前后端基本架构和完全excel表的解析及统计图表的生成以及excel表的到出
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
API 路由注册模块
|
||||
"""
|
||||
from fastapi import APIRouter
|
||||
from app.api.endpoints import upload, ai_analyze, visualization, analysis_charts
|
||||
|
||||
# 创建主路由
|
||||
api_router = APIRouter()
|
||||
|
||||
# 注册各模块路由
|
||||
api_router.include_router(upload.router)
|
||||
api_router.include_router(ai_analyze.router)
|
||||
api_router.include_router(visualization.router)
|
||||
api_router.include_router(analysis_charts.router)
|
||||
|
||||
BIN
backend/app/api/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
backend/app/api/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
backend/app/api/endpoints/__pycache__/ai_analyze.cpython-312.pyc
Normal file
BIN
backend/app/api/endpoints/__pycache__/ai_analyze.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
backend/app/api/endpoints/__pycache__/upload.cpython-312.pyc
Normal file
BIN
backend/app/api/endpoints/__pycache__/upload.cpython-312.pyc
Normal file
Binary file not shown.
BIN
backend/app/api/endpoints/__pycache__/upload.cpython-313.pyc
Normal file
BIN
backend/app/api/endpoints/__pycache__/upload.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
144
backend/app/api/endpoints/ai_analyze.py
Normal file
144
backend/app/api/endpoints/ai_analyze.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
AI 分析 API 接口
|
||||
"""
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
from app.services.excel_ai_service import excel_ai_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/ai", tags=["AI 分析"])
|
||||
|
||||
|
||||
@router.post("/analyze/excel")
|
||||
async def analyze_excel(
|
||||
file: UploadFile = File(...),
|
||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
|
||||
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
|
||||
):
|
||||
"""
|
||||
上传并使用 AI 分析 Excel 文件
|
||||
|
||||
Args:
|
||||
file: 上传的 Excel 文件
|
||||
user_prompt: 用户自定义提示词
|
||||
analysis_type: 分析类型
|
||||
parse_all_sheets: 是否分析所有工作表
|
||||
|
||||
Returns:
|
||||
dict: 分析结果,包含 Excel 数据和 AI 分析结果
|
||||
"""
|
||||
# 检查文件类型
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['xlsx', 'xls']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .xlsx 和 .xls"
|
||||
)
|
||||
|
||||
# 验证分析类型
|
||||
supported_types = ['general', 'summary', 'statistics', 'insights']
|
||||
if analysis_type not in supported_types:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取文件内容
|
||||
content = await file.read()
|
||||
|
||||
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}")
|
||||
|
||||
# 调用 AI 分析服务
|
||||
if parse_all_sheets:
|
||||
result = await excel_ai_service.batch_analyze_sheets(
|
||||
content,
|
||||
file.filename,
|
||||
user_prompt=user_prompt,
|
||||
analysis_type=analysis_type
|
||||
)
|
||||
else:
|
||||
# 解析选项
|
||||
parse_options = {"header_row": 0}
|
||||
|
||||
result = await excel_ai_service.analyze_excel_file(
|
||||
content,
|
||||
file.filename,
|
||||
user_prompt=user_prompt,
|
||||
analysis_type=analysis_type,
|
||||
parse_options=parse_options
|
||||
)
|
||||
|
||||
logger.info(f"文件分析完成: {file.filename}, 成功: {result['success']}")
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"AI 分析过程中出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/analysis/types")
|
||||
async def get_analysis_types():
|
||||
"""
|
||||
获取支持的分析类型列表
|
||||
|
||||
Returns:
|
||||
list: 支持的分析类型
|
||||
"""
|
||||
return {
|
||||
"types": excel_ai_service.get_supported_analysis_types()
|
||||
}
|
||||
|
||||
|
||||
@router.post("/analyze/text")
|
||||
async def analyze_text(
|
||||
excel_data: dict = Body(..., description="Excel 解析后的数据"),
|
||||
user_prompt: str = Body("", description="用户提示词"),
|
||||
analysis_type: str = Body("general", description="分析类型")
|
||||
):
|
||||
"""
|
||||
对已解析的 Excel 数据进行 AI 分析
|
||||
|
||||
Args:
|
||||
excel_data: Excel 数据
|
||||
user_prompt: 用户提示词
|
||||
analysis_type: 分析类型
|
||||
|
||||
Returns:
|
||||
dict: 分析结果
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始文本分析, 分析类型: {analysis_type}")
|
||||
|
||||
# 调用 LLM 服务
|
||||
from app.services.llm_service import llm_service
|
||||
|
||||
if user_prompt and user_prompt.strip():
|
||||
result = await llm_service.analyze_with_template(
|
||||
excel_data,
|
||||
user_prompt
|
||||
)
|
||||
else:
|
||||
result = await llm_service.analyze_excel_data(
|
||||
excel_data,
|
||||
user_prompt,
|
||||
analysis_type
|
||||
)
|
||||
|
||||
logger.info(f"文本分析完成, 成功: {result['success']}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"文本分析失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
105
backend/app/api/endpoints/analysis_charts.py
Normal file
105
backend/app/api/endpoints/analysis_charts.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
分析结果图表 API - 根据文本分析结果生成图表
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
from app.services.text_analysis_service import text_analysis_service
|
||||
from app.services.chart_generator_service import chart_generator_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/analysis", tags=["分析结果图表"])
|
||||
|
||||
|
||||
class AnalysisChartRequest(BaseModel):
|
||||
"""分析图表生成请求模型"""
|
||||
analysis_text: str
|
||||
original_filename: Optional[str] = ""
|
||||
file_type: Optional[str] = "text"
|
||||
|
||||
|
||||
@router.post("/extract-and-chart")
|
||||
async def extract_and_generate_charts(request: AnalysisChartRequest):
|
||||
"""
|
||||
从 AI 分析结果中提取数据并生成图表
|
||||
|
||||
Args:
|
||||
request: 包含分析文本的请求
|
||||
|
||||
Returns:
|
||||
dict: 包含图表数据的结果
|
||||
"""
|
||||
if not request.analysis_text or not request.analysis_text.strip():
|
||||
raise HTTPException(status_code=400, detail="分析文本不能为空")
|
||||
|
||||
try:
|
||||
logger.info("开始从分析结果中提取结构化数据...")
|
||||
|
||||
# 1. 使用 LLM 提取结构化数据
|
||||
extract_result = await text_analysis_service.extract_structured_data(
|
||||
analysis_text=request.analysis_text,
|
||||
original_filename=request.original_filename or "unknown",
|
||||
file_type=request.file_type or "text"
|
||||
)
|
||||
|
||||
if not extract_result.get("success"):
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"提取结构化数据失败: {extract_result.get('error', '未知错误')}"
|
||||
)
|
||||
|
||||
logger.info("结构化数据提取成功,开始生成图表...")
|
||||
|
||||
# 2. 根据提取的数据生成图表
|
||||
chart_result = chart_generator_service.generate_charts_from_analysis(extract_result)
|
||||
|
||||
if not chart_result.get("success"):
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"生成图表失败: {chart_result.get('error', '未知错误')}"
|
||||
)
|
||||
|
||||
logger.info("图表生成成功")
|
||||
|
||||
return chart_result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"分析结果图表生成失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"图表生成失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/analyze-text")
|
||||
async def analyze_text_only(request: AnalysisChartRequest):
|
||||
"""
|
||||
仅提取结构化数据(不生成图表),用于调试
|
||||
|
||||
Args:
|
||||
request: 包含分析文本的请求
|
||||
|
||||
Returns:
|
||||
dict: 提取的结构化数据
|
||||
"""
|
||||
if not request.analysis_text or not request.analysis_text.strip():
|
||||
raise HTTPException(status_code=400, detail="分析文本不能为空")
|
||||
|
||||
try:
|
||||
result = await text_analysis_service.extract_structured_data(
|
||||
analysis_text=request.analysis_text,
|
||||
original_filename=request.original_filename or "unknown",
|
||||
file_type=request.file_type or "text"
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"文本分析失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"文本分析失败: {str(e)}"
|
||||
)
|
||||
205
backend/app/api/endpoints/upload.py
Normal file
205
backend/app/api/endpoints/upload.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""
|
||||
文件上传 API 接口
|
||||
"""
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
from typing import Optional
|
||||
import logging
|
||||
import pandas as pd
|
||||
import io
|
||||
|
||||
from app.services.file_service import file_service
|
||||
from app.core.document_parser import XlsxParser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/upload", tags=["文件上传"])
|
||||
|
||||
# 初始化解析器
|
||||
excel_parser = XlsxParser()
|
||||
|
||||
|
||||
@router.post("/excel")
|
||||
async def upload_excel(
|
||||
file: UploadFile = File(...),
|
||||
parse_all_sheets: bool = Query(False, description="是否解析所有工作表"),
|
||||
sheet_name: Optional[str] = Query(None, description="指定解析的工作表名称"),
|
||||
header_row: int = Query(0, description="表头所在的行索引")
|
||||
):
|
||||
"""
|
||||
上传并解析 Excel 文件
|
||||
|
||||
Args:
|
||||
file: 上传的 Excel 文件
|
||||
parse_all_sheets: 是否解析所有工作表
|
||||
sheet_name: 指定解析的工作表名称
|
||||
header_row: 表头所在的行索引
|
||||
|
||||
Returns:
|
||||
dict: 解析结果
|
||||
"""
|
||||
# 检查文件类型
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['xlsx', 'xls']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .xlsx 和 .xls"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取文件内容
|
||||
content = await file.read()
|
||||
|
||||
# 保存文件
|
||||
saved_path = file_service.save_uploaded_file(
|
||||
content,
|
||||
file.filename,
|
||||
subfolder="excel"
|
||||
)
|
||||
|
||||
logger.info(f"文件已保存: {saved_path}")
|
||||
|
||||
# 解析文件
|
||||
if parse_all_sheets:
|
||||
result = excel_parser.parse_all_sheets(saved_path)
|
||||
else:
|
||||
# 如果指定了 sheet_name,使用指定的,否则使用默认的第一个
|
||||
if sheet_name:
|
||||
result = excel_parser.parse(saved_path, sheet_name=sheet_name, header_row=header_row)
|
||||
else:
|
||||
result = excel_parser.parse(saved_path, header_row=header_row)
|
||||
|
||||
# 添加文件路径到元数据
|
||||
if result.metadata:
|
||||
result.metadata['saved_path'] = saved_path
|
||||
result.metadata['original_filename'] = file.filename
|
||||
|
||||
return result.to_dict()
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"解析 Excel 文件时出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"解析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/excel/preview/{file_path:path}")
|
||||
async def get_excel_preview(
|
||||
file_path: str,
|
||||
sheet_name: Optional[str] = Query(None, description="工作表名称"),
|
||||
max_rows: int = Query(10, description="最多返回的行数", ge=1, le=100)
|
||||
):
|
||||
"""
|
||||
获取 Excel 文件的预览数据
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
sheet_name: 工作表名称
|
||||
max_rows: 最多返回的行数
|
||||
|
||||
Returns:
|
||||
dict: 预览数据
|
||||
"""
|
||||
try:
|
||||
# 解析工作表名称参数
|
||||
sheet_param = sheet_name if sheet_name else 0
|
||||
|
||||
result = excel_parser.get_sheet_preview(
|
||||
file_path,
|
||||
sheet_name=sheet_param,
|
||||
max_rows=max_rows
|
||||
)
|
||||
|
||||
return result.to_dict()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取预览数据时出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"获取预览失败: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/file")
|
||||
async def delete_uploaded_file(file_path: str = Query(..., description="要删除的文件路径")):
|
||||
"""
|
||||
删除已上传的文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
dict: 删除结果
|
||||
"""
|
||||
try:
|
||||
success = file_service.delete_file(file_path)
|
||||
|
||||
if success:
|
||||
return {"success": True, "message": "文件删除成功"}
|
||||
else:
|
||||
return {"success": False, "message": "文件不存在或删除失败"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除文件时出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/excel/export/{file_path:path}")
|
||||
async def export_excel(
|
||||
file_path: str,
|
||||
sheet_name: Optional[str] = Query(None, description="工作表名称"),
|
||||
columns: Optional[str] = Query(None, description="要导出的列,逗号分隔")
|
||||
):
|
||||
"""
|
||||
导出 Excel 文件(可选择工作表和列)
|
||||
|
||||
Args:
|
||||
file_path: 原始文件路径
|
||||
sheet_name: 工作表名称(可选)
|
||||
columns: 要导出的列名,逗号分隔(可选)
|
||||
|
||||
Returns:
|
||||
StreamingResponse: Excel 文件
|
||||
"""
|
||||
try:
|
||||
# 读取 Excel 文件
|
||||
if sheet_name:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
else:
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
# 如果指定了列,只选择这些列
|
||||
if columns:
|
||||
column_list = [col.strip() for col in columns.split(',')]
|
||||
# 过滤掉不存在的列
|
||||
available_columns = [col for col in column_list if col in df.columns]
|
||||
if available_columns:
|
||||
df = df[available_columns]
|
||||
|
||||
# 创建 Excel 文件
|
||||
output = io.BytesIO()
|
||||
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||
df.to_excel(writer, index=False, sheet_name=sheet_name or 'Sheet1')
|
||||
|
||||
output.seek(0)
|
||||
|
||||
# 生成文件名
|
||||
original_name = file_path.split('/')[-1] if '/' in file_path else file_path
|
||||
if columns:
|
||||
export_name = f"export_{sheet_name or 'data'}_{len(column_list) if columns else 'all'}_cols.xlsx"
|
||||
else:
|
||||
export_name = f"export_{original_name}"
|
||||
|
||||
# 返回文件流
|
||||
return StreamingResponse(
|
||||
io.BytesIO(output.getvalue()),
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
headers={"Content-Disposition": f"attachment; filename={export_name}"}
|
||||
)
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
raise HTTPException(status_code=404, detail="文件不存在")
|
||||
except Exception as e:
|
||||
logger.error(f"导出 Excel 文件时出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
|
||||
90
backend/app/api/endpoints/visualization.py
Normal file
90
backend/app/api/endpoints/visualization.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
可视化 API 接口 - 生成统计图表
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Body
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
from app.services.visualization_service import visualization_service
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/visualization", tags=["数据可视化"])
|
||||
|
||||
|
||||
class StatisticsRequest(BaseModel):
|
||||
"""统计图表生成请求模型"""
|
||||
excel_data: Dict[str, Any]
|
||||
analysis_type: str = "statistics"
|
||||
|
||||
|
||||
@router.post("/statistics")
|
||||
async def generate_statistics(request: StatisticsRequest):
|
||||
"""
|
||||
生成统计信息和可视化图表
|
||||
|
||||
Args:
|
||||
request: 包含 excel_data 和 analysis_type 的请求体
|
||||
|
||||
Returns:
|
||||
dict: 包含统计信息和图表数据的结果
|
||||
"""
|
||||
excel_data = request.excel_data
|
||||
analysis_type = request.analysis_type
|
||||
|
||||
if not excel_data:
|
||||
raise HTTPException(status_code=400, detail="未提供 Excel 数据")
|
||||
|
||||
try:
|
||||
result = visualization_service.analyze_and_visualize(
|
||||
excel_data,
|
||||
analysis_type
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
raise HTTPException(status_code=500, detail=result.get("error", "分析失败"))
|
||||
|
||||
logger.info("统计图表生成成功")
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"统计图表生成失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"图表生成失败: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/chart-types")
|
||||
async def get_chart_types():
|
||||
"""
|
||||
获取支持的图表类型
|
||||
|
||||
Returns:
|
||||
dict: 支持的图表类型列表
|
||||
"""
|
||||
return {
|
||||
"chart_types": [
|
||||
{
|
||||
"value": "histogram",
|
||||
"label": "直方图",
|
||||
"description": "显示数值型列的分布情况"
|
||||
},
|
||||
{
|
||||
"value": "bar_chart",
|
||||
"label": "条形图",
|
||||
"description": "显示分类列的频次分布"
|
||||
},
|
||||
{
|
||||
"value": "box_plot",
|
||||
"label": "箱线图",
|
||||
"description": "显示数值列的四分位数和异常值"
|
||||
},
|
||||
{
|
||||
"value": "correlation_heatmap",
|
||||
"label": "相关性热力图",
|
||||
"description": "显示数值列之间的相关性"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user