添加系统架构图
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
AI 分析 API 接口
|
AI 分析 API 接口
|
||||||
"""
|
"""
|
||||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
|
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body, Form
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import logging
|
import logging
|
||||||
@@ -21,7 +21,8 @@ router = APIRouter(prefix="/ai", tags=["AI 分析"])
|
|||||||
|
|
||||||
@router.post("/analyze/excel")
|
@router.post("/analyze/excel")
|
||||||
async def analyze_excel(
|
async def analyze_excel(
|
||||||
file: UploadFile = File(...),
|
file: Optional[UploadFile] = File(None),
|
||||||
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||||
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
|
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
|
||||||
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
|
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
|
||||||
@@ -30,7 +31,8 @@ async def analyze_excel(
|
|||||||
上传并使用 AI 分析 Excel 文件
|
上传并使用 AI 分析 Excel 文件
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: 上传的 Excel 文件
|
file: 上传的 Excel 文件(与 doc_id 二选一)
|
||||||
|
doc_id: 文档ID(从数据库读取)
|
||||||
user_prompt: 用户自定义提示词
|
user_prompt: 用户自定义提示词
|
||||||
analysis_type: 分析类型
|
analysis_type: 分析类型
|
||||||
parse_all_sheets: 是否分析所有工作表
|
parse_all_sheets: 是否分析所有工作表
|
||||||
@@ -38,7 +40,57 @@ async def analyze_excel(
|
|||||||
Returns:
|
Returns:
|
||||||
dict: 分析结果,包含 Excel 数据和 AI 分析结果
|
dict: 分析结果,包含 Excel 数据和 AI 分析结果
|
||||||
"""
|
"""
|
||||||
# 检查文件类型
|
filename = None
|
||||||
|
|
||||||
|
# 从数据库读取模式
|
||||||
|
if doc_id:
|
||||||
|
try:
|
||||||
|
from app.core.database.mongodb import mongodb
|
||||||
|
doc = await mongodb.get_document(doc_id)
|
||||||
|
if not doc:
|
||||||
|
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
|
||||||
|
|
||||||
|
filename = doc.get("metadata", {}).get("original_filename", "unknown.xlsx")
|
||||||
|
file_ext = filename.split('.')[-1].lower()
|
||||||
|
|
||||||
|
if file_ext not in ['xlsx', 'xls']:
|
||||||
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Excel: {file_ext}")
|
||||||
|
|
||||||
|
file_path = doc.get("metadata", {}).get("file_path")
|
||||||
|
if not file_path:
|
||||||
|
raise HTTPException(status_code=400, detail="文档没有存储文件路径,请重新上传")
|
||||||
|
|
||||||
|
# 使用文件路径进行 AI 分析
|
||||||
|
if parse_all_sheets:
|
||||||
|
result = await excel_ai_service.batch_analyze_sheets_from_path(
|
||||||
|
file_path=file_path,
|
||||||
|
filename=filename,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
analysis_type=analysis_type
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result = await excel_ai_service.analyze_excel_file_from_path(
|
||||||
|
file_path=file_path,
|
||||||
|
filename=filename,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
analysis_type=analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.get("success"):
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库读取 Excel 文档失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
|
||||||
|
|
||||||
|
# 文件上传模式
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
|
||||||
|
|
||||||
if not file.filename:
|
if not file.filename:
|
||||||
raise HTTPException(status_code=400, detail="文件名为空")
|
raise HTTPException(status_code=400, detail="文件名为空")
|
||||||
|
|
||||||
@@ -61,7 +113,11 @@ async def analyze_excel(
|
|||||||
# 读取文件内容
|
# 读取文件内容
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
|
|
||||||
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}")
|
# 验证文件内容不为空
|
||||||
|
if not content:
|
||||||
|
raise HTTPException(status_code=400, detail="文件内容为空,请确保文件已正确上传")
|
||||||
|
|
||||||
|
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}, 文件大小: {len(content)} bytes")
|
||||||
|
|
||||||
# 调用 AI 分析服务
|
# 调用 AI 分析服务
|
||||||
if parse_all_sheets:
|
if parse_all_sheets:
|
||||||
@@ -155,7 +211,7 @@ async def analyze_text(
|
|||||||
@router.post("/analyze/md")
|
@router.post("/analyze/md")
|
||||||
async def analyze_markdown(
|
async def analyze_markdown(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
|
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
|
||||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||||
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
||||||
@@ -198,7 +254,7 @@ async def analyze_markdown(
|
|||||||
if file_ext not in ['md', 'markdown']:
|
if file_ext not in ['md', 'markdown']:
|
||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
|
||||||
|
|
||||||
content = doc.get("content", "")
|
content = doc.get("content") or ""
|
||||||
if not content:
|
if not content:
|
||||||
raise HTTPException(status_code=400, detail="文档内容为空")
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
|
|
||||||
@@ -392,7 +448,7 @@ async def get_markdown_outline(
|
|||||||
@router.post("/analyze/txt")
|
@router.post("/analyze/txt")
|
||||||
async def analyze_txt(
|
async def analyze_txt(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -427,7 +483,7 @@ async def analyze_txt(
|
|||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
|
||||||
|
|
||||||
# 使用数据库中的 content
|
# 使用数据库中的 content
|
||||||
text_content = doc.get("content", "")
|
text_content = doc.get("content") or ""
|
||||||
|
|
||||||
if not text_content:
|
if not text_content:
|
||||||
raise HTTPException(status_code=400, detail="文档内容为空")
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
@@ -498,8 +554,8 @@ async def analyze_txt(
|
|||||||
@router.post("/analyze/word")
|
@router.post("/analyze/word")
|
||||||
async def analyze_word(
|
async def analyze_word(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"),
|
user_hint: str = Form("", description="用户提示词,如'请提取表格数据'"),
|
||||||
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -536,8 +592,9 @@ async def analyze_word(
|
|||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
|
||||||
|
|
||||||
# 使用数据库中的 content 进行分析
|
# 使用数据库中的 content 进行分析
|
||||||
content = doc.get("content", "")
|
content = doc.get("content", "") or ""
|
||||||
tables = doc.get("structured_data", {}).get("tables", [])
|
structured_data = doc.get("structured_data") or {}
|
||||||
|
tables = structured_data.get("tables", [])
|
||||||
|
|
||||||
# 调用 AI 分析服务,传入数据库内容
|
# 调用 AI 分析服务,传入数据库内容
|
||||||
if analysis_type == "charts":
|
if analysis_type == "charts":
|
||||||
|
|||||||
@@ -223,6 +223,177 @@ class ExcelAIService:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def analyze_excel_file_from_path(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
filename: str,
|
||||||
|
user_prompt: str = "",
|
||||||
|
analysis_type: str = "general",
|
||||||
|
parse_options: Optional[Dict[str, Any]] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文件路径分析 Excel 文件(用于从数据库加载的文档)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Excel 文件路径
|
||||||
|
filename: 文件名
|
||||||
|
user_prompt: 用户自定义提示词
|
||||||
|
analysis_type: 分析类型
|
||||||
|
parse_options: 解析选项
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: 分析结果
|
||||||
|
"""
|
||||||
|
# 1. 解析 Excel 文件
|
||||||
|
excel_data = None
|
||||||
|
parse_result_metadata = None
|
||||||
|
try:
|
||||||
|
parse_options = parse_options or {}
|
||||||
|
parse_result = self.parser.parse(file_path, **parse_options)
|
||||||
|
|
||||||
|
if not parse_result.success:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": parse_result.error,
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
excel_data = parse_result.data
|
||||||
|
parse_result_metadata = parse_result.metadata
|
||||||
|
logger.info(f"Excel 解析成功: {parse_result_metadata}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel 解析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Excel 解析失败: {str(e)}",
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. 调用 LLM 进行分析
|
||||||
|
try:
|
||||||
|
if user_prompt and user_prompt.strip():
|
||||||
|
llm_result = await self.llm_service.analyze_with_template(
|
||||||
|
excel_data,
|
||||||
|
user_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
llm_result = await self.llm_service.analyze_excel_data(
|
||||||
|
excel_data,
|
||||||
|
user_prompt,
|
||||||
|
analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"AI 分析完成: {llm_result['success']}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"excel": {
|
||||||
|
"data": excel_data,
|
||||||
|
"metadata": parse_result_metadata,
|
||||||
|
"saved_path": file_path
|
||||||
|
},
|
||||||
|
"analysis": llm_result
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"AI 分析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"AI 分析失败: {str(e)}",
|
||||||
|
"excel": {
|
||||||
|
"data": excel_data,
|
||||||
|
"metadata": parse_result_metadata
|
||||||
|
},
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
async def batch_analyze_sheets_from_path(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
filename: str,
|
||||||
|
user_prompt: str = "",
|
||||||
|
analysis_type: str = "general"
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文件路径批量分析 Excel 文件的所有工作表(用于从数据库加载的文档)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Excel 文件路径
|
||||||
|
filename: 文件名
|
||||||
|
user_prompt: 用户自定义提示词
|
||||||
|
analysis_type: 分析类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: 分析结果
|
||||||
|
"""
|
||||||
|
# 1. 解析所有工作表
|
||||||
|
try:
|
||||||
|
parse_result = self.parser.parse_all_sheets(file_path)
|
||||||
|
|
||||||
|
if not parse_result.success:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": parse_result.error,
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
sheets_data = parse_result.data.get("sheets", {})
|
||||||
|
logger.info(f"Excel 解析成功,共 {len(sheets_data)} 个工作表")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel 解析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Excel 解析失败: {str(e)}",
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. 批量分析每个工作表
|
||||||
|
sheet_analyses = {}
|
||||||
|
errors = {}
|
||||||
|
|
||||||
|
for sheet_name, sheet_data in sheets_data.items():
|
||||||
|
try:
|
||||||
|
if user_prompt and user_prompt.strip():
|
||||||
|
llm_result = await self.llm_service.analyze_with_template(
|
||||||
|
sheet_data,
|
||||||
|
user_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
llm_result = await self.llm_service.analyze_excel_data(
|
||||||
|
sheet_data,
|
||||||
|
user_prompt,
|
||||||
|
analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
sheet_analyses[sheet_name] = llm_result
|
||||||
|
|
||||||
|
if not llm_result["success"]:
|
||||||
|
errors[sheet_name] = llm_result.get("error", "未知错误")
|
||||||
|
|
||||||
|
logger.info(f"工作表 '{sheet_name}' 分析完成")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"工作表 '{sheet_name}' 分析失败: {str(e)}")
|
||||||
|
errors[sheet_name] = str(e)
|
||||||
|
|
||||||
|
# 3. 组合结果
|
||||||
|
return {
|
||||||
|
"success": len(errors) == 0,
|
||||||
|
"excel": {
|
||||||
|
"sheets": sheets_data,
|
||||||
|
"metadata": parse_result.metadata,
|
||||||
|
"saved_path": file_path
|
||||||
|
},
|
||||||
|
"analysis": {
|
||||||
|
"sheets": sheet_analyses,
|
||||||
|
"total_sheets": len(sheets_data),
|
||||||
|
"successful": len(sheet_analyses) - len(errors),
|
||||||
|
"errors": errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_supported_analysis_types(self) -> List[str]:
|
def get_supported_analysis_types(self) -> List[str]:
|
||||||
"""获取支持的分析类型"""
|
"""获取支持的分析类型"""
|
||||||
return [
|
return [
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ class LLMService:
|
|||||||
_start_time = time.time()
|
_start_time = time.time()
|
||||||
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
|
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.base_url}/chat/completions",
|
f"{self.base_url}/chat/completions",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
@@ -84,7 +84,7 @@ class LLMService:
|
|||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM API 调用异常: {str(e)}")
|
logger.error(f"LLM API 调用异常: {repr(e)} - {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ class TxtAIService:
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.parser = TxtParser()
|
self.parser = TxtParser()
|
||||||
|
self.llm = llm_service
|
||||||
|
|
||||||
async def analyze_txt_with_ai(
|
async def analyze_txt_with_ai(
|
||||||
self,
|
self,
|
||||||
@@ -114,7 +115,7 @@ class TxtAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content_text = self.llm.extract_message_content(response)
|
content_text = self.llm.extract_message_content(response)
|
||||||
@@ -220,7 +221,7 @@ class TxtAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content_text = self.llm.extract_message_content(response)
|
content_text = self.llm.extract_message_content(response)
|
||||||
|
|||||||
@@ -53,7 +53,11 @@ class VisualizationService:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 转换为 DataFrame
|
# 转换为 DataFrame
|
||||||
df = pd.DataFrame(rows, columns=columns)
|
# 过滤掉行数与列数不匹配的数据
|
||||||
|
valid_rows = [row for row in rows if len(row) == len(columns)]
|
||||||
|
if len(valid_rows) < len(rows):
|
||||||
|
logger.warning(f"过滤了 {len(rows) - len(valid_rows)} 行无效数据(列数不匹配)")
|
||||||
|
df = pd.DataFrame(valid_rows, columns=columns)
|
||||||
|
|
||||||
# 根据列类型分类
|
# 根据列类型分类
|
||||||
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||||
@@ -141,18 +145,18 @@ class VisualizationService:
|
|||||||
charts = {}
|
charts = {}
|
||||||
|
|
||||||
# 1. 数值型列的直方图
|
# 1. 数值型列的直方图
|
||||||
charts["histograms"] = []
|
charts["numeric_charts"] = []
|
||||||
for col in numeric_columns[:5]: # 限制最多 5 个数值列
|
for col in numeric_columns[:5]: # 限制最多 5 个数值列
|
||||||
chart_data = self._create_histogram(df[col], col)
|
chart_data = self._create_histogram(df[col], col)
|
||||||
if chart_data:
|
if chart_data:
|
||||||
charts["histograms"].append(chart_data)
|
charts["numeric_charts"].append(chart_data)
|
||||||
|
|
||||||
# 2. 分类型列的条形图
|
# 2. 分类型列的条形图
|
||||||
charts["bar_charts"] = []
|
charts["categorical_charts"] = []
|
||||||
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
|
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
|
||||||
chart_data = self._create_bar_chart(df[col], col)
|
chart_data = self._create_bar_chart(df[col], col)
|
||||||
if chart_data:
|
if chart_data:
|
||||||
charts["bar_charts"].append(chart_data)
|
charts["categorical_charts"].append(chart_data)
|
||||||
|
|
||||||
# 3. 数值型列的箱线图
|
# 3. 数值型列的箱线图
|
||||||
charts["box_plots"] = []
|
charts["box_plots"] = []
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ class WordAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content = self.llm.extract_message_content(response)
|
content = self.llm.extract_message_content(response)
|
||||||
@@ -276,7 +276,7 @@ class WordAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content = self.llm.extract_message_content(response)
|
content = self.llm.extract_message_content(response)
|
||||||
@@ -849,10 +849,12 @@ class WordAIService:
|
|||||||
|
|
||||||
# 提取可用于图表的数据
|
# 提取可用于图表的数据
|
||||||
chart_data = None
|
chart_data = None
|
||||||
|
logger.info(f"准备提取图表数据,structured_data type: {structured_data.get('type')}, keys: {list(structured_data.keys())}")
|
||||||
|
|
||||||
if structured_data.get("type") == "table_data":
|
if structured_data.get("type") == "table_data":
|
||||||
headers = structured_data.get("headers", [])
|
headers = structured_data.get("headers", [])
|
||||||
rows = structured_data.get("rows", [])
|
rows = structured_data.get("rows", [])
|
||||||
|
logger.info(f"table_data类型: headers数量={len(headers)}, rows数量={len(rows)}")
|
||||||
if headers and rows:
|
if headers and rows:
|
||||||
chart_data = {
|
chart_data = {
|
||||||
"columns": headers,
|
"columns": headers,
|
||||||
@@ -860,15 +862,19 @@ class WordAIService:
|
|||||||
}
|
}
|
||||||
elif structured_data.get("type") == "structured_text":
|
elif structured_data.get("type") == "structured_text":
|
||||||
tables_data = structured_data.get("tables", [])
|
tables_data = structured_data.get("tables", [])
|
||||||
|
logger.info(f"structured_text类型: tables数量={len(tables_data)}")
|
||||||
if tables_data and len(tables_data) > 0:
|
if tables_data and len(tables_data) > 0:
|
||||||
first_table = tables_data[0]
|
first_table = tables_data[0]
|
||||||
headers = first_table.get("headers", [])
|
headers = first_table.get("headers", [])
|
||||||
rows = first_table.get("rows", [])
|
rows = first_table.get("rows", [])
|
||||||
|
logger.info(f"第一个表格: headers={headers[:5]}, rows数量={len(rows)}")
|
||||||
if headers and rows:
|
if headers and rows:
|
||||||
chart_data = {
|
chart_data = {
|
||||||
"columns": headers,
|
"columns": headers,
|
||||||
"rows": rows
|
"rows": rows
|
||||||
}
|
}
|
||||||
|
else:
|
||||||
|
logger.warning(f"无法识别的structured_data类型: {structured_data.get('type')}")
|
||||||
|
|
||||||
# 生成可视化图表
|
# 生成可视化图表
|
||||||
if chart_data:
|
if chart_data:
|
||||||
@@ -904,3 +910,6 @@ class WordAIService:
|
|||||||
"success": False,
|
"success": False,
|
||||||
"error": str(e)
|
"error": str(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
word_ai_service = WordAIService()
|
||||||
|
|||||||
@@ -1187,11 +1187,19 @@ export const aiApi = {
|
|||||||
* 上传并使用 AI 分析 Excel 文件
|
* 上传并使用 AI 分析 Excel 文件
|
||||||
*/
|
*/
|
||||||
async analyzeExcel(
|
async analyzeExcel(
|
||||||
file: File,
|
file: File | null,
|
||||||
options: AIAnalyzeOptions = {}
|
options: AIAnalyzeOptions = {},
|
||||||
|
docId: string | null = null
|
||||||
): Promise<AIExcelAnalyzeResult> {
|
): Promise<AIExcelAnalyzeResult> {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
|
|
||||||
|
if (docId) {
|
||||||
|
formData.append('doc_id', docId);
|
||||||
|
} else if (file) {
|
||||||
formData.append('file', file);
|
formData.append('file', file);
|
||||||
|
} else {
|
||||||
|
throw new Error('必须提供文件或文档ID');
|
||||||
|
}
|
||||||
|
|
||||||
const params = new URLSearchParams();
|
const params = new URLSearchParams();
|
||||||
if (options.userPrompt) {
|
if (options.userPrompt) {
|
||||||
@@ -1268,7 +1276,9 @@ export const aiApi = {
|
|||||||
try {
|
try {
|
||||||
const response = await fetch(url);
|
const response = await fetch(url);
|
||||||
if (!response.ok) throw new Error('获取分析类型失败');
|
if (!response.ok) throw new Error('获取分析类型失败');
|
||||||
return await response.json();
|
const data = await response.json();
|
||||||
|
// 转换后端返回格式 {excel_types: [], markdown_types: []} 为前端期望的 {types: []}
|
||||||
|
return { types: data.excel_types || [] };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('获取分析类型失败:', error);
|
console.error('获取分析类型失败:', error);
|
||||||
throw error;
|
throw error;
|
||||||
|
|||||||
@@ -472,11 +472,17 @@ const Documents: React.FC = () => {
|
|||||||
setAnalysisCharts(null);
|
setAnalysisCharts(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await aiApi.analyzeExcel(uploadedFile, {
|
// 判断是从历史文档还是本地上传
|
||||||
|
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
|
||||||
|
const result = await aiApi.analyzeExcel(
|
||||||
|
uploadedFile.size > 0 ? uploadedFile : null,
|
||||||
|
{
|
||||||
userPrompt: aiOptions.userPrompt,
|
userPrompt: aiOptions.userPrompt,
|
||||||
analysisType: aiOptions.analysisType,
|
analysisType: aiOptions.analysisType,
|
||||||
parseAllSheets: aiOptions.parseAllSheetsForAI
|
parseAllSheets: aiOptions.parseAllSheetsForAI
|
||||||
});
|
},
|
||||||
|
docId
|
||||||
|
);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
toast.success('AI 分析完成');
|
toast.success('AI 分析完成');
|
||||||
@@ -706,6 +712,12 @@ const Documents: React.FC = () => {
|
|||||||
|
|
||||||
const handleSelectDocument = async (docId: string) => {
|
const handleSelectDocument = async (docId: string) => {
|
||||||
setLoadingDocument(true);
|
setLoadingDocument(true);
|
||||||
|
// 重置所有 AI 分析结果,避免显示上一个文档的分析
|
||||||
|
setAiAnalysis(null);
|
||||||
|
setAnalysisCharts(null);
|
||||||
|
setMdAnalysis(null);
|
||||||
|
setWordAnalysis(null);
|
||||||
|
setTxtAnalysis(null);
|
||||||
try {
|
try {
|
||||||
const result = await backendApi.getDocument(docId);
|
const result = await backendApi.getDocument(docId);
|
||||||
if (result.success && result.document) {
|
if (result.success && result.document) {
|
||||||
@@ -2264,27 +2276,39 @@ const Documents: React.FC = () => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
// 数据表格组件
|
// 数据表格组件 - 滑动窗口样式
|
||||||
const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> = ({ columns, rows }) => {
|
const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> = ({ columns, rows }) => {
|
||||||
if (!columns.length || !rows.length) {
|
if (!columns.length || !rows.length) {
|
||||||
return <div className="text-center py-8 text-muted-foreground text-sm">暂无数据</div>;
|
return <div className="text-center py-8 text-muted-foreground text-sm">暂无数据</div>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const displayRows = rows.slice(0, 500); // 限制最多显示500行
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="rounded-lg border overflow-x-auto">
|
<div className="rounded-lg border overflow-hidden">
|
||||||
|
{/* 表头 - 固定 */}
|
||||||
|
<div className="overflow-x-auto">
|
||||||
<TableComponent>
|
<TableComponent>
|
||||||
<TableHeader>
|
<TableHeader>
|
||||||
<TableRow>
|
<TableRow className="bg-muted/50">
|
||||||
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
|
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
|
||||||
{columns.map((col, idx) => (
|
{columns.map((col, idx) => (
|
||||||
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
|
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
|
||||||
))}
|
))}
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHeader>
|
</TableHeader>
|
||||||
|
</TableComponent>
|
||||||
|
</div>
|
||||||
|
{/* 表体 - 可滚动 */}
|
||||||
|
<div
|
||||||
|
className="overflow-y-auto"
|
||||||
|
style={{ maxHeight: '400px' }}
|
||||||
|
>
|
||||||
|
<TableComponent>
|
||||||
<TableBody>
|
<TableBody>
|
||||||
{rows.slice(0, 100).map((row, rowIdx) => (
|
{displayRows.map((row, rowIdx) => (
|
||||||
<TableRow key={rowIdx}>
|
<TableRow key={rowIdx}>
|
||||||
<TableCell className="text-center text-muted-foreground font-medium">{rowIdx + 1}</TableCell>
|
<TableCell className="text-center text-muted-foreground font-medium w-16">{rowIdx + 1}</TableCell>
|
||||||
{columns.map((col, colIdx) => (
|
{columns.map((col, colIdx) => (
|
||||||
<TableCell key={colIdx} className="whitespace-nowrap">
|
<TableCell key={colIdx} className="whitespace-nowrap">
|
||||||
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
|
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
|
||||||
@@ -2294,9 +2318,15 @@ const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> =
|
|||||||
))}
|
))}
|
||||||
</TableBody>
|
</TableBody>
|
||||||
</TableComponent>
|
</TableComponent>
|
||||||
{rows.length > 100 && (
|
</div>
|
||||||
|
{rows.length > 500 && (
|
||||||
<div className="p-3 text-center text-sm text-muted-foreground bg-muted/30">
|
<div className="p-3 text-center text-sm text-muted-foreground bg-muted/30">
|
||||||
仅显示前 100 行数据
|
仅显示前 500 行数据(共 {rows.length} 行)
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{rows.length > 100 && rows.length <= 500 && (
|
||||||
|
<div className="p-2 text-center text-xs text-muted-foreground bg-muted/20">
|
||||||
|
共 {rows.length} 行数据,向下滚动查看更多
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user