添加系统架构图

This commit is contained in:
dj
2026-04-16 23:08:21 +08:00
parent 38b0c7e62e
commit 975ebf536b
8 changed files with 339 additions and 57 deletions

View File

@@ -1,7 +1,7 @@
"""
AI 分析 API 接口
"""
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body, Form
from fastapi.responses import StreamingResponse
from typing import Optional
import logging
@@ -21,7 +21,8 @@ router = APIRouter(prefix="/ai", tags=["AI 分析"])
@router.post("/analyze/excel")
async def analyze_excel(
file: UploadFile = File(...),
file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Form(None, description="文档ID从数据库读取"),
user_prompt: str = Query("", description="用户自定义提示词"),
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
@@ -30,7 +31,8 @@ async def analyze_excel(
上传并使用 AI 分析 Excel 文件
Args:
file: 上传的 Excel 文件
file: 上传的 Excel 文件(与 doc_id 二选一)
doc_id: 文档ID从数据库读取
user_prompt: 用户自定义提示词
analysis_type: 分析类型
parse_all_sheets: 是否分析所有工作表
@@ -38,7 +40,57 @@ async def analyze_excel(
Returns:
dict: 分析结果,包含 Excel 数据和 AI 分析结果
"""
# 检查文件类型
filename = None
# 从数据库读取模式
if doc_id:
try:
from app.core.database.mongodb import mongodb
doc = await mongodb.get_document(doc_id)
if not doc:
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
filename = doc.get("metadata", {}).get("original_filename", "unknown.xlsx")
file_ext = filename.split('.')[-1].lower()
if file_ext not in ['xlsx', 'xls']:
raise HTTPException(status_code=400, detail=f"文档类型不是 Excel: {file_ext}")
file_path = doc.get("metadata", {}).get("file_path")
if not file_path:
raise HTTPException(status_code=400, detail="文档没有存储文件路径,请重新上传")
# 使用文件路径进行 AI 分析
if parse_all_sheets:
result = await excel_ai_service.batch_analyze_sheets_from_path(
file_path=file_path,
filename=filename,
user_prompt=user_prompt,
analysis_type=analysis_type
)
else:
result = await excel_ai_service.analyze_excel_file_from_path(
file_path=file_path,
filename=filename,
user_prompt=user_prompt,
analysis_type=analysis_type
)
if result.get("success"):
return result
else:
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"从数据库读取 Excel 文档失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
# 文件上传模式
if not file:
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
@@ -61,7 +113,11 @@ async def analyze_excel(
# 读取文件内容
content = await file.read()
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}")
# 验证文件内容不为空
if not content:
raise HTTPException(status_code=400, detail="文件内容为空,请确保文件已正确上传")
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}, 文件大小: {len(content)} bytes")
# 调用 AI 分析服务
if parse_all_sheets:
@@ -155,7 +211,7 @@ async def analyze_text(
@router.post("/analyze/md")
async def analyze_markdown(
file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
doc_id: Optional[str] = Form(None, description="文档ID从数据库读取"),
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
user_prompt: str = Query("", description="用户自定义提示词"),
section_number: Optional[str] = Query(None, description="指定章节编号,如 '''(一)'")
@@ -198,7 +254,7 @@ async def analyze_markdown(
if file_ext not in ['md', 'markdown']:
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
content = doc.get("content", "")
content = doc.get("content") or ""
if not content:
raise HTTPException(status_code=400, detail="文档内容为空")
@@ -392,7 +448,7 @@ async def get_markdown_outline(
@router.post("/analyze/txt")
async def analyze_txt(
file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
doc_id: Optional[str] = Form(None, description="文档ID从数据库读取"),
analysis_type: str = Query("structured", description="分析类型: structured, charts")
):
"""
@@ -427,7 +483,7 @@ async def analyze_txt(
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
# 使用数据库中的 content
text_content = doc.get("content", "")
text_content = doc.get("content") or ""
if not text_content:
raise HTTPException(status_code=400, detail="文档内容为空")
@@ -498,8 +554,8 @@ async def analyze_txt(
@router.post("/analyze/word")
async def analyze_word(
file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"),
doc_id: Optional[str] = Form(None, description="文档ID从数据库读取"),
user_hint: str = Form("", description="用户提示词,如'请提取表格数据'"),
analysis_type: str = Query("structured", description="分析类型: structured, charts")
):
"""
@@ -536,8 +592,9 @@ async def analyze_word(
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
# 使用数据库中的 content 进行分析
content = doc.get("content", "")
tables = doc.get("structured_data", {}).get("tables", [])
content = doc.get("content", "") or ""
structured_data = doc.get("structured_data") or {}
tables = structured_data.get("tables", [])
# 调用 AI 分析服务,传入数据库内容
if analysis_type == "charts":

View File

@@ -223,6 +223,177 @@ class ExcelAIService:
}
}
async def analyze_excel_file_from_path(
self,
file_path: str,
filename: str,
user_prompt: str = "",
analysis_type: str = "general",
parse_options: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
从文件路径分析 Excel 文件(用于从数据库加载的文档)
Args:
file_path: Excel 文件路径
filename: 文件名
user_prompt: 用户自定义提示词
analysis_type: 分析类型
parse_options: 解析选项
Returns:
Dict[str, Any]: 分析结果
"""
# 1. 解析 Excel 文件
excel_data = None
parse_result_metadata = None
try:
parse_options = parse_options or {}
parse_result = self.parser.parse(file_path, **parse_options)
if not parse_result.success:
return {
"success": False,
"error": parse_result.error,
"analysis": None
}
excel_data = parse_result.data
parse_result_metadata = parse_result.metadata
logger.info(f"Excel 解析成功: {parse_result_metadata}")
except Exception as e:
logger.error(f"Excel 解析失败: {str(e)}")
return {
"success": False,
"error": f"Excel 解析失败: {str(e)}",
"analysis": None
}
# 2. 调用 LLM 进行分析
try:
if user_prompt and user_prompt.strip():
llm_result = await self.llm_service.analyze_with_template(
excel_data,
user_prompt
)
else:
llm_result = await self.llm_service.analyze_excel_data(
excel_data,
user_prompt,
analysis_type
)
logger.info(f"AI 分析完成: {llm_result['success']}")
return {
"success": True,
"excel": {
"data": excel_data,
"metadata": parse_result_metadata,
"saved_path": file_path
},
"analysis": llm_result
}
except Exception as e:
logger.error(f"AI 分析失败: {str(e)}")
return {
"success": False,
"error": f"AI 分析失败: {str(e)}",
"excel": {
"data": excel_data,
"metadata": parse_result_metadata
},
"analysis": None
}
async def batch_analyze_sheets_from_path(
self,
file_path: str,
filename: str,
user_prompt: str = "",
analysis_type: str = "general"
) -> Dict[str, Any]:
"""
从文件路径批量分析 Excel 文件的所有工作表(用于从数据库加载的文档)
Args:
file_path: Excel 文件路径
filename: 文件名
user_prompt: 用户自定义提示词
analysis_type: 分析类型
Returns:
Dict[str, Any]: 分析结果
"""
# 1. 解析所有工作表
try:
parse_result = self.parser.parse_all_sheets(file_path)
if not parse_result.success:
return {
"success": False,
"error": parse_result.error,
"analysis": None
}
sheets_data = parse_result.data.get("sheets", {})
logger.info(f"Excel 解析成功,共 {len(sheets_data)} 个工作表")
except Exception as e:
logger.error(f"Excel 解析失败: {str(e)}")
return {
"success": False,
"error": f"Excel 解析失败: {str(e)}",
"analysis": None
}
# 2. 批量分析每个工作表
sheet_analyses = {}
errors = {}
for sheet_name, sheet_data in sheets_data.items():
try:
if user_prompt and user_prompt.strip():
llm_result = await self.llm_service.analyze_with_template(
sheet_data,
user_prompt
)
else:
llm_result = await self.llm_service.analyze_excel_data(
sheet_data,
user_prompt,
analysis_type
)
sheet_analyses[sheet_name] = llm_result
if not llm_result["success"]:
errors[sheet_name] = llm_result.get("error", "未知错误")
logger.info(f"工作表 '{sheet_name}' 分析完成")
except Exception as e:
logger.error(f"工作表 '{sheet_name}' 分析失败: {str(e)}")
errors[sheet_name] = str(e)
# 3. 组合结果
return {
"success": len(errors) == 0,
"excel": {
"sheets": sheets_data,
"metadata": parse_result.metadata,
"saved_path": file_path
},
"analysis": {
"sheets": sheet_analyses,
"total_sheets": len(sheets_data),
"successful": len(sheet_analyses) - len(errors),
"errors": errors
}
}
def get_supported_analysis_types(self) -> List[str]:
"""获取支持的分析类型"""
return [

View File

@@ -58,7 +58,7 @@ class LLMService:
_start_time = time.time()
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{self.base_url}/chat/completions",
headers=headers,
@@ -84,7 +84,7 @@ class LLMService:
pass
raise
except Exception as e:
logger.error(f"LLM API 调用异常: {str(e)}")
logger.error(f"LLM API 调用异常: {repr(e)} - {str(e)}")
raise
def extract_message_content(self, response: Dict[str, Any]) -> str:

View File

@@ -19,6 +19,7 @@ class TxtAIService:
def __init__(self):
self.parser = TxtParser()
self.llm = llm_service
async def analyze_txt_with_ai(
self,
@@ -114,7 +115,7 @@ class TxtAIService:
response = await self.llm.chat(
messages=messages,
temperature=0.1,
max_tokens=50000
max_tokens=8000
)
content_text = self.llm.extract_message_content(response)
@@ -220,7 +221,7 @@ class TxtAIService:
response = await self.llm.chat(
messages=messages,
temperature=0.1,
max_tokens=50000
max_tokens=8000
)
content_text = self.llm.extract_message_content(response)

View File

@@ -53,7 +53,11 @@ class VisualizationService:
}
# 转换为 DataFrame
df = pd.DataFrame(rows, columns=columns)
# 过滤掉行数与列数不匹配的数据
valid_rows = [row for row in rows if len(row) == len(columns)]
if len(valid_rows) < len(rows):
logger.warning(f"过滤了 {len(rows) - len(valid_rows)} 行无效数据(列数不匹配)")
df = pd.DataFrame(valid_rows, columns=columns)
# 根据列类型分类
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
@@ -141,18 +145,18 @@ class VisualizationService:
charts = {}
# 1. 数值型列的直方图
charts["histograms"] = []
charts["numeric_charts"] = []
for col in numeric_columns[:5]: # 限制最多 5 个数值列
chart_data = self._create_histogram(df[col], col)
if chart_data:
charts["histograms"].append(chart_data)
charts["numeric_charts"].append(chart_data)
# 2. 分类型列的条形图
charts["bar_charts"] = []
charts["categorical_charts"] = []
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
chart_data = self._create_bar_chart(df[col], col)
if chart_data:
charts["bar_charts"].append(chart_data)
charts["categorical_charts"].append(chart_data)
# 3. 数值型列的箱线图
charts["box_plots"] = []

View File

@@ -184,7 +184,7 @@ class WordAIService:
response = await self.llm.chat(
messages=messages,
temperature=0.1,
max_tokens=50000
max_tokens=8000
)
content = self.llm.extract_message_content(response)
@@ -276,7 +276,7 @@ class WordAIService:
response = await self.llm.chat(
messages=messages,
temperature=0.1,
max_tokens=50000
max_tokens=8000
)
content = self.llm.extract_message_content(response)
@@ -849,10 +849,12 @@ class WordAIService:
# 提取可用于图表的数据
chart_data = None
logger.info(f"准备提取图表数据structured_data type: {structured_data.get('type')}, keys: {list(structured_data.keys())}")
if structured_data.get("type") == "table_data":
headers = structured_data.get("headers", [])
rows = structured_data.get("rows", [])
logger.info(f"table_data类型: headers数量={len(headers)}, rows数量={len(rows)}")
if headers and rows:
chart_data = {
"columns": headers,
@@ -860,15 +862,19 @@ class WordAIService:
}
elif structured_data.get("type") == "structured_text":
tables_data = structured_data.get("tables", [])
logger.info(f"structured_text类型: tables数量={len(tables_data)}")
if tables_data and len(tables_data) > 0:
first_table = tables_data[0]
headers = first_table.get("headers", [])
rows = first_table.get("rows", [])
logger.info(f"第一个表格: headers={headers[:5]}, rows数量={len(rows)}")
if headers and rows:
chart_data = {
"columns": headers,
"rows": rows
}
else:
logger.warning(f"无法识别的structured_data类型: {structured_data.get('type')}")
# 生成可视化图表
if chart_data:
@@ -904,3 +910,6 @@ class WordAIService:
"success": False,
"error": str(e)
}
word_ai_service = WordAIService()

View File

@@ -1187,11 +1187,19 @@ export const aiApi = {
* 上传并使用 AI 分析 Excel 文件
*/
async analyzeExcel(
file: File,
options: AIAnalyzeOptions = {}
file: File | null,
options: AIAnalyzeOptions = {},
docId: string | null = null
): Promise<AIExcelAnalyzeResult> {
const formData = new FormData();
formData.append('file', file);
if (docId) {
formData.append('doc_id', docId);
} else if (file) {
formData.append('file', file);
} else {
throw new Error('必须提供文件或文档ID');
}
const params = new URLSearchParams();
if (options.userPrompt) {
@@ -1268,7 +1276,9 @@ export const aiApi = {
try {
const response = await fetch(url);
if (!response.ok) throw new Error('获取分析类型失败');
return await response.json();
const data = await response.json();
// 转换后端返回格式 {excel_types: [], markdown_types: []} 为前端期望的 {types: []}
return { types: data.excel_types || [] };
} catch (error) {
console.error('获取分析类型失败:', error);
throw error;

View File

@@ -472,11 +472,17 @@ const Documents: React.FC = () => {
setAnalysisCharts(null);
try {
const result = await aiApi.analyzeExcel(uploadedFile, {
userPrompt: aiOptions.userPrompt,
analysisType: aiOptions.analysisType,
parseAllSheets: aiOptions.parseAllSheetsForAI
});
// 判断是从历史文档还是本地上传
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
const result = await aiApi.analyzeExcel(
uploadedFile.size > 0 ? uploadedFile : null,
{
userPrompt: aiOptions.userPrompt,
analysisType: aiOptions.analysisType,
parseAllSheets: aiOptions.parseAllSheetsForAI
},
docId
);
if (result.success) {
toast.success('AI 分析完成');
@@ -706,6 +712,12 @@ const Documents: React.FC = () => {
const handleSelectDocument = async (docId: string) => {
setLoadingDocument(true);
// 重置所有 AI 分析结果,避免显示上一个文档的分析
setAiAnalysis(null);
setAnalysisCharts(null);
setMdAnalysis(null);
setWordAnalysis(null);
setTxtAnalysis(null);
try {
const result = await backendApi.getDocument(docId);
if (result.success && result.document) {
@@ -2264,39 +2276,57 @@ const Documents: React.FC = () => {
);
};
// 数据表格组件
// 数据表格组件 - 滑动窗口样式
const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> = ({ columns, rows }) => {
if (!columns.length || !rows.length) {
return <div className="text-center py-8 text-muted-foreground text-sm"></div>;
}
const displayRows = rows.slice(0, 500); // 限制最多显示500行
return (
<div className="rounded-lg border overflow-x-auto">
<TableComponent>
<TableHeader>
<TableRow>
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
{columns.map((col, idx) => (
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{rows.slice(0, 100).map((row, rowIdx) => (
<TableRow key={rowIdx}>
<TableCell className="text-center text-muted-foreground font-medium">{rowIdx + 1}</TableCell>
{columns.map((col, colIdx) => (
<TableCell key={colIdx} className="whitespace-nowrap">
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
</TableCell>
<div className="rounded-lg border overflow-hidden">
{/* 表头 - 固定 */}
<div className="overflow-x-auto">
<TableComponent>
<TableHeader>
<TableRow className="bg-muted/50">
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
{columns.map((col, idx) => (
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
))}
</TableRow>
))}
</TableBody>
</TableComponent>
{rows.length > 100 && (
</TableHeader>
</TableComponent>
</div>
{/* 表体 - 可滚动 */}
<div
className="overflow-y-auto"
style={{ maxHeight: '400px' }}
>
<TableComponent>
<TableBody>
{displayRows.map((row, rowIdx) => (
<TableRow key={rowIdx}>
<TableCell className="text-center text-muted-foreground font-medium w-16">{rowIdx + 1}</TableCell>
{columns.map((col, colIdx) => (
<TableCell key={colIdx} className="whitespace-nowrap">
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
</TableCell>
))}
</TableRow>
))}
</TableBody>
</TableComponent>
</div>
{rows.length > 500 && (
<div className="p-3 text-center text-sm text-muted-foreground bg-muted/30">
100
500 {rows.length}
</div>
)}
{rows.length > 100 && rows.length <= 500 && (
<div className="p-2 text-center text-xs text-muted-foreground bg-muted/20">
{rows.length}
</div>
)}
</div>