支持从数据库读取文档进行AI分析

新增 doc_id 参数支持从数据库读取文档内容,同时保留文件上传功能,
实现两种方式的灵活切换。修改了 Markdown、TXT 和 Word 文档的分析接口,
添加从数据库获取文档的逻辑,并相应更新前端 API 调用。

BREAKING CHANGE: 分析接口现在支持文件上传和数据库文档 ID 两种方式
This commit is contained in:
2026-04-16 19:43:43 +08:00
parent 2adf9aef60
commit c2f50d3bd8
5 changed files with 762 additions and 120 deletions

View File

@@ -154,8 +154,9 @@ async def analyze_text(
@router.post("/analyze/md") @router.post("/analyze/md")
async def analyze_markdown( async def analyze_markdown(
file: UploadFile = File(...), file: Optional[UploadFile] = File(None),
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"), doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
user_prompt: str = Query("", description="用户自定义提示词"), user_prompt: str = Query("", description="用户自定义提示词"),
section_number: Optional[str] = Query(None, description="指定章节编号,如 '''(一)'") section_number: Optional[str] = Query(None, description="指定章节编号,如 '''(一)'")
): ):
@@ -163,7 +164,8 @@ async def analyze_markdown(
上传并使用 AI 分析 Markdown 文件 上传并使用 AI 分析 Markdown 文件
Args: Args:
file: 上传的 Markdown 文件 file: 上传的 Markdown 文件(与 doc_id 二选一)
doc_id: 文档ID从数据库读取
analysis_type: 分析类型 analysis_type: 分析类型
user_prompt: 用户自定义提示词 user_prompt: 用户自定义提示词
section_number: 指定分析的章节编号 section_number: 指定分析的章节编号
@@ -171,16 +173,8 @@ async def analyze_markdown(
Returns: Returns:
dict: 分析结果 dict: 分析结果
""" """
# 检查文件类型 filename = None
if not file.filename: tmp_path = None
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['md', 'markdown']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
)
# 验证分析类型 # 验证分析类型
supported_types = markdown_ai_service.get_supported_analysis_types() supported_types = markdown_ai_service.get_supported_analysis_types()
@@ -190,46 +184,96 @@ async def analyze_markdown(
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}" detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
) )
try: if doc_id:
# 读取文件内容 # 从数据库读取文档
content = await file.read()
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try: try:
logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}") from app.core.database.mongodb import mongodb
doc = await mongodb.get_document(doc_id)
if not doc:
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
# 调用 AI 分析服务 filename = doc.get("metadata", {}).get("original_filename", "unknown.md")
result = await markdown_ai_service.analyze_markdown( file_ext = filename.split('.')[-1].lower()
file_path=tmp_path,
analysis_type=analysis_type, if file_ext not in ['md', 'markdown']:
user_prompt=user_prompt, raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
section_number=section_number
content = doc.get("content", "")
if not content:
raise HTTPException(status_code=400, detail="文档内容为空")
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content.encode('utf-8'))
tmp_path = tmp.name
logger.info(f"从数据库加载 Markdown 文档: {filename}, 长度: {len(content)}")
except HTTPException:
raise
except Exception as e:
logger.error(f"从数据库读取 Markdown 文档失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
else:
# 文件上传模式
if not file:
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['md', 'markdown']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
) )
logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}") try:
# 读取文件内容
content = await file.read()
if not result['success']: # 保存到临时文件
raise HTTPException(status_code=500, detail=result.get('error', '分析失败')) with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
return result filename = file.filename
finally: except Exception as e:
# 清理临时文件,确保在所有情况下都能清理 logger.error(f"读取 Markdown 文件失败: {str(e)}")
try: raise HTTPException(status_code=500, detail=f"读取文件失败: {str(e)}")
if tmp_path and os.path.exists(tmp_path):
os.unlink(tmp_path) try:
except Exception as cleanup_error: logger.info(f"开始分析 Markdown 文件: {filename}, 分析类型: {analysis_type}, 章节: {section_number}")
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
# 调用 AI 分析服务
result = await markdown_ai_service.analyze_markdown(
file_path=tmp_path,
analysis_type=analysis_type,
user_prompt=user_prompt,
section_number=section_number
)
logger.info(f"Markdown 分析完成: {filename}, 成功: {result['success']}")
if not result['success']:
raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
return result
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
logger.error(f"Markdown AI 分析过程中出错: {str(e)}") logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
finally:
# 清理临时文件
if tmp_path and os.path.exists(tmp_path):
try:
os.unlink(tmp_path)
except Exception as cleanup_error:
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
@router.post("/analyze/md/stream") @router.post("/analyze/md/stream")
@@ -347,7 +391,8 @@ async def get_markdown_outline(
@router.post("/analyze/txt") @router.post("/analyze/txt")
async def analyze_txt( async def analyze_txt(
file: UploadFile = File(...), file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
analysis_type: str = Query("structured", description="分析类型: structured, charts") analysis_type: str = Query("structured", description="分析类型: structured, charts")
): ):
""" """
@@ -357,63 +402,89 @@ async def analyze_txt(
当 analysis_type=charts 时,可生成可视化图表 当 analysis_type=charts 时,可生成可视化图表
Args: Args:
file: 上传的 TXT 文件 file: 上传的 TXT 文件(与 doc_id 二选一)
doc_id: 文档ID从数据库读取
analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表)
Returns: Returns:
dict: 分析结果,包含结构化表格数据或图表数据 dict: 分析结果,包含结构化表格数据或图表数据
""" """
if not file.filename: filename = None
raise HTTPException(status_code=400, detail="文件名为空") text_content = None
file_ext = file.filename.split('.')[-1].lower() if doc_id:
if file_ext not in ['txt', 'text']: # 从数据库读取文档
raise HTTPException( try:
status_code=400, from app.core.database.mongodb import mongodb
detail=f"不支持的文件类型: {file_ext},仅支持 .txt" doc = await mongodb.get_document(doc_id)
) if not doc:
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
filename = doc.get("metadata", {}).get("original_filename", "unknown.txt")
file_ext = filename.split('.')[-1].lower()
if file_ext not in ['txt', 'text']:
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
# 使用数据库中的 content
text_content = doc.get("content", "")
if not text_content:
raise HTTPException(status_code=400, detail="文档内容为空")
logger.info(f"从数据库加载 TXT 文档: {filename}, 长度: {len(text_content)}")
except HTTPException:
raise
except Exception as e:
logger.error(f"从数据库读取 TXT 文档失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
else:
# 文件上传模式
if not file:
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['txt', 'text']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
)
try:
# 读取文件内容 # 读取文件内容
content = await file.read() content = await file.read()
text_content = content.decode('utf-8', errors='replace') text_content = content.decode('utf-8', errors='replace')
filename = file.filename
# 保存到临时文件 try:
with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp: logger.info(f"开始 AI 分析 TXT 文件: {filename}, analysis_type={analysis_type}")
tmp.write(content)
tmp_path = tmp.name
try: # 使用 txt_ai_service 的 AI 分析方法
logger.info(f"开始 AI 分析 TXT 文件: {file.filename}, analysis_type={analysis_type}") result = await txt_ai_service.analyze_txt_with_ai(
content=text_content,
filename=filename,
analysis_type=analysis_type
)
# 使用 txt_ai_service 的 AI 分析方法 if result:
result = await txt_ai_service.analyze_txt_with_ai( logger.info(f"TXT AI 分析成功: {filename}")
content=text_content, return {
filename=file.filename, "success": result.get("success", True),
analysis_type=analysis_type "filename": filename,
) "analysis_type": analysis_type,
"result": result
if result: }
logger.info(f"TXT AI 分析成功: {file.filename}") else:
return { logger.warning(f"TXT AI 分析返回空结果: {filename}")
"success": result.get("success", True), return {
"filename": file.filename, "success": False,
"analysis_type": analysis_type, "filename": filename,
"result": result "error": "AI 分析未能提取到结构化数据",
} "result": None
else: }
logger.warning(f"TXT AI 分析返回空结果: {file.filename}")
return {
"success": False,
"filename": file.filename,
"error": "AI 分析未能提取到结构化数据",
"result": None
}
finally:
# 清理临时文件
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException: except HTTPException:
raise raise
@@ -426,7 +497,8 @@ async def analyze_txt(
@router.post("/analyze/word") @router.post("/analyze/word")
async def analyze_word( async def analyze_word(
file: UploadFile = File(...), file: Optional[UploadFile] = File(None),
doc_id: Optional[str] = Query(None, description="文档ID从数据库读取"),
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"), user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"),
analysis_type: str = Query("structured", description="分析类型: structured, charts") analysis_type: str = Query("structured", description="分析类型: structured, charts")
): ):
@@ -437,13 +509,77 @@ async def analyze_word(
当 analysis_type=charts 时,可生成可视化图表 当 analysis_type=charts 时,可生成可视化图表
Args: Args:
file: 上传的 Word 文件 file: 上传的 Word 文件(与 doc_id 二选一)
doc_id: 文档ID从数据库读取
user_hint: 用户提示词 user_hint: 用户提示词
analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表)
Returns: Returns:
dict: 包含结构化数据的解析结果或图表数据 dict: 包含结构化数据的解析结果或图表数据
""" """
# 获取文件名和扩展名
filename = None
file_ext = None
if doc_id:
# 从数据库读取文档
try:
from app.core.database.mongodb import mongodb
doc = await mongodb.get_document(doc_id)
if not doc:
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
filename = doc.get("metadata", {}).get("original_filename", "unknown.docx")
file_ext = filename.split('.')[-1].lower()
if file_ext not in ['docx']:
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
# 使用数据库中的 content 进行分析
content = doc.get("content", "")
tables = doc.get("structured_data", {}).get("tables", [])
# 调用 AI 分析服务,传入数据库内容
if analysis_type == "charts":
result = await word_ai_service.generate_charts_from_db(
content=content,
tables=tables,
filename=filename,
user_hint=user_hint
)
else:
result = await word_ai_service.parse_word_with_ai_from_db(
content=content,
tables=tables,
filename=filename,
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
)
if result.get("success"):
return {
"success": True,
"filename": filename,
"analysis_type": analysis_type,
"result": result
}
else:
return {
"success": False,
"filename": filename,
"error": result.get("error", "AI 解析失败"),
"result": None
}
except HTTPException:
raise
except Exception as e:
logger.error(f"从数据库读取 Word 文档失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
# 文件上传模式
if not file:
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
if not file.filename: if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空") raise HTTPException(status_code=400, detail="文件名为空")

View File

@@ -405,7 +405,7 @@ async def process_documents_batch(task_id: str, files: List[dict]):
if content and len(content) > 50: if content and len(content) > 50:
await index_document_to_rag(doc_id, filename, result, file_info["ext"]) await index_document_to_rag(doc_id, filename, result, file_info["ext"])
return {"index": index, "filename": filename, "doc_id": doc_id, "success": True} return {"index": index, "filename": filename, "doc_id": doc_id, "file_path": file_info["path"], "success": True}
except Exception as e: except Exception as e:
logger.error(f"处理文件 {filename} 失败: {e}") logger.error(f"处理文件 {filename} 失败: {e}")

View File

@@ -757,5 +757,150 @@ class WordAIService:
} }
# 全局单例 async def parse_word_with_ai_from_db(
word_ai_service = WordAIService() self,
content: str,
tables: List[Dict],
filename: str = "",
user_hint: str = ""
) -> Dict[str, Any]:
"""
使用 AI 解析从数据库读取的 Word 文档内容,提取结构化数据
Args:
content: 文档文本内容
tables: 表格数据列表
filename: 文件名
user_hint: 用户提示词
Returns:
Dict: 包含结构化数据的解析结果
"""
try:
# 解析段落
paragraphs = [p.strip() for p in content.split('\n') if p.strip()]
logger.info(f"从数据库解析 Word: {len(paragraphs)} 个段落, {len(tables)} 个表格")
# 优先处理表格数据
if tables and len(tables) > 0:
structured_data = await self._extract_tables_with_ai(
tables, paragraphs, 0, user_hint, {"filename": filename}
)
elif paragraphs and len(paragraphs) > 0:
structured_data = await self._extract_from_text_with_ai(
paragraphs, content, 0, [], user_hint
)
else:
structured_data = {
"success": True,
"type": "empty",
"message": "文档内容为空"
}
return structured_data
except Exception as e:
logger.error(f"从数据库解析 Word 文档失败: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def generate_charts_from_db(
self,
content: str,
tables: List[Dict],
filename: str = "",
user_hint: str = ""
) -> Dict[str, Any]:
"""
使用 AI 解析从数据库读取的 Word 文档并生成可视化图表
Args:
content: 文档文本内容
tables: 表格数据列表
filename: 文件名
user_hint: 用户提示词
Returns:
Dict: 包含图表数据和统计信息的结果
"""
try:
# 解析段落
paragraphs = [p.strip() for p in content.split('\n') if p.strip()]
logger.info(f"从数据库生成 Word 图表: {len(paragraphs)} 个段落, {len(tables)} 个表格")
# 优先处理表格数据
if tables and len(tables) > 0:
structured_data = await self._extract_tables_with_ai(
tables, paragraphs, 0, user_hint, {"filename": filename}
)
elif paragraphs and len(paragraphs) > 0:
structured_data = await self._extract_from_text_with_ai(
paragraphs, content, 0, [], user_hint
)
else:
return {
"success": False,
"error": "文档内容为空"
}
# 提取可用于图表的数据
chart_data = None
if structured_data.get("type") == "table_data":
headers = structured_data.get("headers", [])
rows = structured_data.get("rows", [])
if headers and rows:
chart_data = {
"columns": headers,
"rows": rows
}
elif structured_data.get("type") == "structured_text":
tables_data = structured_data.get("tables", [])
if tables_data and len(tables_data) > 0:
first_table = tables_data[0]
headers = first_table.get("headers", [])
rows = first_table.get("rows", [])
if headers and rows:
chart_data = {
"columns": headers,
"rows": rows
}
# 生成可视化图表
if chart_data:
logger.info(f"开始生成图表,列数: {len(chart_data['columns'])}, 行数: {len(chart_data['rows'])}")
vis_result = visualization_service.analyze_and_visualize(chart_data)
if vis_result.get("success"):
return {
"success": True,
"charts": vis_result.get("charts", {}),
"statistics": vis_result.get("statistics", {}),
"distributions": vis_result.get("distributions", {}),
"structured_data": structured_data,
"row_count": vis_result.get("row_count", 0),
"column_count": vis_result.get("column_count", 0)
}
else:
return {
"success": False,
"error": vis_result.get("error", "可视化生成失败"),
"structured_data": structured_data
}
else:
return {
"success": False,
"error": "文档中没有可用于图表的表格数据",
"structured_data": structured_data
}
except Exception as e:
logger.error(f"从数据库生成 Word 图表失败: {str(e)}")
return {
"success": False,
"error": str(e)
}

View File

@@ -1279,15 +1279,21 @@ export const aiApi = {
* 上传并使用 AI 分析 Markdown 文件 * 上传并使用 AI 分析 Markdown 文件
*/ */
async analyzeMarkdown( async analyzeMarkdown(
file: File, file: File | null,
options: { options: {
docId?: string;
analysisType?: MarkdownAnalysisType; analysisType?: MarkdownAnalysisType;
userPrompt?: string; userPrompt?: string;
sectionNumber?: string; sectionNumber?: string;
} = {} } = {}
): Promise<AIMarkdownAnalyzeResult> { ): Promise<AIMarkdownAnalyzeResult> {
const formData = new FormData(); const formData = new FormData();
formData.append('file', file); if (file) {
formData.append('file', file);
}
if (options.docId) {
formData.append('doc_id', options.docId);
}
const params = new URLSearchParams(); const params = new URLSearchParams();
if (options.analysisType) { if (options.analysisType) {
@@ -1432,7 +1438,8 @@ export const aiApi = {
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表 * 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表
*/ */
async analyzeTxt( async analyzeTxt(
file: File, file: File | null,
docId: string | null = null,
analysisType: TxtAnalysisType = 'structured' analysisType: TxtAnalysisType = 'structured'
): Promise<{ ): Promise<{
success: boolean; success: boolean;
@@ -1442,7 +1449,12 @@ export const aiApi = {
error?: string; error?: string;
}> { }> {
const formData = new FormData(); const formData = new FormData();
formData.append('file', file); if (file) {
formData.append('file', file);
}
if (docId) {
formData.append('doc_id', docId);
}
const params = new URLSearchParams(); const params = new URLSearchParams();
params.append('analysis_type', analysisType); params.append('analysis_type', analysisType);
@@ -1572,7 +1584,8 @@ export const aiApi = {
* 使用 AI 解析 Word 文档,提取结构化数据或生成图表 * 使用 AI 解析 Word 文档,提取结构化数据或生成图表
*/ */
async analyzeWordWithAI( async analyzeWordWithAI(
file: File, file: File | null,
docId: string | null = null,
userHint: string = '', userHint: string = '',
analysisType: WordAnalysisType = 'structured' analysisType: WordAnalysisType = 'structured'
): Promise<{ ): Promise<{
@@ -1583,7 +1596,12 @@ export const aiApi = {
error?: string; error?: string;
}> { }> {
const formData = new FormData(); const formData = new FormData();
formData.append('file', file); if (file) {
formData.append('file', file);
}
if (docId) {
formData.append('doc_id', docId);
}
if (userHint) { if (userHint) {
formData.append('user_hint', userHint); formData.append('user_hint', userHint);
} }

View File

@@ -10,7 +10,7 @@ import {
ChevronDown, ChevronDown,
ChevronUp, ChevronUp,
FileSpreadsheet, FileSpreadsheet,
File, File as FileIcon,
Table, Table,
CheckCircle, CheckCircle,
AlertCircle, AlertCircle,
@@ -123,6 +123,17 @@ const Documents: React.FC = () => {
const [ragResults, setRagResults] = useState<any[]>([]); const [ragResults, setRagResults] = useState<any[]>([]);
const [ragRebuilding, setRagRebuilding] = useState(false); const [ragRebuilding, setRagRebuilding] = useState(false);
// 选中的文档详情
const [selectedDocument, setSelectedDocument] = useState<{
doc_id: string;
original_filename: string;
doc_type: string;
content?: string;
structured_data?: any;
metadata?: any;
} | null>(null);
const [loadingDocument, setLoadingDocument] = useState(false);
// 解析选项 // 解析选项
const [parseOptions, setParseOptions] = useState({ const [parseOptions, setParseOptions] = useState({
parseAllSheets: false, parseAllSheets: false,
@@ -277,6 +288,33 @@ const Documents: React.FC = () => {
return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' }; return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' };
} }
})); }));
// 设置第一个成功文件的 uploadedFile
const firstSuccessIdx = fileResults.findIndex((fr: any) => fr?.success);
if (firstSuccessIdx >= 0 && acceptedFiles[firstSuccessIdx]) {
const firstFile = acceptedFiles[firstSuccessIdx];
const firstResult = fileResults[firstSuccessIdx];
const ext = firstFile.name.split('.').pop()?.toLowerCase();
// 设置 uploadedFile
setUploadedFile(firstFile);
// 对于 Excel 文件,获取 parseResult
if (ext === 'xlsx' || ext === 'xls') {
// 调用 parseDocument 获取 parseResult
if (firstResult?.file_path) {
try {
const parseResult = await backendApi.parseDocument(firstResult.file_path);
if (parseResult.success) {
setParseResult(parseResult as any);
}
} catch (parseErr) {
console.warn('获取 parseResult 失败:', parseErr);
}
}
}
}
loadDocuments(); loadDocuments();
return; return;
} else if (status.status === 'failure') { } else if (status.status === 'failure') {
@@ -455,24 +493,79 @@ const Documents: React.FC = () => {
// 基于 AI 分析生成图表 // 基于 AI 分析生成图表
const handleGenerateCharts = async () => { const handleGenerateCharts = async () => {
if (!aiAnalysis || !aiAnalysis.success) { // 检查是否有任何 AI 分析结果
const hasExcelAI = aiAnalysis?.success;
const hasMdAI = mdAnalysis?.success;
const hasWordAI = wordAnalysis?.success;
const hasTxtAI = txtAnalysis?.success;
if (!hasExcelAI && !hasMdAI && !hasWordAI && !hasTxtAI) {
toast.error('请先进行 AI 分析'); toast.error('请先进行 AI 分析');
return; return;
} }
// 如果是 Markdown 分析已有图表,直接显示
if (hasMdAI && mdAnalysis?.chart_data?.tables) {
setAnalysisCharts({
success: true,
charts: { tables: mdAnalysis.chart_data.tables },
statistics: mdAnalysis.chart_data.key_statistics
});
toast.success('图表生成完成');
return;
}
// 如果是 Word 分析已有图表,直接显示
if (hasWordAI && wordAnalysis?.result?.charts) {
setAnalysisCharts({
success: true,
charts: wordAnalysis.result.charts,
statistics: wordAnalysis.result.statistics
});
toast.success('图表生成完成');
return;
}
// 如果是 TXT 分析已有图表,直接显示
if (hasTxtAI && txtAnalysis?.result?.charts) {
setAnalysisCharts({
success: true,
charts: txtAnalysis.result.charts,
statistics: txtAnalysis.result.statistics
});
toast.success('图表生成完成');
return;
}
// 尝试从各种分析结果中提取文本并生成图表
let analysisText = ''; let analysisText = '';
if (aiAnalysis.analysis?.analysis) { let fileType = 'unknown';
analysisText = aiAnalysis.analysis.analysis;
} else if (aiAnalysis.analysis?.sheets) { if (hasExcelAI) {
const sheets = aiAnalysis.analysis.sheets; if (aiAnalysis.analysis?.analysis) {
if (sheets && Object.keys(sheets).length > 0) { analysisText = aiAnalysis.analysis.analysis;
const firstSheet = Object.keys(sheets)[0]; fileType = 'excel';
analysisText = sheets[firstSheet]?.analysis || ''; } else if (aiAnalysis.analysis?.sheets) {
const sheets = aiAnalysis.analysis.sheets;
if (sheets && Object.keys(sheets).length > 0) {
const firstSheet = Object.keys(sheets)[0];
analysisText = sheets[firstSheet]?.analysis || '';
fileType = 'excel';
}
} }
} else if (hasMdAI && mdAnalysis?.analysis) {
analysisText = mdAnalysis.analysis;
fileType = 'markdown';
} else if (hasWordAI && wordAnalysis?.result?.summary) {
analysisText = wordAnalysis.result.summary;
fileType = 'word';
} else if (hasTxtAI && txtAnalysis?.result?.summary) {
analysisText = txtAnalysis.result.summary;
fileType = 'txt';
} }
if (!analysisText?.trim()) { if (!analysisText?.trim()) {
toast.error('无法获取 AI 分析结果'); toast.error('无法获取 AI 分析文本结果');
return; return;
} }
@@ -483,7 +576,7 @@ const Documents: React.FC = () => {
const result = await aiApi.extractAndGenerateCharts({ const result = await aiApi.extractAndGenerateCharts({
analysis_text: analysisText, analysis_text: analysisText,
original_filename: uploadedFile?.name || 'unknown', original_filename: uploadedFile?.name || 'unknown',
file_type: 'excel' file_type: fileType
}); });
if (result.success) { if (result.success) {
@@ -601,6 +694,9 @@ const Documents: React.FC = () => {
const result = await backendApi.deleteDocument(docId); const result = await backendApi.deleteDocument(docId);
if (result.success) { if (result.success) {
setDocuments(prev => prev.filter(d => d.doc_id !== docId)); setDocuments(prev => prev.filter(d => d.doc_id !== docId));
if (selectedDocument?.doc_id === docId) {
setSelectedDocument(null);
}
toast.success('文档已删除'); toast.success('文档已删除');
} }
} catch (err: any) { } catch (err: any) {
@@ -608,6 +704,95 @@ const Documents: React.FC = () => {
} }
}; };
const handleSelectDocument = async (docId: string) => {
setLoadingDocument(true);
try {
const result = await backendApi.getDocument(docId);
if (result.success && result.document) {
setSelectedDocument(result.document);
const doc = result.document;
// 优先使用 file_path 调用 parseDocument 获取完整解析结果
const filePath = doc.metadata?.file_path;
if (filePath) {
try {
const parseResult = await backendApi.parseDocument(filePath);
if (parseResult.success) {
setParseResult(parseResult as any);
const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type;
const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) });
setUploadedFile(fakeFile);
toast.success('已加载文档: ' + doc.original_filename);
setLoadingDocument(false);
return;
} else {
console.warn('parseDocument returned success:false, using fallback');
}
} catch (parseErr) {
console.warn('parseDocument failed, fallback to structured_data:', parseErr);
}
}
// 后备:使用 structured_data 构建 parseResult
const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type;
const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) });
if (doc.structured_data) {
const mockParseResult: ExcelParseResult = {
success: true,
data: {},
metadata: {
filename: doc.filename,
original_filename: doc.original_filename,
extension: doc.doc_type,
doc_type: doc.doc_type as any,
file_size: doc.metadata?.file_size || 0,
}
};
if (doc.structured_data.tables && doc.structured_data.tables.length > 0) {
const firstTable = doc.structured_data.tables[0];
mockParseResult.data = {
columns: firstTable.headers || [],
rows: (firstTable.rows || []).map((row: string[]) => {
const obj: Record<string, any> = {};
(firstTable.headers || []).forEach((h: string, i: number) => {
obj[h] = row[i] || '';
});
return obj;
}),
row_count: firstTable.rows?.length || 0,
column_count: firstTable.headers?.length || 0,
};
}
if (doc.structured_data.sheets) {
mockParseResult.data.sheets = doc.structured_data.sheets;
}
setParseResult(mockParseResult);
} else if (doc.content) {
setParseResult({
success: true,
data: { content: doc.content },
metadata: {
filename: doc.filename,
original_filename: doc.original_filename,
extension: doc.doc_type,
doc_type: doc.doc_type as any,
file_size: doc.metadata?.file_size || 0,
}
});
}
setUploadedFile(fakeFile);
toast.success('已加载文档: ' + doc.original_filename);
} else {
toast.error(result.error || '获取文档详情失败');
}
} catch (err: any) {
toast.error(err.message || '获取文档详情失败');
} finally {
setLoadingDocument(false);
}
};
const filteredDocs = documents.filter(doc => const filteredDocs = documents.filter(doc =>
doc.original_filename.toLowerCase().includes(search.toLowerCase()) doc.original_filename.toLowerCase().includes(search.toLowerCase())
); );
@@ -621,7 +806,7 @@ const Documents: React.FC = () => {
case 'doc': case 'doc':
return <FileText size={28} />; return <FileText size={28} />;
default: default:
return <File size={28} />; return <FileIcon size={28} />;
} }
}; };
@@ -641,11 +826,17 @@ const Documents: React.FC = () => {
setMdAnalysis(null); setMdAnalysis(null);
try { try {
const result = await aiApi.analyzeMarkdown(uploadedFile, { // 判断是从历史文档还是本地上传
analysisType: mdAnalysisType, const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : undefined;
userPrompt: mdUserPrompt, const result = await aiApi.analyzeMarkdown(
sectionNumber: mdSelectedSection || undefined uploadedFile.size > 0 ? uploadedFile : null,
}); {
docId: docId || undefined,
analysisType: mdAnalysisType,
userPrompt: mdUserPrompt,
sectionNumber: mdSelectedSection || undefined
}
);
if (result.success) { if (result.success) {
toast.success('Markdown AI 分析完成'); toast.success('Markdown AI 分析完成');
@@ -721,8 +912,11 @@ const Documents: React.FC = () => {
setWordAnalysis(null); setWordAnalysis(null);
try { try {
// 判断是从历史文档还是本地上传
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
const result = await aiApi.analyzeWordWithAI( const result = await aiApi.analyzeWordWithAI(
uploadedFile, uploadedFile.size > 0 ? uploadedFile : null,
docId,
wordUserHint, wordUserHint,
wordAnalysisType wordAnalysisType
); );
@@ -751,7 +945,13 @@ const Documents: React.FC = () => {
setTxtAnalysis(null); setTxtAnalysis(null);
try { try {
const result = await aiApi.analyzeTxt(uploadedFile, txtAnalysisType); // 判断是从历史文档还是本地上传
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
const result = await aiApi.analyzeTxt(
uploadedFile.size > 0 ? uploadedFile : null,
docId,
txtAnalysisType
);
if (result.success) { if (result.success) {
toast.success('TXT AI 分析完成'); toast.success('TXT AI 分析完成');
@@ -789,6 +989,18 @@ const Documents: React.FC = () => {
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`; return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
}; };
const getMimeType = (ext: string): string => {
const mimeTypes: Record<string, string> = {
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'xls': 'application/vnd.ms-excel',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'doc': 'application/msword',
'md': 'text/markdown',
'txt': 'text/plain',
};
return mimeTypes[ext] || 'application/octet-stream';
};
const getAnalysisIcon = (type: string) => { const getAnalysisIcon = (type: string) => {
switch (type) { switch (type) {
case 'general': return <FileText size={20} />; case 'general': return <FileText size={20} />;
@@ -1130,7 +1342,7 @@ const Documents: React.FC = () => {
<FileText size={12} className="mr-1" /> Markdown <FileText size={12} className="mr-1" /> Markdown
</Badge> </Badge>
<Badge variant="outline" className="bg-gray-500/10 text-gray-600 border-gray-200 text-xs"> <Badge variant="outline" className="bg-gray-500/10 text-gray-600 border-gray-200 text-xs">
<File size={12} className="mr-1" /> <FileIcon size={12} className="mr-1" />
</Badge> </Badge>
</div> </div>
</div> </div>
@@ -1139,6 +1351,38 @@ const Documents: React.FC = () => {
)} )}
</Card> </Card>
{/* 从历史文档中选择 */}
{documents.length > 0 && (
<Card className="border-none shadow-md">
<CardHeader className="pb-4">
<CardTitle className="flex items-center gap-2">
<Clock className="text-primary" size={20} />
</CardTitle>
</CardHeader>
<CardContent className="space-y-3">
<Select
value=""
onValueChange={async (docId) => {
if (!docId) return;
await handleSelectDocument(docId);
}}
>
<SelectTrigger className="bg-background">
<SelectValue placeholder="选择历史文档..." />
</SelectTrigger>
<SelectContent>
{documents.slice(0, 20).map((doc) => (
<SelectItem key={doc.doc_id} value={doc.doc_id}>
{doc.original_filename}
</SelectItem>
))}
</SelectContent>
</Select>
</CardContent>
</Card>
)}
{/* Excel 解析选项 */} {/* Excel 解析选项 */}
{uploadedFile && isExcelFile(uploadedFile.name) && ( {uploadedFile && isExcelFile(uploadedFile.name) && (
<Card className="border-none shadow-md"> <Card className="border-none shadow-md">
@@ -1423,7 +1667,7 @@ const Documents: React.FC = () => {
)} )}
{/* 数据操作 */} {/* 数据操作 */}
{parseResult?.success && ( {(parseResult?.success || aiAnalysis?.success || mdAnalysis?.success || wordAnalysis?.success || txtAnalysis?.success) && (
<Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5"> <Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5">
<CardHeader className="pb-4"> <CardHeader className="pb-4">
<CardTitle className="flex items-center gap-2"> <CardTitle className="flex items-center gap-2">
@@ -1432,7 +1676,7 @@ const Documents: React.FC = () => {
</CardTitle> </CardTitle>
</CardHeader> </CardHeader>
<CardContent className="space-y-3"> <CardContent className="space-y-3">
<Button onClick={handleGenerateCharts} disabled={!aiAnalysis?.success || analyzingForCharts} className="w-full bg-gradient-to-r from-primary to-purple-600 hover:from-primary/90 hover:to-purple-600/90"> <Button onClick={handleGenerateCharts} disabled={!(aiAnalysis?.success || mdAnalysis?.success || wordAnalysis?.success || txtAnalysis?.success) || analyzingForCharts} className="w-full bg-gradient-to-r from-primary to-purple-600 hover:from-primary/90 hover:to-purple-600/90">
{analyzingForCharts ? <><Loader2 className="mr-2 animate-spin" size={16} />...</> : <><Brain size={16} className="mr-2" />AI </>} {analyzingForCharts ? <><Loader2 className="mr-2 animate-spin" size={16} />...</> : <><Brain size={16} className="mr-2" />AI </>}
</Button> </Button>
<Button onClick={openExportDialog} variant="outline" className="w-full"> <Button onClick={openExportDialog} variant="outline" className="w-full">
@@ -1774,6 +2018,95 @@ const Documents: React.FC = () => {
</CardContent> </CardContent>
</Card> </Card>
{/* 已上传文档详情 */}
{selectedDocument && (
<Card className="border-none shadow-md border-l-4 border-l-cyan-500">
<CardHeader>
<div className="flex items-center justify-between">
<div className="space-y-1">
<CardTitle className="flex items-center gap-2">
<FileText className="text-cyan-500" size={20} />
</CardTitle>
<CardDescription>
{selectedDocument.original_filename} {selectedDocument.doc_type.toUpperCase()}
</CardDescription>
</div>
<Button variant="ghost" size="sm" onClick={() => setSelectedDocument(null)}>
</Button>
</div>
</CardHeader>
<CardContent className="max-h-[500px] overflow-y-auto">
{loadingDocument ? (
<div className="flex items-center justify-center py-8">
<Loader2 className="animate-spin" size={24} />
<span className="ml-2">...</span>
</div>
) : (
<div className="space-y-4">
{selectedDocument.structured_data?.tables && selectedDocument.structured_data.tables.length > 0 && (
<div className="space-y-2">
<p className="text-sm font-medium"></p>
{selectedDocument.structured_data.tables.slice(0, 3).map((table: any, idx: number) => (
<div key={idx} className="border rounded-lg overflow-x-auto">
{table.headers && (
<TableComponent>
<TableHeader>
<TableRow>
{table.headers.map((header: string, hIdx: number) => (
<TableHead key={hIdx}>{header}</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{(table.rows || []).slice(0, 10).map((row: string[], rIdx: number) => (
<TableRow key={rIdx}>
{row.map((cell: string, cIdx: number) => (
<TableCell key={cIdx}>{cell}</TableCell>
))}
</TableRow>
))}
</TableBody>
</TableComponent>
)}
</div>
))}
</div>
)}
{selectedDocument.structured_data?.key_values && Object.keys(selectedDocument.structured_data.key_values || {}).length > 0 && (
<div className="space-y-2">
<p className="text-sm font-medium"></p>
<div className="grid grid-cols-2 gap-2">
{Object.entries(selectedDocument.structured_data.key_values || {}).map(([key, value]: [string, any]) => (
<div key={key} className="flex gap-2 p-2 bg-muted/30 rounded-lg">
<span className="font-medium text-sm">{key}:</span>
<span className="text-sm text-muted-foreground">{String(value)}</span>
</div>
))}
</div>
</div>
)}
{selectedDocument.content && (
<div className="space-y-2">
<p className="text-sm font-medium"></p>
<div className="p-3 bg-muted/30 rounded-lg max-h-[300px] overflow-y-auto">
<p className="text-sm whitespace-pre-wrap font-mono">
{selectedDocument.content.slice(0, 2000)}
{selectedDocument.content.length > 2000 && '...'}
</p>
</div>
</div>
)}
{!selectedDocument.content && !selectedDocument.structured_data?.tables && !selectedDocument.structured_data?.key_values && (
<p className="text-sm text-muted-foreground text-center py-4"></p>
)}
</div>
)}
</CardContent>
</Card>
)}
{/* 文档列表 */} {/* 文档列表 */}
<Card className="border-none shadow-md"> <Card className="border-none shadow-md">
<CardHeader> <CardHeader>
@@ -1801,7 +2134,14 @@ const Documents: React.FC = () => {
) : (filteredDocs?.length ?? 0) > 0 ? ( ) : (filteredDocs?.length ?? 0) > 0 ? (
<div className="space-y-3"> <div className="space-y-3">
{(filteredDocs || []).map(doc => ( {(filteredDocs || []).map(doc => (
<div key={doc.doc_id} className="flex items-center gap-4 p-4 rounded-xl border border-transparent hover:bg-muted/30 transition-all group"> <div
key={doc.doc_id}
className={cn(
"flex items-center gap-4 p-4 rounded-xl border border-transparent hover:bg-muted/30 transition-all group cursor-pointer",
selectedDocument?.doc_id === doc.doc_id && "bg-primary/5 border-primary/20"
)}
onClick={() => handleSelectDocument(doc.doc_id)}
>
<div className={cn( <div className={cn(
"w-10 h-10 rounded-lg flex items-center justify-center shrink-0", "w-10 h-10 rounded-lg flex items-center justify-center shrink-0",
doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500" doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
@@ -1814,7 +2154,10 @@ const Documents: React.FC = () => {
{doc.doc_type.toUpperCase()} {format(new Date(doc.created_at), 'yyyy-MM-dd HH:mm')} {doc.doc_type.toUpperCase()} {format(new Date(doc.created_at), 'yyyy-MM-dd HH:mm')}
</p> </p>
</div> </div>
<Button variant="ghost" size="icon" className="text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100" onClick={() => handleDelete(doc.doc_id)}> <Button variant="ghost" size="icon" className="text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100" onClick={(e) => {
e.stopPropagation();
handleDelete(doc.doc_id);
}}>
<Trash2 size={16} /> <Trash2 size={16} />
</Button> </Button>
</div> </div>