diff --git a/backend/app/api/endpoints/ai_analyze.py b/backend/app/api/endpoints/ai_analyze.py index ae13191..36dedfe 100644 --- a/backend/app/api/endpoints/ai_analyze.py +++ b/backend/app/api/endpoints/ai_analyze.py @@ -12,6 +12,7 @@ from app.services.excel_ai_service import excel_ai_service from app.services.markdown_ai_service import markdown_ai_service from app.services.template_fill_service import template_fill_service from app.services.word_ai_service import word_ai_service +from app.services.txt_ai_service import txt_ai_service logger = logging.getLogger(__name__) @@ -153,8 +154,9 @@ async def analyze_text( @router.post("/analyze/md") async def analyze_markdown( - file: UploadFile = File(...), - analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"), + file: Optional[UploadFile] = File(None), + doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), + analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"), user_prompt: str = Query("", description="用户自定义提示词"), section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'") ): @@ -162,7 +164,8 @@ async def analyze_markdown( 上传并使用 AI 分析 Markdown 文件 Args: - file: 上传的 Markdown 文件 + file: 上传的 Markdown 文件(与 doc_id 二选一) + doc_id: 文档ID(从数据库读取) analysis_type: 分析类型 user_prompt: 用户自定义提示词 section_number: 指定分析的章节编号 @@ -170,16 +173,8 @@ async def analyze_markdown( Returns: dict: 分析结果 """ - # 检查文件类型 - if not file.filename: - raise HTTPException(status_code=400, detail="文件名为空") - - file_ext = file.filename.split('.')[-1].lower() - if file_ext not in ['md', 'markdown']: - raise HTTPException( - status_code=400, - detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown" - ) + filename = None + tmp_path = None # 验证分析类型 supported_types = markdown_ai_service.get_supported_analysis_types() @@ -189,46 +184,96 @@ async def analyze_markdown( detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}" ) - try: - # 读取文件内容 - content = await file.read() - - # 保存到临时文件 - with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: - tmp.write(content) - tmp_path = tmp.name - + if doc_id: + # 从数据库读取文档 try: - logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}") + from app.core.database.mongodb import mongodb + doc = await mongodb.get_document(doc_id) + if not doc: + raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") - # 调用 AI 分析服务 - result = await markdown_ai_service.analyze_markdown( - file_path=tmp_path, - analysis_type=analysis_type, - user_prompt=user_prompt, - section_number=section_number + filename = doc.get("metadata", {}).get("original_filename", "unknown.md") + file_ext = filename.split('.')[-1].lower() + + if file_ext not in ['md', 'markdown']: + raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}") + + content = doc.get("content", "") + if not content: + raise HTTPException(status_code=400, detail="文档内容为空") + + # 保存到临时文件 + with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: + tmp.write(content.encode('utf-8')) + tmp_path = tmp.name + + logger.info(f"从数据库加载 Markdown 文档: {filename}, 长度: {len(content)}") + + except HTTPException: + raise + except Exception as e: + logger.error(f"从数据库读取 Markdown 文档失败: {str(e)}") + raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") + else: + # 文件上传模式 + if not file: + raise HTTPException(status_code=400, detail="请提供文件或文档ID") + + if not file.filename: + raise HTTPException(status_code=400, detail="文件名为空") + + file_ext = file.filename.split('.')[-1].lower() + if file_ext not in ['md', 'markdown']: + raise HTTPException( + status_code=400, + detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown" ) - logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}") + try: + # 读取文件内容 + content = await file.read() - if not result['success']: - raise HTTPException(status_code=500, detail=result.get('error', '分析失败')) + # 保存到临时文件 + with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name - return result + filename = file.filename - finally: - # 清理临时文件,确保在所有情况下都能清理 - try: - if tmp_path and os.path.exists(tmp_path): - os.unlink(tmp_path) - except Exception as cleanup_error: - logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}") + except Exception as e: + logger.error(f"读取 Markdown 文件失败: {str(e)}") + raise HTTPException(status_code=500, detail=f"读取文件失败: {str(e)}") + + try: + logger.info(f"开始分析 Markdown 文件: {filename}, 分析类型: {analysis_type}, 章节: {section_number}") + + # 调用 AI 分析服务 + result = await markdown_ai_service.analyze_markdown( + file_path=tmp_path, + analysis_type=analysis_type, + user_prompt=user_prompt, + section_number=section_number + ) + + logger.info(f"Markdown 分析完成: {filename}, 成功: {result['success']}") + + if not result['success']: + raise HTTPException(status_code=500, detail=result.get('error', '分析失败')) + + return result except HTTPException: raise except Exception as e: logger.error(f"Markdown AI 分析过程中出错: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") + finally: + # 清理临时文件 + if tmp_path and os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except Exception as cleanup_error: + logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}") @router.post("/analyze/md/stream") @@ -346,67 +391,100 @@ async def get_markdown_outline( @router.post("/analyze/txt") async def analyze_txt( - file: UploadFile = File(...), + file: Optional[UploadFile] = File(None), + doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), + analysis_type: str = Query("structured", description="分析类型: structured, charts") ): """ - 上传并使用 AI 分析 TXT 文本文件,提取结构化数据 + 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表 将非结构化文本转换为结构化表格数据,便于后续填表使用 + 当 analysis_type=charts 时,可生成可视化图表 Args: - file: 上传的 TXT 文件 + file: 上传的 TXT 文件(与 doc_id 二选一) + doc_id: 文档ID(从数据库读取) + analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) Returns: - dict: 分析结果,包含结构化表格数据 + dict: 分析结果,包含结构化表格数据或图表数据 """ - if not file.filename: - raise HTTPException(status_code=400, detail="文件名为空") - - file_ext = file.filename.split('.')[-1].lower() - if file_ext not in ['txt', 'text']: - raise HTTPException( - status_code=400, - detail=f"不支持的文件类型: {file_ext},仅支持 .txt" - ) - - try: - # 读取文件内容 - content = await file.read() - - # 保存到临时文件 - with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp: - tmp.write(content) - tmp_path = tmp.name + filename = None + text_content = None + if doc_id: + # 从数据库读取文档 try: - logger.info(f"开始 AI 分析 TXT 文件: {file.filename}") + from app.core.database.mongodb import mongodb + doc = await mongodb.get_document(doc_id) + if not doc: + raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") - # 使用 template_fill_service 的 AI 分析方法 - result = await template_fill_service.analyze_txt_with_ai( - content=content.decode('utf-8', errors='replace'), - filename=file.filename + filename = doc.get("metadata", {}).get("original_filename", "unknown.txt") + file_ext = filename.split('.')[-1].lower() + + if file_ext not in ['txt', 'text']: + raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}") + + # 使用数据库中的 content + text_content = doc.get("content", "") + + if not text_content: + raise HTTPException(status_code=400, detail="文档内容为空") + + logger.info(f"从数据库加载 TXT 文档: {filename}, 长度: {len(text_content)}") + + except HTTPException: + raise + except Exception as e: + logger.error(f"从数据库读取 TXT 文档失败: {str(e)}") + raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") + else: + # 文件上传模式 + if not file: + raise HTTPException(status_code=400, detail="请提供文件或文档ID") + + if not file.filename: + raise HTTPException(status_code=400, detail="文件名为空") + + file_ext = file.filename.split('.')[-1].lower() + if file_ext not in ['txt', 'text']: + raise HTTPException( + status_code=400, + detail=f"不支持的文件类型: {file_ext},仅支持 .txt" ) - if result: - logger.info(f"TXT AI 分析成功: {file.filename}") - return { - "success": True, - "filename": file.filename, - "structured_data": result - } - else: - logger.warning(f"TXT AI 分析返回空结果: {file.filename}") - return { - "success": False, - "filename": file.filename, - "error": "AI 分析未能提取到结构化数据", - "structured_data": None - } + # 读取文件内容 + content = await file.read() + text_content = content.decode('utf-8', errors='replace') + filename = file.filename - finally: - # 清理临时文件 - if os.path.exists(tmp_path): - os.unlink(tmp_path) + try: + logger.info(f"开始 AI 分析 TXT 文件: {filename}, analysis_type={analysis_type}") + + # 使用 txt_ai_service 的 AI 分析方法 + result = await txt_ai_service.analyze_txt_with_ai( + content=text_content, + filename=filename, + analysis_type=analysis_type + ) + + if result: + logger.info(f"TXT AI 分析成功: {filename}") + return { + "success": result.get("success", True), + "filename": filename, + "analysis_type": analysis_type, + "result": result + } + else: + logger.warning(f"TXT AI 分析返回空结果: {filename}") + return { + "success": False, + "filename": filename, + "error": "AI 分析未能提取到结构化数据", + "result": None + } except HTTPException: raise @@ -419,21 +497,89 @@ async def analyze_txt( @router.post("/analyze/word") async def analyze_word( - file: UploadFile = File(...), - user_hint: str = Query("", description="用户提示词,如'请提取表格数据'") + file: Optional[UploadFile] = File(None), + doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), + user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"), + analysis_type: str = Query("structured", description="分析类型: structured, charts") ): """ - 使用 AI 解析 Word 文档,提取结构化数据 + 使用 AI 解析 Word 文档,提取结构化数据或生成图表 适用于从非结构化的 Word 文档中提取表格数据、键值对等信息 + 当 analysis_type=charts 时,可生成可视化图表 Args: - file: 上传的 Word 文件 + file: 上传的 Word 文件(与 doc_id 二选一) + doc_id: 文档ID(从数据库读取) user_hint: 用户提示词 + analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) Returns: - dict: 包含结构化数据的解析结果 + dict: 包含结构化数据的解析结果或图表数据 """ + # 获取文件名和扩展名 + filename = None + file_ext = None + + if doc_id: + # 从数据库读取文档 + try: + from app.core.database.mongodb import mongodb + doc = await mongodb.get_document(doc_id) + if not doc: + raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") + + filename = doc.get("metadata", {}).get("original_filename", "unknown.docx") + file_ext = filename.split('.')[-1].lower() + + if file_ext not in ['docx']: + raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}") + + # 使用数据库中的 content 进行分析 + content = doc.get("content", "") + tables = doc.get("structured_data", {}).get("tables", []) + + # 调用 AI 分析服务,传入数据库内容 + if analysis_type == "charts": + result = await word_ai_service.generate_charts_from_db( + content=content, + tables=tables, + filename=filename, + user_hint=user_hint + ) + else: + result = await word_ai_service.parse_word_with_ai_from_db( + content=content, + tables=tables, + filename=filename, + user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等" + ) + + if result.get("success"): + return { + "success": True, + "filename": filename, + "analysis_type": analysis_type, + "result": result + } + else: + return { + "success": False, + "filename": filename, + "error": result.get("error", "AI 解析失败"), + "result": None + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"从数据库读取 Word 文档失败: {str(e)}") + raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") + + # 文件上传模式 + if not file: + raise HTTPException(status_code=400, detail="请提供文件或文档ID") + if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") @@ -453,16 +599,25 @@ async def analyze_word( tmp_path = tmp.name try: - # 使用 AI 解析 Word 文档 - result = await word_ai_service.parse_word_with_ai( - file_path=tmp_path, - user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等" - ) + # 根据 analysis_type 选择处理方式 + if analysis_type == "charts": + # 生成图表 + result = await word_ai_service.generate_charts( + file_path=tmp_path, + user_hint=user_hint + ) + else: + # 提取结构化数据 + result = await word_ai_service.parse_word_with_ai( + file_path=tmp_path, + user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等" + ) if result.get("success"): return { "success": True, "filename": file.filename, + "analysis_type": analysis_type, "result": result } else: diff --git a/backend/app/api/endpoints/documents.py b/backend/app/api/endpoints/documents.py index 221e059..2f77714 100644 --- a/backend/app/api/endpoints/documents.py +++ b/backend/app/api/endpoints/documents.py @@ -405,7 +405,7 @@ async def process_documents_batch(task_id: str, files: List[dict]): if content and len(content) > 50: await index_document_to_rag(doc_id, filename, result, file_info["ext"]) - return {"index": index, "filename": filename, "doc_id": doc_id, "success": True} + return {"index": index, "filename": filename, "doc_id": doc_id, "file_path": file_info["path"], "success": True} except Exception as e: logger.error(f"处理文件 {filename} 失败: {e}") diff --git a/backend/app/services/txt_ai_service.py b/backend/app/services/txt_ai_service.py new file mode 100644 index 0000000..740121d --- /dev/null +++ b/backend/app/services/txt_ai_service.py @@ -0,0 +1,352 @@ +""" +TXT 文档 AI 分析服务 + +使用 LLM 对 TXT 文本文件进行深度分析,提取结构化数据并生成可视化图表 +""" +import logging +import re +from typing import Any, Dict, List, Optional + +from app.services.llm_service import llm_service +from app.services.visualization_service import visualization_service +from app.core.document_parser.txt_parser import TxtParser + +logger = logging.getLogger(__name__) + + +class TxtAIService: + """TXT 文档 AI 分析服务""" + + def __init__(self): + self.parser = TxtParser() + + async def analyze_txt_with_ai( + self, + content: str, + filename: str = "", + user_hint: str = "", + analysis_type: str = "structured" + ) -> Dict[str, Any]: + """ + 使用 AI 解析 TXT 文本文件 + + Args: + content: 文本内容 + filename: 文件名(可选) + user_hint: 用户提示词 + analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) + + Returns: + Dict: 包含结构化数据的分析结果 + """ + try: + if not content or not content.strip(): + return { + "success": False, + "error": "文档内容为空" + } + + # 根据分析类型选择处理方式 + if analysis_type == "charts": + return await self.generate_charts(content, filename, user_hint) + + # 默认:提取结构化数据 + return await self._extract_structured_data(content, filename, user_hint) + + except Exception as e: + logger.error(f"TXT AI 分析失败: {str(e)}") + return { + "success": False, + "error": str(e) + } + + async def _extract_structured_data( + self, + content: str, + filename: str = "", + user_hint: str = "" + ) -> Dict[str, Any]: + """ + 从文本中提取结构化数据 + + Args: + content: 文本内容 + filename: 文件名 + user_hint: 用户提示词 + + Returns: + 结构化数据 + """ + try: + # 截断内容避免超出 token 限制 + max_content_len = 8000 + text_preview = content[:max_content_len] if len(content) > max_content_len else content + + prompt = f"""你是一个专业的数据提取专家。请从以下文本中提取结构化数据。 + +【用户需求】 +{user_hint if user_hint else "请提取文档中的所有结构化数据,包括表格数据、键值对、列表项等。"} + +【文档内容】({"前" + str(max_content_len) + "字符,仅显示部分" if len(content) > max_content_len else "全文"}) +{text_preview} + +请按照以下 JSON 格式输出: +{{ + "type": "structured_text", + "tables": [{{"headers": [...], "rows": [...]}}], + "key_values": {{"键1": "值1", "键2": "值2", ...}}, + "list_items": ["项1", "项2", ...], + "summary": "文档内容摘要" +}} + +重点: +- 如果文档包含表格数据(制表符、空格对齐等),提取到 tables 中 +- 如果文档包含键值对(如 名称: 张三),提取到 key_values 中 +- 如果文档包含列表项,提取到 list_items 中 +- 如果无法提取到结构化数据,至少提供一个详细的摘要 +""" + + messages = [ + {"role": "system", "content": "你是一个专业的数据提取助手。请严格按JSON格式输出。"}, + {"role": "user", "content": prompt} + ] + + response = await self.llm.chat( + messages=messages, + temperature=0.1, + max_tokens=50000 + ) + + content_text = self.llm.extract_message_content(response) + result = self._parse_json_response(content_text) + + if result: + logger.info(f"TXT 结构化数据提取成功: type={result.get('type')}") + return { + "success": True, + "type": result.get("type", "structured_text"), + "tables": result.get("tables", []), + "key_values": result.get("key_values", {}), + "list_items": result.get("list_items", []), + "summary": result.get("summary", "") + } + else: + return { + "success": True, + "type": "text", + "summary": text_preview[:500], + "raw_text_preview": text_preview[:500] + } + + except Exception as e: + logger.error(f"TXT 结构化数据提取失败: {str(e)}") + return { + "success": False, + "error": str(e) + } + + async def generate_charts( + self, + content: str, + filename: str = "", + user_hint: str = "" + ) -> Dict[str, Any]: + """ + 从文本中提取数据并生成可视化图表 + + Args: + content: 文本内容 + filename: 文件名 + user_hint: 用户提示词 + + Returns: + 包含图表数据和统计信息的结果 + """ + try: + # 截断内容避免超出 token 限制 + max_content_len = 8000 + text_preview = content[:max_content_len] if len(content) > max_content_len else content + + # 使用 LLM 提取可用于图表的数据 + prompt = f"""你是一个专业的数据可视化助手。请从以下文本中提取可用于可视化的数据。 + +文档标题:{filename} + +文档内容: +{text_preview} + +请完成以下任务: +1. 识别文本中的表格数据(制表符分隔、空格对齐的表格等) +2. 识别文本中的关键统计数据(百分比、数量、趋势等) +3. 识别可用于比较的分类数据 + +请用 JSON 格式返回以下结构的数据(如果没有表格数据,返回空结构): +{{ + "tables": [ + {{ + "description": "表格的描述", + "columns": ["列名1", "列名2", ...], + "rows": [ + ["值1", "值2", ...], + ["值1", "值2", ...] + ] + }} + ], + "key_statistics": [ + {{ + "name": "指标名称", + "value": "数值", + "trend": "增长/下降/持平", + "description": "指标说明" + }} + ], + "chart_suggestions": [ + {{ + "chart_type": "bar/line/pie", + "title": "图表标题", + "data_source": "数据来源说明" + }} + ] +}} + +如果没有表格数据,返回空结构:{{"tables": [], "key_statistics": [], "chart_suggestions": []}} +请确保返回的是合法的 JSON 格式。""" + + messages = [ + {"role": "system", "content": "你是一个专业的数据可视化助手,擅长从文本中提取数据并生成图表。"}, + {"role": "user", "content": prompt} + ] + + response = await self.llm.chat( + messages=messages, + temperature=0.1, + max_tokens=50000 + ) + + content_text = self.llm.extract_message_content(response) + chart_data = self._parse_json_response(content_text) + + if not chart_data: + return { + "success": False, + "error": "无法从文本中提取有效的数据结构" + } + + # 检查是否有表格数据 + tables = chart_data.get("tables", []) + key_statistics = chart_data.get("key_statistics", []) + + if not tables: + return { + "success": False, + "error": "文档中没有可用于图表的表格数据", + "key_statistics": key_statistics, + "chart_suggestions": chart_data.get("chart_suggestions", []) + } + + # 使用第一个表格生成图表 + first_table = tables[0] + columns = first_table.get("columns", []) + rows = first_table.get("rows", []) + + if not columns or not rows: + return { + "success": False, + "error": "表格数据为空" + } + + # 转换为 visualization_service 需要的格式 + viz_data = { + "columns": columns, + "rows": rows + } + + # 生成可视化图表 + logger.info(f"开始生成图表,列数: {len(columns)}, 行数: {len(rows)}") + vis_result = visualization_service.analyze_and_visualize(viz_data) + + if vis_result.get("success"): + return { + "success": True, + "charts": vis_result.get("charts", {}), + "statistics": vis_result.get("statistics", {}), + "distributions": vis_result.get("distributions", {}), + "row_count": vis_result.get("row_count", 0), + "column_count": vis_result.get("column_count", 0), + "key_statistics": key_statistics, + "chart_suggestions": chart_data.get("chart_suggestions", []), + "table_description": first_table.get("description", "") + } + else: + return { + "success": False, + "error": vis_result.get("error", "可视化生成失败"), + "key_statistics": key_statistics + } + + except Exception as e: + logger.error(f"TXT 图表生成失败: {str(e)}") + return { + "success": False, + "error": str(e) + } + + def _parse_json_response(self, content: str) -> Optional[Dict]: + """解析 JSON 响应,处理各种格式问题""" + if not content: + return None + + import json + + # 清理 markdown 标记 + cleaned = content.strip() + cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE) + cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE) + cleaned = cleaned.strip() + + # 找到 JSON 开始位置 + json_start = -1 + for i, c in enumerate(cleaned): + if c == '{': + json_start = i + break + + if json_start == -1: + logger.warning("无法找到 JSON 开始位置") + return None + + json_text = cleaned[json_start:] + + # 尝试直接解析 + try: + return json.loads(json_text) + except json.JSONDecodeError: + pass + + # 尝试修复并解析 + try: + # 找到闭合括号 + depth = 0 + end_pos = -1 + for i, c in enumerate(json_text): + if c == '{': + depth += 1 + elif c == '}': + depth -= 1 + if depth == 0: + end_pos = i + 1 + break + + if end_pos > 0: + fixed = json_text[:end_pos] + # 移除末尾逗号 + fixed = re.sub(r',\s*([}]])', r'\1', fixed) + return json.loads(fixed) + except Exception as e: + logger.warning(f"JSON 修复失败: {e}") + + return None + + +# 全局单例 +txt_ai_service = TxtAIService() diff --git a/backend/app/services/word_ai_service.py b/backend/app/services/word_ai_service.py index 3a0ab16..a38d70d 100644 --- a/backend/app/services/word_ai_service.py +++ b/backend/app/services/word_ai_service.py @@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional import json from app.services.llm_service import llm_service +from app.services.visualization_service import visualization_service from app.core.document_parser.docx_parser import DocxParser logger = logging.getLogger(__name__) @@ -634,6 +635,272 @@ class WordAIService: return values + async def generate_charts( + self, + file_path: str, + user_hint: str = "" + ) -> Dict[str, Any]: + """ + 使用 AI 解析 Word 文档并生成可视化图表 -# 全局单例 -word_ai_service = WordAIService() + 从 Word 文档中提取表格数据,然后生成统计图表 + + Args: + file_path: Word 文件路径 + user_hint: 用户提示词,指定要提取的内容类型 + + Returns: + Dict: 包含图表数据和统计信息的结果 + """ + try: + # 1. 先用基础解析器提取原始内容 + parse_result = self.parser.parse(file_path) + + if not parse_result.success: + return { + "success": False, + "error": parse_result.error, + "structured_data": None + } + + # 2. 获取原始数据 + raw_data = parse_result.data + paragraphs = raw_data.get("paragraphs", []) + tables = raw_data.get("tables", []) + content = raw_data.get("content", "") + + logger.info(f"Word 基础解析完成: {len(paragraphs)} 个段落, {len(tables)} 个表格") + + # 3. 优先处理表格数据 + if tables and len(tables) > 0: + structured_data = await self._extract_tables_with_ai( + tables, paragraphs, 0, user_hint, parse_result.metadata + ) + elif paragraphs and len(paragraphs) > 0: + structured_data = await self._extract_from_text_with_ai( + paragraphs, content, 0, [], user_hint + ) + else: + return { + "success": False, + "error": "文档内容为空", + "structured_data": None + } + + # 4. 检查是否有表格数据用于可视化 + if not structured_data.get("success"): + return { + "success": False, + "error": structured_data.get("error", "解析失败"), + "structured_data": None + } + + parse_type = structured_data.get("type", "") + + # 5. 提取可用于图表的数据 + chart_data = None + + if parse_type == "table_data": + headers = structured_data.get("headers", []) + rows = structured_data.get("rows", []) + if headers and rows: + chart_data = { + "columns": headers, + "rows": rows + } + elif parse_type == "structured_text": + tables = structured_data.get("tables", []) + if tables and len(tables) > 0: + first_table = tables[0] + headers = first_table.get("headers", []) + rows = first_table.get("rows", []) + if headers and rows: + chart_data = { + "columns": headers, + "rows": rows + } + + # 6. 生成可视化图表 + if chart_data: + logger.info(f"开始生成图表,列数: {len(chart_data['columns'])}, 行数: {len(chart_data['rows'])}") + vis_result = visualization_service.analyze_and_visualize(chart_data) + + if vis_result.get("success"): + return { + "success": True, + "charts": vis_result.get("charts", {}), + "statistics": vis_result.get("statistics", {}), + "distributions": vis_result.get("distributions", {}), + "structured_data": structured_data, + "row_count": vis_result.get("row_count", 0), + "column_count": vis_result.get("column_count", 0) + } + else: + return { + "success": False, + "error": vis_result.get("error", "可视化生成失败"), + "structured_data": structured_data + } + else: + return { + "success": False, + "error": "文档中没有可用于图表的表格数据", + "structured_data": structured_data + } + + except Exception as e: + logger.error(f"Word 文档图表生成失败: {str(e)}") + return { + "success": False, + "error": str(e), + "structured_data": None + } + + + async def parse_word_with_ai_from_db( + self, + content: str, + tables: List[Dict], + filename: str = "", + user_hint: str = "" + ) -> Dict[str, Any]: + """ + 使用 AI 解析从数据库读取的 Word 文档内容,提取结构化数据 + + Args: + content: 文档文本内容 + tables: 表格数据列表 + filename: 文件名 + user_hint: 用户提示词 + + Returns: + Dict: 包含结构化数据的解析结果 + """ + try: + # 解析段落 + paragraphs = [p.strip() for p in content.split('\n') if p.strip()] + + logger.info(f"从数据库解析 Word: {len(paragraphs)} 个段落, {len(tables)} 个表格") + + # 优先处理表格数据 + if tables and len(tables) > 0: + structured_data = await self._extract_tables_with_ai( + tables, paragraphs, 0, user_hint, {"filename": filename} + ) + elif paragraphs and len(paragraphs) > 0: + structured_data = await self._extract_from_text_with_ai( + paragraphs, content, 0, [], user_hint + ) + else: + structured_data = { + "success": True, + "type": "empty", + "message": "文档内容为空" + } + + return structured_data + + except Exception as e: + logger.error(f"从数据库解析 Word 文档失败: {str(e)}") + return { + "success": False, + "error": str(e) + } + + async def generate_charts_from_db( + self, + content: str, + tables: List[Dict], + filename: str = "", + user_hint: str = "" + ) -> Dict[str, Any]: + """ + 使用 AI 解析从数据库读取的 Word 文档并生成可视化图表 + + Args: + content: 文档文本内容 + tables: 表格数据列表 + filename: 文件名 + user_hint: 用户提示词 + + Returns: + Dict: 包含图表数据和统计信息的结果 + """ + try: + # 解析段落 + paragraphs = [p.strip() for p in content.split('\n') if p.strip()] + + logger.info(f"从数据库生成 Word 图表: {len(paragraphs)} 个段落, {len(tables)} 个表格") + + # 优先处理表格数据 + if tables and len(tables) > 0: + structured_data = await self._extract_tables_with_ai( + tables, paragraphs, 0, user_hint, {"filename": filename} + ) + elif paragraphs and len(paragraphs) > 0: + structured_data = await self._extract_from_text_with_ai( + paragraphs, content, 0, [], user_hint + ) + else: + return { + "success": False, + "error": "文档内容为空" + } + + # 提取可用于图表的数据 + chart_data = None + + if structured_data.get("type") == "table_data": + headers = structured_data.get("headers", []) + rows = structured_data.get("rows", []) + if headers and rows: + chart_data = { + "columns": headers, + "rows": rows + } + elif structured_data.get("type") == "structured_text": + tables_data = structured_data.get("tables", []) + if tables_data and len(tables_data) > 0: + first_table = tables_data[0] + headers = first_table.get("headers", []) + rows = first_table.get("rows", []) + if headers and rows: + chart_data = { + "columns": headers, + "rows": rows + } + + # 生成可视化图表 + if chart_data: + logger.info(f"开始生成图表,列数: {len(chart_data['columns'])}, 行数: {len(chart_data['rows'])}") + vis_result = visualization_service.analyze_and_visualize(chart_data) + + if vis_result.get("success"): + return { + "success": True, + "charts": vis_result.get("charts", {}), + "statistics": vis_result.get("statistics", {}), + "distributions": vis_result.get("distributions", {}), + "structured_data": structured_data, + "row_count": vis_result.get("row_count", 0), + "column_count": vis_result.get("column_count", 0) + } + else: + return { + "success": False, + "error": vis_result.get("error", "可视化生成失败"), + "structured_data": structured_data + } + else: + return { + "success": False, + "error": "文档中没有可用于图表的表格数据", + "structured_data": structured_data + } + + except Exception as e: + logger.error(f"从数据库生成 Word 图表失败: {str(e)}") + return { + "success": False, + "error": str(e) + } diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts index 7d43424..bb6c162 100644 --- a/frontend/src/db/backend-api.ts +++ b/frontend/src/db/backend-api.ts @@ -250,6 +250,98 @@ export interface AIExcelAnalyzeResult { error?: string; } +// ==================== Word/TXT AI 分析类型 ==================== + +export type WordAnalysisType = 'structured' | 'charts'; +export type TxtAnalysisType = 'structured' | 'charts'; + +export interface WordAIStructuredResult { + success: boolean; + result?: { + success?: boolean; + type?: string; + headers?: string[]; + rows?: string[][]; + key_values?: Record; + list_items?: string[]; + summary?: string; + error?: string; + }; + error?: string; +} + +export interface WordAIChartsResult { + success: boolean; + result?: { + success?: boolean; + charts?: { + histograms?: Array; + bar_charts?: Array; + box_plots?: Array; + correlation?: any; + }; + statistics?: { + numeric?: Record; + categorical?: Record; + }; + distributions?: Record; + row_count?: number; + column_count?: number; + error?: string; + }; + error?: string; +} + +export interface TxtAIStructuredResult { + success: boolean; + result?: { + success?: boolean; + type?: string; + tables?: Array<{ + headers?: string[]; + rows?: string[][]; + }>; + key_values?: Record; + list_items?: string[]; + summary?: string; + error?: string; + }; + error?: string; +} + +export interface TxtAIChartsResult { + success: boolean; + result?: { + success?: boolean; + charts?: { + histograms?: Array; + bar_charts?: Array; + box_plots?: Array; + correlation?: any; + }; + statistics?: { + numeric?: Record; + categorical?: Record; + }; + distributions?: Record; + row_count?: number; + column_count?: number; + key_statistics?: Array<{ + name?: string; + value?: string; + trend?: string; + description?: string; + }>; + chart_suggestions?: Array<{ + chart_type?: string; + title?: string; + data_source?: string; + }>; + error?: string; + }; + error?: string; +} + // ==================== API 封装 ==================== export const backendApi = { @@ -1187,15 +1279,21 @@ export const aiApi = { * 上传并使用 AI 分析 Markdown 文件 */ async analyzeMarkdown( - file: File, + file: File | null, options: { + docId?: string; analysisType?: MarkdownAnalysisType; userPrompt?: string; sectionNumber?: string; } = {} ): Promise { const formData = new FormData(); - formData.append('file', file); + if (file) { + formData.append('file', file); + } + if (options.docId) { + formData.append('doc_id', options.docId); + } const params = new URLSearchParams(); if (options.analysisType) { @@ -1337,28 +1435,31 @@ export const aiApi = { }, /** - * 上传并使用 AI 分析 TXT 文本文件,提取结构化数据 + * 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表 */ async analyzeTxt( - file: File + file: File | null, + docId: string | null = null, + analysisType: TxtAnalysisType = 'structured' ): Promise<{ success: boolean; filename?: string; - structured_data?: { - table?: { - columns?: string[]; - rows?: string[][]; - }; - summary?: string; - key_value_pairs?: Array<{ key: string; value: string }>; - numeric_data?: Array<{ name: string; value: number; unit?: string }>; - }; + analysis_type?: string; + result?: any; error?: string; }> { const formData = new FormData(); - formData.append('file', file); + if (file) { + formData.append('file', file); + } + if (docId) { + formData.append('doc_id', docId); + } - const url = `${BACKEND_BASE_URL}/ai/analyze/txt`; + const params = new URLSearchParams(); + params.append('analysis_type', analysisType); + + const url = `${BACKEND_BASE_URL}/ai/analyze/txt?${params.toString()}`; try { const response = await fetch(url, { @@ -1480,28 +1581,35 @@ export const aiApi = { // ==================== Word AI 解析 ==================== /** - * 使用 AI 解析 Word 文档,提取结构化数据 + * 使用 AI 解析 Word 文档,提取结构化数据或生成图表 */ async analyzeWordWithAI( - file: File, - userHint: string = '' + file: File | null, + docId: string | null = null, + userHint: string = '', + analysisType: WordAnalysisType = 'structured' ): Promise<{ success: boolean; - type?: string; - headers?: string[]; - rows?: string[][]; - key_values?: Record; - list_items?: string[]; - summary?: string; + filename?: string; + analysis_type?: string; + result?: any; error?: string; }> { const formData = new FormData(); - formData.append('file', file); + if (file) { + formData.append('file', file); + } + if (docId) { + formData.append('doc_id', docId); + } if (userHint) { formData.append('user_hint', userHint); } - const url = `${BACKEND_BASE_URL}/ai/analyze/word`; + const params = new URLSearchParams(); + params.append('analysis_type', analysisType); + + const url = `${BACKEND_BASE_URL}/ai/analyze/word?${params.toString()}`; try { const response = await fetch(url, { diff --git a/frontend/src/pages/Documents.tsx b/frontend/src/pages/Documents.tsx index 79af9f5..a71b113 100644 --- a/frontend/src/pages/Documents.tsx +++ b/frontend/src/pages/Documents.tsx @@ -10,7 +10,7 @@ import { ChevronDown, ChevronUp, FileSpreadsheet, - File, + File as FileIcon, Table, CheckCircle, AlertCircle, @@ -107,6 +107,15 @@ const Documents: React.FC = () => { const [mdStreaming, setMdStreaming] = useState(false); const [mdStreamingContent, setMdStreamingContent] = useState(''); + // Word AI 分析相关状态 + const [wordAnalysis, setWordAnalysis] = useState(null); + const [wordAnalysisType, setWordAnalysisType] = useState<'structured' | 'charts'>('structured'); + const [wordUserHint, setWordUserHint] = useState(''); + + // TXT AI 分析相关状态 + const [txtAnalysis, setTxtAnalysis] = useState(null); + const [txtAnalysisType, setTxtAnalysisType] = useState<'structured' | 'charts'>('structured'); + // RAG 向量检索相关状态 const [ragStatus, setRagStatus] = useState<{ vector_count: number; collections: string[] } | null>(null); const [ragSearchQuery, setRagSearchQuery] = useState(''); @@ -114,6 +123,17 @@ const Documents: React.FC = () => { const [ragResults, setRagResults] = useState([]); const [ragRebuilding, setRagRebuilding] = useState(false); + // 选中的文档详情 + const [selectedDocument, setSelectedDocument] = useState<{ + doc_id: string; + original_filename: string; + doc_type: string; + content?: string; + structured_data?: any; + metadata?: any; + } | null>(null); + const [loadingDocument, setLoadingDocument] = useState(false); + // 解析选项 const [parseOptions, setParseOptions] = useState({ parseAllSheets: false, @@ -268,6 +288,33 @@ const Documents: React.FC = () => { return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' }; } })); + + // 设置第一个成功文件的 uploadedFile + const firstSuccessIdx = fileResults.findIndex((fr: any) => fr?.success); + if (firstSuccessIdx >= 0 && acceptedFiles[firstSuccessIdx]) { + const firstFile = acceptedFiles[firstSuccessIdx]; + const firstResult = fileResults[firstSuccessIdx]; + const ext = firstFile.name.split('.').pop()?.toLowerCase(); + + // 设置 uploadedFile + setUploadedFile(firstFile); + + // 对于 Excel 文件,获取 parseResult + if (ext === 'xlsx' || ext === 'xls') { + // 调用 parseDocument 获取 parseResult + if (firstResult?.file_path) { + try { + const parseResult = await backendApi.parseDocument(firstResult.file_path); + if (parseResult.success) { + setParseResult(parseResult as any); + } + } catch (parseErr) { + console.warn('获取 parseResult 失败:', parseErr); + } + } + } + } + loadDocuments(); return; } else if (status.status === 'failure') { @@ -446,24 +493,79 @@ const Documents: React.FC = () => { // 基于 AI 分析生成图表 const handleGenerateCharts = async () => { - if (!aiAnalysis || !aiAnalysis.success) { + // 检查是否有任何 AI 分析结果 + const hasExcelAI = aiAnalysis?.success; + const hasMdAI = mdAnalysis?.success; + const hasWordAI = wordAnalysis?.success; + const hasTxtAI = txtAnalysis?.success; + + if (!hasExcelAI && !hasMdAI && !hasWordAI && !hasTxtAI) { toast.error('请先进行 AI 分析'); return; } + // 如果是 Markdown 分析已有图表,直接显示 + if (hasMdAI && mdAnalysis?.chart_data?.tables) { + setAnalysisCharts({ + success: true, + charts: { tables: mdAnalysis.chart_data.tables }, + statistics: mdAnalysis.chart_data.key_statistics + }); + toast.success('图表生成完成'); + return; + } + + // 如果是 Word 分析已有图表,直接显示 + if (hasWordAI && wordAnalysis?.result?.charts) { + setAnalysisCharts({ + success: true, + charts: wordAnalysis.result.charts, + statistics: wordAnalysis.result.statistics + }); + toast.success('图表生成完成'); + return; + } + + // 如果是 TXT 分析已有图表,直接显示 + if (hasTxtAI && txtAnalysis?.result?.charts) { + setAnalysisCharts({ + success: true, + charts: txtAnalysis.result.charts, + statistics: txtAnalysis.result.statistics + }); + toast.success('图表生成完成'); + return; + } + + // 尝试从各种分析结果中提取文本并生成图表 let analysisText = ''; - if (aiAnalysis.analysis?.analysis) { - analysisText = aiAnalysis.analysis.analysis; - } else if (aiAnalysis.analysis?.sheets) { - const sheets = aiAnalysis.analysis.sheets; - if (sheets && Object.keys(sheets).length > 0) { - const firstSheet = Object.keys(sheets)[0]; - analysisText = sheets[firstSheet]?.analysis || ''; + let fileType = 'unknown'; + + if (hasExcelAI) { + if (aiAnalysis.analysis?.analysis) { + analysisText = aiAnalysis.analysis.analysis; + fileType = 'excel'; + } else if (aiAnalysis.analysis?.sheets) { + const sheets = aiAnalysis.analysis.sheets; + if (sheets && Object.keys(sheets).length > 0) { + const firstSheet = Object.keys(sheets)[0]; + analysisText = sheets[firstSheet]?.analysis || ''; + fileType = 'excel'; + } } + } else if (hasMdAI && mdAnalysis?.analysis) { + analysisText = mdAnalysis.analysis; + fileType = 'markdown'; + } else if (hasWordAI && wordAnalysis?.result?.summary) { + analysisText = wordAnalysis.result.summary; + fileType = 'word'; + } else if (hasTxtAI && txtAnalysis?.result?.summary) { + analysisText = txtAnalysis.result.summary; + fileType = 'txt'; } if (!analysisText?.trim()) { - toast.error('无法获取 AI 分析结果'); + toast.error('无法获取 AI 分析文本结果'); return; } @@ -474,7 +576,7 @@ const Documents: React.FC = () => { const result = await aiApi.extractAndGenerateCharts({ analysis_text: analysisText, original_filename: uploadedFile?.name || 'unknown', - file_type: 'excel' + file_type: fileType }); if (result.success) { @@ -592,6 +694,9 @@ const Documents: React.FC = () => { const result = await backendApi.deleteDocument(docId); if (result.success) { setDocuments(prev => prev.filter(d => d.doc_id !== docId)); + if (selectedDocument?.doc_id === docId) { + setSelectedDocument(null); + } toast.success('文档已删除'); } } catch (err: any) { @@ -599,6 +704,95 @@ const Documents: React.FC = () => { } }; + const handleSelectDocument = async (docId: string) => { + setLoadingDocument(true); + try { + const result = await backendApi.getDocument(docId); + if (result.success && result.document) { + setSelectedDocument(result.document); + const doc = result.document; + + // 优先使用 file_path 调用 parseDocument 获取完整解析结果 + const filePath = doc.metadata?.file_path; + if (filePath) { + try { + const parseResult = await backendApi.parseDocument(filePath); + if (parseResult.success) { + setParseResult(parseResult as any); + const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type; + const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) }); + setUploadedFile(fakeFile); + toast.success('已加载文档: ' + doc.original_filename); + setLoadingDocument(false); + return; + } else { + console.warn('parseDocument returned success:false, using fallback'); + } + } catch (parseErr) { + console.warn('parseDocument failed, fallback to structured_data:', parseErr); + } + } + + // 后备:使用 structured_data 构建 parseResult + const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type; + const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) }); + + if (doc.structured_data) { + const mockParseResult: ExcelParseResult = { + success: true, + data: {}, + metadata: { + filename: doc.filename, + original_filename: doc.original_filename, + extension: doc.doc_type, + doc_type: doc.doc_type as any, + file_size: doc.metadata?.file_size || 0, + } + }; + if (doc.structured_data.tables && doc.structured_data.tables.length > 0) { + const firstTable = doc.structured_data.tables[0]; + mockParseResult.data = { + columns: firstTable.headers || [], + rows: (firstTable.rows || []).map((row: string[]) => { + const obj: Record = {}; + (firstTable.headers || []).forEach((h: string, i: number) => { + obj[h] = row[i] || ''; + }); + return obj; + }), + row_count: firstTable.rows?.length || 0, + column_count: firstTable.headers?.length || 0, + }; + } + if (doc.structured_data.sheets) { + mockParseResult.data.sheets = doc.structured_data.sheets; + } + setParseResult(mockParseResult); + } else if (doc.content) { + setParseResult({ + success: true, + data: { content: doc.content }, + metadata: { + filename: doc.filename, + original_filename: doc.original_filename, + extension: doc.doc_type, + doc_type: doc.doc_type as any, + file_size: doc.metadata?.file_size || 0, + } + }); + } + setUploadedFile(fakeFile); + toast.success('已加载文档: ' + doc.original_filename); + } else { + toast.error(result.error || '获取文档详情失败'); + } + } catch (err: any) { + toast.error(err.message || '获取文档详情失败'); + } finally { + setLoadingDocument(false); + } + }; + const filteredDocs = documents.filter(doc => doc.original_filename.toLowerCase().includes(search.toLowerCase()) ); @@ -612,7 +806,7 @@ const Documents: React.FC = () => { case 'doc': return ; default: - return ; + return ; } }; @@ -632,11 +826,17 @@ const Documents: React.FC = () => { setMdAnalysis(null); try { - const result = await aiApi.analyzeMarkdown(uploadedFile, { - analysisType: mdAnalysisType, - userPrompt: mdUserPrompt, - sectionNumber: mdSelectedSection || undefined - }); + // 判断是从历史文档还是本地上传 + const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : undefined; + const result = await aiApi.analyzeMarkdown( + uploadedFile.size > 0 ? uploadedFile : null, + { + docId: docId || undefined, + analysisType: mdAnalysisType, + userPrompt: mdUserPrompt, + sectionNumber: mdSelectedSection || undefined + } + ); if (result.success) { toast.success('Markdown AI 分析完成'); @@ -701,6 +901,71 @@ const Documents: React.FC = () => { } }; + // Word AI 分析处理 + const handleWordAnalyze = async () => { + if (!uploadedFile || !isWordFile(uploadedFile.name)) { + toast.error('请先上传 Word 文件'); + return; + } + + setAnalyzing(true); + setWordAnalysis(null); + + try { + // 判断是从历史文档还是本地上传 + const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null; + const result = await aiApi.analyzeWordWithAI( + uploadedFile.size > 0 ? uploadedFile : null, + docId, + wordUserHint, + wordAnalysisType + ); + + if (result.success) { + toast.success('Word AI 分析完成'); + setWordAnalysis(result); + } else { + toast.error(result.error || 'AI 分析失败'); + } + } catch (error: any) { + toast.error(error.message || 'AI 分析失败'); + } finally { + setAnalyzing(false); + } + }; + + // TXT AI 分析处理 + const handleTxtAnalyze = async () => { + if (!uploadedFile || !isTxtFile(uploadedFile.name)) { + toast.error('请先上传 TXT 文件'); + return; + } + + setAnalyzing(true); + setTxtAnalysis(null); + + try { + // 判断是从历史文档还是本地上传 + const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null; + const result = await aiApi.analyzeTxt( + uploadedFile.size > 0 ? uploadedFile : null, + docId, + txtAnalysisType + ); + + if (result.success) { + toast.success('TXT AI 分析完成'); + setTxtAnalysis(result); + } else { + toast.error(result.error || 'AI 分析失败'); + } + } catch (error: any) { + toast.error(error.message || 'AI 分析失败'); + } finally { + setAnalyzing(false); + } + }; + const getMdAnalysisIcon = (type: string) => { switch (type) { case 'summary': return ; @@ -724,6 +989,18 @@ const Documents: React.FC = () => { return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`; }; + const getMimeType = (ext: string): string => { + const mimeTypes: Record = { + 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'xls': 'application/vnd.ms-excel', + 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'doc': 'application/msword', + 'md': 'text/markdown', + 'txt': 'text/plain', + }; + return mimeTypes[ext] || 'application/octet-stream'; + }; + const getAnalysisIcon = (type: string) => { switch (type) { case 'general': return ; @@ -739,6 +1016,16 @@ const Documents: React.FC = () => { return ext === 'xlsx' || ext === 'xls'; }; + const isWordFile = (filename: string) => { + const ext = filename.split('.').pop()?.toLowerCase(); + return ext === 'docx'; + }; + + const isTxtFile = (filename: string) => { + const ext = filename.split('.').pop()?.toLowerCase(); + return ext === 'txt'; + }; + return (
@@ -1055,7 +1342,7 @@ const Documents: React.FC = () => { Markdown - 文本 + 文本
@@ -1064,6 +1351,38 @@ const Documents: React.FC = () => { )} + {/* 从历史文档中选择 */} + {documents.length > 0 && ( + + + + + 从历史文档选择 + + + + + + + )} + {/* Excel 解析选项 */} {uploadedFile && isExcelFile(uploadedFile.name) && ( @@ -1238,8 +1557,117 @@ const Documents: React.FC = () => { )} + {/* Word AI 分析选项 */} + {uploadedFile && isWordFile(uploadedFile.name) && ( + + + + + Word AI 分析 + + + +
+ + +
+
+ +