""" AI 分析 API 接口 """ from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body from fastapi.responses import StreamingResponse from typing import Optional import logging import tempfile import os from app.services.excel_ai_service import excel_ai_service from app.services.markdown_ai_service import markdown_ai_service from app.services.template_fill_service import template_fill_service from app.services.word_ai_service import word_ai_service from app.services.txt_ai_service import txt_ai_service logger = logging.getLogger(__name__) router = APIRouter(prefix="/ai", tags=["AI 分析"]) @router.post("/analyze/excel") async def analyze_excel( file: UploadFile = File(...), user_prompt: str = Query("", description="用户自定义提示词"), analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"), parse_all_sheets: bool = Query(False, description="是否分析所有工作表") ): """ 上传并使用 AI 分析 Excel 文件 Args: file: 上传的 Excel 文件 user_prompt: 用户自定义提示词 analysis_type: 分析类型 parse_all_sheets: 是否分析所有工作表 Returns: dict: 分析结果,包含 Excel 数据和 AI 分析结果 """ # 检查文件类型 if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['xlsx', 'xls']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .xlsx 和 .xls" ) # 验证分析类型 supported_types = ['general', 'summary', 'statistics', 'insights'] if analysis_type not in supported_types: raise HTTPException( status_code=400, detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}" ) try: # 读取文件内容 content = await file.read() logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}") # 调用 AI 分析服务 if parse_all_sheets: result = await excel_ai_service.batch_analyze_sheets( content, file.filename, user_prompt=user_prompt, analysis_type=analysis_type ) else: # 解析选项 parse_options = {"header_row": 0} result = await excel_ai_service.analyze_excel_file( content, file.filename, user_prompt=user_prompt, analysis_type=analysis_type, parse_options=parse_options ) logger.info(f"文件分析完成: {file.filename}, 成功: {result['success']}") return result except HTTPException: raise except Exception as e: logger.error(f"AI 分析过程中出错: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") @router.get("/analysis/types") async def get_analysis_types(): """ 获取支持的分析类型列表 Returns: dict: 支持的分析类型(包含 Excel 和 Markdown) """ return { "excel_types": excel_ai_service.get_supported_analysis_types(), "markdown_types": markdown_ai_service.get_supported_analysis_types() } @router.post("/analyze/text") async def analyze_text( excel_data: dict = Body(..., description="Excel 解析后的数据"), user_prompt: str = Body("", description="用户提示词"), analysis_type: str = Body("general", description="分析类型") ): """ 对已解析的 Excel 数据进行 AI 分析 Args: excel_data: Excel 数据 user_prompt: 用户提示词 analysis_type: 分析类型 Returns: dict: 分析结果 """ try: logger.info(f"开始文本分析, 分析类型: {analysis_type}") # 调用 LLM 服务 from app.services.llm_service import llm_service if user_prompt and user_prompt.strip(): result = await llm_service.analyze_with_template( excel_data, user_prompt ) else: result = await llm_service.analyze_excel_data( excel_data, user_prompt, analysis_type ) logger.info(f"文本分析完成, 成功: {result['success']}") return result except Exception as e: logger.error(f"文本分析失败: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") @router.post("/analyze/md") async def analyze_markdown( file: Optional[UploadFile] = File(None), doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"), user_prompt: str = Query("", description="用户自定义提示词"), section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'") ): """ 上传并使用 AI 分析 Markdown 文件 Args: file: 上传的 Markdown 文件(与 doc_id 二选一) doc_id: 文档ID(从数据库读取) analysis_type: 分析类型 user_prompt: 用户自定义提示词 section_number: 指定分析的章节编号 Returns: dict: 分析结果 """ filename = None tmp_path = None # 验证分析类型 supported_types = markdown_ai_service.get_supported_analysis_types() if analysis_type not in supported_types: raise HTTPException( status_code=400, detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}" ) if doc_id: # 从数据库读取文档 try: from app.core.database.mongodb import mongodb doc = await mongodb.get_document(doc_id) if not doc: raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") filename = doc.get("metadata", {}).get("original_filename", "unknown.md") file_ext = filename.split('.')[-1].lower() if file_ext not in ['md', 'markdown']: raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}") content = doc.get("content", "") if not content: raise HTTPException(status_code=400, detail="文档内容为空") # 保存到临时文件 with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: tmp.write(content.encode('utf-8')) tmp_path = tmp.name logger.info(f"从数据库加载 Markdown 文档: {filename}, 长度: {len(content)}") except HTTPException: raise except Exception as e: logger.error(f"从数据库读取 Markdown 文档失败: {str(e)}") raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") else: # 文件上传模式 if not file: raise HTTPException(status_code=400, detail="请提供文件或文档ID") if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['md', 'markdown']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown" ) try: # 读取文件内容 content = await file.read() # 保存到临时文件 with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: tmp.write(content) tmp_path = tmp.name filename = file.filename except Exception as e: logger.error(f"读取 Markdown 文件失败: {str(e)}") raise HTTPException(status_code=500, detail=f"读取文件失败: {str(e)}") try: logger.info(f"开始分析 Markdown 文件: {filename}, 分析类型: {analysis_type}, 章节: {section_number}") # 调用 AI 分析服务 result = await markdown_ai_service.analyze_markdown( file_path=tmp_path, analysis_type=analysis_type, user_prompt=user_prompt, section_number=section_number ) logger.info(f"Markdown 分析完成: {filename}, 成功: {result['success']}") if not result['success']: raise HTTPException(status_code=500, detail=result.get('error', '分析失败')) return result except HTTPException: raise except Exception as e: logger.error(f"Markdown AI 分析过程中出错: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") finally: # 清理临时文件 if tmp_path and os.path.exists(tmp_path): try: os.unlink(tmp_path) except Exception as cleanup_error: logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}") @router.post("/analyze/md/stream") async def analyze_markdown_stream( file: UploadFile = File(...), analysis_type: str = Query("summary", description="分析类型"), user_prompt: str = Query("", description="用户自定义提示词"), section_number: Optional[str] = Query(None, description="指定章节编号") ): """ 流式分析 Markdown 文件 (SSE) Returns: StreamingResponse: SSE 流式响应 """ if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['md', 'markdown']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown" ) try: content = await file.read() with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: tmp.write(content) tmp_path = tmp.name try: logger.info(f"开始流式分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}") async def stream_generator(): async for chunk in markdown_ai_service.analyze_markdown_stream( file_path=tmp_path, analysis_type=analysis_type, user_prompt=user_prompt, section_number=section_number ): yield chunk return StreamingResponse( stream_generator(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no" } ) finally: # 清理临时文件,确保在所有情况下都能清理 try: if tmp_path and os.path.exists(tmp_path): os.unlink(tmp_path) except Exception as cleanup_error: logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}") except HTTPException: raise except Exception as e: logger.error(f"Markdown AI 流式分析出错: {str(e)}") raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}") @router.post("/analyze/md/outline") async def get_markdown_outline( file: UploadFile = File(...) ): """ 获取 Markdown 文档的大纲结构(分章节信息) Args: file: 上传的 Markdown 文件 Returns: dict: 文档大纲结构 """ if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['md', 'markdown']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown" ) try: content = await file.read() with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp: tmp.write(content) tmp_path = tmp.name try: result = await markdown_ai_service.extract_outline(tmp_path) return result finally: # 清理临时文件,确保在所有情况下都能清理 try: if tmp_path and os.path.exists(tmp_path): os.unlink(tmp_path) except Exception as cleanup_error: logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}") except Exception as e: logger.error(f"获取 Markdown 大纲失败: {str(e)}") raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}") @router.post("/analyze/txt") async def analyze_txt( file: Optional[UploadFile] = File(None), doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), analysis_type: str = Query("structured", description="分析类型: structured, charts") ): """ 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表 将非结构化文本转换为结构化表格数据,便于后续填表使用 当 analysis_type=charts 时,可生成可视化图表 Args: file: 上传的 TXT 文件(与 doc_id 二选一) doc_id: 文档ID(从数据库读取) analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) Returns: dict: 分析结果,包含结构化表格数据或图表数据 """ filename = None text_content = None if doc_id: # 从数据库读取文档 try: from app.core.database.mongodb import mongodb doc = await mongodb.get_document(doc_id) if not doc: raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") filename = doc.get("metadata", {}).get("original_filename", "unknown.txt") file_ext = filename.split('.')[-1].lower() if file_ext not in ['txt', 'text']: raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}") # 使用数据库中的 content text_content = doc.get("content", "") if not text_content: raise HTTPException(status_code=400, detail="文档内容为空") logger.info(f"从数据库加载 TXT 文档: {filename}, 长度: {len(text_content)}") except HTTPException: raise except Exception as e: logger.error(f"从数据库读取 TXT 文档失败: {str(e)}") raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") else: # 文件上传模式 if not file: raise HTTPException(status_code=400, detail="请提供文件或文档ID") if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['txt', 'text']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .txt" ) # 读取文件内容 content = await file.read() text_content = content.decode('utf-8', errors='replace') filename = file.filename try: logger.info(f"开始 AI 分析 TXT 文件: {filename}, analysis_type={analysis_type}") # 使用 txt_ai_service 的 AI 分析方法 result = await txt_ai_service.analyze_txt_with_ai( content=text_content, filename=filename, analysis_type=analysis_type ) if result: logger.info(f"TXT AI 分析成功: {filename}") return { "success": result.get("success", True), "filename": filename, "analysis_type": analysis_type, "result": result } else: logger.warning(f"TXT AI 分析返回空结果: {filename}") return { "success": False, "filename": filename, "error": "AI 分析未能提取到结构化数据", "result": None } except HTTPException: raise except Exception as e: logger.error(f"TXT AI 分析过程中出错: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}") # ==================== Word 文档 AI 解析 ==================== @router.post("/analyze/word") async def analyze_word( file: Optional[UploadFile] = File(None), doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"), user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"), analysis_type: str = Query("structured", description="分析类型: structured, charts") ): """ 使用 AI 解析 Word 文档,提取结构化数据或生成图表 适用于从非结构化的 Word 文档中提取表格数据、键值对等信息 当 analysis_type=charts 时,可生成可视化图表 Args: file: 上传的 Word 文件(与 doc_id 二选一) doc_id: 文档ID(从数据库读取) user_hint: 用户提示词 analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表) Returns: dict: 包含结构化数据的解析结果或图表数据 """ # 获取文件名和扩展名 filename = None file_ext = None if doc_id: # 从数据库读取文档 try: from app.core.database.mongodb import mongodb doc = await mongodb.get_document(doc_id) if not doc: raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}") filename = doc.get("metadata", {}).get("original_filename", "unknown.docx") file_ext = filename.split('.')[-1].lower() if file_ext not in ['docx']: raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}") # 使用数据库中的 content 进行分析 content = doc.get("content", "") tables = doc.get("structured_data", {}).get("tables", []) # 调用 AI 分析服务,传入数据库内容 if analysis_type == "charts": result = await word_ai_service.generate_charts_from_db( content=content, tables=tables, filename=filename, user_hint=user_hint ) else: result = await word_ai_service.parse_word_with_ai_from_db( content=content, tables=tables, filename=filename, user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等" ) if result.get("success"): return { "success": True, "filename": filename, "analysis_type": analysis_type, "result": result } else: return { "success": False, "filename": filename, "error": result.get("error", "AI 解析失败"), "result": None } except HTTPException: raise except Exception as e: logger.error(f"从数据库读取 Word 文档失败: {str(e)}") raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}") # 文件上传模式 if not file: raise HTTPException(status_code=400, detail="请提供文件或文档ID") if not file.filename: raise HTTPException(status_code=400, detail="文件名为空") file_ext = file.filename.split('.')[-1].lower() if file_ext not in ['docx']: raise HTTPException( status_code=400, detail=f"不支持的文件类型: {file_ext},仅支持 .docx" ) try: # 保存上传的文件 content = await file.read() suffix = f".{file_ext}" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(content) tmp_path = tmp.name try: # 根据 analysis_type 选择处理方式 if analysis_type == "charts": # 生成图表 result = await word_ai_service.generate_charts( file_path=tmp_path, user_hint=user_hint ) else: # 提取结构化数据 result = await word_ai_service.parse_word_with_ai( file_path=tmp_path, user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等" ) if result.get("success"): return { "success": True, "filename": file.filename, "analysis_type": analysis_type, "result": result } else: return { "success": False, "filename": file.filename, "error": result.get("error", "AI 解析失败"), "result": None } finally: # 清理临时文件 if os.path.exists(tmp_path): os.unlink(tmp_path) except HTTPException: raise except Exception as e: logger.error(f"Word AI 分析过程中出错: {str(e)}") raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")