feat(ai-analyze): 新增 Markdown 文件 AI 分析功能

- 添加 Markdown 文件上传和解析接口 - 实现流式分析和大纲提取功能 - 支持多种分析类型：摘要、大纲、关键点等 - 新增 markdown_ai_service 服务类 - 扩展 LLMService 支持流式调用 - 更新前端 API 接口定义和实现
2026-04-02 11:53:12 +08:00
parent ddf30078f0
commit d189ea9620
6 changed files with 1286 additions and 118 deletions
--- a/backend/app/api/endpoints/ai_analyze.py
+++ b/backend/app/api/endpoints/ai_analyze.py
@@ -2,10 +2,14 @@
 AI 分析 API 接口
 """
 from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
+from fastapi.responses import StreamingResponse
 from typing import Optional
 import logging
+import tempfile
+import os

 from app.services.excel_ai_service import excel_ai_service
+from app.services.markdown_ai_service import markdown_ai_service

 logger = logging.getLogger(__name__)

@@ -93,10 +97,11 @@ async def get_analysis_types():
    获取支持的分析类型列表

    Returns:
-        list: 支持的分析类型
+        dict: 支持的分析类型（包含 Excel 和 Markdown）
    """
    return {
-        "types": excel_ai_service.get_supported_analysis_types()
+        "excel_types": excel_ai_service.get_supported_analysis_types(),
+        "markdown_types": markdown_ai_service.get_supported_analysis_types()
    }


@@ -142,3 +147,185 @@ async def analyze_text(
    except Exception as e:
        logger.error(f"文本分析失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
+
+
+@router.post("/analyze/md")
+async def analyze_markdown(
+    file: UploadFile = File(...),
+    analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"),
+    user_prompt: str = Query("", description="用户自定义提示词"),
+    section_number: Optional[str] = Query(None, description="指定章节编号，如 '一' 或 '（一）'")
+):
+    """
+    上传并使用 AI 分析 Markdown 文件
+
+    Args:
+        file: 上传的 Markdown 文件
+        analysis_type: 分析类型
+        user_prompt: 用户自定义提示词
+        section_number: 指定分析的章节编号
+
+    Returns:
+        dict: 分析结果
+    """
+    # 检查文件类型
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="文件名为空")
+
+    file_ext = file.filename.split('.')[-1].lower()
+    if file_ext not in ['md', 'markdown']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
+        )
+
+    # 验证分析类型
+    supported_types = markdown_ai_service.get_supported_analysis_types()
+    if analysis_type not in supported_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的分析类型: {analysis_type}，支持的类型: {', '.join(supported_types)}"
+        )
+
+    try:
+        # 读取文件内容
+        content = await file.read()
+
+        # 保存到临时文件
+        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
+            tmp.write(content)
+            tmp_path = tmp.name
+
+        try:
+            logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}")
+
+            # 调用 AI 分析服务
+            result = await markdown_ai_service.analyze_markdown(
+                file_path=tmp_path,
+                analysis_type=analysis_type,
+                user_prompt=user_prompt,
+                section_number=section_number
+            )
+
+            logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}")
+
+            if not result['success']:
+                raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
+
+            return result
+
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
+
+
+@router.post("/analyze/md/stream")
+async def analyze_markdown_stream(
+    file: UploadFile = File(...),
+    analysis_type: str = Query("summary", description="分析类型"),
+    user_prompt: str = Query("", description="用户自定义提示词"),
+    section_number: Optional[str] = Query(None, description="指定章节编号")
+):
+    """
+    流式分析 Markdown 文件 (SSE)
+
+    Returns:
+        StreamingResponse: SSE 流式响应
+    """
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="文件名为空")
+
+    file_ext = file.filename.split('.')[-1].lower()
+    if file_ext not in ['md', 'markdown']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
+        )
+
+    try:
+        content = await file.read()
+
+        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
+            tmp.write(content)
+            tmp_path = tmp.name
+
+        try:
+            logger.info(f"开始流式分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}")
+
+            async def stream_generator():
+                async for chunk in markdown_ai_service.analyze_markdown_stream(
+                    file_path=tmp_path,
+                    analysis_type=analysis_type,
+                    user_prompt=user_prompt,
+                    section_number=section_number
+                ):
+                    yield chunk
+
+            return StreamingResponse(
+                stream_generator(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "X-Accel-Buffering": "no"
+                }
+            )
+
+        finally:
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Markdown AI 流式分析出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
+
+
+@router.get("/analyze/md/outline")
+async def get_markdown_outline(
+    file: UploadFile = File(...)
+):
+    """
+    获取 Markdown 文档的大纲结构（分章节信息）
+
+    Args:
+        file: 上传的 Markdown 文件
+
+    Returns:
+        dict: 文档大纲结构
+    """
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="文件名为空")
+
+    file_ext = file.filename.split('.')[-1].lower()
+    if file_ext not in ['md', 'markdown']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
+        )
+
+    try:
+        content = await file.read()
+
+        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
+            tmp.write(content)
+            tmp_path = tmp.name
+
+        try:
+            result = await markdown_ai_service.extract_outline(tmp_path)
+            return result
+        finally:
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+    except Exception as e:
+        logger.error(f"获取 Markdown 大纲失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")