feat(ai-analyze): 新增 Markdown 文件 AI 分析功能

- 添加 Markdown 文件上传和解析接口 - 实现流式分析和大纲提取功能 - 支持多种分析类型：摘要、大纲、关键点等 - 新增 markdown_ai_service 服务类 - 扩展 LLMService 支持流式调用 - 更新前端 API 接口定义和实现
2026-04-02 11:53:12 +08:00
parent ddf30078f0
commit d189ea9620
6 changed files with 1286 additions and 118 deletions
--- a/backend/app/api/endpoints/ai_analyze.py
+++ b/backend/app/api/endpoints/ai_analyze.py
@@ -2,10 +2,14 @@
 AI 分析 API 接口
 """
 from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
 from fastapi.responses import StreamingResponse
 from typing import Optional
 import logging
 import tempfile
 import os
 from app.services.excel_ai_service import excel_ai_service
 from app.services.markdown_ai_service import markdown_ai_service
 logger = logging.getLogger(__name__)
@@ -93,10 +97,11 @@ async def get_analysis_types():
    获取支持的分析类型列表
    Returns:
-        list: 支持的分析类型
+        dict: 支持的分析类型（包含 Excel 和 Markdown）
    """
    return {
-        "types": excel_ai_service.get_supported_analysis_types()
+        "excel_types": excel_ai_service.get_supported_analysis_types(),
        "markdown_types": markdown_ai_service.get_supported_analysis_types()
    }
@@ -142,3 +147,185 @@ async def analyze_text(
    except Exception as e:
        logger.error(f"文本分析失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
@router.post("/analyze/md")
 async def analyze_markdown(
    file: UploadFile = File(...),
    analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"),
    user_prompt: str = Query("", description="用户自定义提示词"),
    section_number: Optional[str] = Query(None, description="指定章节编号，如 '一' 或 '（一）'")
 ):
    """
    上传并使用 AI 分析 Markdown 文件
    Args:
        file: 上传的 Markdown 文件
        analysis_type: 分析类型
        user_prompt: 用户自定义提示词
        section_number: 指定分析的章节编号
    Returns:
        dict: 分析结果
    """
    # 检查文件类型
    if not file.filename:
        raise HTTPException(status_code=400, detail="文件名为空")
    file_ext = file.filename.split('.')[-1].lower()
    if file_ext not in ['md', 'markdown']:
        raise HTTPException(
            status_code=400,
            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
        )
    # 验证分析类型
    supported_types = markdown_ai_service.get_supported_analysis_types()
    if analysis_type not in supported_types:
        raise HTTPException(
            status_code=400,
            detail=f"不支持的分析类型: {analysis_type}，支持的类型: {', '.join(supported_types)}"
        )
    try:
        # 读取文件内容
        content = await file.read()
        # 保存到临时文件
        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
            tmp.write(content)
            tmp_path = tmp.name
        try:
            logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}")
            # 调用 AI 分析服务
            result = await markdown_ai_service.analyze_markdown(
                file_path=tmp_path,
                analysis_type=analysis_type,
                user_prompt=user_prompt,
                section_number=section_number
            )
            logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}")
            if not result['success']:
                raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
            return result
        finally:
            # 清理临时文件
            if os.path.exists(tmp_path):
                os.unlink(tmp_path)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
@router.post("/analyze/md/stream")
 async def analyze_markdown_stream(
    file: UploadFile = File(...),
    analysis_type: str = Query("summary", description="分析类型"),
    user_prompt: str = Query("", description="用户自定义提示词"),
    section_number: Optional[str] = Query(None, description="指定章节编号")
 ):
    """
    流式分析 Markdown 文件 (SSE)
    Returns:
        StreamingResponse: SSE 流式响应
    """
    if not file.filename:
        raise HTTPException(status_code=400, detail="文件名为空")
    file_ext = file.filename.split('.')[-1].lower()
    if file_ext not in ['md', 'markdown']:
        raise HTTPException(
            status_code=400,
            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
        )
    try:
        content = await file.read()
        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
            tmp.write(content)
            tmp_path = tmp.name
        try:
            logger.info(f"开始流式分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}")
            async def stream_generator():
                async for chunk in markdown_ai_service.analyze_markdown_stream(
                    file_path=tmp_path,
                    analysis_type=analysis_type,
                    user_prompt=user_prompt,
                    section_number=section_number
                ):
                    yield chunk
            return StreamingResponse(
                stream_generator(),
                media_type="text/event-stream",
                headers={
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "X-Accel-Buffering": "no"
                }
            )
        finally:
            if os.path.exists(tmp_path):
                os.unlink(tmp_path)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Markdown AI 流式分析出错: {str(e)}")
        raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
@router.get("/analyze/md/outline")
 async def get_markdown_outline(
    file: UploadFile = File(...)
 ):
    """
    获取 Markdown 文档的大纲结构（分章节信息）
    Args:
        file: 上传的 Markdown 文件
    Returns:
        dict: 文档大纲结构
    """
    if not file.filename:
        raise HTTPException(status_code=400, detail="文件名为空")
    file_ext = file.filename.split('.')[-1].lower()
    if file_ext not in ['md', 'markdown']:
        raise HTTPException(
            status_code=400,
            detail=f"不支持的文件类型: {file_ext}，仅支持 .md 和 .markdown"
        )
    try:
        content = await file.read()
        with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
            tmp.write(content)
            tmp_path = tmp.name
        try:
            result = await markdown_ai_service.extract_outline(tmp_path)
            return result
        finally:
            if os.path.exists(tmp_path):
                os.unlink(tmp_path)
    except Exception as e:
        logger.error(f"获取 Markdown 大纲失败: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -2,7 +2,7 @@
 LLM 服务模块 - 封装大模型 API 调用
 """
 import logging
-from typing import Dict, Any, List, Optional
+from typing import Dict, Any, List, Optional, AsyncGenerator
 import httpx
 from app.config import settings
@@ -87,6 +87,71 @@ class LLMService:
            logger.error(f"解析 API 响应失败: {str(e)}")
            raise
    async def chat_stream(
        self,
        messages: List[Dict[str, str]],
        temperature: float = 0.7,
        max_tokens: Optional[int] = None,
        **kwargs
    ) -> AsyncGenerator[Dict[str, Any], None]:
        """
        流式调用聊天 API
        Args:
            messages: 消息列表
            temperature: 温度参数
            max_tokens: 最大 token 数
            **kwargs: 其他参数
        Yields:
            Dict[str, Any]: 包含 delta 内容的块
        """
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": self.model_name,
            "messages": messages,
            "temperature": temperature,
            "stream": True
        }
        if max_tokens:
            payload["max_tokens"] = max_tokens
        payload.update(kwargs)
        try:
            async with httpx.AsyncClient(timeout=120.0) as client:
                async with client.stream(
                    "POST",
                    f"{self.base_url}/chat/completions",
                    headers=headers,
                    json=payload
                ) as response:
                    async for line in response.aiter_lines():
                        if line.startswith("data: "):
                            data = line[6:]  # Remove "data: " prefix
                            if data == "[DONE]":
                                break
                            try:
                                import json as json_module
                                chunk = json_module.loads(data)
                                delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
                                if delta:
                                    yield {"content": delta}
                            except json_module.JSONDecodeError:
                                continue
        except httpx.HTTPStatusError as e:
            logger.error(f"LLM 流式 API 请求失败: {e.response.status_code}")
            raise
        except Exception as e:
            logger.error(f"LLM 流式 API 调用异常: {str(e)}")
            raise
    async def analyze_excel_data(
        self,
        excel_data: Dict[str, Any],
--- a/backend/app/services/markdown_ai_service.py
+++ b/backend/app/services/markdown_ai_service.py
@@ -0,0 +1,591 @@
 """
 Markdown 文档 AI 分析服务
 支持：
 - 分章节解析（中文章节编号：一、二、三， （一）（二）（三））
 - 结构化数据提取
 - 流式输出
 - 多种分析类型
 """
 import asyncio
 import json
 import logging
 import re
 from typing import Any, AsyncGenerator, Dict, List, Optional
 from app.services.llm_service import llm_service
 from app.core.document_parser import MarkdownParser
 logger = logging.getLogger(__name__)
 class MarkdownSection:
    """文档章节结构"""
    def __init__(self, number: str, title: str, level: int, content: str, line_start: int, line_end: int):
        self.number = number  # 章节编号，如 "一", "（一）", "1"
        self.title = title
        self.level = level  # 层级深度
        self.content = content  # 章节内容（不含子章节）
        self.line_start = line_start
        self.line_end = line_end
        self.subsections: List[MarkdownSection] = []
    def to_dict(self) -> Dict[str, Any]:
        return {
            "number": self.number,
            "title": self.title,
            "level": self.level,
            "content_preview": self.content[:200] + "..." if len(self.content) > 200 else self.content,
            "line_start": self.line_start,
            "line_end": self.line_end,
            "subsections": [s.to_dict() for s in self.subsections]
        }
 class MarkdownAIService:
    """Markdown 文档 AI 分析服务"""
    # 中文章节编号模式
    CHINESE_NUMBERS = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
    CHINESE_SUFFIX = "、"
    PARENTHESIS_PATTERN = re.compile(r'^（([一二三四五六七八九十]+)）\s*(.+)$')
    CHINESE_SECTION_PATTERN = re.compile(r'^([一二三四五六七八九十]+）、\s*(.+)$')
    ARABIC_SECTION_PATTERN = re.compile(r'^(\d+)\.\s+(.+)$')
    def __init__(self):
        self.parser = MarkdownParser()
    def get_supported_analysis_types(self) -> list:
        """获取支持的分析类型"""
        return [
            "summary",      # 文档摘要
            "outline",     # 大纲提取
            "key_points",   # 关键点提取
            "questions",    # 生成问题
            "tags",         # 生成标签
            "qa",           # 问答对
            "statistics",   # 统计数据分析（适合政府公报）
            "section"       # 分章节详细分析
        ]
    def extract_sections(self, content: str, titles: List[Dict]) -> List[MarkdownSection]:
        """
        从文档内容中提取章节结构
        识别以下章节格式：
        - 一级：一、二、三...
        - 二级：（一）（二）（三）...
        - 三级：1. 2. 3. ...
        """
        sections = []
        lines = content.split('\n')
        # 构建标题行到内容的映射
        title_lines = {}
        for t in titles:
            title_lines[t.get('line', 0)] = t
        current_section = None
        section_stack = []
        for i, line in enumerate(lines, 1):
            stripped = line.strip()
            # 检查是否是一级标题（中文数字 + 、）
            match = self.CHINESE_SECTION_PATTERN.match(stripped)
            if match:
                # 结束当前章节
                if current_section:
                    current_section.content = self._get_section_content(
                        lines, current_section.line_start, i - 1
                    )
                current_section = MarkdownSection(
                    number=match.group(1),
                    title=match.group(2),
                    level=1,
                    content="",
                    line_start=i,
                    line_end=len(lines)
                )
                sections.append(current_section)
                section_stack = [current_section]
                continue
            # 检查是否是二级标题（（一）（二）...）
            match = self.PARENTHESIS_PATTERN.match(stripped)
            if match and current_section:
                # 结束当前子章节
                if section_stack and len(section_stack) > 1:
                    parent = section_stack[-1]
                    parent.content = self._get_section_content(
                        lines, parent.line_start, i - 1
                    )
                subsection = MarkdownSection(
                    number=match.group(1),
                    title=match.group(2),
                    level=2,
                    content="",
                    line_start=i,
                    line_end=len(lines)
                )
                current_section.subsections.append(subsection)
                section_stack = [current_section, subsection]
                continue
            # 检查是否是三级标题（1. 2. 3.）
            match = self.ARABIC_SECTION_PATTERN.match(stripped)
            if match and len(section_stack) > 1:
                # 结束当前子章节
                if len(section_stack) > 2:
                    parent = section_stack[-1]
                    parent.content = self._get_section_content(
                        lines, parent.line_start, i - 1
                    )
                sub_subsection = MarkdownSection(
                    number=match.group(1),
                    title=match.group(2),
                    level=3,
                    content="",
                    line_start=i,
                    line_end=len(lines)
                )
                section_stack[-1].subsections.append(sub_subsection)
                section_stack = section_stack[:-1] + [sub_subsection]
                continue
        # 处理最后一个章节
        if current_section:
            current_section.content = self._get_section_content(
                lines, current_section.line_start, len(lines)
            )
        return sections
    def _get_section_content(self, lines: List[str], start: int, end: int) -> str:
        """获取指定行范围的内容"""
        if start > end:
            return ""
        content_lines = lines[start-1:end]
        # 清理：移除标题行和空行
        cleaned = []
        for line in content_lines:
            stripped = line.strip()
            if not stripped:
                continue
            # 跳过章节标题行
            if self.CHINESE_SECTION_PATTERN.match(stripped):
                continue
            if self.PARENTHESIS_PATTERN.match(stripped):
                continue
            if self.ARABIC_SECTION_PATTERN.match(stripped):
                continue
            cleaned.append(stripped)
        return '\n'.join(cleaned)
    async def analyze_markdown(
        self,
        file_path: str,
        analysis_type: str = "summary",
        user_prompt: str = "",
        section_number: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        使用 AI 分析 Markdown 文档
        Args:
            file_path: 文件路径
            analysis_type: 分析类型
            user_prompt: 用户自定义提示词
            section_number: 指定分析的章节编号（如 "一" 或 "（一）"）
        Returns:
            dict: 分析结果
        """
        try:
            parse_result = self.parser.parse(file_path)
            if not parse_result.success:
                return {
                    "success": False,
                    "error": parse_result.error
                }
            data = parse_result.data
            # 提取章节结构
            sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
            # 如果指定了章节，只分析该章节
            target_content = data.get("content", "")
            target_title = parse_result.metadata.get("filename", "")
            if section_number:
                section = self._find_section(sections, section_number)
                if section:
                    target_content = section.content
                    target_title = f"{section.number}、{section.title}"
                else:
                    return {
                        "success": False,
                        "error": f"未找到章节: {section_number}"
                    }
            # 根据分析类型构建提示词
            prompt = self._build_prompt(
                content=target_content,
                analysis_type=analysis_type,
                user_prompt=user_prompt,
                title=target_title
            )
            # 调用 LLM 分析
            messages = [
                {"role": "system", "content": self._get_system_prompt(analysis_type)},
                {"role": "user", "content": prompt}
            ]
            response = await llm_service.chat(
                messages=messages,
                temperature=0.3,
                max_tokens=4000
            )
            analysis = llm_service.extract_message_content(response)
            return {
                "success": True,
                "filename": parse_result.metadata.get("filename", ""),
                "analysis_type": analysis_type,
                "section": target_title if section_number else None,
                "word_count": len(target_content),
                "structure": {
                    "title_count": parse_result.metadata.get("title_count", 0),
                    "code_block_count": parse_result.metadata.get("code_block_count", 0),
                    "table_count": parse_result.metadata.get("table_count", 0),
                    "section_count": len(sections)
                },
                "sections": [s.to_dict() for s in sections[:10]],  # 最多返回10个一级章节
                "analysis": analysis
            }
        except Exception as e:
            logger.error(f"Markdown AI 分析失败: {str(e)}")
            return {
                "success": False,
                "error": str(e)
            }
    async def analyze_markdown_stream(
        self,
        file_path: str,
        analysis_type: str = "summary",
        user_prompt: str = "",
        section_number: Optional[str] = None
    ) -> AsyncGenerator[str, None]:
        """
        流式分析 Markdown 文档 (SSE)
        Yields:
            str: SSE 格式的数据块
        """
        try:
            parse_result = self.parser.parse(file_path)
            if not parse_result.success:
                yield f"data: {json.dumps({'error': parse_result.error}, ensure_ascii=False)}\n\n"
                return
            data = parse_result.data
            sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
            target_content = data.get("content", "")
            target_title = parse_result.metadata.get("filename", "")
            if section_number:
                section = self._find_section(sections, section_number)
                if section:
                    target_content = section.content
                    target_title = f"{section.number}、{section.title}"
                else:
                    yield f"data: {json.dumps({'error': f'未找到章节: {section_number}'}, ensure_ascii=False)}\n\n"
                    return
            prompt = self._build_prompt(
                content=target_content,
                analysis_type=analysis_type,
                user_prompt=user_prompt,
                title=target_title
            )
            messages = [
                {"role": "system", "content": self._get_system_prompt(analysis_type)},
                {"role": "user", "content": prompt}
            ]
            # 发送初始元数据
            yield f"data: {json.dumps({
                'type': 'start',
                'filename': parse_result.metadata.get("filename", ""),
                'analysis_type': analysis_type,
                'section': target_title if section_number else None,
                'word_count': len(target_content)
            }, ensure_ascii=False)}\n\n"
            # 流式调用 LLM
            full_response = ""
            async for chunk in llm_service.chat_stream(messages, temperature=0.3, max_tokens=4000):
                content = chunk.get("content", "")
                if content:
                    full_response += content
                    yield f"data: {json.dumps({'type': 'content', 'delta': content}, ensure_ascii=False)}\n\n"
            # 发送完成消息
            yield f"data: {json.dumps({'type': 'done', 'full_response': full_response}, ensure_ascii=False)}\n\n"
        except Exception as e:
            logger.error(f"Markdown AI 流式分析失败: {str(e)}")
            yield f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n"
    def _find_section(self, sections: List[MarkdownSection], number: str) -> Optional[MarkdownSection]:
        """查找指定编号的章节"""
        # 标准化编号
        num = number.strip()
        for section in sections:
            if section.number == num or section.title == num:
                return section
            # 在子章节中查找
            found = self._find_section(section.subsections, number)
            if found:
                return found
        return None
    def _get_system_prompt(self, analysis_type: str) -> str:
        """根据分析类型获取系统提示词"""
        prompts = {
            "summary": "你是一个专业的文档摘要助手，擅长从长文档中提取核心信息。",
            "outline": "你是一个专业的文档结构分析助手，擅长提取文档大纲和层级结构。",
            "key_points": "你是一个专业的知识提取助手，擅长从文档中提取关键信息和要点。",
            "questions": "你是一个专业的教育助手，擅长生成帮助理解文档的问题。",
            "tags": "你是一个专业的标签生成助手，擅长提取文档的主题标签。",
            "qa": "你是一个专业的问答助手，擅长基于文档内容生成问答对。",
            "statistics": "你是一个专业的统计数据分析助手，擅长分析政府统计公报中的数据。",
            "section": "你是一个专业的章节分析助手，擅长对文档的特定章节进行深入分析。"
        }
        return prompts.get(analysis_type, "你是一个专业的文档分析助手。")
    def _build_prompt(
        self,
        content: str,
        analysis_type: str,
        user_prompt: str,
        title: str = ""
    ) -> str:
        """根据分析类型构建提示词"""
        # 截断内容避免超出 token 限制
        max_content_len = 6000
        if len(content) > max_content_len:
            content = content[:max_content_len] + "\n\n[内容已截断...]"
        base_prompts = {
            "summary": f"""请对以下文档进行摘要分析：
 文档标题：{title}
 文档内容：
 {content}
 请提供：
 1. 文档主要内容摘要（300字以内）
 2. 文档的目的和用途
 3. 适合的读者群体
 请用中文回答，结构清晰。""",
            "outline": f"""请提取以下文档的大纲结构：
 文档标题：{title}
 文档内容：
 {content}
 请按层级列出文档大纲，用缩进表示层级关系。
 格式：
 一、一级标题
   （一）二级标题
      1. 三级标题
 请用中文回答。""",
            "key_points": f"""请从以下文档中提取关键要点：
 文档标题：{title}
 文档内容：
 {content}
 请列出文档的关键要点（5-10条），每条用简洁的语言描述，并说明其在文档中的重要性。
 请用中文回答，格式清晰。""",
            "questions": f"""请根据以下文档生成有助于理解内容的问题：
 文档标题：{title}
 文档内容：
 {content}
 请生成5-10个问题，帮助读者更好地理解文档内容。每个问题应该：
 1. 涵盖文档的重要信息点
 2. 易于理解和回答
 3. 具有思考价值
 请用中文回答。""",
            "tags": f"""请为以下文档生成标签：
 文档标题：{title}
 文档内容：
 {content[:3000]}
 请生成5-8个标签，用逗号分隔。标签应该反映：
 - 文档的主题领域
 - 文档的类型
 - 文档的关键特征
 请用中文回答，只需输出标签，不要其他内容。""",
            "qa": f"""请根据以下文档生成问答对：
 文档标题：{title}
 文档内容：
 {content[:4000]}
 请生成3-5个问答对，帮助读者通过问答形式理解文档内容。
 格式：
 Q1: 问题
 A1: 回答
 Q2: 问题
 A2: 回答
 请用中文回答，内容准确。""",
            "statistics": f"""请分析以下政府统计公报中的数据和结论：
 文档标题：{title}
 文档内容：
 {content}
 请提供：
 1. 文档中涉及的主要统计数据（列出关键数字和指标）
 2. 数据的变化趋势（增长/下降）
 3. 重要的百分比和对比
 4. 数据来源和统计口径说明
 请用中文回答，数据准确。""",
            "section": f"""请详细分析以下文档章节：
 章节标题：{title}
 章节内容：
 {content}
 请提供：
 1. 章节主要内容概括
 2. 关键信息和数据
 3. 与其他部分的关联（如有）
 4. 重要结论
 请用中文回答，分析深入。"""
        }
        prompt = base_prompts.get(analysis_type, base_prompts["summary"])
        if user_prompt and user_prompt.strip():
            prompt += f"\n\n用户额外需求：{user_prompt}"
        return prompt
    async def extract_outline(self, file_path: str) -> Dict[str, Any]:
        """提取文档大纲"""
        try:
            parse_result = self.parser.parse(file_path)
            if not parse_result.success:
                return {"success": False, "error": parse_result.error}
            data = parse_result.data
            sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
            # 构建结构化大纲
            outline = []
            for section in sections:
                outline.append({
                    "number": section.number,
                    "title": section.title,
                    "level": section.level,
                    "line": section.line_start,
                    "content_preview": section.content[:100] + "..." if len(section.content) > 100 else section.content,
                    "subsections": [{
                        "number": s.number,
                        "title": s.title,
                        "level": s.level,
                        "line": s.line_start
                    } for s in section.subsections]
                })
            return {
                "success": True,
                "outline": outline
            }
        except Exception as e:
            logger.error(f"大纲提取失败: {str(e)}")
            return {"success": False, "error": str(e)}
    async def extract_tables_summary(self, file_path: str) -> Dict[str, Any]:
        """提取并总结文档中的表格"""
        try:
            parse_result = self.parser.parse(file_path)
            if not parse_result.success:
                return {"success": False, "error": parse_result.error}
            tables = parse_result.data.get("tables", [])
            if not tables:
                return {"success": True, "tables": [], "message": "文档中没有表格"}
            # 提取每个表格的关键信息
            table_summaries = []
            for i, table in enumerate(tables):
                summary = {
                    "index": i + 1,
                    "headers": table.get("headers", []),
                    "row_count": table.get("row_count", 0),
                    "column_count": table.get("column_count", 0),
                    "preview_rows": table.get("rows", [])[:3],  # 只取前3行预览
                    "first_column": [row[0] if row else "" for row in table.get("rows", [])[:5]]
                }
                table_summaries.append(summary)
            return {
                "success": True,
                "tables": table_summaries,
                "table_count": len(tables)
            }
        except Exception as e:
            logger.error(f"表格提取失败: {str(e)}")
            return {"success": False, "error": str(e)}
 # 全局单例
 markdown_ai_service = MarkdownAIService()
--- a/docs/test/test.md
+++ b/docs/test/test.md
@@ -1,113 +0,0 @@
 ✅ Excel 文件解析功能已完成并测试通过
  已完成的工作
  后端部分
  1. 文件服务层 (backend/app/services/file_service.py)
    - 文件保存、读取、删除功能
    - 文件信息获取
  2. Excel 解析模块 (backend/app/core/document_parser/)
    - base.py - 解析器基类
    - xlsx_parser.py - Excel 文件解析器
    - utils.py - 工具函数
  3. API 接口 (backend/app/api/endpoints/upload.py)
    - POST /upload/excel - 上传并解析 Excel 文件
    - GET /upload/excel/preview/{file_path} - 获取预览
    - DELETE /upload/file - 删除文件
  4. 主应用配置 (backend/app/main.py)
    - CORS 中间件配置
    - API 路由注册
  前端部分
  1. 后端 API 封装 (frontendTest/app-a6ww9j3ja3nl/src/db/backend-api.ts)
    - Excel 上传、预览、删除函数
  2. Excel 解析页面 (frontendTest/app-a6ww9j3ja3nl/src/pages/ExcelParse.tsx)
    - 拖拽上传界面
    - 解析选项配置
    - 数据表格展示
    - 多工作表支持
  3. 路由配置 (frontendTest/app-a6ww9j3ja3nl/src/routes.tsx)
    - 添加 /excel-parse 路由
  4. 环境配置 (.env)
    - 后端 API URL 配置
  测试结果
  后端 API 测试通过：
  - ✅ 健康检查：http://127.0.0.1:8000/health
  - ✅ Excel 上传解析：成功解析 5 行 6 列的测试数据
  访问方式
  1. 后端 API 文档：http://127.0.0.1:8000/api/v1/docs
  2. 前端页面：
    - 启动前端：cd frontendTest/app-a6ww9j3ja3nl && npm run dev
    - 访问 Excel 解析页面：/excel-parse
  下一步建议
  1. 启动前端开发服务器测试完整的前端界面
  2. 添加更多文档格式解析支持（docx, md, txt）
  3. 完善错误处理和用户提示
 ✻ Worked for 4m 18s · 1 background task still running (↓ to manage)
 > 如何启动后端呢
 ● 后端启动步骤如下：
  方法一：使用命令行启动（开发模式）
  # 1. 进入后端目录
  cd H:/OwnProject/FilesReadSysteam/backend
  # 2. 激活虚拟环境
  # PowerShell:
  .\venv\Scripts\Activate.ps1
  # 或 CMD:
  .\venv\Scripts\Activate.bat
  # 3. 启动后端服务器
  python -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload
  方法二：直接使用虚拟环境中的 Python
  cd H:/OwnProject/FilesReadSysteam/backend
  ./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload
  方法三：直接运行 main.py（开发调试）
  cd H:/OwnProject/FilesReadSysteam/backend
  ./venv/Scripts/python.exe app/main.py
  启动成功的标志
  INFO:     Uvicorn running on http://127.0.0.1:8000
  INFO:     Started server process [xxxxx]
  INFO:     Waiting for application startup.
  INFO:     Application startup complete.
  验证服务是否正常
  # 健康检查
  curl http://127.0.0.1:8000/health
  # 访问 API 文档
  # 浏览器打开: http://127.0.0.1:8000/api/v1/docs
 ---
  当前状态：后端已在后台运行（任务 ID: b22jkg69j），可以直接访问 http://127.0.0.1:8000
  需要停止的话告诉我即可。
--- a/frontend/src/db/backend-api.ts
+++ b/frontend/src/db/backend-api.ts
@@ -166,6 +166,43 @@ export interface AIAnalysisResult {
  error?: string;
 }
 // ==================== Markdown AI 分析类型 ====================
 export interface AIMarkdownAnalyzeResult {
  success: boolean;
  filename?: string;
  analysis_type?: string;
  section?: string;
  word_count?: number;
  structure?: {
    title_count?: number;
    code_block_count?: number;
    table_count?: number;
    section_count?: number;
  };
  sections?: MarkdownSection[];
  analysis?: string;
  error?: string;
 }
 export interface MarkdownSection {
  number: string;
  title: string;
  level: number;
  content_preview?: string;
  line_start: number;
  line_end?: number;
  subsections?: MarkdownSection[];
 }
 export interface MarkdownOutlineResult {
  success: boolean;
  outline?: MarkdownSection[];
  error?: string;
 }
 export type MarkdownAnalysisType = 'summary' | 'outline' | 'key_points' | 'questions' | 'tags' | 'qa' | 'statistics' | 'section';
 export interface AIExcelAnalyzeResult {
  success: boolean;
  excel?: {
@@ -842,6 +879,159 @@ export const aiApi = {
    }
  },
  /**
   * 上传并使用 AI 分析 Markdown 文件
   */
  async analyzeMarkdown(
    file: File,
    options: {
      analysisType?: MarkdownAnalysisType;
      userPrompt?: string;
      sectionNumber?: string;
    } = {}
  ): Promise<AIMarkdownAnalyzeResult> {
    const formData = new FormData();
    formData.append('file', file);
    const params = new URLSearchParams();
    if (options.analysisType) {
      params.append('analysis_type', options.analysisType);
    }
    if (options.userPrompt) {
      params.append('user_prompt', options.userPrompt);
    }
    if (options.sectionNumber) {
      params.append('section_number', options.sectionNumber);
    }
    const url = `${BACKEND_BASE_URL}/ai/analyze/md?${params.toString()}`;
    try {
      const response = await fetch(url, {
        method: 'POST',
        body: formData,
      });
      if (!response.ok) {
        const error = await response.json();
        throw new Error(error.detail || 'Markdown AI 分析失败');
      }
      return await response.json();
    } catch (error) {
      console.error('Markdown AI 分析失败:', error);
      throw error;
    }
  },
  /**
   * 流式分析 Markdown 文件 (SSE)
   */
  async analyzeMarkdownStream(
    file: File,
    options: {
      analysisType?: MarkdownAnalysisType;
      userPrompt?: string;
      sectionNumber?: string;
    } = {},
    onChunk?: (chunk: { type: string; delta?: string; error?: string }) => void
  ): Promise<string> {
    const formData = new FormData();
    formData.append('file', file);
    const params = new URLSearchParams();
    if (options.analysisType) {
      params.append('analysis_type', options.analysisType);
    }
    if (options.userPrompt) {
      params.append('user_prompt', options.userPrompt);
    }
    if (options.sectionNumber) {
      params.append('section_number', options.sectionNumber);
    }
    const url = `${BACKEND_BASE_URL}/ai/analyze/md/stream?${params.toString()}`;
    try {
      const response = await fetch(url, {
        method: 'POST',
        body: formData,
      });
      if (!response.ok) {
        const error = await response.json();
        throw new Error(error.detail || 'Markdown AI 流式分析失败');
      }
      const reader = response.body?.getReader();
      if (!reader) throw new Error('无法读取响应流');
      const decoder = new TextDecoder();
      let fullResponse = '';
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        const chunk = decoder.decode(value);
        const lines = chunk.split('\n');
        for (const line of lines) {
          if (line.startsWith('data: ')) {
            const data = line.slice(6);
            if (data === '[DONE]') continue;
            try {
              const parsed = JSON.parse(data);
              if (parsed.type === 'content' && parsed.delta) {
                fullResponse += parsed.delta;
                onChunk?.({ type: 'content', delta: parsed.delta });
              } else if (parsed.type === 'done') {
                fullResponse = parsed.full_response || fullResponse;
              } else if (parsed.error) {
                onChunk?.({ type: 'error', error: parsed.error });
              }
            } catch {
              // Ignore parse errors for incomplete JSON
            }
          }
        }
      }
      return fullResponse;
    } catch (error) {
      console.error('Markdown AI 流式分析失败:', error);
      throw error;
    }
  },
  /**
   * 获取 Markdown 文档大纲（分章节信息）
   */
  async getMarkdownOutline(file: File): Promise<MarkdownOutlineResult> {
    const formData = new FormData();
    formData.append('file', file);
    const url = `${BACKEND_BASE_URL}/ai/analyze/md/outline`;
    try {
      const response = await fetch(url, {
        method: 'GET',
        body: formData,
      });
      if (!response.ok) {
        const error = await response.json();
        throw new Error(error.detail || '获取 Markdown 大纲失败');
      }
      return await response.json();
    } catch (error) {
      console.error('获取 Markdown 大纲失败:', error);
      throw error;
    }
  },
  /**
   * 生成统计信息和图表
   */
--- a/frontend/src/pages/Documents.tsx
+++ b/frontend/src/pages/Documents.tsx
@@ -19,7 +19,11 @@ import {
  TrendingUp,
  Download,
  Brain,
-  Settings2
+  Settings2,
  List,
  MessageSquareCode,
  Tag,
  HelpCircle
 } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Input } from '@/components/ui/input';
@@ -33,7 +37,7 @@ import { Checkbox } from '@/components/ui/checkbox';
 import { toast } from 'sonner';
 import { cn } from '@/lib/utils';
 import { Skeleton } from '@/components/ui/skeleton';
-import { backendApi, type ExcelParseResult, aiApi } from '@/db/backend-api';
+import { backendApi, type ExcelParseResult, type AIMarkdownAnalyzeResult, type MarkdownSection, aiApi } from '@/db/backend-api';
 import {
  Table as TableComponent,
  TableBody,
@@ -78,6 +82,15 @@ const Documents: React.FC = () => {
  const [analysisCharts, setAnalysisCharts] = useState<any>(null);
  const [analysisTypes, setAnalysisTypes] = useState<Array<{ value: string; label: string; description: string }>>([]);
  // Markdown AI 分析相关状态
  const [mdAnalysis, setMdAnalysis] = useState<AIMarkdownAnalyzeResult | null>(null);
  const [mdAnalysisType, setMdAnalysisType] = useState<'summary' | 'outline' | 'key_points' | 'questions' | 'tags' | 'qa' | 'statistics' | 'section'>('summary');
  const [mdUserPrompt, setMdUserPrompt] = useState('');
  const [mdSections, setMdSections] = useState<MarkdownSection[]>([]);
  const [mdSelectedSection, setMdSelectedSection] = useState<string>('');
  const [mdStreaming, setMdStreaming] = useState(false);
  const [mdStreamingContent, setMdStreamingContent] = useState('');
  // 解析选项
  const [parseOptions, setParseOptions] = useState({
    parseAllSheets: false,
@@ -144,6 +157,9 @@ const Documents: React.FC = () => {
    setAiAnalysis(null);
    setAnalysisCharts(null);
    setExpandedSheet(null);
    setMdAnalysis(null);
    setMdSections([]);
    setMdStreamingContent('');
    const ext = file.name.split('.').pop()?.toLowerCase();
@@ -163,6 +179,9 @@ const Documents: React.FC = () => {
        } else {
          toast.error(result.error || '解析失败');
        }
      } else if (ext === 'md' || ext === 'markdown') {
        // Markdown 文件：获取大纲
        await fetchMdOutline();
      } else {
        // 其他文档使用通用上传接口
        const result = await backendApi.uploadDocument(file);
@@ -403,6 +422,105 @@ const Documents: React.FC = () => {
    }
  };
  const isMarkdownFile = (filename: string) => {
    const ext = filename.split('.').pop()?.toLowerCase();
    return ext === 'md' || ext === 'markdown';
  };
  // Markdown AI 分析处理
  const handleMdAnalyze = async () => {
    if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) {
      toast.error('请先上传 Markdown 文件');
      return;
    }
    setAnalyzing(true);
    setMdAnalysis(null);
    try {
      const result = await aiApi.analyzeMarkdown(uploadedFile, {
        analysisType: mdAnalysisType,
        userPrompt: mdUserPrompt,
        sectionNumber: mdSelectedSection || undefined
      });
      if (result.success) {
        toast.success('Markdown AI 分析完成');
        setMdAnalysis(result);
      } else {
        toast.error(result.error || 'AI 分析失败');
      }
    } catch (error: any) {
      toast.error(error.message || 'AI 分析失败');
    } finally {
      setAnalyzing(false);
    }
  };
  // 流式分析 Markdown
  const handleMdAnalyzeStream = async () => {
    if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) {
      toast.error('请先上传 Markdown 文件');
      return;
    }
    setAnalyzing(true);
    setMdStreaming(true);
    setMdStreamingContent('');
    setMdAnalysis(null);
    try {
      await aiApi.analyzeMarkdownStream(
        uploadedFile,
        {
          analysisType: mdAnalysisType,
          userPrompt: mdUserPrompt,
          sectionNumber: mdSelectedSection || undefined
        },
        (chunk: { type: string; delta?: string; error?: string }) => {
          if (chunk.type === 'content' && chunk.delta) {
            setMdStreamingContent(prev => prev + chunk.delta);
          } else if (chunk.type === 'error') {
            toast.error(chunk.error || '流式分析出错');
          }
        }
      );
    } catch (error: any) {
      toast.error(error.message || 'AI 分析失败');
    } finally {
      setAnalyzing(false);
      setMdStreaming(false);
    }
  };
  // 获取 Markdown 文档大纲（分章节）
  const fetchMdOutline = async () => {
    if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) return;
    try {
      const result = await aiApi.getMarkdownOutline(uploadedFile);
      if (result.success && result.outline) {
        setMdSections(result.outline);
      }
    } catch (error) {
      console.error('获取大纲失败:', error);
    }
  };
  const getMdAnalysisIcon = (type: string) => {
    switch (type) {
      case 'summary': return <FileText size={20} />;
      case 'outline': return <List size={20} />;
      case 'key_points': return <TrendingUp size={20} />;
      case 'statistics': return <TrendingUp size={20} />;
      case 'section': return <FileText size={20} />;
      case 'questions': return <MessageSquareCode size={20} />;
      case 'tags': return <Tag size={20} />;
      case 'qa': return <HelpCircle size={20} />;
      default: return <Sparkles size={20} />;
    }
  };
  const formatFileSize = (bytes: number): string => {
    if (bytes === 0) return '0 B';
    const k = 1024;
@@ -600,6 +718,97 @@ const Documents: React.FC = () => {
            </Card>
          )}
          {/* Markdown AI 分析选项 */}
          {uploadedFile && isMarkdownFile(uploadedFile.name) && (
            <Card className="border-none shadow-md bg-gradient-to-br from-purple-500/5 to-primary/5">
              <CardHeader className="pb-4">
                <CardTitle className="flex items-center gap-2">
                  <Sparkles className="text-purple-500" size={20} />
                  Markdown AI 分析
                </CardTitle>
              </CardHeader>
              <CardContent className="space-y-4">
                {/* 章节选择 */}
                {mdSections.length > 0 && (
                  <div className="space-y-2">
                    <Label htmlFor="md-section" className="text-sm">指定章节（可选）</Label>
                    <Select value={mdSelectedSection} onValueChange={setMdSelectedSection}>
                      <SelectTrigger id="md-section" className="bg-background">
                        <SelectValue placeholder="全文分析" />
                      </SelectTrigger>
                      <SelectContent>
                        <SelectItem value="">全文分析</SelectItem>
                        {mdSections.map((section) => (
                          <SelectItem key={section.number} value={section.number}>
                            {section.number}、{section.title}
                          </SelectItem>
                        ))}
                      </SelectContent>
                    </Select>
                  </div>
                )}
                <div className="space-y-2">
                  <Label htmlFor="md-analysis-type" className="text-sm">分析类型</Label>
                  <Select value={mdAnalysisType} onValueChange={(value: any) => setMdAnalysisType(value)}>
                    <SelectTrigger id="md-analysis-type" className="bg-background">
                      <SelectValue />
                    </SelectTrigger>
                    <SelectContent>
                      {[
                        { value: 'summary', label: '文档摘要', desc: '主要内容摘要' },
                        { value: 'outline', label: '大纲提取', desc: '提取文档结构' },
                        { value: 'key_points', label: '关键要点', desc: '提取关键信息' },
                        { value: 'statistics', label: '统计分析', desc: '统计数据分析' },
                        { value: 'section', label: '章节分析', desc: '分章节详细分析' },
                        { value: 'questions', label: '生成问题', desc: '生成理解性问题' },
                        { value: 'tags', label: '生成标签', desc: '提取主题标签' },
                        { value: 'qa', label: '问答对', desc: '生成问答内容' }
                      ].map(type => (
                        <SelectItem key={type.value} value={type.value}>
                          <div className="flex items-center gap-2">
                            {getMdAnalysisIcon(type.value)}
                            <div className="flex flex-col">
                              <span className="font-medium">{type.label}</span>
                              <span className="text-xs text-muted-foreground">{type.desc}</span>
                            </div>
                          </div>
                        </SelectItem>
                      ))}
                    </SelectContent>
                  </Select>
                </div>
                <div className="space-y-2">
                  <Label htmlFor="md-user-prompt" className="text-sm">自定义提示词（可选）</Label>
                  <Textarea
                    id="md-user-prompt"
                    placeholder="例如：请重点关注技术实现部分..."
                    value={mdUserPrompt}
                    onChange={(e) => setMdUserPrompt(e.target.value)}
                    className="bg-background resize-none"
                    rows={2}
                  />
                </div>
                <div className="flex gap-2">
                  <Button
                    onClick={handleMdAnalyze}
                    disabled={analyzing}
                    className="flex-1 bg-gradient-to-r from-purple-500 to-primary hover:from-purple-500/90 hover:to-primary/90"
                  >
                    {analyzing && !mdStreaming ? <><Loader2 className="mr-2 animate-spin" size={16} /> 分析中...</> : <><Sparkles className="mr-2" size={16} />普通分析</>}
                  </Button>
                  <Button
                    onClick={handleMdAnalyzeStream}
                    disabled={analyzing}
                    variant="outline"
                    className="flex-1"
                  >
                    {analyzing && mdStreaming ? <><Loader2 className="mr-2 animate-spin" size={16} /> 流式...</> : <><Sparkles className="mr-2" size={16} />流式分析</>}
                  </Button>
                </div>
              </CardContent>
            </Card>
          )}
          {/* 数据操作 */}
          {parseResult?.success && (
            <Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5">
@@ -661,6 +870,45 @@ const Documents: React.FC = () => {
            </Card>
          )}
          {/* Markdown AI 分析结果 */}
          {(mdAnalysis || mdStreamingContent) && (
            <Card className="border-none shadow-md border-l-4 border-l-purple-500">
              <CardHeader>
                <div className="flex items-center justify-between">
                  <div className="space-y-1">
                    <CardTitle className="flex items-center gap-2">
                      <Sparkles className="text-purple-500" size={20} />
                      Markdown AI 分析结果
                      {mdStreaming && <Badge variant="default" className="ml-2 bg-purple-500">流式输出中</Badge>}
                    </CardTitle>
                    {mdAnalysis && (
                      <CardDescription>
                        {mdAnalysis.filename} • {mdAnalysis.word_count || 0} 字 • {mdAnalysis.analysis_type}
                        {mdAnalysis.section && ` • ${mdAnalysis.section}`}
                      </CardDescription>
                    )}
                  </div>
                  {mdAnalysis?.structure && (
                    <Badge variant="secondary">
                      {mdAnalysis.structure.title_count || 0} 标题 • {mdAnalysis.structure.section_count || 0} 章节
                    </Badge>
                  )}
                </div>
              </CardHeader>
              <CardContent className="max-h-[500px] overflow-y-auto">
                {/* 流式内容优先显示 */}
                {mdStreamingContent && (
                  <div className="animate-pulse text-sm text-muted-foreground mb-4">
                    流式输出中...
                  </div>
                )}
                {mdStreamingContent && <Markdown content={mdStreamingContent} />}
                {mdAnalysis?.analysis && !mdStreamingContent && <Markdown content={mdAnalysis.analysis} />}
                {!mdAnalysis?.success && !mdStreamingContent && <p className="text-sm text-destructive">{mdAnalysis?.error || '分析失败'}</p>}
              </CardContent>
            </Card>
          )}
          {/* 图表显示 */}
          {analysisCharts && (
            <Card className="border-none shadow-md border-l-4 border-l-indigo-500">