feat(ai-analyze): 新增 Markdown 文件 AI 分析功能

- 添加 Markdown 文件上传和解析接口
- 实现流式分析和大纲提取功能
- 支持多种分析类型:摘要、大纲、关键点等
- 新增 markdown_ai_service 服务类
- 扩展 LLMService 支持流式调用
- 更新前端 API 接口定义和实现
This commit is contained in:
2026-04-02 11:53:12 +08:00
parent ddf30078f0
commit d189ea9620
6 changed files with 1286 additions and 118 deletions

View File

@@ -2,10 +2,14 @@
AI 分析 API 接口
"""
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
from fastapi.responses import StreamingResponse
from typing import Optional
import logging
import tempfile
import os
from app.services.excel_ai_service import excel_ai_service
from app.services.markdown_ai_service import markdown_ai_service
logger = logging.getLogger(__name__)
@@ -93,10 +97,11 @@ async def get_analysis_types():
获取支持的分析类型列表
Returns:
list: 支持的分析类型
dict: 支持的分析类型(包含 Excel 和 Markdown
"""
return {
"types": excel_ai_service.get_supported_analysis_types()
"excel_types": excel_ai_service.get_supported_analysis_types(),
"markdown_types": markdown_ai_service.get_supported_analysis_types()
}
@@ -142,3 +147,185 @@ async def analyze_text(
except Exception as e:
logger.error(f"文本分析失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
@router.post("/analyze/md")
async def analyze_markdown(
file: UploadFile = File(...),
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"),
user_prompt: str = Query("", description="用户自定义提示词"),
section_number: Optional[str] = Query(None, description="指定章节编号,如 '''(一)'")
):
"""
上传并使用 AI 分析 Markdown 文件
Args:
file: 上传的 Markdown 文件
analysis_type: 分析类型
user_prompt: 用户自定义提示词
section_number: 指定分析的章节编号
Returns:
dict: 分析结果
"""
# 检查文件类型
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['md', 'markdown']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
)
# 验证分析类型
supported_types = markdown_ai_service.get_supported_analysis_types()
if analysis_type not in supported_types:
raise HTTPException(
status_code=400,
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
)
try:
# 读取文件内容
content = await file.read()
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}")
# 调用 AI 分析服务
result = await markdown_ai_service.analyze_markdown(
file_path=tmp_path,
analysis_type=analysis_type,
user_prompt=user_prompt,
section_number=section_number
)
logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}")
if not result['success']:
raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
return result
finally:
# 清理临时文件
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException:
raise
except Exception as e:
logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
@router.post("/analyze/md/stream")
async def analyze_markdown_stream(
file: UploadFile = File(...),
analysis_type: str = Query("summary", description="分析类型"),
user_prompt: str = Query("", description="用户自定义提示词"),
section_number: Optional[str] = Query(None, description="指定章节编号")
):
"""
流式分析 Markdown 文件 (SSE)
Returns:
StreamingResponse: SSE 流式响应
"""
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['md', 'markdown']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
)
try:
content = await file.read()
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
logger.info(f"开始流式分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}")
async def stream_generator():
async for chunk in markdown_ai_service.analyze_markdown_stream(
file_path=tmp_path,
analysis_type=analysis_type,
user_prompt=user_prompt,
section_number=section_number
):
yield chunk
return StreamingResponse(
stream_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no"
}
)
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException:
raise
except Exception as e:
logger.error(f"Markdown AI 流式分析出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
@router.get("/analyze/md/outline")
async def get_markdown_outline(
file: UploadFile = File(...)
):
"""
获取 Markdown 文档的大纲结构(分章节信息)
Args:
file: 上传的 Markdown 文件
Returns:
dict: 文档大纲结构
"""
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['md', 'markdown']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
)
try:
content = await file.read()
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
result = await markdown_ai_service.extract_outline(tmp_path)
return result
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except Exception as e:
logger.error(f"获取 Markdown 大纲失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")