feat(ai-analyze): 新增 Markdown 文件 AI 分析功能
- 添加 Markdown 文件上传和解析接口 - 实现流式分析和大纲提取功能 - 支持多种分析类型:摘要、大纲、关键点等 - 新增 markdown_ai_service 服务类 - 扩展 LLMService 支持流式调用 - 更新前端 API 接口定义和实现
This commit is contained in:
@@ -2,10 +2,14 @@
|
||||
AI 分析 API 接口
|
||||
"""
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
|
||||
from fastapi.responses import StreamingResponse
|
||||
from typing import Optional
|
||||
import logging
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from app.services.excel_ai_service import excel_ai_service
|
||||
from app.services.markdown_ai_service import markdown_ai_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -93,10 +97,11 @@ async def get_analysis_types():
|
||||
获取支持的分析类型列表
|
||||
|
||||
Returns:
|
||||
list: 支持的分析类型
|
||||
dict: 支持的分析类型(包含 Excel 和 Markdown)
|
||||
"""
|
||||
return {
|
||||
"types": excel_ai_service.get_supported_analysis_types()
|
||||
"excel_types": excel_ai_service.get_supported_analysis_types(),
|
||||
"markdown_types": markdown_ai_service.get_supported_analysis_types()
|
||||
}
|
||||
|
||||
|
||||
@@ -142,3 +147,185 @@ async def analyze_text(
|
||||
except Exception as e:
|
||||
logger.error(f"文本分析失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/analyze/md")
|
||||
async def analyze_markdown(
|
||||
file: UploadFile = File(...),
|
||||
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"),
|
||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
||||
):
|
||||
"""
|
||||
上传并使用 AI 分析 Markdown 文件
|
||||
|
||||
Args:
|
||||
file: 上传的 Markdown 文件
|
||||
analysis_type: 分析类型
|
||||
user_prompt: 用户自定义提示词
|
||||
section_number: 指定分析的章节编号
|
||||
|
||||
Returns:
|
||||
dict: 分析结果
|
||||
"""
|
||||
# 检查文件类型
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['md', 'markdown']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
|
||||
)
|
||||
|
||||
# 验证分析类型
|
||||
supported_types = markdown_ai_service.get_supported_analysis_types()
|
||||
if analysis_type not in supported_types:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取文件内容
|
||||
content = await file.read()
|
||||
|
||||
# 保存到临时文件
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}")
|
||||
|
||||
# 调用 AI 分析服务
|
||||
result = await markdown_ai_service.analyze_markdown(
|
||||
file_path=tmp_path,
|
||||
analysis_type=analysis_type,
|
||||
user_prompt=user_prompt,
|
||||
section_number=section_number
|
||||
)
|
||||
|
||||
logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}")
|
||||
|
||||
if not result['success']:
|
||||
raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/analyze/md/stream")
|
||||
async def analyze_markdown_stream(
|
||||
file: UploadFile = File(...),
|
||||
analysis_type: str = Query("summary", description="分析类型"),
|
||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||
section_number: Optional[str] = Query(None, description="指定章节编号")
|
||||
):
|
||||
"""
|
||||
流式分析 Markdown 文件 (SSE)
|
||||
|
||||
Returns:
|
||||
StreamingResponse: SSE 流式响应
|
||||
"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['md', 'markdown']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
|
||||
)
|
||||
|
||||
try:
|
||||
content = await file.read()
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始流式分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}")
|
||||
|
||||
async def stream_generator():
|
||||
async for chunk in markdown_ai_service.analyze_markdown_stream(
|
||||
file_path=tmp_path,
|
||||
analysis_type=analysis_type,
|
||||
user_prompt=user_prompt,
|
||||
section_number=section_number
|
||||
):
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
stream_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no"
|
||||
}
|
||||
)
|
||||
|
||||
finally:
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown AI 流式分析出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/analyze/md/outline")
|
||||
async def get_markdown_outline(
|
||||
file: UploadFile = File(...)
|
||||
):
|
||||
"""
|
||||
获取 Markdown 文档的大纲结构(分章节信息)
|
||||
|
||||
Args:
|
||||
file: 上传的 Markdown 文件
|
||||
|
||||
Returns:
|
||||
dict: 文档大纲结构
|
||||
"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['md', 'markdown']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
|
||||
)
|
||||
|
||||
try:
|
||||
content = await file.read()
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = await markdown_ai_service.extract_outline(tmp_path)
|
||||
return result
|
||||
finally:
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取 Markdown 大纲失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
LLM 服务模块 - 封装大模型 API 调用
|
||||
"""
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List, Optional, AsyncGenerator
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
@@ -87,6 +87,71 @@ class LLMService:
|
||||
logger.error(f"解析 API 响应失败: {str(e)}")
|
||||
raise
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None,
|
||||
**kwargs
|
||||
) -> AsyncGenerator[Dict[str, Any], None]:
|
||||
"""
|
||||
流式调用聊天 API
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大 token 数
|
||||
**kwargs: 其他参数
|
||||
|
||||
Yields:
|
||||
Dict[str, Any]: 包含 delta 内容的块
|
||||
"""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
if max_tokens:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
payload.update(kwargs)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload
|
||||
) as response:
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data = line[6:] # Remove "data: " prefix
|
||||
if data == "[DONE]":
|
||||
break
|
||||
try:
|
||||
import json as json_module
|
||||
chunk = json_module.loads(data)
|
||||
delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
|
||||
if delta:
|
||||
yield {"content": delta}
|
||||
except json_module.JSONDecodeError:
|
||||
continue
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"LLM 流式 API 请求失败: {e.response.status_code}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LLM 流式 API 调用异常: {str(e)}")
|
||||
raise
|
||||
|
||||
async def analyze_excel_data(
|
||||
self,
|
||||
excel_data: Dict[str, Any],
|
||||
|
||||
591
backend/app/services/markdown_ai_service.py
Normal file
591
backend/app/services/markdown_ai_service.py
Normal file
@@ -0,0 +1,591 @@
|
||||
"""
|
||||
Markdown 文档 AI 分析服务
|
||||
|
||||
支持:
|
||||
- 分章节解析(中文章节编号:一、二、三, (一)(二)(三))
|
||||
- 结构化数据提取
|
||||
- 流式输出
|
||||
- 多种分析类型
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, AsyncGenerator, Dict, List, Optional
|
||||
|
||||
from app.services.llm_service import llm_service
|
||||
from app.core.document_parser import MarkdownParser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MarkdownSection:
|
||||
"""文档章节结构"""
|
||||
def __init__(self, number: str, title: str, level: int, content: str, line_start: int, line_end: int):
|
||||
self.number = number # 章节编号,如 "一", "(一)", "1"
|
||||
self.title = title
|
||||
self.level = level # 层级深度
|
||||
self.content = content # 章节内容(不含子章节)
|
||||
self.line_start = line_start
|
||||
self.line_end = line_end
|
||||
self.subsections: List[MarkdownSection] = []
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"number": self.number,
|
||||
"title": self.title,
|
||||
"level": self.level,
|
||||
"content_preview": self.content[:200] + "..." if len(self.content) > 200 else self.content,
|
||||
"line_start": self.line_start,
|
||||
"line_end": self.line_end,
|
||||
"subsections": [s.to_dict() for s in self.subsections]
|
||||
}
|
||||
|
||||
|
||||
class MarkdownAIService:
|
||||
"""Markdown 文档 AI 分析服务"""
|
||||
|
||||
# 中文章节编号模式
|
||||
CHINESE_NUMBERS = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
|
||||
CHINESE_SUFFIX = "、"
|
||||
PARENTHESIS_PATTERN = re.compile(r'^(([一二三四五六七八九十]+))\s*(.+)$')
|
||||
CHINESE_SECTION_PATTERN = re.compile(r'^([一二三四五六七八九十]+)、\s*(.+)$')
|
||||
ARABIC_SECTION_PATTERN = re.compile(r'^(\d+)\.\s+(.+)$')
|
||||
|
||||
def __init__(self):
|
||||
self.parser = MarkdownParser()
|
||||
|
||||
def get_supported_analysis_types(self) -> list:
|
||||
"""获取支持的分析类型"""
|
||||
return [
|
||||
"summary", # 文档摘要
|
||||
"outline", # 大纲提取
|
||||
"key_points", # 关键点提取
|
||||
"questions", # 生成问题
|
||||
"tags", # 生成标签
|
||||
"qa", # 问答对
|
||||
"statistics", # 统计数据分析(适合政府公报)
|
||||
"section" # 分章节详细分析
|
||||
]
|
||||
|
||||
def extract_sections(self, content: str, titles: List[Dict]) -> List[MarkdownSection]:
|
||||
"""
|
||||
从文档内容中提取章节结构
|
||||
|
||||
识别以下章节格式:
|
||||
- 一级:一、二、三...
|
||||
- 二级:(一)(二)(三)...
|
||||
- 三级:1. 2. 3. ...
|
||||
"""
|
||||
sections = []
|
||||
lines = content.split('\n')
|
||||
|
||||
# 构建标题行到内容的映射
|
||||
title_lines = {}
|
||||
for t in titles:
|
||||
title_lines[t.get('line', 0)] = t
|
||||
|
||||
current_section = None
|
||||
section_stack = []
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# 检查是否是一级标题(中文数字 + 、)
|
||||
match = self.CHINESE_SECTION_PATTERN.match(stripped)
|
||||
if match:
|
||||
# 结束当前章节
|
||||
if current_section:
|
||||
current_section.content = self._get_section_content(
|
||||
lines, current_section.line_start, i - 1
|
||||
)
|
||||
|
||||
current_section = MarkdownSection(
|
||||
number=match.group(1),
|
||||
title=match.group(2),
|
||||
level=1,
|
||||
content="",
|
||||
line_start=i,
|
||||
line_end=len(lines)
|
||||
)
|
||||
sections.append(current_section)
|
||||
section_stack = [current_section]
|
||||
continue
|
||||
|
||||
# 检查是否是二级标题((一)(二)...)
|
||||
match = self.PARENTHESIS_PATTERN.match(stripped)
|
||||
if match and current_section:
|
||||
# 结束当前子章节
|
||||
if section_stack and len(section_stack) > 1:
|
||||
parent = section_stack[-1]
|
||||
parent.content = self._get_section_content(
|
||||
lines, parent.line_start, i - 1
|
||||
)
|
||||
|
||||
subsection = MarkdownSection(
|
||||
number=match.group(1),
|
||||
title=match.group(2),
|
||||
level=2,
|
||||
content="",
|
||||
line_start=i,
|
||||
line_end=len(lines)
|
||||
)
|
||||
current_section.subsections.append(subsection)
|
||||
section_stack = [current_section, subsection]
|
||||
continue
|
||||
|
||||
# 检查是否是三级标题(1. 2. 3.)
|
||||
match = self.ARABIC_SECTION_PATTERN.match(stripped)
|
||||
if match and len(section_stack) > 1:
|
||||
# 结束当前子章节
|
||||
if len(section_stack) > 2:
|
||||
parent = section_stack[-1]
|
||||
parent.content = self._get_section_content(
|
||||
lines, parent.line_start, i - 1
|
||||
)
|
||||
|
||||
sub_subsection = MarkdownSection(
|
||||
number=match.group(1),
|
||||
title=match.group(2),
|
||||
level=3,
|
||||
content="",
|
||||
line_start=i,
|
||||
line_end=len(lines)
|
||||
)
|
||||
section_stack[-1].subsections.append(sub_subsection)
|
||||
section_stack = section_stack[:-1] + [sub_subsection]
|
||||
continue
|
||||
|
||||
# 处理最后一个章节
|
||||
if current_section:
|
||||
current_section.content = self._get_section_content(
|
||||
lines, current_section.line_start, len(lines)
|
||||
)
|
||||
|
||||
return sections
|
||||
|
||||
def _get_section_content(self, lines: List[str], start: int, end: int) -> str:
|
||||
"""获取指定行范围的内容"""
|
||||
if start > end:
|
||||
return ""
|
||||
content_lines = lines[start-1:end]
|
||||
# 清理:移除标题行和空行
|
||||
cleaned = []
|
||||
for line in content_lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
# 跳过章节标题行
|
||||
if self.CHINESE_SECTION_PATTERN.match(stripped):
|
||||
continue
|
||||
if self.PARENTHESIS_PATTERN.match(stripped):
|
||||
continue
|
||||
if self.ARABIC_SECTION_PATTERN.match(stripped):
|
||||
continue
|
||||
cleaned.append(stripped)
|
||||
return '\n'.join(cleaned)
|
||||
|
||||
async def analyze_markdown(
|
||||
self,
|
||||
file_path: str,
|
||||
analysis_type: str = "summary",
|
||||
user_prompt: str = "",
|
||||
section_number: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
使用 AI 分析 Markdown 文档
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
analysis_type: 分析类型
|
||||
user_prompt: 用户自定义提示词
|
||||
section_number: 指定分析的章节编号(如 "一" 或 "(一)")
|
||||
|
||||
Returns:
|
||||
dict: 分析结果
|
||||
"""
|
||||
try:
|
||||
parse_result = self.parser.parse(file_path)
|
||||
|
||||
if not parse_result.success:
|
||||
return {
|
||||
"success": False,
|
||||
"error": parse_result.error
|
||||
}
|
||||
|
||||
data = parse_result.data
|
||||
|
||||
# 提取章节结构
|
||||
sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
|
||||
|
||||
# 如果指定了章节,只分析该章节
|
||||
target_content = data.get("content", "")
|
||||
target_title = parse_result.metadata.get("filename", "")
|
||||
|
||||
if section_number:
|
||||
section = self._find_section(sections, section_number)
|
||||
if section:
|
||||
target_content = section.content
|
||||
target_title = f"{section.number}、{section.title}"
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"未找到章节: {section_number}"
|
||||
}
|
||||
|
||||
# 根据分析类型构建提示词
|
||||
prompt = self._build_prompt(
|
||||
content=target_content,
|
||||
analysis_type=analysis_type,
|
||||
user_prompt=user_prompt,
|
||||
title=target_title
|
||||
)
|
||||
|
||||
# 调用 LLM 分析
|
||||
messages = [
|
||||
{"role": "system", "content": self._get_system_prompt(analysis_type)},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
response = await llm_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.3,
|
||||
max_tokens=4000
|
||||
)
|
||||
|
||||
analysis = llm_service.extract_message_content(response)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"filename": parse_result.metadata.get("filename", ""),
|
||||
"analysis_type": analysis_type,
|
||||
"section": target_title if section_number else None,
|
||||
"word_count": len(target_content),
|
||||
"structure": {
|
||||
"title_count": parse_result.metadata.get("title_count", 0),
|
||||
"code_block_count": parse_result.metadata.get("code_block_count", 0),
|
||||
"table_count": parse_result.metadata.get("table_count", 0),
|
||||
"section_count": len(sections)
|
||||
},
|
||||
"sections": [s.to_dict() for s in sections[:10]], # 最多返回10个一级章节
|
||||
"analysis": analysis
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown AI 分析失败: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def analyze_markdown_stream(
|
||||
self,
|
||||
file_path: str,
|
||||
analysis_type: str = "summary",
|
||||
user_prompt: str = "",
|
||||
section_number: Optional[str] = None
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
流式分析 Markdown 文档 (SSE)
|
||||
|
||||
Yields:
|
||||
str: SSE 格式的数据块
|
||||
"""
|
||||
try:
|
||||
parse_result = self.parser.parse(file_path)
|
||||
|
||||
if not parse_result.success:
|
||||
yield f"data: {json.dumps({'error': parse_result.error}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
data = parse_result.data
|
||||
sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
|
||||
|
||||
target_content = data.get("content", "")
|
||||
target_title = parse_result.metadata.get("filename", "")
|
||||
|
||||
if section_number:
|
||||
section = self._find_section(sections, section_number)
|
||||
if section:
|
||||
target_content = section.content
|
||||
target_title = f"{section.number}、{section.title}"
|
||||
else:
|
||||
yield f"data: {json.dumps({'error': f'未找到章节: {section_number}'}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
prompt = self._build_prompt(
|
||||
content=target_content,
|
||||
analysis_type=analysis_type,
|
||||
user_prompt=user_prompt,
|
||||
title=target_title
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": self._get_system_prompt(analysis_type)},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
# 发送初始元数据
|
||||
yield f"data: {json.dumps({
|
||||
'type': 'start',
|
||||
'filename': parse_result.metadata.get("filename", ""),
|
||||
'analysis_type': analysis_type,
|
||||
'section': target_title if section_number else None,
|
||||
'word_count': len(target_content)
|
||||
}, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 流式调用 LLM
|
||||
full_response = ""
|
||||
async for chunk in llm_service.chat_stream(messages, temperature=0.3, max_tokens=4000):
|
||||
content = chunk.get("content", "")
|
||||
if content:
|
||||
full_response += content
|
||||
yield f"data: {json.dumps({'type': 'content', 'delta': content}, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 发送完成消息
|
||||
yield f"data: {json.dumps({'type': 'done', 'full_response': full_response}, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown AI 流式分析失败: {str(e)}")
|
||||
yield f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n"
|
||||
|
||||
def _find_section(self, sections: List[MarkdownSection], number: str) -> Optional[MarkdownSection]:
|
||||
"""查找指定编号的章节"""
|
||||
# 标准化编号
|
||||
num = number.strip()
|
||||
for section in sections:
|
||||
if section.number == num or section.title == num:
|
||||
return section
|
||||
# 在子章节中查找
|
||||
found = self._find_section(section.subsections, number)
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
|
||||
def _get_system_prompt(self, analysis_type: str) -> str:
|
||||
"""根据分析类型获取系统提示词"""
|
||||
prompts = {
|
||||
"summary": "你是一个专业的文档摘要助手,擅长从长文档中提取核心信息。",
|
||||
"outline": "你是一个专业的文档结构分析助手,擅长提取文档大纲和层级结构。",
|
||||
"key_points": "你是一个专业的知识提取助手,擅长从文档中提取关键信息和要点。",
|
||||
"questions": "你是一个专业的教育助手,擅长生成帮助理解文档的问题。",
|
||||
"tags": "你是一个专业的标签生成助手,擅长提取文档的主题标签。",
|
||||
"qa": "你是一个专业的问答助手,擅长基于文档内容生成问答对。",
|
||||
"statistics": "你是一个专业的统计数据分析助手,擅长分析政府统计公报中的数据。",
|
||||
"section": "你是一个专业的章节分析助手,擅长对文档的特定章节进行深入分析。"
|
||||
}
|
||||
return prompts.get(analysis_type, "你是一个专业的文档分析助手。")
|
||||
|
||||
def _build_prompt(
|
||||
self,
|
||||
content: str,
|
||||
analysis_type: str,
|
||||
user_prompt: str,
|
||||
title: str = ""
|
||||
) -> str:
|
||||
"""根据分析类型构建提示词"""
|
||||
|
||||
# 截断内容避免超出 token 限制
|
||||
max_content_len = 6000
|
||||
if len(content) > max_content_len:
|
||||
content = content[:max_content_len] + "\n\n[内容已截断...]"
|
||||
|
||||
base_prompts = {
|
||||
"summary": f"""请对以下文档进行摘要分析:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content}
|
||||
|
||||
请提供:
|
||||
1. 文档主要内容摘要(300字以内)
|
||||
2. 文档的目的和用途
|
||||
3. 适合的读者群体
|
||||
|
||||
请用中文回答,结构清晰。""",
|
||||
|
||||
"outline": f"""请提取以下文档的大纲结构:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content}
|
||||
|
||||
请按层级列出文档大纲,用缩进表示层级关系。
|
||||
格式:
|
||||
一、一级标题
|
||||
(一)二级标题
|
||||
1. 三级标题
|
||||
|
||||
请用中文回答。""",
|
||||
|
||||
"key_points": f"""请从以下文档中提取关键要点:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content}
|
||||
|
||||
请列出文档的关键要点(5-10条),每条用简洁的语言描述,并说明其在文档中的重要性。
|
||||
|
||||
请用中文回答,格式清晰。""",
|
||||
|
||||
"questions": f"""请根据以下文档生成有助于理解内容的问题:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content}
|
||||
|
||||
请生成5-10个问题,帮助读者更好地理解文档内容。每个问题应该:
|
||||
1. 涵盖文档的重要信息点
|
||||
2. 易于理解和回答
|
||||
3. 具有思考价值
|
||||
|
||||
请用中文回答。""",
|
||||
|
||||
"tags": f"""请为以下文档生成标签:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content[:3000]}
|
||||
|
||||
请生成5-8个标签,用逗号分隔。标签应该反映:
|
||||
- 文档的主题领域
|
||||
- 文档的类型
|
||||
- 文档的关键特征
|
||||
|
||||
请用中文回答,只需输出标签,不要其他内容。""",
|
||||
|
||||
"qa": f"""请根据以下文档生成问答对:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content[:4000]}
|
||||
|
||||
请生成3-5个问答对,帮助读者通过问答形式理解文档内容。
|
||||
格式:
|
||||
Q1: 问题
|
||||
A1: 回答
|
||||
Q2: 问题
|
||||
A2: 回答
|
||||
|
||||
请用中文回答,内容准确。""",
|
||||
|
||||
"statistics": f"""请分析以下政府统计公报中的数据和结论:
|
||||
|
||||
文档标题:{title}
|
||||
|
||||
文档内容:
|
||||
{content}
|
||||
|
||||
请提供:
|
||||
1. 文档中涉及的主要统计数据(列出关键数字和指标)
|
||||
2. 数据的变化趋势(增长/下降)
|
||||
3. 重要的百分比和对比
|
||||
4. 数据来源和统计口径说明
|
||||
|
||||
请用中文回答,数据准确。""",
|
||||
|
||||
"section": f"""请详细分析以下文档章节:
|
||||
|
||||
章节标题:{title}
|
||||
|
||||
章节内容:
|
||||
{content}
|
||||
|
||||
请提供:
|
||||
1. 章节主要内容概括
|
||||
2. 关键信息和数据
|
||||
3. 与其他部分的关联(如有)
|
||||
4. 重要结论
|
||||
|
||||
请用中文回答,分析深入。"""
|
||||
}
|
||||
|
||||
prompt = base_prompts.get(analysis_type, base_prompts["summary"])
|
||||
|
||||
if user_prompt and user_prompt.strip():
|
||||
prompt += f"\n\n用户额外需求:{user_prompt}"
|
||||
|
||||
return prompt
|
||||
|
||||
async def extract_outline(self, file_path: str) -> Dict[str, Any]:
|
||||
"""提取文档大纲"""
|
||||
try:
|
||||
parse_result = self.parser.parse(file_path)
|
||||
|
||||
if not parse_result.success:
|
||||
return {"success": False, "error": parse_result.error}
|
||||
|
||||
data = parse_result.data
|
||||
sections = self.extract_sections(data.get("content", ""), data.get("titles", []))
|
||||
|
||||
# 构建结构化大纲
|
||||
outline = []
|
||||
for section in sections:
|
||||
outline.append({
|
||||
"number": section.number,
|
||||
"title": section.title,
|
||||
"level": section.level,
|
||||
"line": section.line_start,
|
||||
"content_preview": section.content[:100] + "..." if len(section.content) > 100 else section.content,
|
||||
"subsections": [{
|
||||
"number": s.number,
|
||||
"title": s.title,
|
||||
"level": s.level,
|
||||
"line": s.line_start
|
||||
} for s in section.subsections]
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"outline": outline
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"大纲提取失败: {str(e)}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def extract_tables_summary(self, file_path: str) -> Dict[str, Any]:
|
||||
"""提取并总结文档中的表格"""
|
||||
try:
|
||||
parse_result = self.parser.parse(file_path)
|
||||
|
||||
if not parse_result.success:
|
||||
return {"success": False, "error": parse_result.error}
|
||||
|
||||
tables = parse_result.data.get("tables", [])
|
||||
|
||||
if not tables:
|
||||
return {"success": True, "tables": [], "message": "文档中没有表格"}
|
||||
|
||||
# 提取每个表格的关键信息
|
||||
table_summaries = []
|
||||
for i, table in enumerate(tables):
|
||||
summary = {
|
||||
"index": i + 1,
|
||||
"headers": table.get("headers", []),
|
||||
"row_count": table.get("row_count", 0),
|
||||
"column_count": table.get("column_count", 0),
|
||||
"preview_rows": table.get("rows", [])[:3], # 只取前3行预览
|
||||
"first_column": [row[0] if row else "" for row in table.get("rows", [])[:5]]
|
||||
}
|
||||
table_summaries.append(summary)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"tables": table_summaries,
|
||||
"table_count": len(tables)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"表格提取失败: {str(e)}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
# 全局单例
|
||||
markdown_ai_service = MarkdownAIService()
|
||||
@@ -1,113 +0,0 @@
|
||||
✅ Excel 文件解析功能已完成并测试通过
|
||||
|
||||
已完成的工作
|
||||
|
||||
后端部分
|
||||
|
||||
1. 文件服务层 (backend/app/services/file_service.py)
|
||||
|
||||
- 文件保存、读取、删除功能
|
||||
- 文件信息获取
|
||||
2. Excel 解析模块 (backend/app/core/document_parser/)
|
||||
|
||||
- base.py - 解析器基类
|
||||
- xlsx_parser.py - Excel 文件解析器
|
||||
- utils.py - 工具函数
|
||||
3. API 接口 (backend/app/api/endpoints/upload.py)
|
||||
|
||||
- POST /upload/excel - 上传并解析 Excel 文件
|
||||
- GET /upload/excel/preview/{file_path} - 获取预览
|
||||
- DELETE /upload/file - 删除文件
|
||||
4. 主应用配置 (backend/app/main.py)
|
||||
|
||||
- CORS 中间件配置
|
||||
- API 路由注册
|
||||
|
||||
前端部分
|
||||
|
||||
1. 后端 API 封装 (frontendTest/app-a6ww9j3ja3nl/src/db/backend-api.ts)
|
||||
|
||||
- Excel 上传、预览、删除函数
|
||||
2. Excel 解析页面 (frontendTest/app-a6ww9j3ja3nl/src/pages/ExcelParse.tsx)
|
||||
|
||||
- 拖拽上传界面
|
||||
- 解析选项配置
|
||||
- 数据表格展示
|
||||
- 多工作表支持
|
||||
3. 路由配置 (frontendTest/app-a6ww9j3ja3nl/src/routes.tsx)
|
||||
|
||||
- 添加 /excel-parse 路由
|
||||
4. 环境配置 (.env)
|
||||
|
||||
- 后端 API URL 配置
|
||||
|
||||
测试结果
|
||||
|
||||
后端 API 测试通过:
|
||||
- ✅ 健康检查:http://127.0.0.1:8000/health
|
||||
- ✅ Excel 上传解析:成功解析 5 行 6 列的测试数据
|
||||
|
||||
访问方式
|
||||
|
||||
1. 后端 API 文档:http://127.0.0.1:8000/api/v1/docs
|
||||
2. 前端页面:
|
||||
|
||||
- 启动前端:cd frontendTest/app-a6ww9j3ja3nl && npm run dev
|
||||
- 访问 Excel 解析页面:/excel-parse
|
||||
|
||||
下一步建议
|
||||
|
||||
1. 启动前端开发服务器测试完整的前端界面
|
||||
2. 添加更多文档格式解析支持(docx, md, txt)
|
||||
3. 完善错误处理和用户提示
|
||||
|
||||
✻ Worked for 4m 18s · 1 background task still running (↓ to manage)
|
||||
|
||||
> 如何启动后端呢
|
||||
|
||||
● 后端启动步骤如下:
|
||||
|
||||
方法一:使用命令行启动(开发模式)
|
||||
|
||||
# 1. 进入后端目录
|
||||
cd H:/OwnProject/FilesReadSysteam/backend
|
||||
|
||||
# 2. 激活虚拟环境
|
||||
# PowerShell:
|
||||
.\venv\Scripts\Activate.ps1
|
||||
|
||||
# 或 CMD:
|
||||
.\venv\Scripts\Activate.bat
|
||||
|
||||
# 3. 启动后端服务器
|
||||
python -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload
|
||||
|
||||
方法二:直接使用虚拟环境中的 Python
|
||||
|
||||
cd H:/OwnProject/FilesReadSysteam/backend
|
||||
./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload
|
||||
|
||||
方法三:直接运行 main.py(开发调试)
|
||||
|
||||
cd H:/OwnProject/FilesReadSysteam/backend
|
||||
./venv/Scripts/python.exe app/main.py
|
||||
|
||||
启动成功的标志
|
||||
|
||||
INFO: Uvicorn running on http://127.0.0.1:8000
|
||||
INFO: Started server process [xxxxx]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
|
||||
验证服务是否正常
|
||||
|
||||
# 健康检查
|
||||
curl http://127.0.0.1:8000/health
|
||||
|
||||
# 访问 API 文档
|
||||
# 浏览器打开: http://127.0.0.1:8000/api/v1/docs
|
||||
|
||||
---
|
||||
当前状态:后端已在后台运行(任务 ID: b22jkg69j),可以直接访问 http://127.0.0.1:8000
|
||||
|
||||
需要停止的话告诉我即可。
|
||||
@@ -166,6 +166,43 @@ export interface AIAnalysisResult {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// ==================== Markdown AI 分析类型 ====================
|
||||
|
||||
export interface AIMarkdownAnalyzeResult {
|
||||
success: boolean;
|
||||
filename?: string;
|
||||
analysis_type?: string;
|
||||
section?: string;
|
||||
word_count?: number;
|
||||
structure?: {
|
||||
title_count?: number;
|
||||
code_block_count?: number;
|
||||
table_count?: number;
|
||||
section_count?: number;
|
||||
};
|
||||
sections?: MarkdownSection[];
|
||||
analysis?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface MarkdownSection {
|
||||
number: string;
|
||||
title: string;
|
||||
level: number;
|
||||
content_preview?: string;
|
||||
line_start: number;
|
||||
line_end?: number;
|
||||
subsections?: MarkdownSection[];
|
||||
}
|
||||
|
||||
export interface MarkdownOutlineResult {
|
||||
success: boolean;
|
||||
outline?: MarkdownSection[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export type MarkdownAnalysisType = 'summary' | 'outline' | 'key_points' | 'questions' | 'tags' | 'qa' | 'statistics' | 'section';
|
||||
|
||||
export interface AIExcelAnalyzeResult {
|
||||
success: boolean;
|
||||
excel?: {
|
||||
@@ -842,6 +879,159 @@ export const aiApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 上传并使用 AI 分析 Markdown 文件
|
||||
*/
|
||||
async analyzeMarkdown(
|
||||
file: File,
|
||||
options: {
|
||||
analysisType?: MarkdownAnalysisType;
|
||||
userPrompt?: string;
|
||||
sectionNumber?: string;
|
||||
} = {}
|
||||
): Promise<AIMarkdownAnalyzeResult> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const params = new URLSearchParams();
|
||||
if (options.analysisType) {
|
||||
params.append('analysis_type', options.analysisType);
|
||||
}
|
||||
if (options.userPrompt) {
|
||||
params.append('user_prompt', options.userPrompt);
|
||||
}
|
||||
if (options.sectionNumber) {
|
||||
params.append('section_number', options.sectionNumber);
|
||||
}
|
||||
|
||||
const url = `${BACKEND_BASE_URL}/ai/analyze/md?${params.toString()}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'Markdown AI 分析失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('Markdown AI 分析失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 流式分析 Markdown 文件 (SSE)
|
||||
*/
|
||||
async analyzeMarkdownStream(
|
||||
file: File,
|
||||
options: {
|
||||
analysisType?: MarkdownAnalysisType;
|
||||
userPrompt?: string;
|
||||
sectionNumber?: string;
|
||||
} = {},
|
||||
onChunk?: (chunk: { type: string; delta?: string; error?: string }) => void
|
||||
): Promise<string> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const params = new URLSearchParams();
|
||||
if (options.analysisType) {
|
||||
params.append('analysis_type', options.analysisType);
|
||||
}
|
||||
if (options.userPrompt) {
|
||||
params.append('user_prompt', options.userPrompt);
|
||||
}
|
||||
if (options.sectionNumber) {
|
||||
params.append('section_number', options.sectionNumber);
|
||||
}
|
||||
|
||||
const url = `${BACKEND_BASE_URL}/ai/analyze/md/stream?${params.toString()}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'Markdown AI 流式分析失败');
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) throw new Error('无法读取响应流');
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let fullResponse = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.type === 'content' && parsed.delta) {
|
||||
fullResponse += parsed.delta;
|
||||
onChunk?.({ type: 'content', delta: parsed.delta });
|
||||
} else if (parsed.type === 'done') {
|
||||
fullResponse = parsed.full_response || fullResponse;
|
||||
} else if (parsed.error) {
|
||||
onChunk?.({ type: 'error', error: parsed.error });
|
||||
}
|
||||
} catch {
|
||||
// Ignore parse errors for incomplete JSON
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fullResponse;
|
||||
} catch (error) {
|
||||
console.error('Markdown AI 流式分析失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取 Markdown 文档大纲(分章节信息)
|
||||
*/
|
||||
async getMarkdownOutline(file: File): Promise<MarkdownOutlineResult> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const url = `${BACKEND_BASE_URL}/ai/analyze/md/outline`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '获取 Markdown 大纲失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('获取 Markdown 大纲失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 生成统计信息和图表
|
||||
*/
|
||||
|
||||
@@ -19,7 +19,11 @@ import {
|
||||
TrendingUp,
|
||||
Download,
|
||||
Brain,
|
||||
Settings2
|
||||
Settings2,
|
||||
List,
|
||||
MessageSquareCode,
|
||||
Tag,
|
||||
HelpCircle
|
||||
} from 'lucide-react';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Input } from '@/components/ui/input';
|
||||
@@ -33,7 +37,7 @@ import { Checkbox } from '@/components/ui/checkbox';
|
||||
import { toast } from 'sonner';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Skeleton } from '@/components/ui/skeleton';
|
||||
import { backendApi, type ExcelParseResult, aiApi } from '@/db/backend-api';
|
||||
import { backendApi, type ExcelParseResult, type AIMarkdownAnalyzeResult, type MarkdownSection, aiApi } from '@/db/backend-api';
|
||||
import {
|
||||
Table as TableComponent,
|
||||
TableBody,
|
||||
@@ -78,6 +82,15 @@ const Documents: React.FC = () => {
|
||||
const [analysisCharts, setAnalysisCharts] = useState<any>(null);
|
||||
const [analysisTypes, setAnalysisTypes] = useState<Array<{ value: string; label: string; description: string }>>([]);
|
||||
|
||||
// Markdown AI 分析相关状态
|
||||
const [mdAnalysis, setMdAnalysis] = useState<AIMarkdownAnalyzeResult | null>(null);
|
||||
const [mdAnalysisType, setMdAnalysisType] = useState<'summary' | 'outline' | 'key_points' | 'questions' | 'tags' | 'qa' | 'statistics' | 'section'>('summary');
|
||||
const [mdUserPrompt, setMdUserPrompt] = useState('');
|
||||
const [mdSections, setMdSections] = useState<MarkdownSection[]>([]);
|
||||
const [mdSelectedSection, setMdSelectedSection] = useState<string>('');
|
||||
const [mdStreaming, setMdStreaming] = useState(false);
|
||||
const [mdStreamingContent, setMdStreamingContent] = useState('');
|
||||
|
||||
// 解析选项
|
||||
const [parseOptions, setParseOptions] = useState({
|
||||
parseAllSheets: false,
|
||||
@@ -144,6 +157,9 @@ const Documents: React.FC = () => {
|
||||
setAiAnalysis(null);
|
||||
setAnalysisCharts(null);
|
||||
setExpandedSheet(null);
|
||||
setMdAnalysis(null);
|
||||
setMdSections([]);
|
||||
setMdStreamingContent('');
|
||||
|
||||
const ext = file.name.split('.').pop()?.toLowerCase();
|
||||
|
||||
@@ -163,6 +179,9 @@ const Documents: React.FC = () => {
|
||||
} else {
|
||||
toast.error(result.error || '解析失败');
|
||||
}
|
||||
} else if (ext === 'md' || ext === 'markdown') {
|
||||
// Markdown 文件:获取大纲
|
||||
await fetchMdOutline();
|
||||
} else {
|
||||
// 其他文档使用通用上传接口
|
||||
const result = await backendApi.uploadDocument(file);
|
||||
@@ -403,6 +422,105 @@ const Documents: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const isMarkdownFile = (filename: string) => {
|
||||
const ext = filename.split('.').pop()?.toLowerCase();
|
||||
return ext === 'md' || ext === 'markdown';
|
||||
};
|
||||
|
||||
// Markdown AI 分析处理
|
||||
const handleMdAnalyze = async () => {
|
||||
if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) {
|
||||
toast.error('请先上传 Markdown 文件');
|
||||
return;
|
||||
}
|
||||
|
||||
setAnalyzing(true);
|
||||
setMdAnalysis(null);
|
||||
|
||||
try {
|
||||
const result = await aiApi.analyzeMarkdown(uploadedFile, {
|
||||
analysisType: mdAnalysisType,
|
||||
userPrompt: mdUserPrompt,
|
||||
sectionNumber: mdSelectedSection || undefined
|
||||
});
|
||||
|
||||
if (result.success) {
|
||||
toast.success('Markdown AI 分析完成');
|
||||
setMdAnalysis(result);
|
||||
} else {
|
||||
toast.error(result.error || 'AI 分析失败');
|
||||
}
|
||||
} catch (error: any) {
|
||||
toast.error(error.message || 'AI 分析失败');
|
||||
} finally {
|
||||
setAnalyzing(false);
|
||||
}
|
||||
};
|
||||
|
||||
// 流式分析 Markdown
|
||||
const handleMdAnalyzeStream = async () => {
|
||||
if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) {
|
||||
toast.error('请先上传 Markdown 文件');
|
||||
return;
|
||||
}
|
||||
|
||||
setAnalyzing(true);
|
||||
setMdStreaming(true);
|
||||
setMdStreamingContent('');
|
||||
setMdAnalysis(null);
|
||||
|
||||
try {
|
||||
await aiApi.analyzeMarkdownStream(
|
||||
uploadedFile,
|
||||
{
|
||||
analysisType: mdAnalysisType,
|
||||
userPrompt: mdUserPrompt,
|
||||
sectionNumber: mdSelectedSection || undefined
|
||||
},
|
||||
(chunk: { type: string; delta?: string; error?: string }) => {
|
||||
if (chunk.type === 'content' && chunk.delta) {
|
||||
setMdStreamingContent(prev => prev + chunk.delta);
|
||||
} else if (chunk.type === 'error') {
|
||||
toast.error(chunk.error || '流式分析出错');
|
||||
}
|
||||
}
|
||||
);
|
||||
} catch (error: any) {
|
||||
toast.error(error.message || 'AI 分析失败');
|
||||
} finally {
|
||||
setAnalyzing(false);
|
||||
setMdStreaming(false);
|
||||
}
|
||||
};
|
||||
|
||||
// 获取 Markdown 文档大纲(分章节)
|
||||
const fetchMdOutline = async () => {
|
||||
if (!uploadedFile || !isMarkdownFile(uploadedFile.name)) return;
|
||||
|
||||
try {
|
||||
const result = await aiApi.getMarkdownOutline(uploadedFile);
|
||||
if (result.success && result.outline) {
|
||||
setMdSections(result.outline);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('获取大纲失败:', error);
|
||||
}
|
||||
};
|
||||
|
||||
const getMdAnalysisIcon = (type: string) => {
|
||||
switch (type) {
|
||||
case 'summary': return <FileText size={20} />;
|
||||
case 'outline': return <List size={20} />;
|
||||
case 'key_points': return <TrendingUp size={20} />;
|
||||
case 'statistics': return <TrendingUp size={20} />;
|
||||
case 'section': return <FileText size={20} />;
|
||||
case 'questions': return <MessageSquareCode size={20} />;
|
||||
case 'tags': return <Tag size={20} />;
|
||||
case 'qa': return <HelpCircle size={20} />;
|
||||
default: return <Sparkles size={20} />;
|
||||
}
|
||||
};
|
||||
|
||||
const formatFileSize = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
@@ -600,6 +718,97 @@ const Documents: React.FC = () => {
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Markdown AI 分析选项 */}
|
||||
{uploadedFile && isMarkdownFile(uploadedFile.name) && (
|
||||
<Card className="border-none shadow-md bg-gradient-to-br from-purple-500/5 to-primary/5">
|
||||
<CardHeader className="pb-4">
|
||||
<CardTitle className="flex items-center gap-2">
|
||||
<Sparkles className="text-purple-500" size={20} />
|
||||
Markdown AI 分析
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
{/* 章节选择 */}
|
||||
{mdSections.length > 0 && (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="md-section" className="text-sm">指定章节(可选)</Label>
|
||||
<Select value={mdSelectedSection} onValueChange={setMdSelectedSection}>
|
||||
<SelectTrigger id="md-section" className="bg-background">
|
||||
<SelectValue placeholder="全文分析" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="">全文分析</SelectItem>
|
||||
{mdSections.map((section) => (
|
||||
<SelectItem key={section.number} value={section.number}>
|
||||
{section.number}、{section.title}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
)}
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="md-analysis-type" className="text-sm">分析类型</Label>
|
||||
<Select value={mdAnalysisType} onValueChange={(value: any) => setMdAnalysisType(value)}>
|
||||
<SelectTrigger id="md-analysis-type" className="bg-background">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{[
|
||||
{ value: 'summary', label: '文档摘要', desc: '主要内容摘要' },
|
||||
{ value: 'outline', label: '大纲提取', desc: '提取文档结构' },
|
||||
{ value: 'key_points', label: '关键要点', desc: '提取关键信息' },
|
||||
{ value: 'statistics', label: '统计分析', desc: '统计数据分析' },
|
||||
{ value: 'section', label: '章节分析', desc: '分章节详细分析' },
|
||||
{ value: 'questions', label: '生成问题', desc: '生成理解性问题' },
|
||||
{ value: 'tags', label: '生成标签', desc: '提取主题标签' },
|
||||
{ value: 'qa', label: '问答对', desc: '生成问答内容' }
|
||||
].map(type => (
|
||||
<SelectItem key={type.value} value={type.value}>
|
||||
<div className="flex items-center gap-2">
|
||||
{getMdAnalysisIcon(type.value)}
|
||||
<div className="flex flex-col">
|
||||
<span className="font-medium">{type.label}</span>
|
||||
<span className="text-xs text-muted-foreground">{type.desc}</span>
|
||||
</div>
|
||||
</div>
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="md-user-prompt" className="text-sm">自定义提示词(可选)</Label>
|
||||
<Textarea
|
||||
id="md-user-prompt"
|
||||
placeholder="例如:请重点关注技术实现部分..."
|
||||
value={mdUserPrompt}
|
||||
onChange={(e) => setMdUserPrompt(e.target.value)}
|
||||
className="bg-background resize-none"
|
||||
rows={2}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button
|
||||
onClick={handleMdAnalyze}
|
||||
disabled={analyzing}
|
||||
className="flex-1 bg-gradient-to-r from-purple-500 to-primary hover:from-purple-500/90 hover:to-primary/90"
|
||||
>
|
||||
{analyzing && !mdStreaming ? <><Loader2 className="mr-2 animate-spin" size={16} /> 分析中...</> : <><Sparkles className="mr-2" size={16} />普通分析</>}
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleMdAnalyzeStream}
|
||||
disabled={analyzing}
|
||||
variant="outline"
|
||||
className="flex-1"
|
||||
>
|
||||
{analyzing && mdStreaming ? <><Loader2 className="mr-2 animate-spin" size={16} /> 流式...</> : <><Sparkles className="mr-2" size={16} />流式分析</>}
|
||||
</Button>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* 数据操作 */}
|
||||
{parseResult?.success && (
|
||||
<Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5">
|
||||
@@ -661,6 +870,45 @@ const Documents: React.FC = () => {
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Markdown AI 分析结果 */}
|
||||
{(mdAnalysis || mdStreamingContent) && (
|
||||
<Card className="border-none shadow-md border-l-4 border-l-purple-500">
|
||||
<CardHeader>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="space-y-1">
|
||||
<CardTitle className="flex items-center gap-2">
|
||||
<Sparkles className="text-purple-500" size={20} />
|
||||
Markdown AI 分析结果
|
||||
{mdStreaming && <Badge variant="default" className="ml-2 bg-purple-500">流式输出中</Badge>}
|
||||
</CardTitle>
|
||||
{mdAnalysis && (
|
||||
<CardDescription>
|
||||
{mdAnalysis.filename} • {mdAnalysis.word_count || 0} 字 • {mdAnalysis.analysis_type}
|
||||
{mdAnalysis.section && ` • ${mdAnalysis.section}`}
|
||||
</CardDescription>
|
||||
)}
|
||||
</div>
|
||||
{mdAnalysis?.structure && (
|
||||
<Badge variant="secondary">
|
||||
{mdAnalysis.structure.title_count || 0} 标题 • {mdAnalysis.structure.section_count || 0} 章节
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="max-h-[500px] overflow-y-auto">
|
||||
{/* 流式内容优先显示 */}
|
||||
{mdStreamingContent && (
|
||||
<div className="animate-pulse text-sm text-muted-foreground mb-4">
|
||||
流式输出中...
|
||||
</div>
|
||||
)}
|
||||
{mdStreamingContent && <Markdown content={mdStreamingContent} />}
|
||||
{mdAnalysis?.analysis && !mdStreamingContent && <Markdown content={mdAnalysis.analysis} />}
|
||||
{!mdAnalysis?.success && !mdStreamingContent && <p className="text-sm text-destructive">{mdAnalysis?.error || '分析失败'}</p>}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* 图表显示 */}
|
||||
{analysisCharts && (
|
||||
<Card className="border-none shadow-md border-l-4 border-l-indigo-500">
|
||||
|
||||
Reference in New Issue
Block a user