Merge branch 'main' of https://gitea.kronecker.cc/OurCodesAreAllRight/FilesReadSystem
This commit is contained in:
@@ -12,6 +12,7 @@ from app.services.excel_ai_service import excel_ai_service
|
|||||||
from app.services.markdown_ai_service import markdown_ai_service
|
from app.services.markdown_ai_service import markdown_ai_service
|
||||||
from app.services.template_fill_service import template_fill_service
|
from app.services.template_fill_service import template_fill_service
|
||||||
from app.services.word_ai_service import word_ai_service
|
from app.services.word_ai_service import word_ai_service
|
||||||
|
from app.services.txt_ai_service import txt_ai_service
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -153,8 +154,9 @@ async def analyze_text(
|
|||||||
|
|
||||||
@router.post("/analyze/md")
|
@router.post("/analyze/md")
|
||||||
async def analyze_markdown(
|
async def analyze_markdown(
|
||||||
file: UploadFile = File(...),
|
file: Optional[UploadFile] = File(None),
|
||||||
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section"),
|
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
||||||
|
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
|
||||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||||
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
||||||
):
|
):
|
||||||
@@ -162,7 +164,8 @@ async def analyze_markdown(
|
|||||||
上传并使用 AI 分析 Markdown 文件
|
上传并使用 AI 分析 Markdown 文件
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: 上传的 Markdown 文件
|
file: 上传的 Markdown 文件(与 doc_id 二选一)
|
||||||
|
doc_id: 文档ID(从数据库读取)
|
||||||
analysis_type: 分析类型
|
analysis_type: 分析类型
|
||||||
user_prompt: 用户自定义提示词
|
user_prompt: 用户自定义提示词
|
||||||
section_number: 指定分析的章节编号
|
section_number: 指定分析的章节编号
|
||||||
@@ -170,16 +173,8 @@ async def analyze_markdown(
|
|||||||
Returns:
|
Returns:
|
||||||
dict: 分析结果
|
dict: 分析结果
|
||||||
"""
|
"""
|
||||||
# 检查文件类型
|
filename = None
|
||||||
if not file.filename:
|
tmp_path = None
|
||||||
raise HTTPException(status_code=400, detail="文件名为空")
|
|
||||||
|
|
||||||
file_ext = file.filename.split('.')[-1].lower()
|
|
||||||
if file_ext not in ['md', 'markdown']:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 验证分析类型
|
# 验证分析类型
|
||||||
supported_types = markdown_ai_service.get_supported_analysis_types()
|
supported_types = markdown_ai_service.get_supported_analysis_types()
|
||||||
@@ -189,46 +184,96 @@ async def analyze_markdown(
|
|||||||
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
|
detail=f"不支持的分析类型: {analysis_type},支持的类型: {', '.join(supported_types)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
if doc_id:
|
||||||
# 读取文件内容
|
# 从数据库读取文档
|
||||||
content = await file.read()
|
|
||||||
|
|
||||||
# 保存到临时文件
|
|
||||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
|
||||||
tmp.write(content)
|
|
||||||
tmp_path = tmp.name
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"开始分析 Markdown 文件: {file.filename}, 分析类型: {analysis_type}, 章节: {section_number}")
|
from app.core.database.mongodb import mongodb
|
||||||
|
doc = await mongodb.get_document(doc_id)
|
||||||
|
if not doc:
|
||||||
|
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
|
||||||
|
|
||||||
# 调用 AI 分析服务
|
filename = doc.get("metadata", {}).get("original_filename", "unknown.md")
|
||||||
result = await markdown_ai_service.analyze_markdown(
|
file_ext = filename.split('.')[-1].lower()
|
||||||
file_path=tmp_path,
|
|
||||||
analysis_type=analysis_type,
|
if file_ext not in ['md', 'markdown']:
|
||||||
user_prompt=user_prompt,
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
|
||||||
section_number=section_number
|
|
||||||
|
content = doc.get("content", "")
|
||||||
|
if not content:
|
||||||
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
|
|
||||||
|
# 保存到临时文件
|
||||||
|
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
||||||
|
tmp.write(content.encode('utf-8'))
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
|
logger.info(f"从数据库加载 Markdown 文档: {filename}, 长度: {len(content)}")
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库读取 Markdown 文档失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
|
||||||
|
else:
|
||||||
|
# 文件上传模式
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
|
||||||
|
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(status_code=400, detail="文件名为空")
|
||||||
|
|
||||||
|
file_ext = file.filename.split('.')[-1].lower()
|
||||||
|
if file_ext not in ['md', 'markdown']:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"不支持的文件类型: {file_ext},仅支持 .md 和 .markdown"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Markdown 分析完成: {file.filename}, 成功: {result['success']}")
|
try:
|
||||||
|
# 读取文件内容
|
||||||
|
content = await file.read()
|
||||||
|
|
||||||
if not result['success']:
|
# 保存到临时文件
|
||||||
raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
|
with tempfile.NamedTemporaryFile(mode='wb', suffix='.md', delete=False) as tmp:
|
||||||
|
tmp.write(content)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
return result
|
filename = file.filename
|
||||||
|
|
||||||
finally:
|
except Exception as e:
|
||||||
# 清理临时文件,确保在所有情况下都能清理
|
logger.error(f"读取 Markdown 文件失败: {str(e)}")
|
||||||
try:
|
raise HTTPException(status_code=500, detail=f"读取文件失败: {str(e)}")
|
||||||
if tmp_path and os.path.exists(tmp_path):
|
|
||||||
os.unlink(tmp_path)
|
try:
|
||||||
except Exception as cleanup_error:
|
logger.info(f"开始分析 Markdown 文件: {filename}, 分析类型: {analysis_type}, 章节: {section_number}")
|
||||||
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
|
|
||||||
|
# 调用 AI 分析服务
|
||||||
|
result = await markdown_ai_service.analyze_markdown(
|
||||||
|
file_path=tmp_path,
|
||||||
|
analysis_type=analysis_type,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
section_number=section_number
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Markdown 分析完成: {filename}, 成功: {result['success']}")
|
||||||
|
|
||||||
|
if not result['success']:
|
||||||
|
raise HTTPException(status_code=500, detail=result.get('error', '分析失败'))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
|
logger.error(f"Markdown AI 分析过程中出错: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||||
|
finally:
|
||||||
|
# 清理临时文件
|
||||||
|
if tmp_path and os.path.exists(tmp_path):
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
|
||||||
|
|
||||||
|
|
||||||
@router.post("/analyze/md/stream")
|
@router.post("/analyze/md/stream")
|
||||||
@@ -346,67 +391,100 @@ async def get_markdown_outline(
|
|||||||
|
|
||||||
@router.post("/analyze/txt")
|
@router.post("/analyze/txt")
|
||||||
async def analyze_txt(
|
async def analyze_txt(
|
||||||
file: UploadFile = File(...),
|
file: Optional[UploadFile] = File(None),
|
||||||
|
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
||||||
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表
|
||||||
|
|
||||||
将非结构化文本转换为结构化表格数据,便于后续填表使用
|
将非结构化文本转换为结构化表格数据,便于后续填表使用
|
||||||
|
当 analysis_type=charts 时,可生成可视化图表
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: 上传的 TXT 文件
|
file: 上传的 TXT 文件(与 doc_id 二选一)
|
||||||
|
doc_id: 文档ID(从数据库读取)
|
||||||
|
analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: 分析结果,包含结构化表格数据
|
dict: 分析结果,包含结构化表格数据或图表数据
|
||||||
"""
|
"""
|
||||||
if not file.filename:
|
filename = None
|
||||||
raise HTTPException(status_code=400, detail="文件名为空")
|
text_content = None
|
||||||
|
|
||||||
file_ext = file.filename.split('.')[-1].lower()
|
|
||||||
if file_ext not in ['txt', 'text']:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 读取文件内容
|
|
||||||
content = await file.read()
|
|
||||||
|
|
||||||
# 保存到临时文件
|
|
||||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp:
|
|
||||||
tmp.write(content)
|
|
||||||
tmp_path = tmp.name
|
|
||||||
|
|
||||||
|
if doc_id:
|
||||||
|
# 从数据库读取文档
|
||||||
try:
|
try:
|
||||||
logger.info(f"开始 AI 分析 TXT 文件: {file.filename}")
|
from app.core.database.mongodb import mongodb
|
||||||
|
doc = await mongodb.get_document(doc_id)
|
||||||
|
if not doc:
|
||||||
|
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
|
||||||
|
|
||||||
# 使用 template_fill_service 的 AI 分析方法
|
filename = doc.get("metadata", {}).get("original_filename", "unknown.txt")
|
||||||
result = await template_fill_service.analyze_txt_with_ai(
|
file_ext = filename.split('.')[-1].lower()
|
||||||
content=content.decode('utf-8', errors='replace'),
|
|
||||||
filename=file.filename
|
if file_ext not in ['txt', 'text']:
|
||||||
|
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
|
||||||
|
|
||||||
|
# 使用数据库中的 content
|
||||||
|
text_content = doc.get("content", "")
|
||||||
|
|
||||||
|
if not text_content:
|
||||||
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
|
|
||||||
|
logger.info(f"从数据库加载 TXT 文档: {filename}, 长度: {len(text_content)}")
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库读取 TXT 文档失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
|
||||||
|
else:
|
||||||
|
# 文件上传模式
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
|
||||||
|
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(status_code=400, detail="文件名为空")
|
||||||
|
|
||||||
|
file_ext = file.filename.split('.')[-1].lower()
|
||||||
|
if file_ext not in ['txt', 'text']:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
if result:
|
# 读取文件内容
|
||||||
logger.info(f"TXT AI 分析成功: {file.filename}")
|
content = await file.read()
|
||||||
return {
|
text_content = content.decode('utf-8', errors='replace')
|
||||||
"success": True,
|
filename = file.filename
|
||||||
"filename": file.filename,
|
|
||||||
"structured_data": result
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
logger.warning(f"TXT AI 分析返回空结果: {file.filename}")
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"filename": file.filename,
|
|
||||||
"error": "AI 分析未能提取到结构化数据",
|
|
||||||
"structured_data": None
|
|
||||||
}
|
|
||||||
|
|
||||||
finally:
|
try:
|
||||||
# 清理临时文件
|
logger.info(f"开始 AI 分析 TXT 文件: {filename}, analysis_type={analysis_type}")
|
||||||
if os.path.exists(tmp_path):
|
|
||||||
os.unlink(tmp_path)
|
# 使用 txt_ai_service 的 AI 分析方法
|
||||||
|
result = await txt_ai_service.analyze_txt_with_ai(
|
||||||
|
content=text_content,
|
||||||
|
filename=filename,
|
||||||
|
analysis_type=analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
logger.info(f"TXT AI 分析成功: {filename}")
|
||||||
|
return {
|
||||||
|
"success": result.get("success", True),
|
||||||
|
"filename": filename,
|
||||||
|
"analysis_type": analysis_type,
|
||||||
|
"result": result
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
logger.warning(f"TXT AI 分析返回空结果: {filename}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"filename": filename,
|
||||||
|
"error": "AI 分析未能提取到结构化数据",
|
||||||
|
"result": None
|
||||||
|
}
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
@@ -419,21 +497,89 @@ async def analyze_txt(
|
|||||||
|
|
||||||
@router.post("/analyze/word")
|
@router.post("/analyze/word")
|
||||||
async def analyze_word(
|
async def analyze_word(
|
||||||
file: UploadFile = File(...),
|
file: Optional[UploadFile] = File(None),
|
||||||
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'")
|
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
||||||
|
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"),
|
||||||
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
使用 AI 解析 Word 文档,提取结构化数据
|
使用 AI 解析 Word 文档,提取结构化数据或生成图表
|
||||||
|
|
||||||
适用于从非结构化的 Word 文档中提取表格数据、键值对等信息
|
适用于从非结构化的 Word 文档中提取表格数据、键值对等信息
|
||||||
|
当 analysis_type=charts 时,可生成可视化图表
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: 上传的 Word 文件
|
file: 上传的 Word 文件(与 doc_id 二选一)
|
||||||
|
doc_id: 文档ID(从数据库读取)
|
||||||
user_hint: 用户提示词
|
user_hint: 用户提示词
|
||||||
|
analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: 包含结构化数据的解析结果
|
dict: 包含结构化数据的解析结果或图表数据
|
||||||
"""
|
"""
|
||||||
|
# 获取文件名和扩展名
|
||||||
|
filename = None
|
||||||
|
file_ext = None
|
||||||
|
|
||||||
|
if doc_id:
|
||||||
|
# 从数据库读取文档
|
||||||
|
try:
|
||||||
|
from app.core.database.mongodb import mongodb
|
||||||
|
doc = await mongodb.get_document(doc_id)
|
||||||
|
if not doc:
|
||||||
|
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
|
||||||
|
|
||||||
|
filename = doc.get("metadata", {}).get("original_filename", "unknown.docx")
|
||||||
|
file_ext = filename.split('.')[-1].lower()
|
||||||
|
|
||||||
|
if file_ext not in ['docx']:
|
||||||
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
|
||||||
|
|
||||||
|
# 使用数据库中的 content 进行分析
|
||||||
|
content = doc.get("content", "")
|
||||||
|
tables = doc.get("structured_data", {}).get("tables", [])
|
||||||
|
|
||||||
|
# 调用 AI 分析服务,传入数据库内容
|
||||||
|
if analysis_type == "charts":
|
||||||
|
result = await word_ai_service.generate_charts_from_db(
|
||||||
|
content=content,
|
||||||
|
tables=tables,
|
||||||
|
filename=filename,
|
||||||
|
user_hint=user_hint
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result = await word_ai_service.parse_word_with_ai_from_db(
|
||||||
|
content=content,
|
||||||
|
tables=tables,
|
||||||
|
filename=filename,
|
||||||
|
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.get("success"):
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"filename": filename,
|
||||||
|
"analysis_type": analysis_type,
|
||||||
|
"result": result
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"filename": filename,
|
||||||
|
"error": result.get("error", "AI 解析失败"),
|
||||||
|
"result": None
|
||||||
|
}
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库读取 Word 文档失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
|
||||||
|
|
||||||
|
# 文件上传模式
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
|
||||||
|
|
||||||
if not file.filename:
|
if not file.filename:
|
||||||
raise HTTPException(status_code=400, detail="文件名为空")
|
raise HTTPException(status_code=400, detail="文件名为空")
|
||||||
|
|
||||||
@@ -453,16 +599,25 @@ async def analyze_word(
|
|||||||
tmp_path = tmp.name
|
tmp_path = tmp.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 使用 AI 解析 Word 文档
|
# 根据 analysis_type 选择处理方式
|
||||||
result = await word_ai_service.parse_word_with_ai(
|
if analysis_type == "charts":
|
||||||
file_path=tmp_path,
|
# 生成图表
|
||||||
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
|
result = await word_ai_service.generate_charts(
|
||||||
)
|
file_path=tmp_path,
|
||||||
|
user_hint=user_hint
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# 提取结构化数据
|
||||||
|
result = await word_ai_service.parse_word_with_ai(
|
||||||
|
file_path=tmp_path,
|
||||||
|
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
|
||||||
|
)
|
||||||
|
|
||||||
if result.get("success"):
|
if result.get("success"):
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"filename": file.filename,
|
"filename": file.filename,
|
||||||
|
"analysis_type": analysis_type,
|
||||||
"result": result
|
"result": result
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -405,7 +405,7 @@ async def process_documents_batch(task_id: str, files: List[dict]):
|
|||||||
if content and len(content) > 50:
|
if content and len(content) > 50:
|
||||||
await index_document_to_rag(doc_id, filename, result, file_info["ext"])
|
await index_document_to_rag(doc_id, filename, result, file_info["ext"])
|
||||||
|
|
||||||
return {"index": index, "filename": filename, "doc_id": doc_id, "success": True}
|
return {"index": index, "filename": filename, "doc_id": doc_id, "file_path": file_info["path"], "success": True}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"处理文件 {filename} 失败: {e}")
|
logger.error(f"处理文件 {filename} 失败: {e}")
|
||||||
|
|||||||
352
backend/app/services/txt_ai_service.py
Normal file
352
backend/app/services/txt_ai_service.py
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
"""
|
||||||
|
TXT 文档 AI 分析服务
|
||||||
|
|
||||||
|
使用 LLM 对 TXT 文本文件进行深度分析,提取结构化数据并生成可视化图表
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from app.services.llm_service import llm_service
|
||||||
|
from app.services.visualization_service import visualization_service
|
||||||
|
from app.core.document_parser.txt_parser import TxtParser
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TxtAIService:
|
||||||
|
"""TXT 文档 AI 分析服务"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.parser = TxtParser()
|
||||||
|
|
||||||
|
async def analyze_txt_with_ai(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
filename: str = "",
|
||||||
|
user_hint: str = "",
|
||||||
|
analysis_type: str = "structured"
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用 AI 解析 TXT 文本文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: 文本内容
|
||||||
|
filename: 文件名(可选)
|
||||||
|
user_hint: 用户提示词
|
||||||
|
analysis_type: 分析类型 - "structured"(默认,提取结构化数据)或 "charts"(生成图表)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: 包含结构化数据的分析结果
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not content or not content.strip():
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档内容为空"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 根据分析类型选择处理方式
|
||||||
|
if analysis_type == "charts":
|
||||||
|
return await self.generate_charts(content, filename, user_hint)
|
||||||
|
|
||||||
|
# 默认:提取结构化数据
|
||||||
|
return await self._extract_structured_data(content, filename, user_hint)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TXT AI 分析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _extract_structured_data(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
filename: str = "",
|
||||||
|
user_hint: str = ""
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文本中提取结构化数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: 文本内容
|
||||||
|
filename: 文件名
|
||||||
|
user_hint: 用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
结构化数据
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 截断内容避免超出 token 限制
|
||||||
|
max_content_len = 8000
|
||||||
|
text_preview = content[:max_content_len] if len(content) > max_content_len else content
|
||||||
|
|
||||||
|
prompt = f"""你是一个专业的数据提取专家。请从以下文本中提取结构化数据。
|
||||||
|
|
||||||
|
【用户需求】
|
||||||
|
{user_hint if user_hint else "请提取文档中的所有结构化数据,包括表格数据、键值对、列表项等。"}
|
||||||
|
|
||||||
|
【文档内容】({"前" + str(max_content_len) + "字符,仅显示部分" if len(content) > max_content_len else "全文"})
|
||||||
|
{text_preview}
|
||||||
|
|
||||||
|
请按照以下 JSON 格式输出:
|
||||||
|
{{
|
||||||
|
"type": "structured_text",
|
||||||
|
"tables": [{{"headers": [...], "rows": [...]}}],
|
||||||
|
"key_values": {{"键1": "值1", "键2": "值2", ...}},
|
||||||
|
"list_items": ["项1", "项2", ...],
|
||||||
|
"summary": "文档内容摘要"
|
||||||
|
}}
|
||||||
|
|
||||||
|
重点:
|
||||||
|
- 如果文档包含表格数据(制表符、空格对齐等),提取到 tables 中
|
||||||
|
- 如果文档包含键值对(如 名称: 张三),提取到 key_values 中
|
||||||
|
- 如果文档包含列表项,提取到 list_items 中
|
||||||
|
- 如果无法提取到结构化数据,至少提供一个详细的摘要
|
||||||
|
"""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "你是一个专业的数据提取助手。请严格按JSON格式输出。"},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await self.llm.chat(
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.1,
|
||||||
|
max_tokens=50000
|
||||||
|
)
|
||||||
|
|
||||||
|
content_text = self.llm.extract_message_content(response)
|
||||||
|
result = self._parse_json_response(content_text)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
logger.info(f"TXT 结构化数据提取成功: type={result.get('type')}")
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"type": result.get("type", "structured_text"),
|
||||||
|
"tables": result.get("tables", []),
|
||||||
|
"key_values": result.get("key_values", {}),
|
||||||
|
"list_items": result.get("list_items", []),
|
||||||
|
"summary": result.get("summary", "")
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"type": "text",
|
||||||
|
"summary": text_preview[:500],
|
||||||
|
"raw_text_preview": text_preview[:500]
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TXT 结构化数据提取失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def generate_charts(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
filename: str = "",
|
||||||
|
user_hint: str = ""
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文本中提取数据并生成可视化图表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: 文本内容
|
||||||
|
filename: 文件名
|
||||||
|
user_hint: 用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含图表数据和统计信息的结果
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 截断内容避免超出 token 限制
|
||||||
|
max_content_len = 8000
|
||||||
|
text_preview = content[:max_content_len] if len(content) > max_content_len else content
|
||||||
|
|
||||||
|
# 使用 LLM 提取可用于图表的数据
|
||||||
|
prompt = f"""你是一个专业的数据可视化助手。请从以下文本中提取可用于可视化的数据。
|
||||||
|
|
||||||
|
文档标题:{filename}
|
||||||
|
|
||||||
|
文档内容:
|
||||||
|
{text_preview}
|
||||||
|
|
||||||
|
请完成以下任务:
|
||||||
|
1. 识别文本中的表格数据(制表符分隔、空格对齐的表格等)
|
||||||
|
2. 识别文本中的关键统计数据(百分比、数量、趋势等)
|
||||||
|
3. 识别可用于比较的分类数据
|
||||||
|
|
||||||
|
请用 JSON 格式返回以下结构的数据(如果没有表格数据,返回空结构):
|
||||||
|
{{
|
||||||
|
"tables": [
|
||||||
|
{{
|
||||||
|
"description": "表格的描述",
|
||||||
|
"columns": ["列名1", "列名2", ...],
|
||||||
|
"rows": [
|
||||||
|
["值1", "值2", ...],
|
||||||
|
["值1", "值2", ...]
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"key_statistics": [
|
||||||
|
{{
|
||||||
|
"name": "指标名称",
|
||||||
|
"value": "数值",
|
||||||
|
"trend": "增长/下降/持平",
|
||||||
|
"description": "指标说明"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"chart_suggestions": [
|
||||||
|
{{
|
||||||
|
"chart_type": "bar/line/pie",
|
||||||
|
"title": "图表标题",
|
||||||
|
"data_source": "数据来源说明"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
如果没有表格数据,返回空结构:{{"tables": [], "key_statistics": [], "chart_suggestions": []}}
|
||||||
|
请确保返回的是合法的 JSON 格式。"""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "你是一个专业的数据可视化助手,擅长从文本中提取数据并生成图表。"},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await self.llm.chat(
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.1,
|
||||||
|
max_tokens=50000
|
||||||
|
)
|
||||||
|
|
||||||
|
content_text = self.llm.extract_message_content(response)
|
||||||
|
chart_data = self._parse_json_response(content_text)
|
||||||
|
|
||||||
|
if not chart_data:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "无法从文本中提取有效的数据结构"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 检查是否有表格数据
|
||||||
|
tables = chart_data.get("tables", [])
|
||||||
|
key_statistics = chart_data.get("key_statistics", [])
|
||||||
|
|
||||||
|
if not tables:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档中没有可用于图表的表格数据",
|
||||||
|
"key_statistics": key_statistics,
|
||||||
|
"chart_suggestions": chart_data.get("chart_suggestions", [])
|
||||||
|
}
|
||||||
|
|
||||||
|
# 使用第一个表格生成图表
|
||||||
|
first_table = tables[0]
|
||||||
|
columns = first_table.get("columns", [])
|
||||||
|
rows = first_table.get("rows", [])
|
||||||
|
|
||||||
|
if not columns or not rows:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "表格数据为空"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 转换为 visualization_service 需要的格式
|
||||||
|
viz_data = {
|
||||||
|
"columns": columns,
|
||||||
|
"rows": rows
|
||||||
|
}
|
||||||
|
|
||||||
|
# 生成可视化图表
|
||||||
|
logger.info(f"开始生成图表,列数: {len(columns)}, 行数: {len(rows)}")
|
||||||
|
vis_result = visualization_service.analyze_and_visualize(viz_data)
|
||||||
|
|
||||||
|
if vis_result.get("success"):
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"charts": vis_result.get("charts", {}),
|
||||||
|
"statistics": vis_result.get("statistics", {}),
|
||||||
|
"distributions": vis_result.get("distributions", {}),
|
||||||
|
"row_count": vis_result.get("row_count", 0),
|
||||||
|
"column_count": vis_result.get("column_count", 0),
|
||||||
|
"key_statistics": key_statistics,
|
||||||
|
"chart_suggestions": chart_data.get("chart_suggestions", []),
|
||||||
|
"table_description": first_table.get("description", "")
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": vis_result.get("error", "可视化生成失败"),
|
||||||
|
"key_statistics": key_statistics
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TXT 图表生成失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
def _parse_json_response(self, content: str) -> Optional[Dict]:
|
||||||
|
"""解析 JSON 响应,处理各种格式问题"""
|
||||||
|
if not content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
# 清理 markdown 标记
|
||||||
|
cleaned = content.strip()
|
||||||
|
cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE)
|
||||||
|
cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE)
|
||||||
|
cleaned = cleaned.strip()
|
||||||
|
|
||||||
|
# 找到 JSON 开始位置
|
||||||
|
json_start = -1
|
||||||
|
for i, c in enumerate(cleaned):
|
||||||
|
if c == '{':
|
||||||
|
json_start = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if json_start == -1:
|
||||||
|
logger.warning("无法找到 JSON 开始位置")
|
||||||
|
return None
|
||||||
|
|
||||||
|
json_text = cleaned[json_start:]
|
||||||
|
|
||||||
|
# 尝试直接解析
|
||||||
|
try:
|
||||||
|
return json.loads(json_text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 尝试修复并解析
|
||||||
|
try:
|
||||||
|
# 找到闭合括号
|
||||||
|
depth = 0
|
||||||
|
end_pos = -1
|
||||||
|
for i, c in enumerate(json_text):
|
||||||
|
if c == '{':
|
||||||
|
depth += 1
|
||||||
|
elif c == '}':
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
end_pos = i + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
if end_pos > 0:
|
||||||
|
fixed = json_text[:end_pos]
|
||||||
|
# 移除末尾逗号
|
||||||
|
fixed = re.sub(r',\s*([}]])', r'\1', fixed)
|
||||||
|
return json.loads(fixed)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"JSON 修复失败: {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# 全局单例
|
||||||
|
txt_ai_service = TxtAIService()
|
||||||
@@ -8,6 +8,7 @@ from typing import Dict, Any, List, Optional
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from app.services.llm_service import llm_service
|
from app.services.llm_service import llm_service
|
||||||
|
from app.services.visualization_service import visualization_service
|
||||||
from app.core.document_parser.docx_parser import DocxParser
|
from app.core.document_parser.docx_parser import DocxParser
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -634,6 +635,272 @@ class WordAIService:
|
|||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
async def generate_charts(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
user_hint: str = ""
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用 AI 解析 Word 文档并生成可视化图表
|
||||||
|
|
||||||
# 全局单例
|
从 Word 文档中提取表格数据,然后生成统计图表
|
||||||
word_ai_service = WordAIService()
|
|
||||||
|
Args:
|
||||||
|
file_path: Word 文件路径
|
||||||
|
user_hint: 用户提示词,指定要提取的内容类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: 包含图表数据和统计信息的结果
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 1. 先用基础解析器提取原始内容
|
||||||
|
parse_result = self.parser.parse(file_path)
|
||||||
|
|
||||||
|
if not parse_result.success:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": parse_result.error,
|
||||||
|
"structured_data": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. 获取原始数据
|
||||||
|
raw_data = parse_result.data
|
||||||
|
paragraphs = raw_data.get("paragraphs", [])
|
||||||
|
tables = raw_data.get("tables", [])
|
||||||
|
content = raw_data.get("content", "")
|
||||||
|
|
||||||
|
logger.info(f"Word 基础解析完成: {len(paragraphs)} 个段落, {len(tables)} 个表格")
|
||||||
|
|
||||||
|
# 3. 优先处理表格数据
|
||||||
|
if tables and len(tables) > 0:
|
||||||
|
structured_data = await self._extract_tables_with_ai(
|
||||||
|
tables, paragraphs, 0, user_hint, parse_result.metadata
|
||||||
|
)
|
||||||
|
elif paragraphs and len(paragraphs) > 0:
|
||||||
|
structured_data = await self._extract_from_text_with_ai(
|
||||||
|
paragraphs, content, 0, [], user_hint
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档内容为空",
|
||||||
|
"structured_data": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 4. 检查是否有表格数据用于可视化
|
||||||
|
if not structured_data.get("success"):
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": structured_data.get("error", "解析失败"),
|
||||||
|
"structured_data": None
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_type = structured_data.get("type", "")
|
||||||
|
|
||||||
|
# 5. 提取可用于图表的数据
|
||||||
|
chart_data = None
|
||||||
|
|
||||||
|
if parse_type == "table_data":
|
||||||
|
headers = structured_data.get("headers", [])
|
||||||
|
rows = structured_data.get("rows", [])
|
||||||
|
if headers and rows:
|
||||||
|
chart_data = {
|
||||||
|
"columns": headers,
|
||||||
|
"rows": rows
|
||||||
|
}
|
||||||
|
elif parse_type == "structured_text":
|
||||||
|
tables = structured_data.get("tables", [])
|
||||||
|
if tables and len(tables) > 0:
|
||||||
|
first_table = tables[0]
|
||||||
|
headers = first_table.get("headers", [])
|
||||||
|
rows = first_table.get("rows", [])
|
||||||
|
if headers and rows:
|
||||||
|
chart_data = {
|
||||||
|
"columns": headers,
|
||||||
|
"rows": rows
|
||||||
|
}
|
||||||
|
|
||||||
|
# 6. 生成可视化图表
|
||||||
|
if chart_data:
|
||||||
|
logger.info(f"开始生成图表,列数: {len(chart_data['columns'])}, 行数: {len(chart_data['rows'])}")
|
||||||
|
vis_result = visualization_service.analyze_and_visualize(chart_data)
|
||||||
|
|
||||||
|
if vis_result.get("success"):
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"charts": vis_result.get("charts", {}),
|
||||||
|
"statistics": vis_result.get("statistics", {}),
|
||||||
|
"distributions": vis_result.get("distributions", {}),
|
||||||
|
"structured_data": structured_data,
|
||||||
|
"row_count": vis_result.get("row_count", 0),
|
||||||
|
"column_count": vis_result.get("column_count", 0)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": vis_result.get("error", "可视化生成失败"),
|
||||||
|
"structured_data": structured_data
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档中没有可用于图表的表格数据",
|
||||||
|
"structured_data": structured_data
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Word 文档图表生成失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"structured_data": None
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def parse_word_with_ai_from_db(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tables: List[Dict],
|
||||||
|
filename: str = "",
|
||||||
|
user_hint: str = ""
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用 AI 解析从数据库读取的 Word 文档内容,提取结构化数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: 文档文本内容
|
||||||
|
tables: 表格数据列表
|
||||||
|
filename: 文件名
|
||||||
|
user_hint: 用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: 包含结构化数据的解析结果
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 解析段落
|
||||||
|
paragraphs = [p.strip() for p in content.split('\n') if p.strip()]
|
||||||
|
|
||||||
|
logger.info(f"从数据库解析 Word: {len(paragraphs)} 个段落, {len(tables)} 个表格")
|
||||||
|
|
||||||
|
# 优先处理表格数据
|
||||||
|
if tables and len(tables) > 0:
|
||||||
|
structured_data = await self._extract_tables_with_ai(
|
||||||
|
tables, paragraphs, 0, user_hint, {"filename": filename}
|
||||||
|
)
|
||||||
|
elif paragraphs and len(paragraphs) > 0:
|
||||||
|
structured_data = await self._extract_from_text_with_ai(
|
||||||
|
paragraphs, content, 0, [], user_hint
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
structured_data = {
|
||||||
|
"success": True,
|
||||||
|
"type": "empty",
|
||||||
|
"message": "文档内容为空"
|
||||||
|
}
|
||||||
|
|
||||||
|
return structured_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库解析 Word 文档失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def generate_charts_from_db(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tables: List[Dict],
|
||||||
|
filename: str = "",
|
||||||
|
user_hint: str = ""
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用 AI 解析从数据库读取的 Word 文档并生成可视化图表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: 文档文本内容
|
||||||
|
tables: 表格数据列表
|
||||||
|
filename: 文件名
|
||||||
|
user_hint: 用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: 包含图表数据和统计信息的结果
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 解析段落
|
||||||
|
paragraphs = [p.strip() for p in content.split('\n') if p.strip()]
|
||||||
|
|
||||||
|
logger.info(f"从数据库生成 Word 图表: {len(paragraphs)} 个段落, {len(tables)} 个表格")
|
||||||
|
|
||||||
|
# 优先处理表格数据
|
||||||
|
if tables and len(tables) > 0:
|
||||||
|
structured_data = await self._extract_tables_with_ai(
|
||||||
|
tables, paragraphs, 0, user_hint, {"filename": filename}
|
||||||
|
)
|
||||||
|
elif paragraphs and len(paragraphs) > 0:
|
||||||
|
structured_data = await self._extract_from_text_with_ai(
|
||||||
|
paragraphs, content, 0, [], user_hint
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档内容为空"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 提取可用于图表的数据
|
||||||
|
chart_data = None
|
||||||
|
|
||||||
|
if structured_data.get("type") == "table_data":
|
||||||
|
headers = structured_data.get("headers", [])
|
||||||
|
rows = structured_data.get("rows", [])
|
||||||
|
if headers and rows:
|
||||||
|
chart_data = {
|
||||||
|
"columns": headers,
|
||||||
|
"rows": rows
|
||||||
|
}
|
||||||
|
elif structured_data.get("type") == "structured_text":
|
||||||
|
tables_data = structured_data.get("tables", [])
|
||||||
|
if tables_data and len(tables_data) > 0:
|
||||||
|
first_table = tables_data[0]
|
||||||
|
headers = first_table.get("headers", [])
|
||||||
|
rows = first_table.get("rows", [])
|
||||||
|
if headers and rows:
|
||||||
|
chart_data = {
|
||||||
|
"columns": headers,
|
||||||
|
"rows": rows
|
||||||
|
}
|
||||||
|
|
||||||
|
# 生成可视化图表
|
||||||
|
if chart_data:
|
||||||
|
logger.info(f"开始生成图表,列数: {len(chart_data['columns'])}, 行数: {len(chart_data['rows'])}")
|
||||||
|
vis_result = visualization_service.analyze_and_visualize(chart_data)
|
||||||
|
|
||||||
|
if vis_result.get("success"):
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"charts": vis_result.get("charts", {}),
|
||||||
|
"statistics": vis_result.get("statistics", {}),
|
||||||
|
"distributions": vis_result.get("distributions", {}),
|
||||||
|
"structured_data": structured_data,
|
||||||
|
"row_count": vis_result.get("row_count", 0),
|
||||||
|
"column_count": vis_result.get("column_count", 0)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": vis_result.get("error", "可视化生成失败"),
|
||||||
|
"structured_data": structured_data
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "文档中没有可用于图表的表格数据",
|
||||||
|
"structured_data": structured_data
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库生成 Word 图表失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|||||||
@@ -250,6 +250,98 @@ export interface AIExcelAnalyzeResult {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ==================== Word/TXT AI 分析类型 ====================
|
||||||
|
|
||||||
|
export type WordAnalysisType = 'structured' | 'charts';
|
||||||
|
export type TxtAnalysisType = 'structured' | 'charts';
|
||||||
|
|
||||||
|
export interface WordAIStructuredResult {
|
||||||
|
success: boolean;
|
||||||
|
result?: {
|
||||||
|
success?: boolean;
|
||||||
|
type?: string;
|
||||||
|
headers?: string[];
|
||||||
|
rows?: string[][];
|
||||||
|
key_values?: Record<string, string>;
|
||||||
|
list_items?: string[];
|
||||||
|
summary?: string;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WordAIChartsResult {
|
||||||
|
success: boolean;
|
||||||
|
result?: {
|
||||||
|
success?: boolean;
|
||||||
|
charts?: {
|
||||||
|
histograms?: Array<any>;
|
||||||
|
bar_charts?: Array<any>;
|
||||||
|
box_plots?: Array<any>;
|
||||||
|
correlation?: any;
|
||||||
|
};
|
||||||
|
statistics?: {
|
||||||
|
numeric?: Record<string, any>;
|
||||||
|
categorical?: Record<string, any>;
|
||||||
|
};
|
||||||
|
distributions?: Record<string, any>;
|
||||||
|
row_count?: number;
|
||||||
|
column_count?: number;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TxtAIStructuredResult {
|
||||||
|
success: boolean;
|
||||||
|
result?: {
|
||||||
|
success?: boolean;
|
||||||
|
type?: string;
|
||||||
|
tables?: Array<{
|
||||||
|
headers?: string[];
|
||||||
|
rows?: string[][];
|
||||||
|
}>;
|
||||||
|
key_values?: Record<string, string>;
|
||||||
|
list_items?: string[];
|
||||||
|
summary?: string;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TxtAIChartsResult {
|
||||||
|
success: boolean;
|
||||||
|
result?: {
|
||||||
|
success?: boolean;
|
||||||
|
charts?: {
|
||||||
|
histograms?: Array<any>;
|
||||||
|
bar_charts?: Array<any>;
|
||||||
|
box_plots?: Array<any>;
|
||||||
|
correlation?: any;
|
||||||
|
};
|
||||||
|
statistics?: {
|
||||||
|
numeric?: Record<string, any>;
|
||||||
|
categorical?: Record<string, any>;
|
||||||
|
};
|
||||||
|
distributions?: Record<string, any>;
|
||||||
|
row_count?: number;
|
||||||
|
column_count?: number;
|
||||||
|
key_statistics?: Array<{
|
||||||
|
name?: string;
|
||||||
|
value?: string;
|
||||||
|
trend?: string;
|
||||||
|
description?: string;
|
||||||
|
}>;
|
||||||
|
chart_suggestions?: Array<{
|
||||||
|
chart_type?: string;
|
||||||
|
title?: string;
|
||||||
|
data_source?: string;
|
||||||
|
}>;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
// ==================== API 封装 ====================
|
// ==================== API 封装 ====================
|
||||||
|
|
||||||
export const backendApi = {
|
export const backendApi = {
|
||||||
@@ -1187,15 +1279,21 @@ export const aiApi = {
|
|||||||
* 上传并使用 AI 分析 Markdown 文件
|
* 上传并使用 AI 分析 Markdown 文件
|
||||||
*/
|
*/
|
||||||
async analyzeMarkdown(
|
async analyzeMarkdown(
|
||||||
file: File,
|
file: File | null,
|
||||||
options: {
|
options: {
|
||||||
|
docId?: string;
|
||||||
analysisType?: MarkdownAnalysisType;
|
analysisType?: MarkdownAnalysisType;
|
||||||
userPrompt?: string;
|
userPrompt?: string;
|
||||||
sectionNumber?: string;
|
sectionNumber?: string;
|
||||||
} = {}
|
} = {}
|
||||||
): Promise<AIMarkdownAnalyzeResult> {
|
): Promise<AIMarkdownAnalyzeResult> {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('file', file);
|
if (file) {
|
||||||
|
formData.append('file', file);
|
||||||
|
}
|
||||||
|
if (options.docId) {
|
||||||
|
formData.append('doc_id', options.docId);
|
||||||
|
}
|
||||||
|
|
||||||
const params = new URLSearchParams();
|
const params = new URLSearchParams();
|
||||||
if (options.analysisType) {
|
if (options.analysisType) {
|
||||||
@@ -1337,28 +1435,31 @@ export const aiApi = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据或生成图表
|
||||||
*/
|
*/
|
||||||
async analyzeTxt(
|
async analyzeTxt(
|
||||||
file: File
|
file: File | null,
|
||||||
|
docId: string | null = null,
|
||||||
|
analysisType: TxtAnalysisType = 'structured'
|
||||||
): Promise<{
|
): Promise<{
|
||||||
success: boolean;
|
success: boolean;
|
||||||
filename?: string;
|
filename?: string;
|
||||||
structured_data?: {
|
analysis_type?: string;
|
||||||
table?: {
|
result?: any;
|
||||||
columns?: string[];
|
|
||||||
rows?: string[][];
|
|
||||||
};
|
|
||||||
summary?: string;
|
|
||||||
key_value_pairs?: Array<{ key: string; value: string }>;
|
|
||||||
numeric_data?: Array<{ name: string; value: number; unit?: string }>;
|
|
||||||
};
|
|
||||||
error?: string;
|
error?: string;
|
||||||
}> {
|
}> {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('file', file);
|
if (file) {
|
||||||
|
formData.append('file', file);
|
||||||
|
}
|
||||||
|
if (docId) {
|
||||||
|
formData.append('doc_id', docId);
|
||||||
|
}
|
||||||
|
|
||||||
const url = `${BACKEND_BASE_URL}/ai/analyze/txt`;
|
const params = new URLSearchParams();
|
||||||
|
params.append('analysis_type', analysisType);
|
||||||
|
|
||||||
|
const url = `${BACKEND_BASE_URL}/ai/analyze/txt?${params.toString()}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
@@ -1480,28 +1581,35 @@ export const aiApi = {
|
|||||||
// ==================== Word AI 解析 ====================
|
// ==================== Word AI 解析 ====================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 使用 AI 解析 Word 文档,提取结构化数据
|
* 使用 AI 解析 Word 文档,提取结构化数据或生成图表
|
||||||
*/
|
*/
|
||||||
async analyzeWordWithAI(
|
async analyzeWordWithAI(
|
||||||
file: File,
|
file: File | null,
|
||||||
userHint: string = ''
|
docId: string | null = null,
|
||||||
|
userHint: string = '',
|
||||||
|
analysisType: WordAnalysisType = 'structured'
|
||||||
): Promise<{
|
): Promise<{
|
||||||
success: boolean;
|
success: boolean;
|
||||||
type?: string;
|
filename?: string;
|
||||||
headers?: string[];
|
analysis_type?: string;
|
||||||
rows?: string[][];
|
result?: any;
|
||||||
key_values?: Record<string, string>;
|
|
||||||
list_items?: string[];
|
|
||||||
summary?: string;
|
|
||||||
error?: string;
|
error?: string;
|
||||||
}> {
|
}> {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('file', file);
|
if (file) {
|
||||||
|
formData.append('file', file);
|
||||||
|
}
|
||||||
|
if (docId) {
|
||||||
|
formData.append('doc_id', docId);
|
||||||
|
}
|
||||||
if (userHint) {
|
if (userHint) {
|
||||||
formData.append('user_hint', userHint);
|
formData.append('user_hint', userHint);
|
||||||
}
|
}
|
||||||
|
|
||||||
const url = `${BACKEND_BASE_URL}/ai/analyze/word`;
|
const params = new URLSearchParams();
|
||||||
|
params.append('analysis_type', analysisType);
|
||||||
|
|
||||||
|
const url = `${BACKEND_BASE_URL}/ai/analyze/word?${params.toString()}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import {
|
|||||||
ChevronDown,
|
ChevronDown,
|
||||||
ChevronUp,
|
ChevronUp,
|
||||||
FileSpreadsheet,
|
FileSpreadsheet,
|
||||||
File,
|
File as FileIcon,
|
||||||
Table,
|
Table,
|
||||||
CheckCircle,
|
CheckCircle,
|
||||||
AlertCircle,
|
AlertCircle,
|
||||||
@@ -107,6 +107,15 @@ const Documents: React.FC = () => {
|
|||||||
const [mdStreaming, setMdStreaming] = useState(false);
|
const [mdStreaming, setMdStreaming] = useState(false);
|
||||||
const [mdStreamingContent, setMdStreamingContent] = useState('');
|
const [mdStreamingContent, setMdStreamingContent] = useState('');
|
||||||
|
|
||||||
|
// Word AI 分析相关状态
|
||||||
|
const [wordAnalysis, setWordAnalysis] = useState<any>(null);
|
||||||
|
const [wordAnalysisType, setWordAnalysisType] = useState<'structured' | 'charts'>('structured');
|
||||||
|
const [wordUserHint, setWordUserHint] = useState('');
|
||||||
|
|
||||||
|
// TXT AI 分析相关状态
|
||||||
|
const [txtAnalysis, setTxtAnalysis] = useState<any>(null);
|
||||||
|
const [txtAnalysisType, setTxtAnalysisType] = useState<'structured' | 'charts'>('structured');
|
||||||
|
|
||||||
// RAG 向量检索相关状态
|
// RAG 向量检索相关状态
|
||||||
const [ragStatus, setRagStatus] = useState<{ vector_count: number; collections: string[] } | null>(null);
|
const [ragStatus, setRagStatus] = useState<{ vector_count: number; collections: string[] } | null>(null);
|
||||||
const [ragSearchQuery, setRagSearchQuery] = useState('');
|
const [ragSearchQuery, setRagSearchQuery] = useState('');
|
||||||
@@ -114,6 +123,17 @@ const Documents: React.FC = () => {
|
|||||||
const [ragResults, setRagResults] = useState<any[]>([]);
|
const [ragResults, setRagResults] = useState<any[]>([]);
|
||||||
const [ragRebuilding, setRagRebuilding] = useState(false);
|
const [ragRebuilding, setRagRebuilding] = useState(false);
|
||||||
|
|
||||||
|
// 选中的文档详情
|
||||||
|
const [selectedDocument, setSelectedDocument] = useState<{
|
||||||
|
doc_id: string;
|
||||||
|
original_filename: string;
|
||||||
|
doc_type: string;
|
||||||
|
content?: string;
|
||||||
|
structured_data?: any;
|
||||||
|
metadata?: any;
|
||||||
|
} | null>(null);
|
||||||
|
const [loadingDocument, setLoadingDocument] = useState(false);
|
||||||
|
|
||||||
// 解析选项
|
// 解析选项
|
||||||
const [parseOptions, setParseOptions] = useState({
|
const [parseOptions, setParseOptions] = useState({
|
||||||
parseAllSheets: false,
|
parseAllSheets: false,
|
||||||
@@ -268,6 +288,33 @@ const Documents: React.FC = () => {
|
|||||||
return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' };
|
return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' };
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// 设置第一个成功文件的 uploadedFile
|
||||||
|
const firstSuccessIdx = fileResults.findIndex((fr: any) => fr?.success);
|
||||||
|
if (firstSuccessIdx >= 0 && acceptedFiles[firstSuccessIdx]) {
|
||||||
|
const firstFile = acceptedFiles[firstSuccessIdx];
|
||||||
|
const firstResult = fileResults[firstSuccessIdx];
|
||||||
|
const ext = firstFile.name.split('.').pop()?.toLowerCase();
|
||||||
|
|
||||||
|
// 设置 uploadedFile
|
||||||
|
setUploadedFile(firstFile);
|
||||||
|
|
||||||
|
// 对于 Excel 文件,获取 parseResult
|
||||||
|
if (ext === 'xlsx' || ext === 'xls') {
|
||||||
|
// 调用 parseDocument 获取 parseResult
|
||||||
|
if (firstResult?.file_path) {
|
||||||
|
try {
|
||||||
|
const parseResult = await backendApi.parseDocument(firstResult.file_path);
|
||||||
|
if (parseResult.success) {
|
||||||
|
setParseResult(parseResult as any);
|
||||||
|
}
|
||||||
|
} catch (parseErr) {
|
||||||
|
console.warn('获取 parseResult 失败:', parseErr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
loadDocuments();
|
loadDocuments();
|
||||||
return;
|
return;
|
||||||
} else if (status.status === 'failure') {
|
} else if (status.status === 'failure') {
|
||||||
@@ -446,24 +493,79 @@ const Documents: React.FC = () => {
|
|||||||
|
|
||||||
// 基于 AI 分析生成图表
|
// 基于 AI 分析生成图表
|
||||||
const handleGenerateCharts = async () => {
|
const handleGenerateCharts = async () => {
|
||||||
if (!aiAnalysis || !aiAnalysis.success) {
|
// 检查是否有任何 AI 分析结果
|
||||||
|
const hasExcelAI = aiAnalysis?.success;
|
||||||
|
const hasMdAI = mdAnalysis?.success;
|
||||||
|
const hasWordAI = wordAnalysis?.success;
|
||||||
|
const hasTxtAI = txtAnalysis?.success;
|
||||||
|
|
||||||
|
if (!hasExcelAI && !hasMdAI && !hasWordAI && !hasTxtAI) {
|
||||||
toast.error('请先进行 AI 分析');
|
toast.error('请先进行 AI 分析');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果是 Markdown 分析已有图表,直接显示
|
||||||
|
if (hasMdAI && mdAnalysis?.chart_data?.tables) {
|
||||||
|
setAnalysisCharts({
|
||||||
|
success: true,
|
||||||
|
charts: { tables: mdAnalysis.chart_data.tables },
|
||||||
|
statistics: mdAnalysis.chart_data.key_statistics
|
||||||
|
});
|
||||||
|
toast.success('图表生成完成');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果是 Word 分析已有图表,直接显示
|
||||||
|
if (hasWordAI && wordAnalysis?.result?.charts) {
|
||||||
|
setAnalysisCharts({
|
||||||
|
success: true,
|
||||||
|
charts: wordAnalysis.result.charts,
|
||||||
|
statistics: wordAnalysis.result.statistics
|
||||||
|
});
|
||||||
|
toast.success('图表生成完成');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果是 TXT 分析已有图表,直接显示
|
||||||
|
if (hasTxtAI && txtAnalysis?.result?.charts) {
|
||||||
|
setAnalysisCharts({
|
||||||
|
success: true,
|
||||||
|
charts: txtAnalysis.result.charts,
|
||||||
|
statistics: txtAnalysis.result.statistics
|
||||||
|
});
|
||||||
|
toast.success('图表生成完成');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尝试从各种分析结果中提取文本并生成图表
|
||||||
let analysisText = '';
|
let analysisText = '';
|
||||||
if (aiAnalysis.analysis?.analysis) {
|
let fileType = 'unknown';
|
||||||
analysisText = aiAnalysis.analysis.analysis;
|
|
||||||
} else if (aiAnalysis.analysis?.sheets) {
|
if (hasExcelAI) {
|
||||||
const sheets = aiAnalysis.analysis.sheets;
|
if (aiAnalysis.analysis?.analysis) {
|
||||||
if (sheets && Object.keys(sheets).length > 0) {
|
analysisText = aiAnalysis.analysis.analysis;
|
||||||
const firstSheet = Object.keys(sheets)[0];
|
fileType = 'excel';
|
||||||
analysisText = sheets[firstSheet]?.analysis || '';
|
} else if (aiAnalysis.analysis?.sheets) {
|
||||||
|
const sheets = aiAnalysis.analysis.sheets;
|
||||||
|
if (sheets && Object.keys(sheets).length > 0) {
|
||||||
|
const firstSheet = Object.keys(sheets)[0];
|
||||||
|
analysisText = sheets[firstSheet]?.analysis || '';
|
||||||
|
fileType = 'excel';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else if (hasMdAI && mdAnalysis?.analysis) {
|
||||||
|
analysisText = mdAnalysis.analysis;
|
||||||
|
fileType = 'markdown';
|
||||||
|
} else if (hasWordAI && wordAnalysis?.result?.summary) {
|
||||||
|
analysisText = wordAnalysis.result.summary;
|
||||||
|
fileType = 'word';
|
||||||
|
} else if (hasTxtAI && txtAnalysis?.result?.summary) {
|
||||||
|
analysisText = txtAnalysis.result.summary;
|
||||||
|
fileType = 'txt';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!analysisText?.trim()) {
|
if (!analysisText?.trim()) {
|
||||||
toast.error('无法获取 AI 分析结果');
|
toast.error('无法获取 AI 分析文本结果');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -474,7 +576,7 @@ const Documents: React.FC = () => {
|
|||||||
const result = await aiApi.extractAndGenerateCharts({
|
const result = await aiApi.extractAndGenerateCharts({
|
||||||
analysis_text: analysisText,
|
analysis_text: analysisText,
|
||||||
original_filename: uploadedFile?.name || 'unknown',
|
original_filename: uploadedFile?.name || 'unknown',
|
||||||
file_type: 'excel'
|
file_type: fileType
|
||||||
});
|
});
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
@@ -592,6 +694,9 @@ const Documents: React.FC = () => {
|
|||||||
const result = await backendApi.deleteDocument(docId);
|
const result = await backendApi.deleteDocument(docId);
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
setDocuments(prev => prev.filter(d => d.doc_id !== docId));
|
setDocuments(prev => prev.filter(d => d.doc_id !== docId));
|
||||||
|
if (selectedDocument?.doc_id === docId) {
|
||||||
|
setSelectedDocument(null);
|
||||||
|
}
|
||||||
toast.success('文档已删除');
|
toast.success('文档已删除');
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
@@ -599,6 +704,95 @@ const Documents: React.FC = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleSelectDocument = async (docId: string) => {
|
||||||
|
setLoadingDocument(true);
|
||||||
|
try {
|
||||||
|
const result = await backendApi.getDocument(docId);
|
||||||
|
if (result.success && result.document) {
|
||||||
|
setSelectedDocument(result.document);
|
||||||
|
const doc = result.document;
|
||||||
|
|
||||||
|
// 优先使用 file_path 调用 parseDocument 获取完整解析结果
|
||||||
|
const filePath = doc.metadata?.file_path;
|
||||||
|
if (filePath) {
|
||||||
|
try {
|
||||||
|
const parseResult = await backendApi.parseDocument(filePath);
|
||||||
|
if (parseResult.success) {
|
||||||
|
setParseResult(parseResult as any);
|
||||||
|
const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type;
|
||||||
|
const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) });
|
||||||
|
setUploadedFile(fakeFile);
|
||||||
|
toast.success('已加载文档: ' + doc.original_filename);
|
||||||
|
setLoadingDocument(false);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
console.warn('parseDocument returned success:false, using fallback');
|
||||||
|
}
|
||||||
|
} catch (parseErr) {
|
||||||
|
console.warn('parseDocument failed, fallback to structured_data:', parseErr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 后备:使用 structured_data 构建 parseResult
|
||||||
|
const ext = doc.original_filename.split('.').pop()?.toLowerCase() || doc.doc_type;
|
||||||
|
const fakeFile = new File([], doc.original_filename, { type: getMimeType(ext) });
|
||||||
|
|
||||||
|
if (doc.structured_data) {
|
||||||
|
const mockParseResult: ExcelParseResult = {
|
||||||
|
success: true,
|
||||||
|
data: {},
|
||||||
|
metadata: {
|
||||||
|
filename: doc.filename,
|
||||||
|
original_filename: doc.original_filename,
|
||||||
|
extension: doc.doc_type,
|
||||||
|
doc_type: doc.doc_type as any,
|
||||||
|
file_size: doc.metadata?.file_size || 0,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (doc.structured_data.tables && doc.structured_data.tables.length > 0) {
|
||||||
|
const firstTable = doc.structured_data.tables[0];
|
||||||
|
mockParseResult.data = {
|
||||||
|
columns: firstTable.headers || [],
|
||||||
|
rows: (firstTable.rows || []).map((row: string[]) => {
|
||||||
|
const obj: Record<string, any> = {};
|
||||||
|
(firstTable.headers || []).forEach((h: string, i: number) => {
|
||||||
|
obj[h] = row[i] || '';
|
||||||
|
});
|
||||||
|
return obj;
|
||||||
|
}),
|
||||||
|
row_count: firstTable.rows?.length || 0,
|
||||||
|
column_count: firstTable.headers?.length || 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (doc.structured_data.sheets) {
|
||||||
|
mockParseResult.data.sheets = doc.structured_data.sheets;
|
||||||
|
}
|
||||||
|
setParseResult(mockParseResult);
|
||||||
|
} else if (doc.content) {
|
||||||
|
setParseResult({
|
||||||
|
success: true,
|
||||||
|
data: { content: doc.content },
|
||||||
|
metadata: {
|
||||||
|
filename: doc.filename,
|
||||||
|
original_filename: doc.original_filename,
|
||||||
|
extension: doc.doc_type,
|
||||||
|
doc_type: doc.doc_type as any,
|
||||||
|
file_size: doc.metadata?.file_size || 0,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
setUploadedFile(fakeFile);
|
||||||
|
toast.success('已加载文档: ' + doc.original_filename);
|
||||||
|
} else {
|
||||||
|
toast.error(result.error || '获取文档详情失败');
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
toast.error(err.message || '获取文档详情失败');
|
||||||
|
} finally {
|
||||||
|
setLoadingDocument(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const filteredDocs = documents.filter(doc =>
|
const filteredDocs = documents.filter(doc =>
|
||||||
doc.original_filename.toLowerCase().includes(search.toLowerCase())
|
doc.original_filename.toLowerCase().includes(search.toLowerCase())
|
||||||
);
|
);
|
||||||
@@ -612,7 +806,7 @@ const Documents: React.FC = () => {
|
|||||||
case 'doc':
|
case 'doc':
|
||||||
return <FileText size={28} />;
|
return <FileText size={28} />;
|
||||||
default:
|
default:
|
||||||
return <File size={28} />;
|
return <FileIcon size={28} />;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -632,11 +826,17 @@ const Documents: React.FC = () => {
|
|||||||
setMdAnalysis(null);
|
setMdAnalysis(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await aiApi.analyzeMarkdown(uploadedFile, {
|
// 判断是从历史文档还是本地上传
|
||||||
analysisType: mdAnalysisType,
|
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : undefined;
|
||||||
userPrompt: mdUserPrompt,
|
const result = await aiApi.analyzeMarkdown(
|
||||||
sectionNumber: mdSelectedSection || undefined
|
uploadedFile.size > 0 ? uploadedFile : null,
|
||||||
});
|
{
|
||||||
|
docId: docId || undefined,
|
||||||
|
analysisType: mdAnalysisType,
|
||||||
|
userPrompt: mdUserPrompt,
|
||||||
|
sectionNumber: mdSelectedSection || undefined
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
toast.success('Markdown AI 分析完成');
|
toast.success('Markdown AI 分析完成');
|
||||||
@@ -701,6 +901,71 @@ const Documents: React.FC = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Word AI 分析处理
|
||||||
|
const handleWordAnalyze = async () => {
|
||||||
|
if (!uploadedFile || !isWordFile(uploadedFile.name)) {
|
||||||
|
toast.error('请先上传 Word 文件');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setAnalyzing(true);
|
||||||
|
setWordAnalysis(null);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 判断是从历史文档还是本地上传
|
||||||
|
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
|
||||||
|
const result = await aiApi.analyzeWordWithAI(
|
||||||
|
uploadedFile.size > 0 ? uploadedFile : null,
|
||||||
|
docId,
|
||||||
|
wordUserHint,
|
||||||
|
wordAnalysisType
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
toast.success('Word AI 分析完成');
|
||||||
|
setWordAnalysis(result);
|
||||||
|
} else {
|
||||||
|
toast.error(result.error || 'AI 分析失败');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
toast.error(error.message || 'AI 分析失败');
|
||||||
|
} finally {
|
||||||
|
setAnalyzing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TXT AI 分析处理
|
||||||
|
const handleTxtAnalyze = async () => {
|
||||||
|
if (!uploadedFile || !isTxtFile(uploadedFile.name)) {
|
||||||
|
toast.error('请先上传 TXT 文件');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setAnalyzing(true);
|
||||||
|
setTxtAnalysis(null);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 判断是从历史文档还是本地上传
|
||||||
|
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
|
||||||
|
const result = await aiApi.analyzeTxt(
|
||||||
|
uploadedFile.size > 0 ? uploadedFile : null,
|
||||||
|
docId,
|
||||||
|
txtAnalysisType
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
toast.success('TXT AI 分析完成');
|
||||||
|
setTxtAnalysis(result);
|
||||||
|
} else {
|
||||||
|
toast.error(result.error || 'AI 分析失败');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
toast.error(error.message || 'AI 分析失败');
|
||||||
|
} finally {
|
||||||
|
setAnalyzing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const getMdAnalysisIcon = (type: string) => {
|
const getMdAnalysisIcon = (type: string) => {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case 'summary': return <FileText size={20} />;
|
case 'summary': return <FileText size={20} />;
|
||||||
@@ -724,6 +989,18 @@ const Documents: React.FC = () => {
|
|||||||
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getMimeType = (ext: string): string => {
|
||||||
|
const mimeTypes: Record<string, string> = {
|
||||||
|
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
|
'xls': 'application/vnd.ms-excel',
|
||||||
|
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'doc': 'application/msword',
|
||||||
|
'md': 'text/markdown',
|
||||||
|
'txt': 'text/plain',
|
||||||
|
};
|
||||||
|
return mimeTypes[ext] || 'application/octet-stream';
|
||||||
|
};
|
||||||
|
|
||||||
const getAnalysisIcon = (type: string) => {
|
const getAnalysisIcon = (type: string) => {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case 'general': return <FileText size={20} />;
|
case 'general': return <FileText size={20} />;
|
||||||
@@ -739,6 +1016,16 @@ const Documents: React.FC = () => {
|
|||||||
return ext === 'xlsx' || ext === 'xls';
|
return ext === 'xlsx' || ext === 'xls';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const isWordFile = (filename: string) => {
|
||||||
|
const ext = filename.split('.').pop()?.toLowerCase();
|
||||||
|
return ext === 'docx';
|
||||||
|
};
|
||||||
|
|
||||||
|
const isTxtFile = (filename: string) => {
|
||||||
|
const ext = filename.split('.').pop()?.toLowerCase();
|
||||||
|
return ext === 'txt';
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-8 pb-10">
|
<div className="space-y-8 pb-10">
|
||||||
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
||||||
@@ -1055,7 +1342,7 @@ const Documents: React.FC = () => {
|
|||||||
<FileText size={12} className="mr-1" /> Markdown
|
<FileText size={12} className="mr-1" /> Markdown
|
||||||
</Badge>
|
</Badge>
|
||||||
<Badge variant="outline" className="bg-gray-500/10 text-gray-600 border-gray-200 text-xs">
|
<Badge variant="outline" className="bg-gray-500/10 text-gray-600 border-gray-200 text-xs">
|
||||||
<File size={12} className="mr-1" /> 文本
|
<FileIcon size={12} className="mr-1" /> 文本
|
||||||
</Badge>
|
</Badge>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1064,6 +1351,38 @@ const Documents: React.FC = () => {
|
|||||||
)}
|
)}
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
{/* 从历史文档中选择 */}
|
||||||
|
{documents.length > 0 && (
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Clock className="text-primary" size={20} />
|
||||||
|
从历史文档选择
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-3">
|
||||||
|
<Select
|
||||||
|
value=""
|
||||||
|
onValueChange={async (docId) => {
|
||||||
|
if (!docId) return;
|
||||||
|
await handleSelectDocument(docId);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<SelectTrigger className="bg-background">
|
||||||
|
<SelectValue placeholder="选择历史文档..." />
|
||||||
|
</SelectTrigger>
|
||||||
|
<SelectContent>
|
||||||
|
{documents.slice(0, 20).map((doc) => (
|
||||||
|
<SelectItem key={doc.doc_id} value={doc.doc_id}>
|
||||||
|
{doc.original_filename}
|
||||||
|
</SelectItem>
|
||||||
|
))}
|
||||||
|
</SelectContent>
|
||||||
|
</Select>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Excel 解析选项 */}
|
{/* Excel 解析选项 */}
|
||||||
{uploadedFile && isExcelFile(uploadedFile.name) && (
|
{uploadedFile && isExcelFile(uploadedFile.name) && (
|
||||||
<Card className="border-none shadow-md">
|
<Card className="border-none shadow-md">
|
||||||
@@ -1238,8 +1557,117 @@ const Documents: React.FC = () => {
|
|||||||
</Card>
|
</Card>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Word AI 分析选项 */}
|
||||||
|
{uploadedFile && isWordFile(uploadedFile.name) && (
|
||||||
|
<Card className="border-none shadow-md bg-gradient-to-br from-blue-500/5 to-cyan-500/5">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Sparkles className="text-blue-500" size={20} />
|
||||||
|
Word AI 分析
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-4">
|
||||||
|
<div className="space-y-2">
|
||||||
|
<Label htmlFor="word-analysis-type" className="text-sm">分析类型</Label>
|
||||||
|
<Select value={wordAnalysisType} onValueChange={(value: any) => setWordAnalysisType(value)}>
|
||||||
|
<SelectTrigger id="word-analysis-type" className="bg-background">
|
||||||
|
<SelectValue />
|
||||||
|
</SelectTrigger>
|
||||||
|
<SelectContent>
|
||||||
|
<SelectItem value="structured">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<FileText size={16} />
|
||||||
|
<div className="flex flex-col">
|
||||||
|
<span className="font-medium">结构化提取</span>
|
||||||
|
<span className="text-xs text-muted-foreground">提取表格、键值对等</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</SelectItem>
|
||||||
|
<SelectItem value="charts">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<TrendingUp size={16} />
|
||||||
|
<div className="flex flex-col">
|
||||||
|
<span className="font-medium">数据图表</span>
|
||||||
|
<span className="text-xs text-muted-foreground">生成可视化图表</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</SelectItem>
|
||||||
|
</SelectContent>
|
||||||
|
</Select>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-2">
|
||||||
|
<Label htmlFor="word-user-prompt" className="text-sm">自定义提示词(可选)</Label>
|
||||||
|
<Textarea
|
||||||
|
id="word-user-prompt"
|
||||||
|
placeholder="例如:请提取文档中的表格数据..."
|
||||||
|
value={wordUserHint}
|
||||||
|
onChange={(e) => setWordUserHint(e.target.value)}
|
||||||
|
className="bg-background resize-none"
|
||||||
|
rows={2}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
onClick={handleWordAnalyze}
|
||||||
|
disabled={analyzing}
|
||||||
|
className="w-full bg-gradient-to-r from-blue-500 to-cyan-600 hover:from-blue-500/90 hover:to-cyan-600/90"
|
||||||
|
>
|
||||||
|
{analyzing ? <><Loader2 className="mr-2 animate-spin" size={16} /> 分析中...</> : <><Sparkles className="mr-2" size={16} />开始 AI 分析</>}
|
||||||
|
</Button>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* TXT AI 分析选项 */}
|
||||||
|
{uploadedFile && isTxtFile(uploadedFile.name) && (
|
||||||
|
<Card className="border-none shadow-md bg-gradient-to-br from-amber-500/5 to-orange-500/5">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Sparkles className="text-amber-500" size={20} />
|
||||||
|
TXT AI 分析
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-4">
|
||||||
|
<div className="space-y-2">
|
||||||
|
<Label htmlFor="txt-analysis-type" className="text-sm">分析类型</Label>
|
||||||
|
<Select value={txtAnalysisType} onValueChange={(value: any) => setTxtAnalysisType(value)}>
|
||||||
|
<SelectTrigger id="txt-analysis-type" className="bg-background">
|
||||||
|
<SelectValue />
|
||||||
|
</SelectTrigger>
|
||||||
|
<SelectContent>
|
||||||
|
<SelectItem value="structured">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<FileText size={16} />
|
||||||
|
<div className="flex flex-col">
|
||||||
|
<span className="font-medium">结构化提取</span>
|
||||||
|
<span className="text-xs text-muted-foreground">提取表格、键值对等</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</SelectItem>
|
||||||
|
<SelectItem value="charts">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<TrendingUp size={16} />
|
||||||
|
<div className="flex flex-col">
|
||||||
|
<span className="font-medium">数据图表</span>
|
||||||
|
<span className="text-xs text-muted-foreground">生成可视化图表</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</SelectItem>
|
||||||
|
</SelectContent>
|
||||||
|
</Select>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
onClick={handleTxtAnalyze}
|
||||||
|
disabled={analyzing}
|
||||||
|
className="w-full bg-gradient-to-r from-amber-500 to-orange-600 hover:from-amber-500/90 hover:to-orange-600/90"
|
||||||
|
>
|
||||||
|
{analyzing ? <><Loader2 className="mr-2 animate-spin" size={16} /> 分析中...</> : <><Sparkles className="mr-2" size={16} />开始 AI 分析</>}
|
||||||
|
</Button>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* 数据操作 */}
|
{/* 数据操作 */}
|
||||||
{parseResult?.success && (
|
{(parseResult?.success || aiAnalysis?.success || mdAnalysis?.success || wordAnalysis?.success || txtAnalysis?.success) && (
|
||||||
<Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5">
|
<Card className="border-none shadow-md bg-gradient-to-br from-emerald-500/5 to-blue-500/5">
|
||||||
<CardHeader className="pb-4">
|
<CardHeader className="pb-4">
|
||||||
<CardTitle className="flex items-center gap-2">
|
<CardTitle className="flex items-center gap-2">
|
||||||
@@ -1248,7 +1676,7 @@ const Documents: React.FC = () => {
|
|||||||
</CardTitle>
|
</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent className="space-y-3">
|
<CardContent className="space-y-3">
|
||||||
<Button onClick={handleGenerateCharts} disabled={!aiAnalysis?.success || analyzingForCharts} className="w-full bg-gradient-to-r from-primary to-purple-600 hover:from-primary/90 hover:to-purple-600/90">
|
<Button onClick={handleGenerateCharts} disabled={!(aiAnalysis?.success || mdAnalysis?.success || wordAnalysis?.success || txtAnalysis?.success) || analyzingForCharts} className="w-full bg-gradient-to-r from-primary to-purple-600 hover:from-primary/90 hover:to-purple-600/90">
|
||||||
{analyzingForCharts ? <><Loader2 className="mr-2 animate-spin" size={16} />生成中...</> : <><Brain size={16} className="mr-2" />AI 分析图表</>}
|
{analyzingForCharts ? <><Loader2 className="mr-2 animate-spin" size={16} />生成中...</> : <><Brain size={16} className="mr-2" />AI 分析图表</>}
|
||||||
</Button>
|
</Button>
|
||||||
<Button onClick={openExportDialog} variant="outline" className="w-full">
|
<Button onClick={openExportDialog} variant="outline" className="w-full">
|
||||||
@@ -1338,6 +1766,114 @@ const Documents: React.FC = () => {
|
|||||||
</Card>
|
</Card>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Word AI 分析结果 */}
|
||||||
|
{wordAnalysis && (
|
||||||
|
<Card className="border-none shadow-md border-l-4 border-l-blue-500">
|
||||||
|
<CardHeader>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Sparkles className="text-blue-500" size={20} />
|
||||||
|
Word AI 分析结果
|
||||||
|
</CardTitle>
|
||||||
|
{wordAnalysis.filename && (
|
||||||
|
<CardDescription>
|
||||||
|
{wordAnalysis.filename} • {wordAnalysis.analysis_type}
|
||||||
|
</CardDescription>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="max-h-[500px] overflow-y-auto">
|
||||||
|
{wordAnalysis.analysis_type === 'charts' && wordAnalysis.result?.charts ? (
|
||||||
|
<AIChartDisplay
|
||||||
|
charts={wordAnalysis.result.charts}
|
||||||
|
statistics={wordAnalysis.result.statistics}
|
||||||
|
distributions={wordAnalysis.result.distributions}
|
||||||
|
/>
|
||||||
|
) : wordAnalysis.result?.success === false ? (
|
||||||
|
<p className="text-sm text-destructive">{wordAnalysis.result?.error || wordAnalysis.error || '分析失败'}</p>
|
||||||
|
) : wordAnalysis.result?.summary ? (
|
||||||
|
<Markdown content={wordAnalysis.result.summary} />
|
||||||
|
) : wordAnalysis.result?.headers && wordAnalysis.result?.rows ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<p className="text-sm font-medium">提取的表格数据:</p>
|
||||||
|
<div className="border rounded-lg overflow-x-auto">
|
||||||
|
<TableComponent>
|
||||||
|
<TableHeader>
|
||||||
|
<TableRow>
|
||||||
|
{wordAnalysis.result.headers.map((header: string, idx: number) => (
|
||||||
|
<TableHead key={idx}>{header}</TableHead>
|
||||||
|
))}
|
||||||
|
</TableRow>
|
||||||
|
</TableHeader>
|
||||||
|
<TableBody>
|
||||||
|
{wordAnalysis.result.rows.slice(0, 20).map((row: string[], idx: number) => (
|
||||||
|
<TableRow key={idx}>
|
||||||
|
{row.map((cell: string, cidx: number) => (
|
||||||
|
<TableCell key={cidx}>{cell}</TableCell>
|
||||||
|
))}
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</TableComponent>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-muted-foreground">分析完成,无数据</p>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* TXT AI 分析结果 */}
|
||||||
|
{txtAnalysis && (
|
||||||
|
<Card className="border-none shadow-md border-l-4 border-l-amber-500">
|
||||||
|
<CardHeader>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Sparkles className="text-amber-500" size={20} />
|
||||||
|
TXT AI 分析结果
|
||||||
|
</CardTitle>
|
||||||
|
{txtAnalysis.filename && (
|
||||||
|
<CardDescription>
|
||||||
|
{txtAnalysis.filename} • {txtAnalysis.analysis_type}
|
||||||
|
</CardDescription>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="max-h-[500px] overflow-y-auto">
|
||||||
|
{txtAnalysis.analysis_type === 'charts' && txtAnalysis.result?.charts ? (
|
||||||
|
<AIChartDisplay
|
||||||
|
charts={txtAnalysis.result.charts}
|
||||||
|
statistics={txtAnalysis.result.statistics}
|
||||||
|
distributions={txtAnalysis.result.distributions}
|
||||||
|
/>
|
||||||
|
) : txtAnalysis.result?.success === false ? (
|
||||||
|
<p className="text-sm text-destructive">{txtAnalysis.result?.error || txtAnalysis.error || '分析失败'}</p>
|
||||||
|
) : txtAnalysis.result?.summary ? (
|
||||||
|
<Markdown content={txtAnalysis.result.summary} />
|
||||||
|
) : txtAnalysis.result?.key_values && Object.keys(txtAnalysis.result.key_values || {}).length > 0 ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<p className="text-sm font-medium">提取的键值对:</p>
|
||||||
|
<div className="grid grid-cols-2 gap-2">
|
||||||
|
{Object.entries(txtAnalysis.result.key_values || {}).map(([key, value]: [string, any]) => (
|
||||||
|
<div key={key} className="flex gap-2 p-2 bg-muted/30 rounded-lg">
|
||||||
|
<span className="font-medium text-sm">{key}:</span>
|
||||||
|
<span className="text-sm text-muted-foreground">{String(value)}</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-muted-foreground">分析完成,无数据</p>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* 图表显示 */}
|
{/* 图表显示 */}
|
||||||
{analysisCharts && (
|
{analysisCharts && (
|
||||||
<Card className="border-none shadow-md border-l-4 border-l-indigo-500">
|
<Card className="border-none shadow-md border-l-4 border-l-indigo-500">
|
||||||
@@ -1482,6 +2018,95 @@ const Documents: React.FC = () => {
|
|||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
{/* 已上传文档详情 */}
|
||||||
|
{selectedDocument && (
|
||||||
|
<Card className="border-none shadow-md border-l-4 border-l-cyan-500">
|
||||||
|
<CardHeader>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<FileText className="text-cyan-500" size={20} />
|
||||||
|
文档详情
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
{selectedDocument.original_filename} • {selectedDocument.doc_type.toUpperCase()}
|
||||||
|
</CardDescription>
|
||||||
|
</div>
|
||||||
|
<Button variant="ghost" size="sm" onClick={() => setSelectedDocument(null)}>
|
||||||
|
关闭
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="max-h-[500px] overflow-y-auto">
|
||||||
|
{loadingDocument ? (
|
||||||
|
<div className="flex items-center justify-center py-8">
|
||||||
|
<Loader2 className="animate-spin" size={24} />
|
||||||
|
<span className="ml-2">加载中...</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{selectedDocument.structured_data?.tables && selectedDocument.structured_data.tables.length > 0 && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<p className="text-sm font-medium">表格数据:</p>
|
||||||
|
{selectedDocument.structured_data.tables.slice(0, 3).map((table: any, idx: number) => (
|
||||||
|
<div key={idx} className="border rounded-lg overflow-x-auto">
|
||||||
|
{table.headers && (
|
||||||
|
<TableComponent>
|
||||||
|
<TableHeader>
|
||||||
|
<TableRow>
|
||||||
|
{table.headers.map((header: string, hIdx: number) => (
|
||||||
|
<TableHead key={hIdx}>{header}</TableHead>
|
||||||
|
))}
|
||||||
|
</TableRow>
|
||||||
|
</TableHeader>
|
||||||
|
<TableBody>
|
||||||
|
{(table.rows || []).slice(0, 10).map((row: string[], rIdx: number) => (
|
||||||
|
<TableRow key={rIdx}>
|
||||||
|
{row.map((cell: string, cIdx: number) => (
|
||||||
|
<TableCell key={cIdx}>{cell}</TableCell>
|
||||||
|
))}
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</TableComponent>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{selectedDocument.structured_data?.key_values && Object.keys(selectedDocument.structured_data.key_values || {}).length > 0 && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<p className="text-sm font-medium">键值对数据:</p>
|
||||||
|
<div className="grid grid-cols-2 gap-2">
|
||||||
|
{Object.entries(selectedDocument.structured_data.key_values || {}).map(([key, value]: [string, any]) => (
|
||||||
|
<div key={key} className="flex gap-2 p-2 bg-muted/30 rounded-lg">
|
||||||
|
<span className="font-medium text-sm">{key}:</span>
|
||||||
|
<span className="text-sm text-muted-foreground">{String(value)}</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{selectedDocument.content && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<p className="text-sm font-medium">文本内容预览:</p>
|
||||||
|
<div className="p-3 bg-muted/30 rounded-lg max-h-[300px] overflow-y-auto">
|
||||||
|
<p className="text-sm whitespace-pre-wrap font-mono">
|
||||||
|
{selectedDocument.content.slice(0, 2000)}
|
||||||
|
{selectedDocument.content.length > 2000 && '...'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{!selectedDocument.content && !selectedDocument.structured_data?.tables && !selectedDocument.structured_data?.key_values && (
|
||||||
|
<p className="text-sm text-muted-foreground text-center py-4">该文档没有可显示的内容</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* 文档列表 */}
|
{/* 文档列表 */}
|
||||||
<Card className="border-none shadow-md">
|
<Card className="border-none shadow-md">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
@@ -1509,7 +2134,14 @@ const Documents: React.FC = () => {
|
|||||||
) : (filteredDocs?.length ?? 0) > 0 ? (
|
) : (filteredDocs?.length ?? 0) > 0 ? (
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
{(filteredDocs || []).map(doc => (
|
{(filteredDocs || []).map(doc => (
|
||||||
<div key={doc.doc_id} className="flex items-center gap-4 p-4 rounded-xl border border-transparent hover:bg-muted/30 transition-all group">
|
<div
|
||||||
|
key={doc.doc_id}
|
||||||
|
className={cn(
|
||||||
|
"flex items-center gap-4 p-4 rounded-xl border border-transparent hover:bg-muted/30 transition-all group cursor-pointer",
|
||||||
|
selectedDocument?.doc_id === doc.doc_id && "bg-primary/5 border-primary/20"
|
||||||
|
)}
|
||||||
|
onClick={() => handleSelectDocument(doc.doc_id)}
|
||||||
|
>
|
||||||
<div className={cn(
|
<div className={cn(
|
||||||
"w-10 h-10 rounded-lg flex items-center justify-center shrink-0",
|
"w-10 h-10 rounded-lg flex items-center justify-center shrink-0",
|
||||||
doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
||||||
@@ -1522,7 +2154,10 @@ const Documents: React.FC = () => {
|
|||||||
{doc.doc_type.toUpperCase()} • {format(new Date(doc.created_at), 'yyyy-MM-dd HH:mm')}
|
{doc.doc_type.toUpperCase()} • {format(new Date(doc.created_at), 'yyyy-MM-dd HH:mm')}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<Button variant="ghost" size="icon" className="text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100" onClick={() => handleDelete(doc.doc_id)}>
|
<Button variant="ghost" size="icon" className="text-destructive hover:bg-destructive/10 opacity-0 group-hover:opacity-100" onClick={(e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
handleDelete(doc.doc_id);
|
||||||
|
}}>
|
||||||
<Trash2 size={16} />
|
<Trash2 size={16} />
|
||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user