增强 Word 文档 AI 解析和模板填充功能
This commit is contained in:
@@ -10,6 +10,7 @@ import os
|
||||
|
||||
from app.services.excel_ai_service import excel_ai_service
|
||||
from app.services.markdown_ai_service import markdown_ai_service
|
||||
from app.services.template_fill_service import template_fill_service
|
||||
from app.services.word_ai_service import word_ai_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -343,6 +344,77 @@ async def get_markdown_outline(
|
||||
raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/analyze/txt")
|
||||
async def analyze_txt(
|
||||
file: UploadFile = File(...),
|
||||
):
|
||||
"""
|
||||
上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
||||
|
||||
将非结构化文本转换为结构化表格数据,便于后续填表使用
|
||||
|
||||
Args:
|
||||
file: 上传的 TXT 文件
|
||||
|
||||
Returns:
|
||||
dict: 分析结果,包含结构化表格数据
|
||||
"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['txt', 'text']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取文件内容
|
||||
content = await file.read()
|
||||
|
||||
# 保存到临时文件
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始 AI 分析 TXT 文件: {file.filename}")
|
||||
|
||||
# 使用 template_fill_service 的 AI 分析方法
|
||||
result = await template_fill_service.analyze_txt_with_ai(
|
||||
content=content.decode('utf-8', errors='replace'),
|
||||
filename=file.filename
|
||||
)
|
||||
|
||||
if result:
|
||||
logger.info(f"TXT AI 分析成功: {file.filename}")
|
||||
return {
|
||||
"success": True,
|
||||
"filename": file.filename,
|
||||
"structured_data": result
|
||||
}
|
||||
else:
|
||||
logger.warning(f"TXT AI 分析返回空结果: {file.filename}")
|
||||
return {
|
||||
"success": False,
|
||||
"filename": file.filename,
|
||||
"error": "AI 分析未能提取到结构化数据",
|
||||
"structured_data": None
|
||||
}
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"TXT AI 分析过程中出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
|
||||
# ==================== Word 文档 AI 解析 ====================
|
||||
|
||||
@router.post("/analyze/word")
|
||||
@@ -373,98 +445,41 @@ async def analyze_word(
|
||||
)
|
||||
|
||||
try:
|
||||
# 保存上传的文件
|
||||
content = await file.read()
|
||||
|
||||
# 保存到临时文件
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.docx', delete=False) as tmp:
|
||||
suffix = f".{file_ext}"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始 AI 解析 Word 文件: {file.filename}")
|
||||
|
||||
# 使用 AI 解析 Word 文档
|
||||
result = await word_ai_service.parse_word_with_ai(
|
||||
file_path=tmp_path,
|
||||
user_hint=user_hint
|
||||
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
|
||||
)
|
||||
|
||||
logger.info(f"Word AI 解析完成: {file.filename}, success={result.get('success')}")
|
||||
|
||||
return result
|
||||
if result.get("success"):
|
||||
return {
|
||||
"success": True,
|
||||
"filename": file.filename,
|
||||
"result": result
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"filename": file.filename,
|
||||
"error": result.get("error", "AI 解析失败"),
|
||||
"result": None
|
||||
}
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Word AI 解析出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"AI 解析失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/analyze/word/fill-template")
|
||||
async def fill_template_with_word_ai(
|
||||
file: UploadFile = File(...),
|
||||
template_fields: str = Query("", description="模板字段,JSON字符串"),
|
||||
user_hint: str = Query("", description="用户提示词")
|
||||
):
|
||||
"""
|
||||
使用 AI 解析 Word 文档并填写模板
|
||||
|
||||
前端调用此接口即可完成:AI解析 + 填表
|
||||
|
||||
Args:
|
||||
file: 上传的 Word 文件
|
||||
template_fields: 模板字段 JSON 字符串
|
||||
user_hint: 用户提示词
|
||||
|
||||
Returns:
|
||||
dict: 填写结果
|
||||
"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['docx']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .docx"
|
||||
)
|
||||
|
||||
try:
|
||||
import json as json_module
|
||||
fields = json_module.loads(template_fields) if template_fields else []
|
||||
except json_module.JSONDecodeError:
|
||||
raise HTTPException(status_code=400, detail="template_fields 格式错误,应为 JSON 数组")
|
||||
|
||||
try:
|
||||
content = await file.read()
|
||||
|
||||
# 保存到临时文件
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.docx', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始 AI 填表(Word): {file.filename}, 字段数: {len(fields)}")
|
||||
|
||||
result = await word_ai_service.fill_template_with_ai(
|
||||
file_path=tmp_path,
|
||||
template_fields=fields,
|
||||
user_hint=user_hint
|
||||
)
|
||||
|
||||
logger.info(f"Word AI 填表完成: {file.filename}, success={result.get('success')}")
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Word AI 填表出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"AI 填表失败: {str(e)}")
|
||||
logger.error(f"Word AI 分析过程中出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
"""
|
||||
FastAPI 应用主入口
|
||||
"""
|
||||
# ========== 压制 MongoDB 疯狂刷屏日志 ==========
|
||||
import logging
|
||||
logging.getLogger("pymongo").setLevel(logging.WARNING)
|
||||
logging.getLogger("pymongo.topology").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
# ==============================================
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import sys
|
||||
|
||||
@@ -1239,6 +1239,48 @@ export const aiApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
||||
*/
|
||||
async analyzeTxt(
|
||||
file: File
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
filename?: string;
|
||||
structured_data?: {
|
||||
table?: {
|
||||
columns?: string[];
|
||||
rows?: string[][];
|
||||
};
|
||||
summary?: string;
|
||||
key_value_pairs?: Array<{ key: string; value: string }>;
|
||||
numeric_data?: Array<{ name: string; value: number; unit?: string }>;
|
||||
};
|
||||
error?: string;
|
||||
}> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const url = `${BACKEND_BASE_URL}/ai/analyze/txt`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'TXT AI 分析失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('TXT AI 分析失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 生成统计信息和图表
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user