增强 Word 文档 AI 解析和模板填充功能

This commit is contained in:
zzz
2026-04-14 17:16:38 +08:00
3 changed files with 142 additions and 78 deletions

View File

@@ -10,6 +10,7 @@ import os
from app.services.excel_ai_service import excel_ai_service
from app.services.markdown_ai_service import markdown_ai_service
from app.services.template_fill_service import template_fill_service
from app.services.word_ai_service import word_ai_service
logger = logging.getLogger(__name__)
@@ -343,6 +344,77 @@ async def get_markdown_outline(
raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")
@router.post("/analyze/txt")
async def analyze_txt(
file: UploadFile = File(...),
):
"""
上传并使用 AI 分析 TXT 文本文件,提取结构化数据
将非结构化文本转换为结构化表格数据,便于后续填表使用
Args:
file: 上传的 TXT 文件
Returns:
dict: 分析结果,包含结构化表格数据
"""
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['txt', 'text']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
)
try:
# 读取文件内容
content = await file.read()
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
logger.info(f"开始 AI 分析 TXT 文件: {file.filename}")
# 使用 template_fill_service 的 AI 分析方法
result = await template_fill_service.analyze_txt_with_ai(
content=content.decode('utf-8', errors='replace'),
filename=file.filename
)
if result:
logger.info(f"TXT AI 分析成功: {file.filename}")
return {
"success": True,
"filename": file.filename,
"structured_data": result
}
else:
logger.warning(f"TXT AI 分析返回空结果: {file.filename}")
return {
"success": False,
"filename": file.filename,
"error": "AI 分析未能提取到结构化数据",
"structured_data": None
}
finally:
# 清理临时文件
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException:
raise
except Exception as e:
logger.error(f"TXT AI 分析过程中出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
# ==================== Word 文档 AI 解析 ====================
@router.post("/analyze/word")
@@ -373,98 +445,41 @@ async def analyze_word(
)
try:
# 保存上传的文件
content = await file.read()
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.docx', delete=False) as tmp:
suffix = f".{file_ext}"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
logger.info(f"开始 AI 解析 Word 文件: {file.filename}")
# 使用 AI 解析 Word 文档
result = await word_ai_service.parse_word_with_ai(
file_path=tmp_path,
user_hint=user_hint
user_hint=user_hint or "请提取文档中的所有结构化数据,包括表格、键值对等"
)
logger.info(f"Word AI 解析完成: {file.filename}, success={result.get('success')}")
return result
if result.get("success"):
return {
"success": True,
"filename": file.filename,
"result": result
}
else:
return {
"success": False,
"filename": file.filename,
"error": result.get("error", "AI 解析失败"),
"result": None
}
finally:
# 清理临时文件
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException:
raise
except Exception as e:
logger.error(f"Word AI 解析出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"AI 解析失败: {str(e)}")
@router.post("/analyze/word/fill-template")
async def fill_template_with_word_ai(
file: UploadFile = File(...),
template_fields: str = Query("", description="模板字段JSON字符串"),
user_hint: str = Query("", description="用户提示词")
):
"""
使用 AI 解析 Word 文档并填写模板
前端调用此接口即可完成AI解析 + 填表
Args:
file: 上传的 Word 文件
template_fields: 模板字段 JSON 字符串
user_hint: 用户提示词
Returns:
dict: 填写结果
"""
if not file.filename:
raise HTTPException(status_code=400, detail="文件名为空")
file_ext = file.filename.split('.')[-1].lower()
if file_ext not in ['docx']:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file_ext},仅支持 .docx"
)
try:
import json as json_module
fields = json_module.loads(template_fields) if template_fields else []
except json_module.JSONDecodeError:
raise HTTPException(status_code=400, detail="template_fields 格式错误,应为 JSON 数组")
try:
content = await file.read()
# 保存到临时文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.docx', delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
try:
logger.info(f"开始 AI 填表Word: {file.filename}, 字段数: {len(fields)}")
result = await word_ai_service.fill_template_with_ai(
file_path=tmp_path,
template_fields=fields,
user_hint=user_hint
)
logger.info(f"Word AI 填表完成: {file.filename}, success={result.get('success')}")
return result
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except HTTPException:
raise
except Exception as e:
logger.error(f"Word AI 填表出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"AI 填表失败: {str(e)}")
logger.error(f"Word AI 分析过程中出错: {str(e)}")
raise HTTPException(status_code=500, detail=f"析失败: {str(e)}")

View File

@@ -1,6 +1,13 @@
"""
FastAPI 应用主入口
"""
# ========== 压制 MongoDB 疯狂刷屏日志 ==========
import logging
logging.getLogger("pymongo").setLevel(logging.WARNING)
logging.getLogger("pymongo.topology").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
# ==============================================
import logging
import logging.handlers
import sys

View File

@@ -1239,6 +1239,48 @@ export const aiApi = {
}
},
/**
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据
*/
async analyzeTxt(
file: File
): Promise<{
success: boolean;
filename?: string;
structured_data?: {
table?: {
columns?: string[];
rows?: string[][];
};
summary?: string;
key_value_pairs?: Array<{ key: string; value: string }>;
numeric_data?: Array<{ name: string; value: number; unit?: string }>;
};
error?: string;
}> {
const formData = new FormData();
formData.append('file', file);
const url = `${BACKEND_BASE_URL}/ai/analyze/txt`;
try {
const response = await fetch(url, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || 'TXT AI 分析失败');
}
return await response.json();
} catch (error) {
console.error('TXT AI 分析失败:', error);
throw error;
}
},
/**
* 生成统计信息和图表
*/