优化联合模板上传逻辑支持源文档内容解析

- 移除模板文件字段提取步骤，改为直接保存模板文件 - 新增源文档解析功能，提取文档内容、标题和表格数量信息 - 修改模板填充服务，支持传入源文档内容用于AI表头生成 - 更新AI表头生成逻辑，基于源文档内容智能生成合适的表头字段 - 增强日志记录，显示源文档数量和处理进度
2026-04-09 22:43:51 +08:00
parent 7f67fa89de
commit 6752c5c231
2 changed files with 59 additions and 14 deletions
--- a/backend/app/api/endpoints/templates.py
+++ b/backend/app/api/endpoints/templates.py
@@ -155,20 +155,17 @@ async def upload_joint_template(
                )

    try:
-        # 1. 保存模板文件并提取字段
+        # 1. 保存模板文件
        template_content = await template_file.read()
        template_path = file_service.save_uploaded_file(
            template_content,
            template_file.filename,
            subfolder="templates"
        )
-        template_fields = await template_fill_service.get_template_fields_from_file(
-            template_path,
-            template_ext
-        )

-        # 2. 处理源文档 - 保存文件
+        # 2. 保存并解析源文档 - 提取内容用于生成表头
        source_file_info = []
+        source_contents = []
        for sf in source_files:
            if sf.filename:
                sf_content = await sf.read()
@@ -183,6 +180,28 @@ async def upload_joint_template(
                    "filename": sf.filename,
                    "ext": sf_ext
                })
+                # 解析源文档获取内容（用于 AI 生成表头）
+                try:
+                    from app.core.document_parser import ParserFactory
+                    parser = ParserFactory.get_parser(sf_path)
+                    parse_result = parser.parse(sf_path)
+                    if parse_result.success and parse_result.data:
+                        source_contents.append({
+                            "filename": sf.filename,
+                            "doc_type": sf_ext,
+                            "content": parse_result.data.get("content", "")[:5000] if parse_result.data.get("content") else "",
+                            "titles": parse_result.data.get("titles", [])[:10] if parse_result.data.get("titles") else [],
+                            "tables_count": len(parse_result.data.get("tables", [])) if parse_result.data.get("tables") else 0
+                        })
+                except Exception as e:
+                    logger.warning(f"解析源文档失败 {sf.filename}: {e}")
+
+        # 3. 根据源文档内容生成表头
+        template_fields = await template_fill_service.get_template_fields_from_file(
+            template_path,
+            template_ext,
+            source_contents=source_contents  # 传递源文档内容
+        )

        # 3. 异步处理源文档到MongoDB
        task_id = str(uuid.uuid4())