【智能助手增强】

- 新增对话历史管理：MongoDB新增conversations集合，存储用户与AI的对话上下文，支持多轮对话意图延续 - 新增对话历史API（conversation.py）：GET/DELETE conversation历史、列出所有会话 - 意图解析增强：支持基于对话历史的意图识别，上下文理解更准确 - 字段提取优化：支持"提取文档中的医院数量"等自然语言模式，智能去除"文档中的"前缀 - 文档对比优化：从指令中提取文件名并精确匹配source_docs，支持"对比A和B两个文档" - 文档摘要优化：使用LLM生成真实AI摘要而非返回原始文档预览【Word模板填表核心功能】 - Word模板字段生成：空白Word上传后，自动从源文档（Excel/Word/TXT/MD）内容AI生成字段名 - Word模板填表（_fill_docx）：将提取数据写入Word模板表格，支持精确匹配、模糊匹配、追加新行 - 数据润色（_polish_word_filled_data）：LLM对多行Excel数据进行统计归纳（合计/平均/极值），转化为专业自然语言描述 - 段落格式输出：使用📌字段名+值段落+分隔线（灰色横线）格式，提升可读性 - 导出链打通：fill_template返回filled_file_path，export直接返回已填好的Word文件【其他修复】 - 修复Word导出Windows文件锁问题：NamedTemporaryFile改为mkstemp+close - 修复Word方框非法字符：扩展clean_text移除\uFFFD、□等Unicode替代符和零宽字符 - 修复文档对比"需要至少2个文档"：从指令提取具体文件名优先匹配而非取前2个 - 修复导出format硬编码：自动识别docx/xlsx格式 - Docx解析器增加备用解析方法和更完整的段落/表格/标题提取 - RAG服务新增MySQL数据源支持
2026-04-15 23:32:55 +08:00
parent 9e7f9df384
commit e5d4724e82
19 changed files with 2185 additions and 407 deletions
--- a/backend/app/api/endpoints/templates.py
+++ b/backend/app/api/endpoints/templates.py
@@ -87,6 +87,7 @@ class ExportRequest(BaseModel):
    template_id: str
    filled_data: dict
    format: str = "xlsx"  # xlsx 或 docx
+    filled_file_path: Optional[str] = None  # 已填写的 Word 文件路径（可选）


 # ==================== 接口实现 ====================
@@ -541,7 +542,7 @@ async def export_filled_template(
        if request.format == "xlsx":
            return await _export_to_excel(request.filled_data, request.template_id)
        elif request.format == "docx":
-            return await _export_to_word(request.filled_data, request.template_id)
+            return await _export_to_word(request.filled_data, request.template_id, request.filled_file_path)
        else:
            raise HTTPException(
                status_code=400,
@@ -608,11 +609,12 @@ async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResp
    )


-async def _export_to_word(filled_data: dict, template_id: str) -> StreamingResponse:
+async def _export_to_word(filled_data: dict, template_id: str, filled_file_path: Optional[str] = None) -> StreamingResponse:
    """导出为 Word 格式"""
    import re
    import tempfile
    import os
+    import urllib.parse
    from docx import Document
    from docx.shared import Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
@@ -623,12 +625,32 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo
            return ""
        # 移除控制字符
        text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
+        # 转义 XML 特殊字符以防破坏文档结构
+        text = text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
        return text.strip()

+    tmp_path = None
    try:
-        # 先保存到临时文件，再读取到内存，确保文档完整性
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp_file:
-            tmp_path = tmp_file.name
+        # 如果有已填写的文件（通过 _fill_docx 填写了模板单元格），直接返回该文件
+        if filled_file_path and os.path.exists(filled_file_path):
+            filename = os.path.basename(filled_file_path)
+            with open(filled_file_path, 'rb') as f:
+                file_content = f.read()
+            output = io.BytesIO(file_content)
+            encoded_filename = urllib.parse.quote(filename)
+            return StreamingResponse(
+                output,
+                media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                headers={
+                    "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
+                    "Content-Length": str(len(file_content))
+                }
+            )
+
+        # 没有已填写文件，创建新的 Word 文档（表格形式）
+        # 创建临时文件（立即关闭句柄，避免 Windows 文件锁问题）
+        tmp_fd, tmp_path = tempfile.mkstemp(suffix='.docx')
+        os.close(tmp_fd)  # 关闭立即得到的 fd，让 docx 可以写入

        doc = Document()
        doc.add_heading('填写结果', level=1)
@@ -670,19 +692,23 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo

    finally:
        # 清理临时文件
-        if os.path.exists(tmp_path):
+        if tmp_path and os.path.exists(tmp_path):
            try:
                os.unlink(tmp_path)
-            except:
+            except Exception:
                pass

    output = io.BytesIO(file_content)
    filename = "filled_template.docx"
+    encoded_filename = urllib.parse.quote(filename)

    return StreamingResponse(
        output,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}
+        headers={
+            "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
+            "Content-Length": str(len(file_content))
+        }
    )