【智能助手增强】

- 新增对话历史管理：MongoDB新增conversations集合，存储用户与AI的对话上下文，支持多轮对话意图延续 - 新增对话历史API（conversation.py）：GET/DELETE conversation历史、列出所有会话 - 意图解析增强：支持基于对话历史的意图识别，上下文理解更准确 - 字段提取优化：支持"提取文档中的医院数量"等自然语言模式，智能去除"文档中的"前缀 - 文档对比优化：从指令中提取文件名并精确匹配source_docs，支持"对比A和B两个文档" - 文档摘要优化：使用LLM生成真实AI摘要而非返回原始文档预览【Word模板填表核心功能】 - Word模板字段生成：空白Word上传后，自动从源文档（Excel/Word/TXT/MD）内容AI生成字段名 - Word模板填表（_fill_docx）：将提取数据写入Word模板表格，支持精确匹配、模糊匹配、追加新行 - 数据润色（_polish_word_filled_data）：LLM对多行Excel数据进行统计归纳（合计/平均/极值），转化为专业自然语言描述 - 段落格式输出：使用📌字段名+值段落+分隔线（灰色横线）格式，提升可读性 - 导出链打通：fill_template返回filled_file_path，export直接返回已填好的Word文件【其他修复】 - 修复Word导出Windows文件锁问题：NamedTemporaryFile改为mkstemp+close - 修复Word方框非法字符：扩展clean_text移除\uFFFD、□等Unicode替代符和零宽字符 - 修复文档对比"需要至少2个文档"：从指令提取具体文件名优先匹配而非取前2个 - 修复导出format硬编码：自动识别docx/xlsx格式 - Docx解析器增加备用解析方法和更完整的段落/表格/标题提取 - RAG服务新增MySQL数据源支持
2026-04-15 23:32:55 +08:00
parent 9e7f9df384
commit e5d4724e82
19 changed files with 2185 additions and 407 deletions
--- a/backend/app/api/endpoints/instruction.py
+++ b/backend/app/api/endpoints/instruction.py
@@ -25,6 +25,7 @@ class InstructionRequest(BaseModel):
    instruction: str
    doc_ids: Optional[List[str]] = None  # 关联的文档 ID 列表
    context: Optional[Dict[str, Any]] = None  # 额外上下文
+    conversation_id: Optional[str] = None  # 对话会话ID，用于关联历史记录


 class IntentRecognitionResponse(BaseModel):
@@ -240,7 +241,8 @@ async def instruction_chat(
            task_id=task_id,
            instruction=request.instruction,
            doc_ids=request.doc_ids,
-            context=request.context
+            context=request.context,
+            conversation_id=request.conversation_id
        )

        return {
@@ -251,14 +253,15 @@ async def instruction_chat(
        }

    # 同步模式：等待执行完成
-    return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context)
+    return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context, request.conversation_id)


 async def _execute_chat_task(
    task_id: str,
    instruction: str,
    doc_ids: Optional[List[str]],
-    context: Optional[Dict[str, Any]]
+    context: Optional[Dict[str, Any]],
+    conversation_id: Optional[str] = None
 ):
    """执行指令对话的后台任务"""
    from app.core.database import mongodb as mongo_client
@@ -278,6 +281,13 @@ async def _execute_chat_task(
        # 构建上下文
        ctx: Dict[str, Any] = context or {}

+        # 获取对话历史
+        if conversation_id:
+            history = await mongo_client.get_conversation_history(conversation_id, limit=20)
+            if history:
+                ctx["conversation_history"] = history
+                logger.info(f"加载对话历史: conversation_id={conversation_id}, 消息数={len(history)}")
+
        # 获取关联文档
        if doc_ids:
            docs = []
@@ -291,6 +301,29 @@ async def _execute_chat_task(
        # 执行指令
        result = await instruction_executor.execute(instruction, ctx)

+        # 存储对话历史
+        if conversation_id:
+            try:
+                # 存储用户消息
+                await mongo_client.insert_conversation(
+                    conversation_id=conversation_id,
+                    role="user",
+                    content=instruction,
+                    intent=result.get("intent", "unknown")
+                )
+                # 存储助手回复
+                response_content = result.get("message", "")
+                if response_content:
+                    await mongo_client.insert_conversation(
+                        conversation_id=conversation_id,
+                        role="assistant",
+                        content=response_content,
+                        intent=result.get("intent", "unknown")
+                    )
+                logger.info(f"已存储对话历史: conversation_id={conversation_id}")
+            except Exception as e:
+                logger.error(f"存储对话历史失败: {e}")
+
        # 根据意图类型添加友好的响应消息
        response_messages = {
            "extract": f"已提取 {len(result.get('extracted_data', {}))} 个字段的数据",