From e5d4724e822004f6711e0aea69c870a72bd846b2 Mon Sep 17 00:00:00 2001
From: dj <431634905@qq.com>
Date: Wed, 15 Apr 2026 23:32:55 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90=E6=99=BA=E8=83=BD=E5=8A=A9=E6=89=8B?=
 =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E3=80=91=20-=20=E6=96=B0=E5=A2=9E=E5=AF=B9?=
 =?UTF-8?q?=E8=AF=9D=E5=8E=86=E5=8F=B2=E7=AE=A1=E7=90=86=EF=BC=9AMongoDB?=
 =?UTF-8?q?=E6=96=B0=E5=A2=9Econversations=E9=9B=86=E5=90=88=EF=BC=8C?=
 =?UTF-8?q?=E5=AD=98=E5=82=A8=E7=94=A8=E6=88=B7=E4=B8=8EAI=E7=9A=84?=
 =?UTF-8?q?=E5=AF=B9=E8=AF=9D=E4=B8=8A=E4=B8=8B=E6=96=87=EF=BC=8C=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=E5=A4=9A=E8=BD=AE=E5=AF=B9=E8=AF=9D=E6=84=8F=E5=9B=BE?=
 =?UTF-8?q?=E5=BB=B6=E7=BB=AD=20-=20=E6=96=B0=E5=A2=9E=E5=AF=B9=E8=AF=9D?=
 =?UTF-8?q?=E5=8E=86=E5=8F=B2API=EF=BC=88conversation.py=EF=BC=89=EF=BC=9A?=
 =?UTF-8?q?GET/DELETE=20conversation=E5=8E=86=E5=8F=B2=E3=80=81=E5=88=97?=
 =?UTF-8?q?=E5=87=BA=E6=89=80=E6=9C=89=E4=BC=9A=E8=AF=9D=20-=20=E6=84=8F?=
 =?UTF-8?q?=E5=9B=BE=E8=A7=A3=E6=9E=90=E5=A2=9E=E5=BC=BA=EF=BC=9A=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=E5=9F=BA=E4=BA=8E=E5=AF=B9=E8=AF=9D=E5=8E=86=E5=8F=B2?=
 =?UTF-8?q?=E7=9A=84=E6=84=8F=E5=9B=BE=E8=AF=86=E5=88=AB=EF=BC=8C=E4=B8=8A?=
 =?UTF-8?q?=E4=B8=8B=E6=96=87=E7=90=86=E8=A7=A3=E6=9B=B4=E5=87=86=E7=A1=AE?=
 =?UTF-8?q?=20-=20=E5=AD=97=E6=AE=B5=E6=8F=90=E5=8F=96=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=EF=BC=9A=E6=94=AF=E6=8C=81"=E6=8F=90=E5=8F=96=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E4=B8=AD=E7=9A=84=E5=8C=BB=E9=99=A2=E6=95=B0=E9=87=8F?=
 =?UTF-8?q?"=E7=AD=89=E8=87=AA=E7=84=B6=E8=AF=AD=E8=A8=80=E6=A8=A1?=
 =?UTF-8?q?=E5=BC=8F=EF=BC=8C=E6=99=BA=E8=83=BD=E5=8E=BB=E9=99=A4"?=
 =?UTF-8?q?=E6=96=87=E6=A1=A3=E4=B8=AD=E7=9A=84"=E5=89=8D=E7=BC=80=20-=20?=
 =?UTF-8?q?=E6=96=87=E6=A1=A3=E5=AF=B9=E6=AF=94=E4=BC=98=E5=8C=96=EF=BC=9A?=
 =?UTF-8?q?=E4=BB=8E=E6=8C=87=E4=BB=A4=E4=B8=AD=E6=8F=90=E5=8F=96=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E5=90=8D=E5=B9=B6=E7=B2=BE=E7=A1=AE=E5=8C=B9=E9=85=8D?=
 =?UTF-8?q?source=5Fdocs=EF=BC=8C=E6=94=AF=E6=8C=81"=E5=AF=B9=E6=AF=94A?=
 =?UTF-8?q?=E5=92=8CB=E4=B8=A4=E4=B8=AA=E6=96=87=E6=A1=A3"=20-=20=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E6=91=98=E8=A6=81=E4=BC=98=E5=8C=96=EF=BC=9A=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8LLM=E7=94=9F=E6=88=90=E7=9C=9F=E5=AE=9EAI=E6=91=98?=
 =?UTF-8?q?=E8=A6=81=E8=80=8C=E9=9D=9E=E8=BF=94=E5=9B=9E=E5=8E=9F=E5=A7=8B?=
 =?UTF-8?q?=E6=96=87=E6=A1=A3=E9=A2=84=E8=A7=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

【Word模板填表核心功能】
- Word模板字段生成：空白Word上传后，自动从源文档（Excel/Word/TXT/MD）内容AI生成字段名
- Word模板填表（_fill_docx）：将提取数据写入Word模板表格，支持精确匹配、模糊匹配、追加新行
- 数据润色（_polish_word_filled_data）：LLM对多行Excel数据进行统计归纳（合计/平均/极值），转化为专业自然语言描述
- 段落格式输出：使用📌字段名+值段落+分隔线（灰色横线）格式，提升可读性
- 导出链打通：fill_template返回filled_file_path，export直接返回已填好的Word文件

【其他修复】
- 修复Word导出Windows文件锁问题：NamedTemporaryFile改为mkstemp+close
- 修复Word方框非法字符：扩展clean_text移除\uFFFD、□等Unicode替代符和零宽字符
- 修复文档对比"需要至少2个文档"：从指令提取具体文件名优先匹配而非取前2个
- 修复导出format硬编码：自动识别docx/xlsx格式
- Docx解析器增加备用解析方法和更完整的段落/表格/标题提取
- RAG服务新增MySQL数据源支持
---
 backend/app/api/__init__.py                   |   2 +
 backend/app/api/endpoints/conversation.py     |  98 ++++
 backend/app/api/endpoints/documents.py        | 172 +++---
 backend/app/api/endpoints/instruction.py      |  39 +-
 backend/app/api/endpoints/templates.py        |  42 +-
 backend/app/core/database/mongodb.py          | 130 ++++-
 .../app/core/document_parser/docx_parser.py   | 334 ++++++++---
 backend/app/instruction/executor.py           | 313 ++++++++--
 backend/app/instruction/intent_parser.py      |  78 ++-
 backend/app/services/excel_storage_service.py |   5 +-
 backend/app/services/rag_service.py           |  70 ++-
 backend/app/services/table_rag_service.py     |  18 +-
 backend/app/services/template_fill_service.py | 534 +++++++++++++++++-
 backend/requirements.txt                      |   2 +
 frontend/src/db/backend-api.ts                | 194 +++++--
 frontend/src/pages/Dashboard.tsx              |  34 +-
 frontend/src/pages/Documents.tsx              | 407 ++++++++++---
 frontend/src/pages/InstructionChat.tsx        | 102 ++--
 frontend/src/pages/TemplateFill.tsx           |  18 +-
 19 files changed, 2185 insertions(+), 407 deletions(-)
 create mode 100644 backend/app/api/endpoints/conversation.py

diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py
index 1a7ced4..a0c7f7a 100644
--- a/backend/app/api/__init__.py
+++ b/backend/app/api/__init__.py
@@ -14,6 +14,7 @@ from app.api.endpoints import (
     analysis_charts,
     health,
     instruction,    # 智能指令
+    conversation,   # 对话历史
 )
 
 # 创建主路由
@@ -31,3 +32,4 @@ api_router.include_router(ai_analyze.router)       # AI分析
 api_router.include_router(visualization.router)    # 可视化
 api_router.include_router(analysis_charts.router)  # 分析图表
 api_router.include_router(instruction.router)      # 智能指令
+api_router.include_router(conversation.router)     # 对话历史
diff --git a/backend/app/api/endpoints/conversation.py b/backend/app/api/endpoints/conversation.py
new file mode 100644
index 0000000..ca84d05
--- /dev/null
+++ b/backend/app/api/endpoints/conversation.py
@@ -0,0 +1,98 @@
+"""
+对话历史 API 接口
+
+提供对话历史的存储和查询功能
+"""
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from app.core.database import mongodb
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/conversation", tags=["对话历史"])
+
+
+# ==================== 请求/响应模型 ====================
+
+class ConversationMessage(BaseModel):
+    role: str
+    content: str
+    intent: Optional[str] = None
+
+
+class ConversationHistoryResponse(BaseModel):
+    success: bool
+    messages: list
+
+
+class ConversationListResponse(BaseModel):
+    success: bool
+    conversations: list
+
+
+# ==================== 接口 ====================
+
+@router.get("/{conversation_id}/history", response_model=ConversationHistoryResponse)
+async def get_conversation_history(conversation_id: str, limit: int = 20):
+    """
+    获取对话历史
+
+    Args:
+        conversation_id: 对话会话ID
+        limit: 返回消息数量（默认20条）
+    """
+    try:
+        messages = await mongodb.get_conversation_history(conversation_id, limit=limit)
+        return ConversationHistoryResponse(
+            success=True,
+            messages=messages
+        )
+    except Exception as e:
+        logger.error(f"获取对话历史失败: {e}")
+        return ConversationHistoryResponse(
+            success=False,
+            messages=[]
+        )
+
+
+@router.delete("/{conversation_id}")
+async def delete_conversation(conversation_id: str):
+    """
+    删除对话会话
+
+    Args:
+        conversation_id: 对话会话ID
+    """
+    try:
+        success = await mongodb.delete_conversation(conversation_id)
+        return {"success": success}
+    except Exception as e:
+        logger.error(f"删除对话失败: {e}")
+        return {"success": False, "error": str(e)}
+
+
+@router.get("/all", response_model=ConversationListResponse)
+async def list_conversations(limit: int = 50, skip: int = 0):
+    """
+    获取会话列表
+
+    Args:
+        limit: 返回数量
+        skip: 跳过数量
+    """
+    try:
+        conversations = await mongodb.list_conversations(limit=limit, skip=skip)
+        return ConversationListResponse(
+            success=True,
+            conversations=conversations
+        )
+    except Exception as e:
+        logger.error(f"获取会话列表失败: {e}")
+        return ConversationListResponse(
+            success=False,
+            conversations=[]
+        )
\ No newline at end of file
diff --git a/backend/app/api/endpoints/documents.py b/backend/app/api/endpoints/documents.py
index e8e206a..221e059 100644
--- a/backend/app/api/endpoints/documents.py
+++ b/backend/app/api/endpoints/documents.py
@@ -4,6 +4,7 @@
 支持多格式文档(docx/xlsx/md/txt)上传、解析、存储和RAG索引
 集成 Excel 存储和 AI 生成字段描述
 """
+import asyncio
 import logging
 import uuid
 from typing import List, Optional
@@ -258,6 +259,7 @@ async def process_document(
         )
 
         # 如果是 Excel，存储到 MySQL + AI生成描述 + RAG索引
+        mysql_table_name = None
         if doc_type in ["xlsx", "xls"]:
             await update_task_status(
                 task_id, status="processing",
@@ -265,17 +267,29 @@ async def process_document(
             )
 
             try:
-                # 使用 TableRAG 服务完成建表和RAG索引
+                # 使用 TableRAG 服务存储到 MySQL（跳过 RAG 索引以提升速度）
                 logger.info(f"开始存储Excel到MySQL: {original_filename}, file_path: {file_path}")
                 rag_result = await table_rag_service.build_table_rag_index(
                     file_path=file_path,
                     filename=original_filename,
                     sheet_name=parse_options.get("sheet_name"),
-                    header_row=parse_options.get("header_row", 0)
+                    header_row=parse_options.get("header_row", 0),
+                    skip_rag_index=True  # 跳过 AI 字段描述生成和索引
                 )
 
                 if rag_result.get("success"):
-                    logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {rag_result.get('table_name')}")
+                    mysql_table_name = rag_result.get('table_name')
+                    logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {mysql_table_name}")
+                    # 更新 MongoDB 中的 metadata，记录 MySQL 表名
+                    try:
+                        doc = await mongodb.get_document(doc_id)
+                        if doc:
+                            metadata = doc.get("metadata", {})
+                            metadata["mysql_table_name"] = mysql_table_name
+                            await mongodb.update_document_metadata(doc_id, metadata)
+                            logger.info(f"已更新 MongoDB 文档的 mysql_table_name: {mysql_table_name}")
+                    except Exception as update_err:
+                        logger.warning(f"更新 MongoDB mysql_table_name 失败: {update_err}")
                 else:
                     logger.error(f"RAG索引构建失败: {rag_result.get('error')}")
             except Exception as e:
@@ -283,17 +297,16 @@ async def process_document(
 
         else:
             # 非结构化文档
-            await update_task_status(
-                task_id, status="processing",
-                progress=60, message="正在建立索引"
-            )
-
-            # 如果文档中有表格数据，提取并存储到 MySQL + RAG
             structured_data = result.data.get("structured_data", {})
             tables = structured_data.get("tables", [])
 
+            # 如果文档中有表格数据，提取并存储到 MySQL（不需要 RAG 索引）
             if tables:
-                # 对每个表格建立 MySQL 表和 RAG 索引
+                await update_task_status(
+                    task_id, status="processing",
+                    progress=60, message="正在存储表格数据"
+                )
+                # 对每个表格建立 MySQL 表（跳过 RAG 索引，速度更快）
                 for table_info in tables:
                     await table_rag_service.index_document_table(
                         doc_id=doc_id,
@@ -302,8 +315,14 @@ async def process_document(
                         source_doc_type=doc_type
                     )
 
-            # 同时对文档内容建立 RAG 索引
-            await index_document_to_rag(doc_id, original_filename, result, doc_type)
+            # 对文档内容建立 RAG 索引（非结构化文本需要语义搜索）
+            content = result.data.get("content", "")
+            if content and len(content) > 50:  # 只有内容足够长才建立索引
+                await update_task_status(
+                    task_id, status="processing",
+                    progress=80, message="正在建立语义索引"
+                )
+                await index_document_to_rag(doc_id, original_filename, result, doc_type)
 
         # 完成
         await update_task_status(
@@ -328,72 +347,95 @@ async def process_document(
 
 
 async def process_documents_batch(task_id: str, files: List[dict]):
-    """批量处理文档"""
+    """批量并行处理文档"""
     try:
         await update_task_status(
             task_id, status="processing",
-            progress=0, message="开始批量处理"
+            progress=0, message=f"开始批量处理 {len(files)} 个文档",
+            result={"total": len(files), "files": []}
         )
 
-        results = []
-        for i, file_info in enumerate(files):
+        async def process_single_file(file_info: dict, index: int) -> dict:
+            """处理单个文件"""
+            filename = file_info["filename"]
             try:
+                # 解析文档
                 parser = ParserFactory.get_parser(file_info["path"])
                 result = parser.parse(file_info["path"])
 
-                if result.success:
-                    doc_id = await mongodb.insert_document(
-                        doc_type=file_info["ext"],
-                        content=result.data.get("content", ""),
-                        metadata={
-                            **result.metadata,
-                            "original_filename": file_info["filename"],
-                            "file_path": file_info["path"]
-                        },
-                        structured_data=result.data.get("structured_data")
+                if not result.success:
+                    return {"index": index, "filename": filename, "success": False, "error": result.error or "解析失败"}
+
+                # 存储到 MongoDB
+                doc_id = await mongodb.insert_document(
+                    doc_type=file_info["ext"],
+                    content=result.data.get("content", ""),
+                    metadata={
+                        **result.metadata,
+                        "original_filename": filename,
+                        "file_path": file_info["path"]
+                    },
+                    structured_data=result.data.get("structured_data")
+                )
+
+                # Excel 处理
+                if file_info["ext"] in ["xlsx", "xls"]:
+                    await table_rag_service.build_table_rag_index(
+                        file_path=file_info["path"],
+                        filename=filename,
+                        skip_rag_index=True  # 跳过 AI 字段描述生成和索引
                     )
-
-                    # Excel 处理
-                    if file_info["ext"] in ["xlsx", "xls"]:
-                        await table_rag_service.build_table_rag_index(
-                            file_path=file_info["path"],
-                            filename=file_info["filename"]
-                        )
-                    else:
-                        # 非结构化文档：处理其中的表格 + 内容索引
-                        structured_data = result.data.get("structured_data", {})
-                        tables = structured_data.get("tables", [])
-
-                        if tables:
-                            for table_info in tables:
-                                await table_rag_service.index_document_table(
-                                    doc_id=doc_id,
-                                    filename=file_info["filename"],
-                                    table_data=table_info,
-                                    source_doc_type=file_info["ext"]
-                                )
-
-                        await index_document_to_rag(doc_id, file_info["filename"], result, file_info["ext"])
-
-                    results.append({"filename": file_info["filename"], "doc_id": doc_id, "success": True})
                 else:
-                    results.append({"filename": file_info["filename"], "success": False, "error": result.error})
+                    # 非结构化文档
+                    structured_data = result.data.get("structured_data", {})
+                    tables = structured_data.get("tables", [])
+
+                    # 表格数据直接存 MySQL（跳过 RAG 索引）
+                    if tables:
+                        for table_info in tables:
+                            await table_rag_service.index_document_table(
+                                doc_id=doc_id,
+                                filename=filename,
+                                table_data=table_info,
+                                source_doc_type=file_info["ext"]
+                            )
+
+                    # 只有内容足够长才建立语义索引
+                    content = result.data.get("content", "")
+                    if content and len(content) > 50:
+                        await index_document_to_rag(doc_id, filename, result, file_info["ext"])
+
+                return {"index": index, "filename": filename, "doc_id": doc_id, "success": True}
 
             except Exception as e:
-                results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
+                logger.error(f"处理文件 {filename} 失败: {e}")
+                return {"index": index, "filename": filename, "success": False, "error": str(e)}
 
-            progress = int((i + 1) / len(files) * 100)
-            await update_task_status(
-                task_id, status="processing",
-                progress=progress, message=f"已处理 {i+1}/{len(files)}"
-            )
+        # 并行处理所有文档
+        tasks = [process_single_file(f, i) for i, f in enumerate(files)]
+        results = await asyncio.gather(*tasks)
 
+        # 按原始顺序排序
+        results.sort(key=lambda x: x["index"])
+
+        # 统计成功/失败数量
+        success_count = sum(1 for r in results if r["success"])
+        fail_count = len(results) - success_count
+
+        # 更新最终状态
         await update_task_status(
             task_id, status="success",
-            progress=100, message="批量处理完成",
-            result={"results": results}
+            progress=100, message=f"批量处理完成: {success_count} 成功, {fail_count} 失败",
+            result={
+                "total": len(files),
+                "success": success_count,
+                "failure": fail_count,
+                "results": results
+            }
         )
 
+        logger.info(f"批量处理完成: {success_count}/{len(files)} 成功")
+
     except Exception as e:
         logger.error(f"批量处理失败: {str(e)}")
         await update_task_status(
@@ -404,20 +446,20 @@ async def process_documents_batch(task_id: str, files: List[dict]):
 
 
 async def index_document_to_rag(doc_id: str, filename: str, result: ParseResult, doc_type: str):
-    """将非结构化文档索引到 RAG（使用分块索引）"""
+    """将非结构化文档索引到 RAG（使用分块索引，异步执行）"""
     try:
         content = result.data.get("content", "")
         if content:
-            # 将完整内容传递给 RAG 服务自动分块索引
-            rag_service.index_document_content(
+            # 使用异步方法索引，避免阻塞事件循环
+            await rag_service.index_document_content_async(
                 doc_id=doc_id,
-                content=content,  # 传递完整内容，由 RAG 服务自动分块
+                content=content,
                 metadata={
                     "filename": filename,
                     "doc_type": doc_type
                 },
-                chunk_size=500,  # 每块 500 字符
-                chunk_overlap=50  # 块之间 50 字符重叠
+                chunk_size=1000,  # 每块 1000 字符，提升速度
+                chunk_overlap=100  # 块之间 100 字符重叠
             )
             logger.info(f"RAG 索引完成: {filename}, doc_id={doc_id}")
     except Exception as e:
diff --git a/backend/app/api/endpoints/instruction.py b/backend/app/api/endpoints/instruction.py
index 751e518..52b3c42 100644
--- a/backend/app/api/endpoints/instruction.py
+++ b/backend/app/api/endpoints/instruction.py
@@ -25,6 +25,7 @@ class InstructionRequest(BaseModel):
     instruction: str
     doc_ids: Optional[List[str]] = None  # 关联的文档 ID 列表
     context: Optional[Dict[str, Any]] = None  # 额外上下文
+    conversation_id: Optional[str] = None  # 对话会话ID，用于关联历史记录
 
 
 class IntentRecognitionResponse(BaseModel):
@@ -240,7 +241,8 @@ async def instruction_chat(
             task_id=task_id,
             instruction=request.instruction,
             doc_ids=request.doc_ids,
-            context=request.context
+            context=request.context,
+            conversation_id=request.conversation_id
         )
 
         return {
@@ -251,14 +253,15 @@ async def instruction_chat(
         }
 
     # 同步模式：等待执行完成
-    return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context)
+    return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context, request.conversation_id)
 
 
 async def _execute_chat_task(
     task_id: str,
     instruction: str,
     doc_ids: Optional[List[str]],
-    context: Optional[Dict[str, Any]]
+    context: Optional[Dict[str, Any]],
+    conversation_id: Optional[str] = None
 ):
     """执行指令对话的后台任务"""
     from app.core.database import mongodb as mongo_client
@@ -278,6 +281,13 @@ async def _execute_chat_task(
         # 构建上下文
         ctx: Dict[str, Any] = context or {}
 
+        # 获取对话历史
+        if conversation_id:
+            history = await mongo_client.get_conversation_history(conversation_id, limit=20)
+            if history:
+                ctx["conversation_history"] = history
+                logger.info(f"加载对话历史: conversation_id={conversation_id}, 消息数={len(history)}")
+
         # 获取关联文档
         if doc_ids:
             docs = []
@@ -291,6 +301,29 @@ async def _execute_chat_task(
         # 执行指令
         result = await instruction_executor.execute(instruction, ctx)
 
+        # 存储对话历史
+        if conversation_id:
+            try:
+                # 存储用户消息
+                await mongo_client.insert_conversation(
+                    conversation_id=conversation_id,
+                    role="user",
+                    content=instruction,
+                    intent=result.get("intent", "unknown")
+                )
+                # 存储助手回复
+                response_content = result.get("message", "")
+                if response_content:
+                    await mongo_client.insert_conversation(
+                        conversation_id=conversation_id,
+                        role="assistant",
+                        content=response_content,
+                        intent=result.get("intent", "unknown")
+                    )
+                logger.info(f"已存储对话历史: conversation_id={conversation_id}")
+            except Exception as e:
+                logger.error(f"存储对话历史失败: {e}")
+
         # 根据意图类型添加友好的响应消息
         response_messages = {
             "extract": f"已提取 {len(result.get('extracted_data', {}))} 个字段的数据",
diff --git a/backend/app/api/endpoints/templates.py b/backend/app/api/endpoints/templates.py
index 54b3a73..50cae04 100644
--- a/backend/app/api/endpoints/templates.py
+++ b/backend/app/api/endpoints/templates.py
@@ -87,6 +87,7 @@ class ExportRequest(BaseModel):
     template_id: str
     filled_data: dict
     format: str = "xlsx"  # xlsx 或 docx
+    filled_file_path: Optional[str] = None  # 已填写的 Word 文件路径（可选）
 
 
 # ==================== 接口实现 ====================
@@ -541,7 +542,7 @@ async def export_filled_template(
         if request.format == "xlsx":
             return await _export_to_excel(request.filled_data, request.template_id)
         elif request.format == "docx":
-            return await _export_to_word(request.filled_data, request.template_id)
+            return await _export_to_word(request.filled_data, request.template_id, request.filled_file_path)
         else:
             raise HTTPException(
                 status_code=400,
@@ -608,11 +609,12 @@ async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResp
     )
 
 
-async def _export_to_word(filled_data: dict, template_id: str) -> StreamingResponse:
+async def _export_to_word(filled_data: dict, template_id: str, filled_file_path: Optional[str] = None) -> StreamingResponse:
     """导出为 Word 格式"""
     import re
     import tempfile
     import os
+    import urllib.parse
     from docx import Document
     from docx.shared import Pt, RGBColor
     from docx.enum.text import WD_ALIGN_PARAGRAPH
@@ -623,12 +625,32 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo
             return ""
         # 移除控制字符
         text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
+        # 转义 XML 特殊字符以防破坏文档结构
+        text = text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
         return text.strip()
 
+    tmp_path = None
     try:
-        # 先保存到临时文件，再读取到内存，确保文档完整性
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp_file:
-            tmp_path = tmp_file.name
+        # 如果有已填写的文件（通过 _fill_docx 填写了模板单元格），直接返回该文件
+        if filled_file_path and os.path.exists(filled_file_path):
+            filename = os.path.basename(filled_file_path)
+            with open(filled_file_path, 'rb') as f:
+                file_content = f.read()
+            output = io.BytesIO(file_content)
+            encoded_filename = urllib.parse.quote(filename)
+            return StreamingResponse(
+                output,
+                media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                headers={
+                    "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
+                    "Content-Length": str(len(file_content))
+                }
+            )
+
+        # 没有已填写文件，创建新的 Word 文档（表格形式）
+        # 创建临时文件（立即关闭句柄，避免 Windows 文件锁问题）
+        tmp_fd, tmp_path = tempfile.mkstemp(suffix='.docx')
+        os.close(tmp_fd)  # 关闭立即得到的 fd，让 docx 可以写入
 
         doc = Document()
         doc.add_heading('填写结果', level=1)
@@ -670,19 +692,23 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo
 
     finally:
         # 清理临时文件
-        if os.path.exists(tmp_path):
+        if tmp_path and os.path.exists(tmp_path):
             try:
                 os.unlink(tmp_path)
-            except:
+            except Exception:
                 pass
 
     output = io.BytesIO(file_content)
     filename = "filled_template.docx"
+    encoded_filename = urllib.parse.quote(filename)
 
     return StreamingResponse(
         output,
         media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}
+        headers={
+            "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
+            "Content-Length": str(len(file_content))
+        }
     )
 
 
diff --git a/backend/app/core/database/mongodb.py b/backend/app/core/database/mongodb.py
index 90ddb44..ebef9e4 100644
--- a/backend/app/core/database/mongodb.py
+++ b/backend/app/core/database/mongodb.py
@@ -64,6 +64,11 @@ class MongoDB:
         """任务集合 - 存储任务历史记录"""
         return self.db["tasks"]
 
+    @property
+    def conversations(self):
+        """对话集合 - 存储对话历史记录"""
+        return self.db["conversations"]
+
     # ==================== 文档操作 ====================
 
     async def insert_document(
@@ -117,14 +122,20 @@ class MongoDB:
         搜索文档
 
         Args:
-            query: 搜索关键词
+            query: 搜索关键词（支持文件名和内容搜索）
             doc_type: 文档类型过滤
             limit: 返回数量
 
         Returns:
             文档列表
         """
-        filter_query = {"content": {"$regex": query}}
+        filter_query = {
+            "$or": [
+                {"content": {"$regex": query, "$options": "i"}},
+                {"metadata.original_filename": {"$regex": query, "$options": "i"}},
+                {"metadata.filename": {"$regex": query, "$options": "i"}},
+            ]
+        }
         if doc_type:
             filter_query["doc_type"] = doc_type
 
@@ -141,6 +152,15 @@ class MongoDB:
         result = await self.documents.delete_one({"_id": ObjectId(doc_id)})
         return result.deleted_count > 0
 
+    async def update_document_metadata(self, doc_id: str, metadata: Dict[str, Any]) -> bool:
+        """更新文档 metadata 字段"""
+        from bson import ObjectId
+        result = await self.documents.update_one(
+            {"_id": ObjectId(doc_id)},
+            {"$set": {"metadata": metadata}}
+        )
+        return result.modified_count > 0
+
     # ==================== RAG 索引操作 ====================
 
     async def insert_rag_entry(
@@ -251,6 +271,10 @@ class MongoDB:
         await self.tasks.create_index("task_id", unique=True)
         await self.tasks.create_index("created_at")
 
+        # 对话集合索引
+        await self.conversations.create_index("conversation_id")
+        await self.conversations.create_index("created_at")
+
         logger.info("MongoDB 索引创建完成")
 
     # ==================== 任务历史操作 ====================
@@ -369,6 +393,108 @@ class MongoDB:
         result = await self.tasks.delete_one({"task_id": task_id})
         return result.deleted_count > 0
 
+    # ==================== 对话历史操作 ====================
+
+    async def insert_conversation(
+        self,
+        conversation_id: str,
+        role: str,
+        content: str,
+        intent: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        插入对话记录
+
+        Args:
+            conversation_id: 对话会话ID
+            role: 角色 (user/assistant)
+            content: 对话内容
+            intent: 意图类型
+            metadata: 额外元数据
+
+        Returns:
+            插入文档的ID
+        """
+        message = {
+            "conversation_id": conversation_id,
+            "role": role,
+            "content": content,
+            "intent": intent,
+            "metadata": metadata or {},
+            "created_at": datetime.utcnow(),
+        }
+        result = await self.conversations.insert_one(message)
+        return str(result.inserted_id)
+
+    async def get_conversation_history(
+        self,
+        conversation_id: str,
+        limit: int = 20,
+    ) -> List[Dict[str, Any]]:
+        """
+        获取对话历史
+
+        Args:
+            conversation_id: 对话会话ID
+            limit: 返回消息数量
+
+        Returns:
+            对话消息列表
+        """
+        cursor = self.conversations.find(
+            {"conversation_id": conversation_id}
+        ).sort("created_at", 1).limit(limit)
+
+        messages = []
+        async for msg in cursor:
+            msg["_id"] = str(msg["_id"])
+            if msg.get("created_at"):
+                msg["created_at"] = msg["created_at"].isoformat()
+            messages.append(msg)
+        return messages
+
+    async def delete_conversation(self, conversation_id: str) -> bool:
+        """删除对话会话"""
+        result = await self.conversations.delete_many({"conversation_id": conversation_id})
+        return result.deleted_count > 0
+
+    async def list_conversations(
+        self,
+        limit: int = 50,
+        skip: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """
+        获取会话列表（按最近一条消息排序）
+
+        Args:
+            limit: 返回数量
+            skip: 跳过数量
+
+        Returns:
+            会话列表
+        """
+        # 使用 aggregation 获取每个会话的最新一条消息
+        pipeline = [
+            {"$sort": {"created_at": -1}},
+            {"$group": {
+                "_id": "$conversation_id",
+                "last_message": {"$first": "$$ROOT"},
+            }},
+            {"$replaceRoot": {"newRoot": "$last_message"}},
+            {"$sort": {"created_at": -1}},
+            {"$skip": skip},
+            {"$limit": limit},
+        ]
+
+        conversations = []
+        async for doc in self.conversations.aggregate(pipeline):
+            doc["_id"] = str(doc["_id"])
+            if doc.get("created_at"):
+                doc["created_at"] = doc["created_at"].isoformat()
+            conversations.append(doc)
+        return conversations
+
 
 # ==================== 全局单例 ====================
 
diff --git a/backend/app/core/document_parser/docx_parser.py b/backend/app/core/document_parser/docx_parser.py
index db79512..e2bcb0e 100644
--- a/backend/app/core/document_parser/docx_parser.py
+++ b/backend/app/core/document_parser/docx_parser.py
@@ -44,6 +44,22 @@ class DocxParser(BaseParser):
                 error=f"文件不存在: {file_path}"
             )
 
+        # 尝试使用 python-docx 解析，失败则使用备用方法
+        try:
+            return self._parse_with_docx(path)
+        except Exception as e:
+            logger.warning(f"python-docx 解析失败，使用备用方法: {e}")
+            try:
+                return self._parse_fallback(path)
+            except Exception as fallback_error:
+                logger.error(f"备用解析方法也失败: {fallback_error}")
+                return ParseResult(
+                    success=False,
+                    error=f"解析 Word 文档失败: {str(e)}"
+                )
+
+    def _parse_with_docx(self, path: Path) -> ParseResult:
+        """使用 python-docx 解析文档"""
         # 检查文件扩展名
         if path.suffix.lower() not in self.supported_extensions:
             return ParseResult(
@@ -51,98 +67,177 @@ class DocxParser(BaseParser):
                 error=f"不支持的文件类型: {path.suffix}"
             )
 
+        # 读取 Word 文档
+        doc = Document(path)
+
+        # 提取文本内容
+        paragraphs = []
+        for para in doc.paragraphs:
+            if para.text.strip():
+                paragraphs.append({
+                    "text": para.text,
+                    "style": str(para.style.name) if para.style else "Normal"
+                })
+
+        # 提取段落纯文本（用于 AI 解析）
+        paragraphs_text = [p["text"] for p in paragraphs if p["text"].strip()]
+
+        # 提取表格内容
+        tables_data = []
+        for i, table in enumerate(doc.tables):
+            table_rows = []
+            for row in table.rows:
+                row_data = [cell.text.strip() for cell in row.cells]
+                table_rows.append(row_data)
+
+            if table_rows:
+                tables_data.append({
+                    "table_index": i,
+                    "rows": table_rows,
+                    "row_count": len(table_rows),
+                    "column_count": len(table_rows[0]) if table_rows else 0
+                })
+
+        # 提取图片/嵌入式对象信息
+        images_info = self._extract_images_info(doc, path)
+
+        # 合并所有文本（包括图片描述）
+        full_text_parts = []
+        full_text_parts.append("【文档正文】")
+        full_text_parts.extend(paragraphs_text)
+
+        if tables_data:
+            full_text_parts.append("\n【文档表格】")
+            for idx, table in enumerate(tables_data):
+                full_text_parts.append(f"--- 表格 {idx + 1} ---")
+                for row in table["rows"]:
+                    full_text_parts.append(" | ".join(str(cell) for cell in row))
+
+        if images_info.get("image_count", 0) > 0:
+            full_text_parts.append(f"\n【文档图片】文档包含 {images_info['image_count']} 张图片/图表")
+
+        full_text = "\n".join(full_text_parts)
+
+        # 构建元数据
+        metadata = {
+            "filename": path.name,
+            "extension": path.suffix.lower(),
+            "paragraph_count": len(paragraphs),
+            "table_count": len(tables_data),
+            "image_count": images_info.get("image_count", 0)
+        }
+
+        return ParseResult(
+            success=True,
+            data={
+                "content": full_text,
+                "paragraphs": paragraphs,
+                "paragraphs_with_style": paragraphs,
+                "tables": tables_data,
+                "images": images_info
+            },
+            metadata=metadata
+        )
+
+    def _parse_fallback(self, path: Path) -> ParseResult:
+        """备用解析方法：直接解析 docx 的 XML 结构"""
+        import zipfile
+        from xml.etree import ElementTree as ET
+
         try:
-            # 读取 Word 文档
-            doc = Document(file_path)
+            with zipfile.ZipFile(path, 'r') as zf:
+                # 读取 document.xml
+                if 'word/document.xml' not in zf.namelist():
+                    return ParseResult(success=False, error="无效的 docx 文件格式")
 
-            # 提取文本内容
-            paragraphs = []
-            for para in doc.paragraphs:
-                if para.text.strip():
-                    paragraphs.append({
-                        "text": para.text,
-                        "style": str(para.style.name) if para.style else "Normal"
+                xml_content = zf.read('word/document.xml')
+                root = ET.fromstring(xml_content)
+
+                # 命名空间
+                namespaces = {
+                    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+                }
+
+                paragraphs = []
+                tables = []
+                current_table = []
+
+                for elem in root.iter():
+                    if elem.tag.endswith('}p'):  # 段落
+                        text_parts = []
+                        for t in elem.iter():
+                            if t.tag.endswith('}t') and t.text:
+                                text_parts.append(t.text)
+                        text = ''.join(text_parts).strip()
+                        if text:
+                            paragraphs.append({'text': text, 'style': 'Normal'})
+                    elif elem.tag.endswith('}tr'):  # 表格行
+                        row_data = []
+                        for tc in elem.iter():
+                            if tc.tag.endswith('}tc'):  # 单元格
+                                cell_text = []
+                                for t in tc.iter():
+                                    if t.tag.endswith('}t') and t.text:
+                                        cell_text.append(t.text)
+                                row_data.append(''.join(cell_text).strip())
+                        if row_data:
+                            current_table.append(row_data)
+                    else:
+                        # 表格结束，保存
+                        if current_table:
+                            tables.append({
+                                'table_index': len(tables),
+                                'rows': current_table,
+                                'row_count': len(current_table),
+                                'column_count': len(current_table[0]) if current_table else 0
+                            })
+                            current_table = []
+
+                # 保存最后一张表格
+                if current_table:
+                    tables.append({
+                        'table_index': len(tables),
+                        'rows': current_table,
+                        'row_count': len(current_table),
+                        'column_count': len(current_table[0]) if current_table else 0
                     })
 
-            # 提取段落纯文本（用于 AI 解析）
-            paragraphs_text = [p["text"] for p in paragraphs if p["text"].strip()]
+                # 构建文本
+                paragraphs_text = [p["text"] for p in paragraphs]
+                full_text_parts = ["【文档正文】"] + paragraphs_text
 
-            # 提取表格内容
-            tables_data = []
-            for i, table in enumerate(doc.tables):
-                table_rows = []
-                for row in table.rows:
-                    row_data = [cell.text.strip() for cell in row.cells]
-                    table_rows.append(row_data)
+                if tables:
+                    full_text_parts.append("\n【文档表格】")
+                    for idx, table in enumerate(tables):
+                        full_text_parts.append(f"--- 表格 {idx + 1} ---")
+                        for row in table["rows"]:
+                            full_text_parts.append(" | ".join(str(cell) for cell in row))
 
-                if table_rows:
-                    tables_data.append({
-                        "table_index": i,
-                        "rows": table_rows,
-                        "row_count": len(table_rows),
-                        "column_count": len(table_rows[0]) if table_rows else 0
-                    })
+                full_text = "\n".join(full_text_parts)
 
-            # 提取图片/嵌入式对象信息
-            images_info = self._extract_images_info(doc, path)
-
-            # 合并所有文本（包括图片描述）
-            full_text_parts = []
-            full_text_parts.append("【文档正文】")
-            full_text_parts.extend(paragraphs_text)
-
-            if tables_data:
-                full_text_parts.append("\n【文档表格】")
-                for idx, table in enumerate(tables_data):
-                    full_text_parts.append(f"--- 表格 {idx + 1} ---")
-                    for row in table["rows"]:
-                        full_text_parts.append(" | ".join(str(cell) for cell in row))
-
-            if images_info.get("image_count", 0) > 0:
-                full_text_parts.append(f"\n【文档图片】文档包含 {images_info['image_count']} 张图片/图表")
-
-            full_text = "\n".join(full_text_parts)
-
-            # 构建元数据
-            metadata = {
-                "filename": path.name,
-                "extension": path.suffix.lower(),
-                "file_size": path.stat().st_size,
-                "paragraph_count": len(paragraphs),
-                "table_count": len(tables_data),
-                "word_count": len(full_text),
-                "char_count": len(full_text.replace("\n", "")),
-                "has_tables": len(tables_data) > 0,
-                "has_images": images_info.get("image_count", 0) > 0,
-                "image_count": images_info.get("image_count", 0)
-            }
-
-            # 返回结果
-            return ParseResult(
-                success=True,
-                data={
-                    "content": full_text,
-                    "paragraphs": paragraphs_text,
-                    "paragraphs_with_style": paragraphs,
-                    "tables": tables_data,
-                    "images": images_info,
-                    "word_count": len(full_text),
-                    "structured_data": {
+                return ParseResult(
+                    success=True,
+                    data={
+                        "content": full_text,
                         "paragraphs": paragraphs,
-                        "paragraphs_text": paragraphs_text,
-                        "tables": tables_data,
-                        "images": images_info
+                        "paragraphs_with_style": paragraphs,
+                        "tables": tables,
+                        "images": {"image_count": 0, "descriptions": []}
+                    },
+                    metadata={
+                        "filename": path.name,
+                        "extension": path.suffix.lower(),
+                        "paragraph_count": len(paragraphs),
+                        "table_count": len(tables),
+                        "image_count": 0,
+                        "parse_method": "fallback_xml"
                     }
-                },
-                metadata=metadata
-            )
+                )
 
+        except zipfile.BadZipFile:
+            return ParseResult(success=False, error="无效的 ZIP/文档文件")
         except Exception as e:
-            logger.error(f"解析 Word 文档失败: {str(e)}")
-            return ParseResult(
-                success=False,
-                error=f"解析 Word 文档失败: {str(e)}"
-            )
+            return ParseResult(success=False, error=f"备用解析失败: {str(e)}")
 
     def extract_images_as_base64(self, file_path: str) -> List[Dict[str, str]]:
         """
@@ -197,6 +292,83 @@ class DocxParser(BaseParser):
         logger.info(f"共提取 {len(images)} 张图片")
         return images
 
+    def extract_text_from_images(self, file_path: str, lang: str = 'chi_sim+eng') -> Dict[str, Any]:
+        """
+        对 Word 文档中的图片进行 OCR 文字识别
+
+        Args:
+            file_path: Word 文件路径
+            lang: Tesseract 语言代码，默认简体中文+英文 (chi_sim+eng)
+
+        Returns:
+            包含识别结果的字典
+        """
+        import zipfile
+        from io import BytesIO
+        from PIL import Image
+
+        try:
+            import pytesseract
+        except ImportError:
+            logger.warning("pytesseract 未安装，OCR 功能不可用")
+            return {
+                "success": False,
+                "error": "pytesseract 未安装，请运行: pip install pytesseract",
+                "image_count": 0,
+                "extracted_text": []
+            }
+
+        results = {
+            "success": True,
+            "image_count": 0,
+            "extracted_text": [],
+            "total_chars": 0
+        }
+
+        try:
+            with zipfile.ZipFile(file_path, 'r') as zf:
+                # 查找 word/media 目录下的图片文件
+                media_files = [f for f in zf.namelist() if f.startswith('word/media/')]
+
+                for idx, filename in enumerate(media_files):
+                    ext = filename.split('.')[-1].lower()
+                    if ext not in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
+                        continue
+
+                    try:
+                        # 读取图片数据
+                        image_data = zf.read(filename)
+                        image = Image.open(BytesIO(image_data))
+
+                        # 使用 Tesseract OCR 提取文字
+                        text = pytesseract.image_to_string(image, lang=lang)
+                        text = text.strip()
+
+                        if text:
+                            results["extracted_text"].append({
+                                "image_index": idx,
+                                "filename": filename,
+                                "text": text,
+                                "char_count": len(text)
+                            })
+                            results["total_chars"] += len(text)
+
+                        logger.info(f"图片 {filename} OCR 识别完成，提取 {len(text)} 字符")
+
+                    except Exception as e:
+                        logger.warning(f"图片 {filename} OCR 识别失败: {str(e)}")
+
+                results["image_count"] = len(results["extracted_text"])
+
+        except zipfile.BadZipFile:
+            results["success"] = False
+            results["error"] = "无效的 Word 文档文件"
+        except Exception as e:
+            results["success"] = False
+            results["error"] = f"OCR 处理失败: {str(e)}"
+
+        return results
+
     def extract_key_sentences(self, text: str, max_sentences: int = 10) -> List[str]:
         """
         从文本中提取关键句子
diff --git a/backend/app/instruction/executor.py b/backend/app/instruction/executor.py
index c7a05c7..1f3d84a 100644
--- a/backend/app/instruction/executor.py
+++ b/backend/app/instruction/executor.py
@@ -5,9 +5,10 @@
 """
 import logging
 import json
+import re
 from typing import Any, Dict, List, Optional
 
-from app.services.template_fill_service import template_fill_service
+from app.services.template_fill_service import template_fill_service, TemplateField
 from app.services.rag_service import rag_service
 from app.services.markdown_ai_service import markdown_ai_service
 from app.core.database import mongodb
@@ -15,6 +16,31 @@ from app.core.database import mongodb
 logger = logging.getLogger(__name__)
 
 
+def _extract_filenames_from_text(text: str) -> List[str]:
+    """
+    从指令文本中提取文件名列表。
+
+    智能处理用'和'/'与'/'、分隔的多个文件名（尤其是带年号的统计公报）。
+    """
+    # 先去掉"对比这两个文档"等引导语，只保留文件名部分
+    text = re.sub(r'^(?:对比|比较)这两个?文档[的差异]?[：:]?', '', text).strip()
+    text = re.sub(r'两个文档.*$', '', text).strip()
+    if not text:
+        return []
+
+    # 直接查找所有带扩展名的文件名模式
+    results = []
+    for m in re.finditer(r'[^\s，。！？、和与]+(?=\.(?:docx|xlsx|md|txt))', text):
+        start = m.start()
+        ext_match = re.search(r'\.(?:docx|xlsx|md|txt)', text[m.end():])
+        if ext_match:
+            fn = text[start:m.end() + ext_match.end()]
+            if fn:
+                results.append(fn)
+
+    return results
+
+
 class InstructionExecutor:
     """指令执行器"""
 
@@ -41,9 +67,10 @@ class InstructionExecutor:
             self.intent_parser = intent_parser
 
         context = context or {}
+        context["instruction"] = instruction  # 保存原始指令以便后续使用
 
-        # 解析意图
-        intent, params = await self.intent_parser.parse(instruction)
+        # 解析意图（传递对话历史上下文）
+        intent, params = await self.intent_parser.parse(instruction, context)
 
         # 根据意图类型执行相应操作
         if intent == "extract":
@@ -72,18 +99,48 @@ class InstructionExecutor:
     async def _execute_extract(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
         """执行信息提取"""
         try:
-            target_fields = params.get("field_refs", [])
+            # target_fields 来自意图解析，field_refs 来自引号/字段关键词匹配
+            target_fields = params.get("target_fields", []) or params.get("field_refs", [])
             doc_ids = params.get("document_refs", [])
+            instruction_text = context.get("instruction", "")
+
+            # 如果没有指定文档，尝试按文件名精确搜索
+            if not doc_ids or "all_docs" in doc_ids:
+                if instruction_text:
+                    import re
+                    # 提取引号内的内容或文件名
+                    filename_match = re.search(r'["""]([^"""]+)["""]', instruction_text)
+                    if filename_match:
+                        search_term = filename_match.group(1)
+                    else:
+                        match = re.search(r'([^\s]+\.(?:docx|xlsx|md|txt))', instruction_text)
+                        search_term = match.group(1) if match else None
+
+                    if search_term:
+                        logger.info(f"提取时搜索文档: {search_term}")
+                        searched_docs = await mongodb.search_documents(search_term, limit=5)
+                        if searched_docs:
+                            # 优先选择文件名完全匹配的文档
+                            best_docs = [
+                                d for d in searched_docs
+                                if search_term.lower() in d.get("metadata", {}).get("original_filename", "").lower()
+                            ]
+                            if not best_docs:
+                                best_docs = [searched_docs[0]]
+                            context["source_docs"] = best_docs
+                            doc_ids = [doc.get("_id", "") for doc in best_docs]
+                            logger.info(f"找到 {len(best_docs)} 个文档用于提取，最佳: {best_docs[0].get('metadata', {}).get('original_filename', '?')}")
 
             if not target_fields:
                 return {
                     "success": False,
+                    "intent": "extract",
                     "error": "未指定要提取的字段",
                     "message": "请明确说明要提取哪些字段，如：'提取医院数量和床位数'"
                 }
 
-            # 如果指定了文档，验证文档存在
-            if doc_ids and "all_docs" not in doc_ids:
+            # 如果指定了文档且还没有加载 source_docs，则验证并加载
+            if doc_ids and "all_docs" not in doc_ids and not context.get("source_docs"):
                 valid_docs = []
                 for doc_ref in doc_ids:
                     doc_id = doc_ref.replace("doc_", "")
@@ -93,20 +150,22 @@ class InstructionExecutor:
                 if not valid_docs:
                     return {
                         "success": False,
+                        "intent": "extract",
                         "error": "指定的文档不存在",
                         "message": "请检查文档编号是否正确"
                     }
                 context["source_docs"] = valid_docs
 
-            # 构建字段列表
-            fields = []
-            for i, field_name in enumerate(target_fields):
-                fields.append({
-                    "name": field_name,
-                    "cell": f"A{i+1}",
-                    "field_type": "text",
-                    "required": False
-                })
+            # 构建字段列表（使用 TemplateField dataclass）
+            fields = [
+                TemplateField(
+                    name=field_name,
+                    cell=f"A{i+1}",
+                    field_type="text",
+                    required=False
+                )
+                for i, field_name in enumerate(target_fields)
+            ]
 
             # 调用填表服务
             result = await template_fill_service.fill_template(
@@ -143,7 +202,7 @@ class InstructionExecutor:
                 }
 
             # 获取源文档
-            source_docs = context.get("source_docs", [])
+            source_docs = context.get("source_docs", []) or []
             source_doc_ids = [doc.get("_id") for doc in source_docs if doc.get("_id")]
 
             # 获取字段
@@ -175,36 +234,103 @@ class InstructionExecutor:
             }
 
     async def _execute_summarize(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
-        """执行摘要总结"""
+        """执行摘要总结 - 使用 LLM 生成真实摘要"""
         try:
-            docs = context.get("source_docs", [])
+            import re
+            docs = context.get("source_docs", []) or []
+            instruction_text = context.get("instruction", "")
+
+            # 从指令中提取文件名/关键词，优先搜索精确文档
+            search_term = None
+            if instruction_text:
+                filename_match = re.search(r'["""]([^"""]+)["""]', instruction_text)
+                if filename_match:
+                    search_term = filename_match.group(1)
+                else:
+                    file_match = re.search(r'([^\s，。！？,]+\.(?:docx|xlsx|md|txt))', instruction_text)
+                    if file_match:
+                        search_term = file_match.group(1)
+
+            # 如果没有文档或有更精确的搜索词，尝试重新搜索
+            if not docs or search_term:
+                if search_term:
+                    logger.info(f"按关键词搜索文档: {search_term}")
+                    searched_docs = await mongodb.search_documents(search_term, limit=5)
+                    if searched_docs:
+                        # 优先使用文件名最匹配的文档
+                        docs = sorted(
+                            searched_docs,
+                            key=lambda d: 1 if search_term.lower() in d.get("metadata", {}).get("original_filename", "").lower() else 0,
+                            reverse=True
+                        )
+                        logger.info(f"找到 {len(docs)} 个文档，最佳匹配: {docs[0].get('metadata', {}).get('original_filename', '?')}")
+
             if not docs:
                 return {
-                    "success": False,
-                    "error": "没有可用的文档",
-                    "message": "请先上传要总结的文档"
+                    "success": True,
+                    "intent": "summarize",
+                    "action_needed": "provide_document",
+                    "message": "我理解了，您想分析文档内容。",
+                    "suggestion": "请提供已上传文档的名称（可以是文件名或部分名称），或者上传您想要分析的文档。\n\n支持的格式：docx、xlsx、md、txt\n\n例如：'分析2021年民政事业发展统计公报' 或 '总结卫生健康数据'"
                 }
 
-            summaries = []
-            for doc in docs[:5]:  # 最多处理5个文档
-                content = doc.get("content", "")[:5000]  # 限制内容长度
-                if content:
-                    summaries.append({
-                        "filename": doc.get("metadata", {}).get("original_filename", "未知"),
-                        "content_preview": content[:500] + "..." if len(content) > 500 else content
-                    })
+            # 对第一个（最佳匹配）文档生成 AI 摘要
+            primary_doc = docs[0]
+            content = primary_doc.get("content", "")
+            filename = primary_doc.get("metadata", {}).get("original_filename", "未知文档")
+
+            if not content:
+                return {
+                    "success": False,
+                    "intent": "summarize",
+                    "error": "文档内容为空",
+                    "message": f"文档 {filename} 没有可供分析的文本内容"
+                }
+
+            # 使用 LLM 生成摘要
+            content_for_summary = content[:12000]  # 最多取前 12000 字
+            user_request = instruction_text or "请总结这份文档"
+
+            prompt = f"""请对以下文档进行全面、有条理的摘要分析。
+
+文档名称：{filename}
+用户要求：{user_request}
+
+文档内容：
+{content_for_summary}
+
+请按以下格式输出摘要：
+1. **文档概述**：简述文档主题和背景（2-3句）
+2. **主要内容**：列出文档的核心数据和关键信息（用要点列出）
+3. **重要数据**：提取文档中的重要数字、统计数据
+4. **主要结论**：归纳文档的主要结论或趋势
+
+要求：条理清晰，数据准确，不要遗漏关键信息。"""
+
+            from app.services.llm_service import llm_service
+            messages = [
+                {"role": "system", "content": "你是一个专业的文档分析助手，擅长提取关键信息并生成结构化摘要。"},
+                {"role": "user", "content": prompt}
+            ]
+
+            response = await llm_service.chat(messages=messages, temperature=0.3, max_tokens=2000)
+            ai_summary = llm_service.extract_message_content(response)
 
             return {
                 "success": True,
                 "intent": "summarize",
-                "summaries": summaries,
-                "message": f"找到 {len(summaries)} 个文档可供参考"
+                "ai_summary": ai_summary,
+                "filename": filename,
+                "doc_id": primary_doc.get("_id", ""),
+                "total_docs_found": len(docs),
+                "message": f"已生成文档摘要"
             }
 
         except Exception as e:
             logger.error(f"摘要执行失败: {e}")
             return {
                 "success": False,
+                "intent": "summarize",
                 "error": str(e),
                 "message": f"摘要生成失败: {str(e)}"
             }
@@ -213,17 +339,39 @@ class InstructionExecutor:
         """执行问答"""
         try:
             question = params.get("question", "")
+            instruction_text = context.get("instruction", "")
+
             if not question:
                 return {
                     "success": False,
+                    "intent": "question",
                     "error": "未提供问题",
                     "message": "请输入要回答的问题"
                 }
 
-            # 使用 RAG 检索相关文档
-            docs = context.get("source_docs", [])
-            rag_results = []
+            docs = context.get("source_docs", []) or []
 
+            # 如果没有文档，尝试从指令中提取文件名搜索
+            if not docs:
+                filename_match = re.search(r'["""]([^"""]+\.(?:docx|xlsx|md|txt))["""]', instruction_text)
+                if not filename_match:
+                    filename_match = re.search(r'([^\s，。！？]+\.(?:docx|xlsx|md|txt))', instruction_text)
+                if filename_match:
+                    found = await mongodb.search_documents(filename_match.group(1), limit=5)
+                    if found:
+                        docs = found
+
+            if not docs:
+                return {
+                    "success": True,
+                    "intent": "question",
+                    "question": question,
+                    "answer": None,
+                    "message": "请先上传文档，我才能回答您的问题"
+                }
+
+            # 使用 RAG 检索相关文档
+            rag_results = []
             for doc in docs:
                 doc_id = doc.get("_id", "")
                 if doc_id:
@@ -241,12 +389,42 @@ class InstructionExecutor:
                     doc.get("content", "")[:3000] for doc in docs[:3] if doc.get("content")
                 ])
 
+            if not context_text:
+                return {
+                    "success": True,
+                    "intent": "question",
+                    "question": question,
+                    "answer": None,
+                    "message": "文档内容为空，无法回答问题"
+                }
+
+            # 使用 LLM 生成答案
+            filename = docs[0].get("metadata", {}).get("original_filename", "文档")
+            prompt = f"""基于以下文档内容，回答用户的问题。
+
+文档名称：{filename}
+用户问题：{question}
+
+文档内容：
+{context_text[:8000]}
+
+请根据文档内容准确回答问题。如果文档中没有相关信息，请明确说明。"""
+
+            from app.services.llm_service import llm_service
+            messages = [
+                {"role": "system", "content": "你是一个专业的文档问答助手，根据提供的内容准确回答用户问题。"},
+                {"role": "user", "content": prompt}
+            ]
+            response = await llm_service.chat(messages=messages, temperature=0.3, max_tokens=1500)
+            answer = llm_service.extract_message_content(response)
+
             return {
                 "success": True,
                 "intent": "question",
                 "question": question,
-                "context_preview": context_text[:500] + "..." if len(context_text) > 500 else context_text,
-                "message": "已找到相关上下文，可进行问答"
+                "answer": answer,
+                "filename": filename,
+                "message": "已生成回答"
             }
 
         except Exception as e:
@@ -299,12 +477,53 @@ class InstructionExecutor:
     async def _execute_compare(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
         """执行对比分析"""
         try:
-            docs = context.get("source_docs", [])
+            docs = context.get("source_docs", []) or []
+            instruction_text = context.get("instruction", "")
+
+            # 优先从指令中提取具体的文件名
+            filenames = _extract_filenames_from_text(instruction_text)
+
+            if filenames:
+                # 只选择文件名匹配的那些文档
+                matched_docs = []
+                for doc in docs:
+                    fname = doc.get("metadata", {}).get("original_filename", "").lower()
+                    for fn in filenames:
+                        if fn.lower() in fname or fname in fn.lower():
+                            matched_docs.append(doc)
+                            break
+                # 如果匹配到足够文档，用匹配的
+                if len(matched_docs) >= 2:
+                    docs = matched_docs
+                else:
+                    # 匹配不够，尝试按文件名搜索 MongoDB
+                    all_found = []
+                    for fn in filenames:
+                        found = await mongodb.search_documents(fn, limit=5)
+                        all_found.extend(found)
+                    seen = set()
+                    unique_docs = []
+                    for d in all_found:
+                        did = d.get("_id", "")
+                        if did and did not in seen:
+                            seen.add(did)
+                            unique_docs.append(d)
+                    if len(unique_docs) >= 2:
+                        docs = unique_docs
+                    elif len(unique_docs) == 1 and len(docs) >= 1:
+                        # 找到一个指定的 + 用一个通用的
+                        docs = unique_docs + docs[:1]
+                    elif docs and len(filenames) == 1:
+                        # 找到一个指定文件名但只有一个匹配，尝试补充
+                        docs = unique_docs + [d for d in docs if d not in unique_docs]
+                        docs = docs[:2]
+
             if len(docs) < 2:
                 return {
                     "success": False,
+                    "intent": "compare",
                     "error": "对比需要至少2个文档",
-                    "message": "请上传至少2个文档进行对比"
+                    "message": "请上传至少2个文档进行对比，或明确说出要对比的文档名称"
                 }
 
             # 提取文档基本信息
@@ -329,6 +548,7 @@ class InstructionExecutor:
             logger.error(f"对比执行失败: {e}")
             return {
                 "success": False,
+                "intent": "compare",
                 "error": str(e),
                 "message": f"对比分析失败: {str(e)}"
             }
@@ -336,10 +556,23 @@ class InstructionExecutor:
     async def _execute_edit(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
         """执行文档编辑操作"""
         try:
-            docs = context.get("source_docs", [])
+            docs = context.get("source_docs", []) or []
+            instruction_text = context.get("instruction", "")
+
+            # 如果没有文档，尝试从指令中提取文件名搜索
+            if not docs:
+                filename_match = re.search(r'["""]([^"""]+\.(?:docx|xlsx|md|txt))["""]', instruction_text)
+                if not filename_match:
+                    filename_match = re.search(r'([^\s，。！？]+\.(?:docx|xlsx|md|txt))', instruction_text)
+                if filename_match:
+                    found = await mongodb.search_documents(filename_match.group(1), limit=3)
+                    if found:
+                        docs = found
+
             if not docs:
                 return {
                     "success": False,
+                    "intent": "edit",
                     "error": "没有可用的文档",
                     "message": "请先上传要编辑的文档"
                 }
@@ -405,7 +638,7 @@ class InstructionExecutor:
         - Word -> Markdown
         """
         try:
-            docs = context.get("source_docs", [])
+            docs = context.get("source_docs", []) or []
             if not docs:
                 return {
                     "success": False,
diff --git a/backend/app/instruction/intent_parser.py b/backend/app/instruction/intent_parser.py
index b53c034..71c6090 100644
--- a/backend/app/instruction/intent_parser.py
+++ b/backend/app/instruction/intent_parser.py
@@ -28,7 +28,7 @@ class IntentParser:
     INTENT_KEYWORDS = {
         INTENT_EXTRACT: ["提取", "抽取", "获取", "找出", "查找", "识别", "找到"],
         INTENT_FILL_TABLE: ["填表", "填写", "填充", "录入", "导入到表格", "填写到"],
-        INTENT_SUMMARIZE: ["总结", "摘要", "概括", "概述", "归纳", "提炼"],
+        INTENT_SUMMARIZE: ["总结", "摘要", "概括", "概述", "归纳", "提炼", "分析", "聊聊"],
         INTENT_QUESTION: ["问答", "回答", "解释", "什么是", "为什么", "如何", "怎样", "多少", "几个"],
         INTENT_SEARCH: ["搜索", "查找", "检索", "查询", "找"],
         INTENT_COMPARE: ["对比", "比较", "差异", "区别", "不同"],
@@ -47,12 +47,13 @@ class IntentParser:
     def __init__(self):
         self.intent_history: List[Dict[str, Any]] = []
 
-    async def parse(self, text: str) -> Tuple[str, Dict[str, Any]]:
+    async def parse(self, text: str, context: Dict[str, Any] = None) -> Tuple[str, Dict[str, Any]]:
         """
         解析自然语言指令
 
         Args:
             text: 用户输入的自然语言
+            context: 执行上下文（包含对话历史等）
 
         Returns:
             (意图类型, 参数字典)
@@ -61,11 +62,17 @@ class IntentParser:
         if not text:
             return self.INTENT_UNKNOWN, {}
 
+        # 检查对话历史中的上下文
+        conversation_history = []
+        if context and context.get("conversation_history"):
+            conversation_history = context.get("conversation_history", [])
+            logger.info(f"解析时使用对话历史: {len(conversation_history)} 条消息")
+
         # 记录历史
         self.intent_history.append({"text": text, "intent": None})
 
-        # 识别意图
-        intent = self._recognize_intent(text)
+        # 识别意图（考虑对话上下文）
+        intent = self._recognize_intent_with_context(text, conversation_history)
 
         # 提取参数
         params = self._extract_params(text, intent)
@@ -78,6 +85,42 @@ class IntentParser:
 
         return intent, params
 
+    def _recognize_intent_with_context(self, text: str, conversation_history: List[Dict[str, Any]]) -> str:
+        """
+        基于对话历史识别意图
+
+        Args:
+            text: 当前用户输入
+            conversation_history: 对话历史
+
+        Returns:
+            意图类型
+        """
+        # 如果对话历史为空，使用基础意图识别
+        if not conversation_history:
+            return self._recognize_intent(text)
+
+        # 基于历史上下文进行意图识别
+        # 分析最近的对话了解用户意图的延续性
+        last_intent = None
+        last_topic = None
+
+        for msg in conversation_history[-5:]:  # 最多看最近5条消息
+            if msg.get("role") == "assistant":
+                last_intent = msg.get("intent")
+            if msg.get("intent") and msg.get("intent") != "unknown":
+                last_topic = msg.get("intent")
+
+        # 如果当前消息很短（如"继续"、"是的"），可能延续之前的意图
+        short_confirmation = ["是", "是的", "好", "继续", "ok", "好", "接着", "然后", "还有吗"]
+        if text.strip() in short_confirmation or len(text.strip()) <= 3:
+            if last_topic:
+                logger.info(f"简短确认，延续之前的意图: {last_topic}")
+                return last_topic
+
+        # 否则使用标准意图识别
+        return self._recognize_intent(text)
+
     def _recognize_intent(self, text: str) -> str:
         """识别意图类型"""
         intent_scores: Dict[str, float] = {}
@@ -214,18 +257,27 @@ class IntentParser:
         return template_info if template_info else None
 
     def _extract_target_fields(self, text: str) -> List[str]:
-        """提取目标字段"""
+        """提取目标字段 - 按分隔符切分再逐段清理"""
         fields = []
 
-        # 匹配 "提取XXX和YYY"、"抽取XXX、YYY"
-        patterns = [
-            r"提取([^(and|,|，)+]+?)(?:和|与|、|,|plus)",
-            r"抽取([^(and|,|，)+]+?)(?:和|与|、|,|plus)",
-        ]
+        # 去除提取/抽取前缀
+        cleaned_text = re.sub(r"^(?:提取|抽取)", "", text).strip()
 
-        for pattern in patterns:
-            matches = re.findall(pattern, text)
-            fields.extend([m.strip() for m in matches if m.strip()])
+        # 按'和'、'与'、'、'分割成多段
+        segments = re.split(r"[和与、]", cleaned_text)
+
+        # 常见前缀（这些不是字段名，需要去除）
+        prefixes = ["文档中的", "文档中", "文件中的", "文件中", "内容中的", "内容中"]
+
+        for seg in segments:
+            seg = seg.strip()
+            # 去除常见前缀
+            for p in prefixes:
+                if seg.startswith(p):
+                    seg = seg[len(p):]
+                    break
+            if seg and 2 <= len(seg) <= 20:
+                fields.append(seg)
 
         return list(set(fields))
 
diff --git a/backend/app/services/excel_storage_service.py b/backend/app/services/excel_storage_service.py
index d324a5f..f608ab9 100644
--- a/backend/app/services/excel_storage_service.py
+++ b/backend/app/services/excel_storage_service.py
@@ -526,9 +526,10 @@ class ExcelStorageService:
             # 创建表
             model_class = self._create_table_model(table_name, columns, column_types)
 
-            # 创建表结构
+            # 创建表结构 (使用异步方式)
             async with self.mysql_db.get_session() as session:
-                model_class.__table__.create(session.bind, checkfirst=True)
+                async with session.bind.begin() as conn:
+                    await conn.run_sync(lambda: model_class.__table__.create(checkfirst=True))
 
             # 插入数据
             records = []
diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py
index 50c2607..230800c 100644
--- a/backend/app/services/rag_service.py
+++ b/backend/app/services/rag_service.py
@@ -165,9 +165,9 @@ class BM25:
 class RAGService:
     """RAG 检索增强服务"""
 
-    # 默认分块参数
-    DEFAULT_CHUNK_SIZE = 500  # 每个文本块的大小（字符数）
-    DEFAULT_CHUNK_OVERLAP = 50  # 块之间的重叠（字符数）
+    # 默认分块参数 - 增大块大小减少embedding次数
+    DEFAULT_CHUNK_SIZE = 1000  # 每个文本块的大小（字符数），增大以提升速度
+    DEFAULT_CHUNK_OVERLAP = 100  # 块之间的重叠（字符数）
 
     def __init__(self):
         self.embedding_model = None
@@ -389,6 +389,70 @@ class RAGService:
         self._add_documents(documents, chunk_ids)
         logger.info(f"已索引文档 {doc_id}，共 {len(chunks)} 个块")
 
+    async def index_document_content_async(
+        self,
+        doc_id: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+        chunk_size: int = None,
+        chunk_overlap: int = None
+    ):
+        """
+        异步将文档内容索引到向量数据库（自动分块）
+
+        使用 asyncio.to_thread 避免阻塞事件循环
+        """
+        import asyncio
+
+        if self._disabled:
+            logger.info(f"[RAG DISABLED] 文档索引操作已跳过: {doc_id}")
+            return
+
+        if not self._initialized:
+            self._init_vector_store()
+
+        if self.embedding_model is None:
+            logger.debug(f"文档跳过索引 (无嵌入模型): {doc_id}")
+            return
+
+        # 分割文档为小块
+        if chunk_size is None:
+            chunk_size = self.DEFAULT_CHUNK_SIZE
+        if chunk_overlap is None:
+            chunk_overlap = self.DEFAULT_CHUNK_OVERLAP
+
+        chunks = self._split_into_chunks(content, chunk_size, chunk_overlap)
+
+        if not chunks:
+            logger.warning(f"文档内容为空，跳过索引: {doc_id}")
+            return
+
+        # 为每个块创建文档对象
+        documents = []
+        chunk_ids = []
+
+        for i, chunk in enumerate(chunks):
+            chunk_id = f"{doc_id}_chunk_{i}"
+            chunk_metadata = metadata.copy() if metadata else {}
+            chunk_metadata.update({
+                "chunk_index": i,
+                "total_chunks": len(chunks),
+                "doc_id": doc_id
+            })
+
+            documents.append(SimpleDocument(
+                page_content=chunk,
+                metadata=chunk_metadata
+            ))
+            chunk_ids.append(chunk_id)
+
+        # 使用线程池执行 CPU 密集型的 embedding 计算
+        def _sync_add():
+            self._add_documents(documents, chunk_ids)
+
+        await asyncio.to_thread(_sync_add)
+        logger.info(f"已异步索引文档 {doc_id}，共 {len(chunks)} 个块")
+
     def _add_documents(self, documents: List[SimpleDocument], doc_ids: List[str]):
         """批量添加文档到向量索引"""
         if not documents:
diff --git a/backend/app/services/table_rag_service.py b/backend/app/services/table_rag_service.py
index d2a9dd0..4130e10 100644
--- a/backend/app/services/table_rag_service.py
+++ b/backend/app/services/table_rag_service.py
@@ -300,13 +300,15 @@ class TableRAGService:
         filename: str,
         sheet_name: Optional[str] = None,
         header_row: int = 0,
-        sample_size: int = 10
+        sample_size: int = 10,
+        skip_rag_index: bool = False
     ) -> Dict[str, Any]:
         """
         为 Excel 表构建完整的 RAG 索引
 
         流程：
         1. 读取 Excel 获取字段信息
+        2. 如果 skip_rag_index=True，跳过 RAG 索引，直接存 MySQL
         2. AI 生成每个字段的语义描述
         3. 将字段描述存入向量数据库
 
@@ -367,6 +369,20 @@ class TableRAGService:
             results["field_count"] = len(df.columns)
             logger.info(f"表名: {table_name}, 字段数: {len(df.columns)}")
 
+            # 跳过 RAG 索引时直接存 MySQL
+            if skip_rag_index:
+                logger.info(f"跳过 RAG 索引，直接存储到 MySQL")
+                store_result = await self.excel_storage.store_excel(
+                    file_path=file_path,
+                    filename=filename,
+                    sheet_name=sheet_name,
+                    header_row=header_row
+                )
+                results["mysql_table"] = store_result.get("table_name") if store_result.get("success") else None
+                results["row_count"] = store_result.get("row_count", len(df))
+                results["indexed_count"] = 0
+                return results
+
             # 3. 初始化 RAG (如果需要)
             if not self.rag._initialized:
                 self.rag._init_vector_store()
diff --git a/backend/app/services/template_fill_service.py b/backend/app/services/template_fill_service.py
index 9465d35..aeadf2e 100644
--- a/backend/app/services/template_fill_service.py
+++ b/backend/app/services/template_fill_service.py
@@ -5,6 +5,7 @@
 """
 import asyncio
 import logging
+import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 
@@ -13,6 +14,7 @@ from app.services.llm_service import llm_service
 from app.core.document_parser import ParserFactory
 from app.services.markdown_ai_service import markdown_ai_service
 from app.services.rag_service import rag_service
+from app.services.excel_storage_service import excel_storage_service
 
 logger = logging.getLogger(__name__)
 
@@ -105,12 +107,60 @@ class TemplateFillService:
 
         # 3. 检查是否需要使用源文档重新生成表头
         # 条件：源文档已加载 AND 现有字段看起来是自动生成的（如"字段1"、"字段2"）
+        # 注意：Word 模板（docx）不自动重新生成表头，因为 Word 模板的表结构由用户定义，必须保留
         needs_regenerate_headers = (
+            template_file_type != "docx" and
             len(source_docs) > 0 and
             len(template_fields) > 0 and
             all(self._is_auto_generated_field(f.name) for f in template_fields)
         )
 
+        # 4. Word 模板特殊处理：表头为空时，从源文档生成字段
+        # 仅当有源文档、模板字段为空、模板文件类型为 docx 时触发
+        if not needs_regenerate_headers and template_file_type == "docx" and len(source_docs) > 0 and len(template_fields) == 0:
+            logger.info(f"Word 模板表头为空，从源文档生成字段... (source_docs={len(source_docs)})")
+            source_contents = []
+            for doc in source_docs:
+                structured = doc.structured_data if doc.structured_data else {}
+                titles = structured.get("titles", [])
+                tables = structured.get("tables", [])
+                tables_count = len(tables) if tables else 0
+                tables_summary = ""
+                if tables:
+                    tables_summary = "\n【文档中的表格】:\n"
+                    for idx, table in enumerate(tables[:5]):
+                        if isinstance(table, dict):
+                            headers = table.get("headers", [])
+                            rows = table.get("rows", [])
+                            if headers:
+                                tables_summary += f"表格{idx+1}表头: {', '.join(str(h) for h in headers)}\n"
+                            if rows:
+                                tables_summary += f"表格{idx+1}前3行: "
+                                for row_idx, row in enumerate(rows[:3]):
+                                    if isinstance(row, list):
+                                        tables_summary += " | ".join(str(c) for c in row) + "; "
+                                    elif isinstance(row, dict):
+                                        tables_summary += " | ".join(str(row.get(h, "")) for h in headers if headers) + "; "
+                                tables_summary += "\n"
+                source_contents.append({
+                    "filename": doc.filename,
+                    "doc_type": doc.doc_type,
+                    "content": doc.content[:5000] if doc.content else "",
+                    "titles": titles[:10] if titles else [],
+                    "tables_count": tables_count,
+                    "tables_summary": tables_summary
+                })
+            if template_id:
+                generated_fields = await self.get_template_fields_from_file(
+                    template_id,
+                    template_file_type,
+                    source_contents=source_contents,
+                    source_docs=source_docs
+                )
+                if generated_fields:
+                    template_fields = generated_fields
+                    logger.info(f"Word 模板字段生成成功: {[f.name for f in template_fields]}")
+
         if needs_regenerate_headers:
             logger.info(f"检测到自动生成表头，尝试使用源文档重新生成... (当前字段: {[f.name for f in template_fields]})")
 
@@ -162,7 +212,8 @@ class TemplateFillService:
                 new_fields = await self.get_template_fields_from_file(
                     template_id,
                     template_file_type,
-                    source_contents=source_contents
+                    source_contents=source_contents,
+                    source_docs=source_docs
                 )
                 if new_fields and len(new_fields) > 0:
                     logger.info(f"成功重新生成表头: {[f.name for f in new_fields]}")
@@ -224,14 +275,357 @@ class TemplateFillService:
         max_rows = max(len(v) for v in filled_data.values()) if filled_data else 1
         logger.info(f"填表完成: {len(filled_data)} 个字段, 最大行数: {max_rows}")
 
+        # 如果是 Word 模板，将数据填入模板文件
+        filled_file_path = None
+        if template_file_type == "docx" and template_id and filled_data:
+            filled_file_path = await self._fill_docx(template_id, filled_data)
+            if filled_file_path:
+                logger.info(f"Word 模板已填写，输出文件: {filled_file_path}")
+
         return {
             "success": True,
             "filled_data": filled_data,
             "fill_details": fill_details,
             "source_doc_count": len(source_docs),
-            "max_rows": max_rows
+            "max_rows": max_rows,
+            "filled_file_path": filled_file_path
         }
 
+    async def _polish_word_filled_data(
+        self,
+        filled_data: Dict[str, Any]
+    ) -> Dict[str, str]:
+        """
+        将提取的结构化数据（尤其是多行Excel数据）进行统计归纳，
+        然后润色为自然语言文本
+
+        Args:
+            filled_data: {字段名: [原始值列表]}
+
+        Returns:
+            {字段名: 润色后的文本}
+        """
+        if not filled_data:
+            return {}
+
+        try:
+            import json
+
+            # 第一步：对数值型多行数据进行统计分析
+            data_summary = []
+            for field_name, values in filled_data.items():
+                if not isinstance(values, list) or not values:
+                    continue
+
+                # 过滤掉无效值
+                raw_values = []
+                for v in values:
+                    if v and str(v).strip() and not str(v).startswith('[提取失败'):
+                        raw_values.append(str(v).strip())
+
+                if not raw_values:
+                    continue
+
+                # 尝试解析为数值进行统计
+                numeric_values = []
+                for v in raw_values:
+                    # 提取数值（处理 "123个"、"78.5%"、"1,234" 等格式）
+                    num_str = re.sub(r'[^\d.\-]', '', str(v))
+                    try:
+                        if num_str and num_str != '-' and num_str != '.':
+                            numeric_values.append(float(num_str))
+                    except ValueError:
+                        pass
+
+                # 根据字段名判断类型
+                field_lower = field_name.lower()
+                is_count_field = any(kw in field_lower for kw in ['数量', '总数', '次数', '条数', '订单数', '记录数', '条目'])
+                is_amount_field = any(kw in field_lower for kw in ['金额', '总额', '合计', '总计', '销售额', '收入', '支出', '成本'])
+                is_ratio_field = any(kw in field_lower for kw in ['比率', '比例', '占比', '率', '使用率', '增长', '增幅'])
+                is_name_field = any(kw in field_lower for kw in ['名称', '机构', '医院', '公司', '单位', '部门', '区域', '类别'])
+
+                if len(numeric_values) >= 2 and len(numeric_values) == len(raw_values):
+                    # 多行数值数据，进行统计归纳
+                    total = sum(numeric_values)
+                    avg = total / len(numeric_values)
+                    max_val = max(numeric_values)
+                    min_val = min(numeric_values)
+
+                    stats_lines = [
+                        f"【{field_name}】（共 {len(raw_values)} 条数据）:",
+                        f"  - 合计: {self._format_number(total)}" if is_amount_field else f"  - 合计: {total:.2f}",
+                        f"  - 平均: {avg:.2f}",
+                        f"  - 最大: {max_val:.2f}",
+                        f"  - 最小: {min_val:.2f}",
+                    ]
+
+                    # 对原始值去重计数（如果是名称类字段）
+                    if is_name_field:
+                        unique_values = list(set(raw_values))
+                        if len(unique_values) <= 10:
+                            stats_lines.append(f"  - 涉及类别（共 {len(unique_values)} 种）: {'、'.join(unique_values[:8])}")
+                        else:
+                            stats_lines.append(f"  - 涉及 {len(unique_values)} 个不同类别")
+
+                    # 取前5个原始示例
+                    stats_lines.append(f"  - 示例值: {'、'.join(raw_values[:5])}")
+                    data_summary.append('\n'.join(stats_lines))
+
+                elif is_ratio_field and len(numeric_values) == 1:
+                    # 单值百分比
+                    pct = numeric_values[0]
+                    data_summary.append(f"【{field_name}】: {pct:.1f}%，表示相关指标的相对水平")
+
+                elif is_amount_field and len(numeric_values) >= 1:
+                    # 金额类（单位通常是万元/亿元）
+                    total = sum(numeric_values)
+                    unit = ""
+                    if total >= 10000:
+                        unit = f"（约 {total/10000:.2f} 万元）"
+                    elif total >= 1:
+                        unit = f"（约 {total:.2f} 元）"
+                    data_summary.append(f"【{field_name}】: 合计 {self._format_number(total)}{unit}，基于 {len(raw_values)} 条记录汇总")
+
+                elif is_count_field and len(numeric_values) >= 1:
+                    # 数量类
+                    total = sum(numeric_values)
+                    data_summary.append(f"【{field_name}】: 共 {self._format_number(total)}，基于 {len(raw_values)} 条记录汇总")
+
+                else:
+                    # 无法归类的多值数据，做去重归纳
+                    unique_values = list(set(raw_values))
+                    if len(unique_values) <= 8:
+                        data_summary.append(f"【{field_name}】（共 {len(raw_values)} 条，去重后 {len(unique_values)} 项）: {'、'.join(unique_values[:8])}")
+                    elif len(raw_values) > 8:
+                        data_summary.append(f"【{field_name}】（共 {len(raw_values)} 条记录）: {'、'.join(raw_values[:5])} 等")
+                    else:
+                        data_summary.append(f"【{field_name}】: {'、'.join(raw_values)}")
+
+            if not data_summary:
+                return {k: (', '.join(str(v) for v in vals[:5]) if isinstance(vals, list) else str(vals))
+                        for k, vals in filled_data.items()}
+
+            # 第二步：调用 LLM 将统计分析结果转化为专业自然语言描述
+            prompt = f"""你是一个专业的数据分析报告助手。请根据以下从文档中提取并统计的数据，生成专业、简洁的自然语言描述。
+
+【数据统计结果】：
+{chr(10).join(data_summary)}
+
+【润色要求】：
+1. 每个字段生成一段专业的描述性文本（20-60字）
+2. 数值类字段要明确标注单位和含义，如"销售总额达1,234.5万元，共涵盖56个订单"
+3. 分类/名称类字段要归纳总结类别，如"涉及医疗器械、药品采购、设备维修等5个业务类别"
+4. 多值数据不要简单罗列，要做总结，如"覆盖华东地区（上海、江苏、浙江）、华南地区（广东）等6个省市的销售网络"
+5. 百分比/比率类要加背景说明，如"综合毛利率为23.5%，处于行业正常水平"
+6. 保持文本通顺、专业，符合正式报告风格
+7. 每段控制在60字以内
+
+【输出格式】（严格按JSON格式，只返回JSON，不要任何其他内容）：
+{{
+    "字段名1": "润色后的描述文本1",
+    "字段名2": "润色后的描述文本2"
+}}
+"""
+            messages = [
+                {"role": "system", "content": "你是一个专业的数据分析报告助手。请严格按JSON格式输出，只返回纯JSON，不要任何其他内容。"},
+                {"role": "user", "content": prompt}
+            ]
+
+            response = await self.llm.chat(
+                messages=messages,
+                temperature=0.3,
+                max_tokens=3000
+            )
+            content = self.llm.extract_message_content(response)
+            logger.info(f"LLM 润色 Word 数据返回: {content[:500]}")
+
+            # 尝试解析 JSON
+            json_match = re.search(r'\{[\s\S]*\}', content)
+            if json_match:
+                polished = json.loads(json_match.group())
+                logger.info(f"LLM 润色成功: {len(polished)} 个字段")
+                return polished
+            else:
+                logger.warning(f"LLM 返回无法解析为 JSON: {content[:200]}")
+                # 回退到原始统计摘要
+                return {k: (', '.join(str(v) for v in vals[:5]) if isinstance(vals, list) else str(vals))
+                        for k, vals in filled_data.items()}
+
+        except Exception as e:
+            logger.error(f"LLM 润色失败: {str(e)}")
+            # 润色失败时回退到原始值
+            return {k: (', '.join(str(v) for v in vals[:5]) if isinstance(vals, list) else str(vals))
+                    for k, vals in filled_data.items()}
+
+    def _format_number(self, num: float) -> str:
+        """格式化数字，添加千分位"""
+        if abs(num) >= 10000:
+            return f"{num:,.2f}"
+        elif abs(num) >= 1:
+            return f"{num:,.2f}"
+        else:
+            return f"{num:.4f}"
+
+    async def _fill_docx(
+        self,
+        template_path: str,
+        filled_data: Dict[str, Any]
+    ) -> Optional[str]:
+        """
+        将提取的数据填入 Word 模板
+
+        Args:
+            template_path: Word 模板文件路径
+            filled_data: 字段值字典 {field_name: [values]}
+
+        Returns:
+            填写后的文件路径，失败返回 None
+        """
+        import re
+        import os
+        import tempfile
+        import shutil
+        from docx import Document
+        from docx.shared import RGBColor
+
+        def clean_text(text: str) -> str:
+            """清理文本，移除非法字符"""
+            if not text:
+                return ""
+            # 移除控制字符
+            text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
+            # 移除 Word 中常见的非法替代字符（显示为方框）
+            text = re.sub(r'[\ufffd\u25a1\u25a9\u2610\u2611\u25cb\u25c9]', '', text)
+            # 移除其他无效 Unicode 字符
+            text = re.sub(r'[\ufeff\u200b-\u200f\u2028-\u202e]', '', text)
+            return text.strip()
+
+        def set_cell_text(cell, text: str):
+            """设置单元格文本（保留原有格式）"""
+            cell.text = text
+            # 确保文本颜色为黑色
+            for para in cell.paragraphs:
+                for run in para.runs:
+                    run.font.color.rgb = RGBColor(0, 0, 0)
+
+        try:
+            # 先对数据进行 LLM 润色（非结构化文本补充和润色）
+            logger.info(f"Word 填写前开始 LLM 润色 {len(filled_data)} 个字段...")
+            polished_data = await self._polish_word_filled_data(filled_data)
+            logger.info(f"LLM 润色完成，使用润色后文本写入 Word")
+
+            # 创建临时目录存放修改后的文件
+            temp_dir = tempfile.mkdtemp()
+            output_path = os.path.join(temp_dir, "filled_template.docx")
+
+            # 复制模板到临时文件
+            shutil.copy2(template_path, output_path)
+
+            # 打开复制的模板
+            doc = Document(output_path)
+
+            matched_fields = set()
+
+            # 遍历表格，找到字段名所在的行，填写对应值
+            for table in doc.tables:
+                for row in table.rows:
+                    cells = row.cells
+                    if not cells:
+                        continue
+
+                    first_cell_text = cells[0].text.strip()
+                    if not first_cell_text:
+                        continue
+
+                    # 精确匹配字段名
+                    if first_cell_text in polished_data:
+                        display_text = polished_data[first_cell_text]
+                        if display_text:
+                            if len(cells) > 1:
+                                set_cell_text(cells[1], clean_text(display_text))
+                            matched_fields.add(first_cell_text)
+                        logger.info(f"Word 填写（精确）: {first_cell_text} = {display_text[:50] if display_text else ''}")
+                        continue
+
+                    # 前缀/后缀匹配
+                    for field_name, display_text in polished_data.items():
+                        if field_name and first_cell_text and (
+                            field_name.startswith(first_cell_text) or first_cell_text.startswith(field_name)
+                        ):
+                            if display_text:
+                                if len(cells) > 1:
+                                    set_cell_text(cells[1], clean_text(display_text))
+                                matched_fields.add(field_name)
+                            logger.info(f"Word 填写（模糊）: {first_cell_text} ≈ {field_name} = {display_text[:50] if display_text else ''}")
+                            break
+
+            # 如果有未匹配的字段（模板第一列为空），使用段落格式写入（带分隔线，更清晰）
+            unmatched_fields = [f for f in polished_data if f not in matched_fields]
+            if unmatched_fields:
+                logger.info(f"使用段落格式写入 {len(unmatched_fields)} 个字段（带分隔线）")
+
+                from docx.oxml.ns import qn
+                from docx.oxml import OxmlElement
+                from docx.shared import Pt, RGBColor
+
+                def add_horizontal_separator(doc, before_para=None):
+                    """添加水平分隔线（通过段落下边框实现）"""
+                    sep_para = OxmlElement('w:p')
+                    pPr = OxmlElement('w:pPr')
+                    pBdr = OxmlElement('w:pBdr')
+                    bottom = OxmlElement('w:bottom')
+                    bottom.set(qn('w:val'), 'single')
+                    bottom.set(qn('w:sz'), '6')
+                    bottom.set(qn('w:space'), '1')
+                    bottom.set(qn('w:color'), 'CCCCCC')
+                    pBdr.append(bottom)
+                    pPr.append(pBdr)
+                    sep_para.append(pPr)
+                    if before_para is not None:
+                        before_para._element.addprevious(sep_para)
+                    else:
+                        doc._body.append(sep_para)
+
+                def add_field_section(doc, field_name: str, display_text: str):
+                    """添加一个字段区域：字段名（加粗）+ 值段落 + 分隔线"""
+                    from docx.shared import Pt
+
+                    # 字段名段落（加粗）
+                    name_para = doc.add_paragraph()
+                    name_run = name_para.add_run(f"📌 {field_name}")
+                    name_run.bold = True
+                    name_run.font.size = Pt(11)
+                    name_run.font.color.rgb = RGBColor(0, 51, 102)
+                    name_para.paragraph_format.space_before = Pt(12)
+                    name_para.paragraph_format.space_after = Pt(3)
+
+                    # 值段落
+                    value_para = doc.add_paragraph()
+                    value_run = value_para.add_run(display_text)
+                    value_run.font.size = Pt(10.5)
+                    value_run.font.color.rgb = RGBColor(51, 51, 51)
+                    value_para.paragraph_format.space_before = Pt(0)
+                    value_para.paragraph_format.space_after = Pt(6)
+
+                    # 分隔线
+                    add_horizontal_separator(doc, value_para)
+
+                # 在文档末尾添加各字段段落
+                for field_name in unmatched_fields:
+                    display_text = polished_data[field_name]
+                    if display_text:
+                        add_field_section(doc, field_name, clean_text(display_text))
+                        logger.info(f"Word 段落写入: {field_name} = {display_text[:60]}")
+
+            # 保存修改后的文档
+            doc.save(output_path)
+            logger.info(f"Word 模板填写完成: {output_path}, 匹配字段: {len(matched_fields)}, 追加字段: {len(unmatched_fields)}")
+            return output_path
+
+        except Exception as e:
+            logger.error(f"Word 模板填写失败: {str(e)}")
+            return None
+
     async def _load_source_documents(
         self,
         source_doc_ids: Optional[List[str]] = None,
@@ -257,10 +651,38 @@ class TemplateFillService:
                     if doc:
                         sd = doc.get("structured_data", {})
                         sd_keys = list(sd.keys()) if sd else []
-                        logger.info(f"从MongoDB加载文档: {doc_id}, doc_type={doc.get('doc_type')}, structured_data keys={sd_keys}")
+                        doc_type = doc.get("doc_type", "")
+                        mysql_table_name = doc.get("metadata", {}).get("mysql_table_name")
+                        logger.info(f"从MongoDB加载文档: {doc_id}, doc_type={doc_type}, structured_data keys={sd_keys}, mysql_table={mysql_table_name}")
 
-                        # 如果 structured_data 为空，但有 file_path，尝试重新解析文件
                         doc_content = doc.get("content", "")
+
+                        # 如果是 Excel 类型且有 MySQL 表名，直接从 MySQL 加载数据
+                        if doc_type in ["xlsx", "xls"] and mysql_table_name:
+                            try:
+                                logger.info(f"  从 MySQL 表 {mysql_table_name} 加载 Excel 数据")
+                                mysql_data = await excel_storage_service.query_table(mysql_table_name, limit=1000)
+                                if mysql_data:
+                                    # 转换为 SourceDocument 格式
+                                    if mysql_data and len(mysql_data) > 0:
+                                        columns = list(mysql_data[0].keys()) if mysql_data else []
+                                        rows = [[row.get(col) for col in columns] for row in mysql_data]
+                                        sd = {
+                                            "headers": columns,
+                                            "rows": rows,
+                                            "row_count": len(mysql_data),
+                                            "column_count": len(columns),
+                                            "source": "mysql"
+                                        }
+                                        logger.info(f"  MySQL 数据加载成功: {len(mysql_data)} 行, {len(columns)} 列")
+                                    else:
+                                        logger.warning(f"  MySQL 表 {mysql_table_name} 无数据")
+                                else:
+                                    logger.warning(f"  MySQL 表 {mysql_table_name} 查询无结果")
+                            except Exception as mysql_err:
+                                logger.error(f"  MySQL 加载失败: {str(mysql_err)}")
+
+                        # 如果 structured_data 仍然为空，尝试重新解析文件
                         if not sd or (not sd.get("tables") and not sd.get("headers") and not sd.get("rows")):
                             file_path = doc.get("metadata", {}).get("file_path")
                             if file_path:
@@ -294,7 +716,7 @@ class TemplateFillService:
                         source_docs.append(SourceDocument(
                             doc_id=doc_id,
                             filename=doc.get("metadata", {}).get("original_filename", "unknown"),
-                            doc_type=doc.get("doc_type", "unknown"),
+                            doc_type=doc_type,
                             content=doc_content,
                             structured_data=sd
                         ))
@@ -1047,7 +1469,8 @@ class TemplateFillService:
         self,
         file_path: str,
         file_type: str = "xlsx",
-        source_contents: List[dict] = None
+        source_contents: List[dict] = None,
+        source_docs: List["SourceDocument"] = None
     ) -> List[TemplateField]:
         """
         从模板文件提取字段定义
@@ -1071,15 +1494,18 @@ class TemplateFillService:
                 fields = await self._get_template_fields_from_docx(file_path)
 
             # 检查是否需要 AI 生成表头
-            # 条件：没有字段 OR 所有字段都是自动命名的（如"字段1"、"列1"、"Unnamed"开头）
+            # 条件：没有字段 OR 所有字段都是自动命名的
+            # 对于 docx：仅当有源文档时才允许 AI 生成（避免覆盖用户定义的表头）
             needs_ai_generation = (
-                len(fields) == 0 or
-                all(self._is_auto_generated_field(f.name) for f in fields)
+                (len(fields) == 0 or
+                all(self._is_auto_generated_field(f.name) for f in fields))
+            ) and (
+                file_type != "docx" or len(source_contents) > 0
             )
 
             if needs_ai_generation:
                 logger.info(f"模板表头为空或自动生成，尝试 AI 生成表头... (fields={len(fields)}, source_docs={len(source_contents)})")
-                ai_fields = await self._generate_fields_with_ai(file_path, file_type, source_contents)
+                ai_fields = await self._generate_fields_with_ai(file_path, file_type, source_contents, source_docs)
                 if ai_fields:
                     fields = ai_fields
                     logger.info(f"AI 生成表头成功: {len(fields)} 个字段")
@@ -2134,7 +2560,8 @@ class TemplateFillService:
         self,
         file_path: str,
         file_type: str,
-        source_contents: List[dict] = None
+        source_contents: List[dict] = None,
+        source_docs: List["SourceDocument"] = None
     ) -> Optional[List[TemplateField]]:
         """
         使用 AI 为空表生成表头字段
@@ -2148,6 +2575,8 @@ class TemplateFillService:
         Returns:
             生成的字段列表，如果失败返回 None
         """
+        import random
+
         try:
             import pandas as pd
 
@@ -2182,24 +2611,21 @@ class TemplateFillService:
                     else:
                         content_sample = ""
 
-            # 调用 AI 生成表头
-            # 根据源文档内容生成表头
-            source_info = ""
-            logger.info(f"[DEBUG] _generate_fields_with_ai received source_contents: {len(source_contents) if source_contents else 0} items")
+            # 优先从源文档的表格表头中随机选取
             if source_contents:
-                for sc in source_contents:
-                    logger.info(f"[DEBUG]   source doc: filename={sc.get('filename')}, content_len={len(sc.get('content', ''))}, titles={len(sc.get('titles', []))}, tables_count={sc.get('tables_count', 0)}, has_tables_summary={bool(sc.get('tables_summary'))}")
-                source_info = "\n\n【源文档内容摘要】（根据以下文档内容生成表头）：\n"
+                import re
+                all_headers = []
+                source_info = ""
+
                 for idx, src in enumerate(source_contents[:5]):  # 最多5个源文档
                     filename = src.get("filename", f"文档{idx+1}")
                     doc_type = src.get("doc_type", "unknown")
-                    content = src.get("content", "")[:3000]  # 限制内容长度
-                    titles = src.get("titles", [])[:10]  # 最多10个标题
+                    content = src.get("content", "")[:3000]
+                    titles = src.get("titles", [])[:10]
                     tables_count = src.get("tables_count", 0)
                     tables_summary = src.get("tables_summary", "")
 
                     source_info += f"\n--- 文档 {idx+1}: {filename} ({doc_type}) ---\n"
-                    # 处理 titles（可能是字符串列表或字典列表）
                     if titles:
                         title_texts = []
                         for t in titles[:5]:
@@ -2216,6 +2642,72 @@ class TemplateFillService:
                     if content:
                         source_info += f"【文档内容】（前3000字符）：{content[:3000]}\n"
 
+                    # 从 tables_summary 中提取表头
+                    # 表格摘要格式如: "表格1表头: 姓名, 年龄, 性别"
+                    if tables_summary:
+                        header_matches = re.findall(r'表头:\s*([^\n]+)', tables_summary)
+                        for match in header_matches:
+                            # 分割表头字符串
+                            headers = [h.strip() for h in match.split(',') if h.strip()]
+                            all_headers.extend(headers)
+                            logger.info(f"从表格摘要提取到表头: {headers}")
+
+                # 从源文档的 structured_data 中直接提取表头（Excel 等数据源）
+                for doc in source_docs:
+                    if doc.structured_data:
+                        sd = doc.structured_data
+                        # Excel 格式: {columns: [...], rows: [...]}
+                        if sd.get("columns"):
+                            cols = sd.get("columns", [])
+                            if isinstance(cols, list) and cols:
+                                all_headers.extend([str(c) for c in cols if str(c).strip()])
+                                logger.info(f"从 structured_data.columns 提取到表头: {cols}")
+                        # 多 sheet 格式: {sheets: {sheet_name: {columns, rows}}}
+                        if sd.get("sheets"):
+                            for sheet_name, sheet_data in sd.get("sheets", {}).items():
+                                if isinstance(sheet_data, dict) and sheet_data.get("columns"):
+                                    cols = sheet_data.get("columns", [])
+                                    if isinstance(cols, list) and cols:
+                                        all_headers.extend([str(c) for c in cols if str(c).strip()])
+                                        logger.info(f"从 sheets.{sheet_name} 提取到表头: {cols}")
+                        # Markdown/表格格式: {tables: [{headers, rows}]}
+                        if sd.get("tables") and isinstance(sd.get("tables"), list):
+                            for table in sd.get("tables", []):
+                                if isinstance(table, dict) and table.get("headers"):
+                                    headers = table.get("headers", [])
+                                    if isinstance(headers, list) and headers:
+                                        all_headers.extend([str(h) for h in headers if str(h).strip()])
+                                        logger.info(f"从 tables 提取到表头: {headers}")
+                        # 另一种格式: {headers, rows}
+                        if sd.get("headers") and sd.get("rows"):
+                            headers = sd.get("headers", [])
+                            if isinstance(headers, list) and headers:
+                                all_headers.extend([str(h) for h in headers if str(h).strip()])
+                                logger.info(f"从 headers/rows 提取到表头: {headers}")
+
+                # 如果从表格摘要中获取到了表头，随机选取一部分
+                if all_headers:
+                    logger.info(f"共有 {len(all_headers)} 个表头可用")
+                    # 随机选取 5-7 个表头
+                    num_fields = min(random.randint(5, 7), len(all_headers))
+                    selected_headers = random.sample(all_headers, num_fields)
+                    logger.info(f"随机选取的表头: {selected_headers}")
+
+                    fields = []
+                    for idx, header in enumerate(selected_headers):
+                        fields.append(TemplateField(
+                            cell=self._column_to_cell(idx),
+                            name=header,
+                            field_type="text",
+                            required=False,
+                            hint=""
+                        ))
+                    return fields
+            else:
+                source_info = ""
+
+            # 如果无法从表格表头获取，才调用 AI 生成
+
             prompt = f"""你是一个专业的数据分析助手。请分析源文档中的所有数据，生成表格表头字段。
 
 任务：分析源文档，找出所有具体的数据指标及其分类。
diff --git a/backend/requirements.txt b/backend/requirements.txt
index c1700bd..c586179 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -39,6 +39,8 @@ openpyxl==3.1.2
 python-docx==0.8.11
 markdown-it-py==3.0.0
 chardet==5.2.0
+Pillow>=10.0.0
+pytesseract>=0.3.10
 
 # ==================== AI / LLM ====================
 httpx==0.25.2
diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts
index 24973be..7d43424 100644
--- a/frontend/src/db/backend-api.ts
+++ b/frontend/src/db/backend-api.ts
@@ -781,7 +781,8 @@ export const backendApi = {
   async exportFilledTemplate(
     templateId: string,
     filledData: Record<string, any>,
-    format: 'xlsx' | 'docx' = 'xlsx'
+    format: 'xlsx' | 'docx' = 'xlsx',
+    filledFilePath?: string
   ): Promise<Blob> {
     const url = `${BACKEND_BASE_URL}/templates/export`;
 
@@ -793,6 +794,7 @@ export const backendApi = {
           template_id: templateId,
           filled_data: filledData,
           format,
+          ...(filledFilePath && { filled_file_path: filledFilePath }),
         }),
       });
 
@@ -964,6 +966,101 @@ export const backendApi = {
       throw error;
     }
   },
+
+  // ==================== 智能指令 API ====================
+
+  /**
+   * 智能对话（支持多轮对话的指令执行）
+   */
+  async instructionChat(
+    instruction: string,
+    docIds?: string[],
+    context?: Record<string, any>
+  ): Promise<{
+    success: boolean;
+    intent: string;
+    result: Record<string, any>;
+    message: string;
+    hint?: string;
+  }> {
+    const url = `${BACKEND_BASE_URL}/instruction/chat`;
+
+    try {
+      const response = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ instruction, doc_ids: docIds, context }),
+      });
+
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error.detail || '对话处理失败');
+      }
+
+      return await response.json();
+    } catch (error) {
+      console.error('对话处理失败:', error);
+      throw error;
+    }
+  },
+
+  /**
+   * 获取支持的指令类型列表
+   */
+  async getSupportedIntents(): Promise<{
+    intents: Array<{
+      intent: string;
+      name: string;
+      examples: string[];
+      params: string[];
+    }>;
+  }> {
+    const url = `${BACKEND_BASE_URL}/instruction/intents`;
+
+    try {
+      const response = await fetch(url);
+      if (!response.ok) throw new Error('获取指令列表失败');
+      return await response.json();
+    } catch (error) {
+      console.error('获取指令列表失败:', error);
+      throw error;
+    }
+  },
+
+  /**
+   * 执行指令（同步模式）
+   */
+  async executeInstruction(
+    instruction: string,
+    docIds?: string[],
+    context?: Record<string, any>
+  ): Promise<{
+    success: boolean;
+    intent: string;
+    result: Record<string, any>;
+    message: string;
+  }> {
+    const url = `${BACKEND_BASE_URL}/instruction/execute`;
+
+    try {
+      const response = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ instruction, doc_ids: docIds, context }),
+      });
+
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error.detail || '指令执行失败');
+      }
+
+      return await response.json();
+    } catch (error) {
+      console.error('指令执行失败:', error);
+      throw error;
+    }
+  },
+
 };
 
 // ==================== AI 分析 API ====================
@@ -1529,61 +1626,66 @@ export const aiApi = {
     }
   },
 
+  // ==================== 对话历史 API ====================
+
   /**
-   * 智能对话（支持多轮对话的指令执行）
+   * 获取对话历史
    */
-  async instructionChat(
-    instruction: string,
-    docIds?: string[],
-    context?: Record<string, any>
-  ): Promise<{
+  async getConversationHistory(conversationId: string, limit: number = 20): Promise<{
     success: boolean;
-    intent: string;
-    result: Record<string, any>;
-    message: string;
-    hint?: string;
-  }> {
-    const url = `${BACKEND_BASE_URL}/instruction/chat`;
-
-    try {
-      const response = await fetch(url, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ instruction, doc_ids: docIds, context }),
-      });
-
-      if (!response.ok) {
-        const error = await response.json();
-        throw new Error(error.detail || '对话处理失败');
-      }
-
-      return await response.json();
-    } catch (error) {
-      console.error('对话处理失败:', error);
-      throw error;
-    }
-  },
-
-  /**
-   * 获取支持的指令类型列表
-   */
-  async getSupportedIntents(): Promise<{
-    intents: Array<{
-      intent: string;
-      name: string;
-      examples: string[];
-      params: string[];
+    messages: Array<{
+      role: string;
+      content: string;
+      intent?: string;
+      created_at: string;
     }>;
   }> {
-    const url = `${BACKEND_BASE_URL}/instruction/intents`;
+    const url = `${BACKEND_BASE_URL}/conversation/${conversationId}/history?limit=${limit}`;
 
     try {
       const response = await fetch(url);
-      if (!response.ok) throw new Error('获取指令列表失败');
+      if (!response.ok) throw new Error('获取对话历史失败');
       return await response.json();
     } catch (error) {
-      console.error('获取指令列表失败:', error);
-      throw error;
+      console.error('获取对话历史失败:', error);
+      return { success: false, messages: [] };
     }
   },
+
+  /**
+   * 删除对话历史
+   */
+  async deleteConversation(conversationId: string): Promise<{
+    success: boolean;
+  }> {
+    const url = `${BACKEND_BASE_URL}/conversation/${conversationId}`;
+
+    try {
+      const response = await fetch(url, { method: 'DELETE' });
+      if (!response.ok) throw new Error('删除对话历史失败');
+      return await response.json();
+    } catch (error) {
+      console.error('删除对话历史失败:', error);
+      return { success: false };
+    }
+  },
+
+  /**
+   * 获取会话列表
+   */
+  async listConversations(limit: number = 50): Promise<{
+    success: boolean;
+    conversations: Array<any>;
+  }> {
+    const url = `${BACKEND_BASE_URL}/conversation/all?limit=${limit}`;
+
+    try {
+      const response = await fetch(url);
+      if (!response.ok) throw new Error('获取会话列表失败');
+      return await response.json();
+    } catch (error) {
+      console.error('获取会话列表失败:', error);
+      return { success: false, conversations: [] };
+    }
+  }
 };
diff --git a/frontend/src/pages/Dashboard.tsx b/frontend/src/pages/Dashboard.tsx
index 7563304..888d81f 100644
--- a/frontend/src/pages/Dashboard.tsx
+++ b/frontend/src/pages/Dashboard.tsx
@@ -15,12 +15,14 @@ import {
   Sparkles,
   Database,
   FileSpreadsheet,
-  RefreshCcw
+  RefreshCcw,
+  Trash2
 } from 'lucide-react';
 import { backendApi } from '@/db/backend-api';
 import { formatDistanceToNow } from 'date-fns';
 import { zhCN } from 'date-fns/locale';
 import { cn } from '@/lib/utils';
+import { toast } from 'sonner';
 
 type DocumentItem = {
   doc_id: string;
@@ -108,7 +110,7 @@ const Dashboard: React.FC = () => {
       <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
         {[
           { label: '已上传文档', value: stats.docs, icon: FileText, color: 'bg-blue-500', trend: '非结构化文档', link: '/documents' },
-          { label: 'Excel 文件', value: stats.excelFiles, icon: FileSpreadsheet, color: 'bg-emerald-500', trend: '结构化数据', link: '/excel-parse' },
+          { label: 'Excel 文件', value: stats.excelFiles, icon: FileSpreadsheet, color: 'bg-emerald-500', trend: '结构化数据', link: '/documents' },
           { label: '填表任务', value: stats.tasks, icon: TableProperties, color: 'bg-indigo-500', trend: '待实现', link: '/form-fill' }
         ].map((stat, i) => (
           <Card key={i} className="border-none shadow-md overflow-hidden group hover:shadow-xl transition-all duration-300">
@@ -164,8 +166,30 @@ const Dashboard: React.FC = () => {
                         {doc.doc_type.toUpperCase()} • {formatDistanceToNow(new Date(doc.created_at), { addSuffix: true, locale: zhCN })}
                       </p>
                     </div>
-                    <div className="px-2 py-1 rounded-full text-[10px] font-bold uppercase tracking-wider bg-muted">
-                      {doc.doc_type}
+                    <div className="flex items-center gap-2">
+                      <div className="px-2 py-1 rounded-full text-[10px] font-bold uppercase tracking-wider bg-muted">
+                        {doc.doc_type}
+                      </div>
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        className="opacity-0 group-hover:opacity-100 text-destructive hover:bg-destructive/10 transition-opacity"
+                        onClick={async (e) => {
+                          e.stopPropagation();
+                          if (!confirm(`确定要删除 "${doc.original_filename}" 吗？`)) return;
+                          try {
+                            const result = await backendApi.deleteDocument(doc.doc_id);
+                            if (result.success) {
+                              setRecentDocs(prev => prev.filter(d => d.doc_id !== doc.doc_id));
+                              toast.success('文档已删除');
+                            }
+                          } catch (err: any) {
+                            toast.error(err.message || '删除失败');
+                          }
+                        }}
+                      >
+                        <Trash2 size={16} />
+                      </Button>
                     </div>
                   </div>
                 ))}
@@ -197,7 +221,7 @@ const Dashboard: React.FC = () => {
             <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
               {[
                 { title: '上传文档', desc: '支持 docx/md/txt', icon: FileText, link: '/documents', color: 'bg-blue-500' },
-                { title: '解析 Excel', desc: '上传并分析数据', icon: FileSpreadsheet, link: '/excel-parse', color: 'bg-emerald-500' },
+                { title: '解析 Excel', desc: '上传并分析数据', icon: FileSpreadsheet, link: '/documents', color: 'bg-emerald-500' },
                 { title: '智能填表', desc: '自动填写表格模板', icon: TableProperties, link: '/form-fill', color: 'bg-indigo-500' },
                 { title: 'AI 助手', desc: '自然语言交互', icon: MessageSquareCode, link: '/assistant', color: 'bg-amber-500' }
               ].map((item, i) => (
diff --git a/frontend/src/pages/Documents.tsx b/frontend/src/pages/Documents.tsx
index aa666d9..79af9f5 100644
--- a/frontend/src/pages/Documents.tsx
+++ b/frontend/src/pages/Documents.tsx
@@ -78,6 +78,19 @@ const Documents: React.FC = () => {
   const [expandedSheet, setExpandedSheet] = useState<string | null>(null);
   const [uploadExpanded, setUploadExpanded] = useState(false);
 
+  // 批量上传状态跟踪
+  type FileUploadStatus = 'pending' | 'uploading' | 'processing' | 'success' | 'failed';
+  interface UploadFileState {
+    file: File;
+    status: FileUploadStatus;
+    progress: number;
+    taskId?: string;
+    error?: string;
+    docId?: string;
+  }
+  const [uploadStates, setUploadStates] = useState<UploadFileState[]>([]);
+  const [batchTaskId, setBatchTaskId] = useState<string | null>(null);
+
   // AI 分析相关状态
   const [analyzing, setAnalyzing] = useState(false);
   const [analyzingForCharts, setAnalyzingForCharts] = useState(false);
@@ -211,21 +224,119 @@ const Documents: React.FC = () => {
     }
   };
 
-  // 文件上传处理
+  // 文件上传处理 - 批量上传
   const onDrop = async (acceptedFiles: File[]) => {
     if (acceptedFiles.length === 0) return;
 
+    // 初始化上传状态
+    const initialStates: UploadFileState[] = acceptedFiles.map(file => ({
+      file,
+      status: 'pending',
+      progress: 0
+    }));
+    setUploadStates(initialStates);
+    setUploadExpanded(true);
     setUploading(true);
+
+    try {
+      // 使用批量上传接口
+      const result = await backendApi.uploadDocuments(acceptedFiles);
+
+      if (result.task_id) {
+        setBatchTaskId(result.task_id);
+
+        // 更新所有文件状态为上传中
+        setUploadStates(prev => prev.map(s => ({ ...s, status: 'uploading', progress: 30 })));
+
+        // 轮询任务状态
+        let attempts = 0;
+        const maxAttempts = 150; // 最多5分钟
+
+        const checkBatchStatus = async () => {
+          while (attempts < maxAttempts) {
+            try {
+              const status = await backendApi.getTaskStatus(result.task_id);
+
+              if (status.status === 'success' && status.result) {
+                // 更新每个文件的状态
+                const fileResults = status.result.results || [];
+                setUploadStates(prev => prev.map((s, idx) => {
+                  const fileResult = fileResults[idx];
+                  if (fileResult?.success) {
+                    return { ...s, status: 'success', progress: 100, docId: fileResult.doc_id };
+                  } else {
+                    return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' };
+                  }
+                }));
+                loadDocuments();
+                return;
+              } else if (status.status === 'failure') {
+                setUploadStates(prev => prev.map(s => ({
+                  ...s,
+                  status: 'failed',
+                  error: status.error || '批量处理失败'
+                })));
+                return;
+              } else {
+                // 处理中 - 更新进度
+                const progress = status.progress || Math.min(30 + attempts * 2, 90);
+                setUploadStates(prev => prev.map(s => ({
+                  ...s,
+                  status: s.status === 'uploading' ? 'processing' : s.status,
+                  progress
+                })));
+              }
+            } catch (e) {
+              console.error('检查批量状态失败', e);
+            }
+            await new Promise(resolve => setTimeout(resolve, 2000));
+            attempts++;
+          }
+
+          // 超时
+          setUploadStates(prev => prev.map(s => {
+            if (s.status !== 'success') {
+              return { ...s, status: 'failed', error: '处理超时' };
+            }
+            return s;
+          }));
+        };
+
+        checkBatchStatus();
+      } else {
+        // 单文件直接上传（旧逻辑作为后备）
+        await handleSingleFileUploads(acceptedFiles);
+      }
+    } catch (error: any) {
+      toast.error(error.message || '上传失败');
+      setUploadStates(prev => prev.map(s => ({
+        ...s,
+        status: 'failed',
+        error: error.message || '上传失败'
+      })));
+    } finally {
+      setUploading(false);
+    }
+  };
+
+  // 单文件上传后备逻辑
+  const handleSingleFileUploads = async (files: File[]) => {
     let successCount = 0;
-    let failCount = 0;
     const successfulFiles: File[] = [];
 
-    // 逐个上传文件
-    for (const file of acceptedFiles) {
+    for (let i = 0; i < files.length; i++) {
+      const file = files[i];
       const ext = file.name.split('.').pop()?.toLowerCase();
 
+      setUploadStates(prev => prev.map((s, idx) =>
+        idx === i ? { ...s, status: 'uploading' } : s
+      ));
+
       try {
         if (ext === 'xlsx' || ext === 'xls') {
+          setUploadStates(prev => prev.map((s, idx) =>
+            idx === i ? { ...s, status: 'processing', progress: 50 } : s
+          ));
           const result = await backendApi.uploadExcel(file, {
             parseAllSheets: parseOptions.parseAllSheets,
             headerRow: parseOptions.headerRow
@@ -233,99 +344,60 @@ const Documents: React.FC = () => {
           if (result.success) {
             successCount++;
             successfulFiles.push(file);
-            // 第一个Excel文件设置解析结果供预览
+            setUploadStates(prev => prev.map((s, idx) =>
+              idx === i ? { ...s, status: 'success', progress: 100 } : s
+            ));
             if (successCount === 1) {
               setUploadedFile(file);
               setParseResult(result);
-              if (result.metadata?.sheet_count === 1) {
-                setExpandedSheet(Object.keys(result.data?.sheets || {})[0] || null);
-              }
             }
             loadDocuments();
           } else {
-            failCount++;
-            toast.error(`${file.name}: ${result.error || '解析失败'}`);
-          }
-        } else if (ext === 'md' || ext === 'markdown') {
-          const result = await backendApi.uploadDocument(file);
-          if (result.task_id) {
-            successCount++;
-            successfulFiles.push(file);
-            if (successCount === 1) {
-              setUploadedFile(file);
-            }
-            // 轮询任务状态
-            let attempts = 0;
-            const checkStatus = async () => {
-              while (attempts < 30) {
-                try {
-                  const status = await backendApi.getTaskStatus(result.task_id);
-                  if (status.status === 'success') {
-                    loadDocuments();
-                    return;
-                  } else if (status.status === 'failure') {
-                    return;
-                  }
-                } catch (e) {
-                  console.error('检查状态失败', e);
-                }
-                await new Promise(resolve => setTimeout(resolve, 2000));
-                attempts++;
-              }
-            };
-            checkStatus();
-          } else {
-            failCount++;
+            setUploadStates(prev => prev.map((s, idx) =>
+              idx === i ? { ...s, status: 'failed', error: result.error || '解析失败' } : s
+            ));
           }
         } else {
-          // 其他文档使用通用上传接口
+          setUploadStates(prev => prev.map((s, idx) =>
+            idx === i ? { ...s, status: 'processing', progress: 50 } : s
+          ));
           const result = await backendApi.uploadDocument(file);
           if (result.task_id) {
-            successCount++;
-            successfulFiles.push(file);
-            if (successCount === 1) {
-              setUploadedFile(file);
-            }
-            // 轮询任务状态
+            // 等待任务完成
             let attempts = 0;
-            const checkStatus = async () => {
-              while (attempts < 30) {
-                try {
-                  const status = await backendApi.getTaskStatus(result.task_id);
-                  if (status.status === 'success') {
-                    loadDocuments();
-                    return;
-                  } else if (status.status === 'failure') {
-                    return;
-                  }
-                } catch (e) {
-                  console.error('检查状态失败', e);
+            while (attempts < 60) {
+              const status = await backendApi.getTaskStatus(result.task_id);
+              if (status.status === 'success') {
+                successCount++;
+                successfulFiles.push(file);
+                setUploadStates(prev => prev.map((s, idx) =>
+                  idx === i ? { ...s, status: 'success', progress: 100, docId: status.result?.doc_id } : s
+                ));
+                if (successCount === 1) {
+                  setUploadedFile(file);
                 }
-                await new Promise(resolve => setTimeout(resolve, 2000));
-                attempts++;
+                loadDocuments();
+                break;
+              } else if (status.status === 'failure') {
+                setUploadStates(prev => prev.map((s, idx) =>
+                  idx === i ? { ...s, status: 'failed', error: status.error || '处理失败' } : s
+                ));
+                break;
               }
-            };
-            checkStatus();
-          } else {
-            failCount++;
+              await new Promise(resolve => setTimeout(resolve, 2000));
+              attempts++;
+            }
           }
         }
       } catch (error: any) {
-        failCount++;
-        toast.error(`${file.name}: ${error.message || '上传失败'}`);
+        setUploadStates(prev => prev.map((s, idx) =>
+          idx === i ? { ...s, status: 'failed', error: error.message || '上传失败' } : s
+        ));
       }
     }
 
-    setUploading(false);
-    loadDocuments();
-
     if (successCount > 0) {
-      toast.success(`成功上传 ${successCount} 个文件`);
       setUploadedFiles(prev => [...prev, ...successfulFiles]);
-      setUploadExpanded(true);
-    }
-    if (failCount > 0) {
-      toast.error(`${failCount} 个文件上传失败`);
     }
   };
 
@@ -699,7 +771,110 @@ const Documents: React.FC = () => {
             </CardHeader>
             {uploadPanelOpen && (
               <CardContent className="space-y-4">
-                {uploadedFiles.length > 0 || uploadedFile ? (
+                {/* 优先显示正在上传的状态 */}
+                {uploadStates.length > 0 && (
+                  <div className="space-y-3">
+                    {/* 上传状态头部 */}
+                    <div
+                      className="flex items-center justify-between p-3 bg-primary/5 rounded-xl cursor-pointer hover:bg-primary/10 transition-colors"
+                      onClick={() => setUploadExpanded(!uploadExpanded)}
+                    >
+                      <div className="flex items-center gap-3">
+                        <div className="w-10 h-10 rounded-lg bg-primary/10 text-primary flex items-center justify-center">
+                          {uploading ? <Loader2 size={20} className="animate-spin" /> : <Upload size={20} />}
+                        </div>
+                        <div>
+                          <p className="font-semibold text-sm">
+                            {uploading ? '正在上传' : '上传完成'} {uploadStates.length} 个文件
+                          </p>
+                          <p className="text-xs text-muted-foreground">
+                            {uploading ? '上传中，请稍候...' : uploadStates.filter(s => s.status === 'failed').length > 0 ? '部分失败' : '点击查看详情'}
+                          </p>
+                        </div>
+                      </div>
+                      <div className="flex items-center gap-2">
+                        {!uploading && (
+                          <Button
+                            variant="ghost"
+                            size="sm"
+                            onClick={(e) => {
+                              e.stopPropagation();
+                              setUploadStates([]);
+                              setUploadedFiles([]);
+                              setUploadedFile(null);
+                            }}
+                            className="text-destructive hover:text-destructive"
+                          >
+                            <Trash2 size={14} className="mr-1" />
+                            清空
+                          </Button>
+                        )}
+                        {uploadExpanded ? <ChevronUp size={16} /> : <ChevronDown size={16} />}
+                      </div>
+                    </div>
+
+                    {/* 上传进度列表（总是展开显示） */}
+                    {uploadExpanded && (
+                      <div className="space-y-2 border rounded-xl p-3 bg-background">
+                        {uploadStates.map((state, index) => (
+                          <div key={index} className="flex items-center gap-3 p-2 rounded-lg hover:bg-muted/30 transition-colors">
+                            <div className={cn(
+                              "w-8 h-8 rounded flex items-center justify-center shrink-0",
+                              isExcelFile(state.file.name) ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
+                            )}>
+                              {state.status === 'pending' && <Clock size={16} />}
+                              {state.status === 'uploading' && <Upload size={16} className="animate-pulse" />}
+                              {state.status === 'processing' && <Loader2 size={16} className="animate-spin" />}
+                              {state.status === 'success' && <CheckCircle size={16} className="text-green-500" />}
+                              {state.status === 'failed' && <AlertCircle size={16} className="text-red-500" />}
+                            </div>
+                            <div className="flex-1 min-w-0">
+                              <p className="text-sm truncate">{state.file.name}</p>
+                              <div className="flex items-center gap-2">
+                                {state.status === 'pending' && <p className="text-xs text-muted-foreground">等待上传...</p>}
+                                {state.status === 'uploading' && <p className="text-xs text-primary">上传中...</p>}
+                                {state.status === 'processing' && <p className="text-xs text-primary">处理中...</p>}
+                                {state.status === 'failed' && state.error && (
+                                  <p className="text-xs text-red-500 truncate">{state.error}</p>
+                                )}
+                                {state.status === 'success' && (
+                                  <p className="text-xs text-green-500">已完成</p>
+                                )}
+                              </div>
+                              {/* 进度条 */}
+                              {(state.status === 'uploading' || state.status === 'processing') && (
+                                <div className="mt-1 h-1 bg-muted rounded-full overflow-hidden">
+                                  <div
+                                    className="h-full bg-primary transition-all duration-300"
+                                    style={{ width: `${state.progress}%` }}
+                                  />
+                                </div>
+                              )}
+                            </div>
+                            {state.status === 'success' && (
+                              <CheckCircle size={16} className="text-green-500 shrink-0" />
+                            )}
+                            {state.status === 'failed' && (
+                              <Button
+                                variant="ghost"
+                                size="icon"
+                                className="text-destructive hover:bg-destructive/10 shrink-0"
+                                onClick={() => {
+                                  setUploadStates(prev => prev.filter((_, i) => i !== index));
+                                }}
+                              >
+                                <Trash2 size={14} />
+                              </Button>
+                            )}
+                          </div>
+                        ))}
+                      </div>
+                    )}
+                  </div>
+                )}
+
+                {/* 已上传文件列表（没有正在上传时显示） */}
+                {uploadStates.length === 0 && (uploadedFiles.length > 0 || uploadedFile) ? (
                   <div className="space-y-3">
                     {/* 文件列表头部 */}
                     <div
@@ -739,6 +914,84 @@ const Documents: React.FC = () => {
                     {/* 展开的文件列表 */}
                     {uploadExpanded && (
                       <div className="space-y-2 border rounded-xl p-3">
+                        {/* 显示已上传文件列表 */}
+                        {(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).filter(Boolean).map((file, index) => (
+                          <div key={index} className="flex items-center gap-3 p-2 bg-background rounded-lg">
+                            <div className={cn(
+                              "w-8 h-8 rounded flex items-center justify-center",
+                              isExcelFile(file?.name || '') ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
+                            )}>
+                              {isExcelFile(file?.name || '') ? <FileSpreadsheet size={16} /> : <FileText size={16} />}
+                            </div>
+                            <div className="flex-1 min-w-0">
+                              <p className="text-sm truncate">{file?.name}</p>
+                              <p className="text-xs text-muted-foreground">{formatFileSize(file?.size || 0)}</p>
+                            </div>
+                            <Button
+                              variant="ghost"
+                              size="icon"
+                              className="text-destructive hover:bg-destructive/10"
+                              onClick={() => handleRemoveUploadedFile(index)}
+                            >
+                              <Trash2 size={14} />
+                            </Button>
+                          </div>
+                        ))}
+
+                        {/* 继续添加按钮 */}
+                        <div
+                          {...getRootProps()}
+                          className="flex items-center justify-center gap-2 p-3 border-2 border-dashed rounded-lg cursor-pointer hover:border-primary/50 hover:bg-primary/5 transition-colors"
+                          onClick={(e) => e.stopPropagation()}
+                        >
+                          <input {...getInputProps()} multiple={true} />
+                          <Plus size={16} className="text-muted-foreground" />
+                          <span className="text-sm text-muted-foreground">继续添加更多文件</span>
+                        </div>
+                      </div>
+                    )}
+                  </div>
+                ) : (uploadedFiles.length > 0 || uploadedFile) ? (
+                  <div className="space-y-3">
+                    {/* 文件列表头部 */}
+                    <div
+                      className="flex items-center justify-between p-3 bg-muted/50 rounded-xl cursor-pointer hover:bg-muted/70 transition-colors"
+                      onClick={() => setUploadExpanded(!uploadExpanded)}
+                    >
+                      <div className="flex items-center gap-3">
+                        <div className="w-10 h-10 rounded-lg bg-primary/10 text-primary flex items-center justify-center">
+                          <Upload size={20} />
+                        </div>
+                        <div>
+                          <p className="font-semibold text-sm">
+                            已上传 {(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).length} 个文件
+                          </p>
+                          <p className="text-xs text-muted-foreground">
+                            {uploadExpanded ? '点击收起' : '点击展开查看'}
+                          </p>
+                        </div>
+                      </div>
+                      <div className="flex items-center gap-2">
+                        <Button
+                          variant="ghost"
+                          size="sm"
+                          onClick={(e) => {
+                            e.stopPropagation();
+                            handleDeleteFile();
+                          }}
+                          className="text-destructive hover:text-destructive"
+                        >
+                          <Trash2 size={14} className="mr-1" />
+                          清空
+                        </Button>
+                        {uploadExpanded ? <ChevronUp size={16} /> : <ChevronDown size={16} />}
+                      </div>
+                    </div>
+
+                    {/* 展开的文件列表 */}
+                    {uploadExpanded && (
+                      <div className="space-y-2 border rounded-xl p-3">
+                        {/* 显示已上传文件列表 */}
                         {(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).filter(Boolean).map((file, index) => (
                           <div key={index} className="flex items-center gap-3 p-2 bg-background rounded-lg">
                             <div className={cn(
diff --git a/frontend/src/pages/InstructionChat.tsx b/frontend/src/pages/InstructionChat.tsx
index d1a14b6..be098e7 100644
--- a/frontend/src/pages/InstructionChat.tsx
+++ b/frontend/src/pages/InstructionChat.tsx
@@ -1,26 +1,10 @@
 import React, { useState, useRef, useEffect } from 'react';
-import {
-  Send,
-  Bot,
-  User,
-  Sparkles,
-  Trash2,
-  RefreshCcw,
-  FileText,
-  TableProperties,
-  ChevronRight,
-  ArrowRight,
-  Loader2,
-  Download,
-  Search,
-  MessageSquare,
-  CheckCircle
-} from 'lucide-react';
+import { Send, Bot, User, Sparkles, Trash2, FileText, TableProperties, ArrowRight, Search, MessageSquare } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Input } from '@/components/ui/input';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
 import { ScrollArea } from '@/components/ui/scroll-area';
-import { Badge } from '@/components/ui/badge';
+import { Markdown } from '@/components/ui/markdown';
 import { backendApi } from '@/db/backend-api';
 import { toast } from 'sonner';
 import { cn } from '@/lib/utils';
@@ -39,8 +23,21 @@ const InstructionChat: React.FC = () => {
   const [input, setInput] = useState('');
   const [loading, setLoading] = useState(false);
   const [currentDocIds, setCurrentDocIds] = useState<string[]>([]);
+  const [conversationId, setConversationId] = useState<string>('');
   const scrollAreaRef = useRef<HTMLDivElement>(null);
 
+  // 初始化会话ID
+  useEffect(() => {
+    const storedId = localStorage.getItem('chat_conversation_id');
+    if (storedId) {
+      setConversationId(storedId);
+    } else {
+      const newId = `conv_${Date.now()}_${Math.random().toString(36).substring(7)}`;
+      setConversationId(newId);
+      localStorage.setItem('chat_conversation_id', newId);
+    }
+  }, []);
+
   useEffect(() => {
     // Initial welcome message
     if (messages.length === 0) {
@@ -119,7 +116,8 @@ const InstructionChat: React.FC = () => {
       // 使用真实的智能指令 API
       const response = await backendApi.instructionChat(
         input.trim(),
-        currentDocIds.length > 0 ? currentDocIds : undefined
+        currentDocIds.length > 0 ? currentDocIds : undefined,
+        { conversation_id: conversationId }
       );
 
       // 根据意图类型生成友好响应
@@ -135,11 +133,12 @@ const InstructionChat: React.FC = () => {
             responseContent = `✅ 已提取到 ${keys.length} 个字段的数据：\n\n`;
             for (const [key, value] of Object.entries(extracted)) {
               const values = Array.isArray(value) ? value : [value];
-              responseContent += `**${key}**: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}\n`;
+              const displayValues = values.length > 10 ? values.slice(0, 10).join(', ') + ` ...（共${values.length}条）` : values.join(', ');
+              responseContent += `**${key}**: ${displayValues}\n`;
             }
-            responseContent += `\n💡 您可以将这些数据填入表格。`;
+            responseContent += `\n💡 可直接使用以上数据，或说"填入表格"继续填表操作。`;
           } else {
-            responseContent = '未能从文档中提取到相关数据。请尝试更明确的字段名称。';
+            responseContent = resultData?.message || '未能从文档中提取到相关数据。请尝试更明确的字段名称。';
           }
           break;
 
@@ -151,24 +150,24 @@ const InstructionChat: React.FC = () => {
             responseContent = `✅ 填表完成！成功填写 ${filledKeys.length} 个字段：\n\n`;
             for (const [key, value] of Object.entries(filled)) {
               const values = Array.isArray(value) ? value : [value];
-              responseContent += `**${key}**: ${values.slice(0, 3).join(', ')}\n`;
+              const displayValues = values.length > 10 ? values.slice(0, 10).join(', ') + ` ...（共${values.length}条）` : values.join(', ');
+              responseContent += `**${key}**: ${displayValues}\n`;
             }
             responseContent += `\n📋 请到【智能填表】页面查看或导出结果。`;
           } else {
-            responseContent = '填表未能提取到数据。请检查模板表头和数据源内容。';
+            responseContent = resultData?.message || '填表未能提取到数据。请检查模板表头和数据源内容。';
           }
           break;
 
         case 'summarize':
           // 摘要结果
-          const summaries = resultData?.summaries || [];
-          if (summaries.length > 0) {
-            responseContent = `📄 找到 ${summaries.length} 个文档的摘要：\n\n`;
-            summaries.forEach((s: any, idx: number) => {
-              responseContent += `**${idx + 1}. ${s.filename}**\n${s.content_preview}\n\n`;
-            });
+          if (resultData?.action_needed === 'provide_document' || resultData?.action_needed === 'upload_document') {
+            responseContent = `📋 ${resultData.message}\n\n${resultData.suggestion || ''}`;
+          } else if (resultData?.ai_summary) {
+            // AI 生成的摘要
+            responseContent = `📄 **${resultData.filename}** 摘要分析：\n\n${resultData.ai_summary}`;
           } else {
-            responseContent = '未能生成摘要。请确保已上传文档。';
+            responseContent = resultData?.message || '未能生成摘要。请确保已上传文档。';
           }
           break;
 
@@ -176,8 +175,10 @@ const InstructionChat: React.FC = () => {
           // 问答结果
           if (resultData?.answer) {
             responseContent = `**问题**: ${resultData.question}\n\n**答案**: ${resultData.answer}`;
+          } else if (resultData?.context_preview) {
+            responseContent = `**问题**: ${resultData.question}\n\n**相关上下文**：\n${resultData.context_preview}`;
           } else {
-            responseContent = resultData?.message || '我找到了相关信息，请查看上文。';
+            responseContent = resultData?.message || '请先上传文档，我才能回答您的问题。';
           }
           break;
 
@@ -207,8 +208,35 @@ const InstructionChat: React.FC = () => {
           }
           break;
 
+        case 'edit':
+          // 文档编辑结果
+          if (resultData?.edited_content) {
+            responseContent = `✏️ **${resultData.original_filename}** 编辑完成：\n\n${resultData.edited_content.substring(0, 500)}${resultData.edited_content.length > 500 ? '\n\n...(内容已截断)' : ''}`;
+          } else {
+            responseContent = resultData?.message || '编辑完成。';
+          }
+          break;
+
+        case 'transform':
+          // 格式转换结果
+          if (resultData?.excel_data) {
+            responseContent = `🔄 格式转换完成！\n\n已转换为 **Excel** 格式，共 **${resultData.excel_data.length}** 行数据。\n\n${resultData.message || ''}`;
+          } else if (resultData?.content) {
+            responseContent = `🔄 格式转换完成！\n\n目标格式: **${resultData.target_format?.toUpperCase()}**\n\n${resultData.message || ''}`;
+          } else {
+            responseContent = resultData?.message || '格式转换完成。';
+          }
+          break;
+
         case 'unknown':
-          responseContent = `我理解您想要： "${input.trim()}"\n\n但我目前无法完成此操作。您可以尝试：\n\n1. **提取数据**: "提取医院数量和床位数"\n2. **填表**: "根据这些数据填表"\n3. **总结**: "总结这份文档"\n4. **问答**: "文档里说了什么？"\n5. **搜索**: "搜索相关内容"`;
+          // 检查是否需要用户上传文档
+          if (resultData?.suggestion) {
+            responseContent = resultData.suggestion;
+          } else if (resultData?.message && resultData.message !== '无法理解该指令，请尝试更明确的描述') {
+            responseContent = resultData.message;
+          } else {
+            responseContent = `我理解您想要： "${input.trim()}"\n\n请尝试以下操作：\n\n1. **提取数据**: "提取医院数量和床位数"\n2. **填表**: "根据这些数据填表"\n3. **总结**: "总结这份文档"\n4. **问答**: "文档里说了什么？"\n5. **搜索**: "搜索相关内容"`;
+          }
           break;
 
         default:
@@ -299,9 +327,11 @@ const InstructionChat: React.FC = () => {
                       ? "bg-primary text-primary-foreground shadow-xl shadow-primary/20 rounded-tr-none"
                       : "bg-white border border-border/50 shadow-md rounded-tl-none"
                   )}>
-                    <p className="text-sm leading-relaxed whitespace-pre-wrap font-medium">
-                      {m.content}
-                    </p>
+                    {m.role === 'assistant' ? (
+                      <Markdown content={m.content} className="text-sm leading-relaxed prose prose-sm max-w-none" />
+                    ) : (
+                      <p className="text-sm leading-relaxed whitespace-pre-wrap font-medium">{m.content}</p>
+                    )}
                     <span className={cn(
                       "text-[10px] block opacity-50 font-bold tracking-widest",
                       m.role === 'user' ? "text-right" : "text-left"
diff --git a/frontend/src/pages/TemplateFill.tsx b/frontend/src/pages/TemplateFill.tsx
index 633604c..439bfe2 100644
--- a/frontend/src/pages/TemplateFill.tsx
+++ b/frontend/src/pages/TemplateFill.tsx
@@ -248,15 +248,25 @@ const TemplateFill: React.FC = () => {
     if (!templateFile || !filledResult) return;
 
     try {
+      const ext = templateFile.name.split('.').pop()?.toLowerCase();
+      const exportFormat = (ext === 'docx') ? 'docx' : 'xlsx';
+      // 对于 Word 模板，如果已有填写后的文件（已填入表格单元格），传递其路径以便直接下载
+      const filledFilePath = (ext === 'docx' && filledResult.filled_file_path)
+        ? filledResult.filled_file_path
+        : undefined;
       const blob = await backendApi.exportFilledTemplate(
         templateId || 'temp',
         filledResult.filled_data || {},
-        'xlsx'
+        exportFormat,
+        filledFilePath
       );
+      const ext_match = templateFile.name.match(/\.([^.])+$/);
+      const baseName = ext_match ? templateFile.name.replace(ext_match[0], '') : templateFile.name;
+      const downloadName = `filled_${baseName}.${exportFormat}`;
       const url = URL.createObjectURL(blob);
       const a = document.createElement('a');
       a.href = url;
-      a.download = `filled_${templateFile.name}`;
+      a.download = downloadName;
       a.click();
       URL.revokeObjectURL(url);
       toast.success('导出成功');
@@ -546,7 +556,7 @@ const TemplateFill: React.FC = () => {
             </div>
             <h3 className="text-xl font-bold mb-2">AI 正在智能分析并填表</h3>
             <p className="text-muted-foreground text-center max-w-md">
-              系统正在从 {sourceFiles.length || sourceFilePaths.length} 份文档中检索相关信息...
+              系统正在从 {sourceFiles.length || sourceFilePaths.length || sourceDocIds.length || 0} 份文档中检索相关信息...
             </p>
           </CardContent>
         </Card>
@@ -562,7 +572,7 @@ const TemplateFill: React.FC = () => {
                 填表完成
               </CardTitle>
               <CardDescription>
-                系统已根据 {sourceFiles.length || sourceFilePaths.length} 份文档自动完成表格填写
+                系统已根据 {filledResult.source_doc_count || sourceFiles.length || sourceFilePaths.length || sourceDocIds.length} 份文档自动完成表格填写
               </CardDescription>
             </CardHeader>
             <CardContent>