From e5711b3f05b318da0930c2d690a733a3b4939ba0 Mon Sep 17 00:00:00 2001
From: KiriAky 107 <KiriAky107@qq.com>
Date: Thu, 9 Apr 2026 20:35:41 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=81=94=E5=90=88?=
 =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=A8=A1=E6=9D=BF=E5=92=8C=E6=BA=90=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

新增 upload-joint 接口支持模板文件和源文档的一键式联合上传处理，
包括异步文档解析和MongoDB存储功能；前端新增对应API调用方法和UI界
面，优化表格填写流程，支持拖拽上传和实时预览功能。
---
 backend/app/api/endpoints/templates.py | 171 ++++++-
 frontend/src/db/backend-api.ts         |  40 ++
 frontend/src/pages/TemplateFill.tsx    | 615 ++++++++++++++-----------
 logs/rag_disable_note.txt              |  59 +++
 4 files changed, 604 insertions(+), 281 deletions(-)
 create mode 100644 logs/rag_disable_note.txt

diff --git a/backend/app/api/endpoints/templates.py b/backend/app/api/endpoints/templates.py
index 8aaa296..8d2ebee 100644
--- a/backend/app/api/endpoints/templates.py
+++ b/backend/app/api/endpoints/templates.py
@@ -5,15 +5,18 @@
 """
 import io
 import logging
+import uuid
 from typing import List, Optional
 
-from fastapi import APIRouter, File, HTTPException, Query, UploadFile
+from fastapi import APIRouter, File, HTTPException, Query, UploadFile, BackgroundTasks
 from fastapi.responses import StreamingResponse
 import pandas as pd
 from pydantic import BaseModel
 
 from app.services.template_fill_service import template_fill_service, TemplateField
 from app.services.file_service import file_service
+from app.core.database import mongodb
+from app.core.document_parser import ParserFactory
 
 logger = logging.getLogger(__name__)
 
@@ -109,6 +112,172 @@ async def upload_template(
         raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
 
 
+@router.post("/upload-joint")
+async def upload_joint_template(
+    background_tasks: BackgroundTasks,
+    template_file: UploadFile = File(..., description="模板文件"),
+    source_files: List[UploadFile] = File(..., description="源文档文件列表"),
+):
+    """
+    联合上传模板和源文档，一键完成解析和存储
+
+    1. 保存模板文件并提取字段
+    2. 异步处理源文档（解析+存MongoDB）
+    3. 返回模板信息和源文档ID列表
+
+    Args:
+        template_file: 模板文件 (xlsx/xls/docx)
+        source_files: 源文档列表 (docx/xlsx/md/txt)
+
+    Returns:
+        模板ID、字段列表、源文档ID列表
+    """
+    if not template_file.filename:
+        raise HTTPException(status_code=400, detail="模板文件名为空")
+
+    # 验证模板格式
+    template_ext = template_file.filename.split('.')[-1].lower()
+    if template_ext not in ['xlsx', 'xls', 'docx']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的模板格式: {template_ext}，仅支持 xlsx/xls/docx"
+        )
+
+    # 验证源文档格式
+    valid_exts = ['docx', 'xlsx', 'xls', 'md', 'txt']
+    for sf in source_files:
+        if sf.filename:
+            sf_ext = sf.filename.split('.')[-1].lower()
+            if sf_ext not in valid_exts:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"不支持的源文档格式: {sf_ext}，仅支持 docx/xlsx/xls/md/txt"
+                )
+
+    try:
+        # 1. 保存模板文件并提取字段
+        template_content = await template_file.read()
+        template_path = file_service.save_uploaded_file(
+            template_content,
+            template_file.filename,
+            subfolder="templates"
+        )
+        template_fields = await template_fill_service.get_template_fields_from_file(
+            template_path,
+            template_ext
+        )
+
+        # 2. 处理源文档 - 保存文件
+        source_file_info = []
+        for sf in source_files:
+            if sf.filename:
+                sf_content = await sf.read()
+                sf_ext = sf.filename.split('.')[-1].lower()
+                sf_path = file_service.save_uploaded_file(
+                    sf_content,
+                    sf.filename,
+                    subfolder=sf_ext
+                )
+                source_file_info.append({
+                    "path": sf_path,
+                    "filename": sf.filename,
+                    "ext": sf_ext
+                })
+
+        # 3. 异步处理源文档到MongoDB
+        task_id = str(uuid.uuid4())
+        if source_file_info:
+            background_tasks.add_task(
+                process_source_documents,
+                task_id=task_id,
+                files=source_file_info
+            )
+
+        logger.info(f"联合上传完成: 模板={template_file.filename}, 源文档={len(source_file_info)}个")
+
+        return {
+            "success": True,
+            "template_id": template_path,
+            "filename": template_file.filename,
+            "file_type": template_ext,
+            "fields": [
+                {
+                    "cell": f.cell,
+                    "name": f.name,
+                    "field_type": f.field_type,
+                    "required": f.required,
+                    "hint": f.hint
+                }
+                for f in template_fields
+            ],
+            "field_count": len(template_fields),
+            "source_file_paths": [f["path"] for f in source_file_info],
+            "source_filenames": [f["filename"] for f in source_file_info],
+            "task_id": task_id
+        }
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"联合上传失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"联合上传失败: {str(e)}")
+
+
+async def process_source_documents(task_id: str, files: List[dict]):
+    """异步处理源文档，存入MongoDB"""
+    from app.core.database import redis_db
+
+    try:
+        await redis_db.set_task_status(
+            task_id, status="processing",
+            meta={"progress": 0, "message": "开始处理源文档"}
+        )
+
+        doc_ids = []
+        for i, file_info in enumerate(files):
+            try:
+                parser = ParserFactory.get_parser(file_info["path"])
+                result = parser.parse(file_info["path"])
+
+                if result.success:
+                    doc_id = await mongodb.insert_document(
+                        doc_type=file_info["ext"],
+                        content=result.data.get("content", ""),
+                        metadata={
+                            **result.metadata,
+                            "original_filename": file_info["filename"],
+                            "file_path": file_info["path"]
+                        },
+                        structured_data=result.data.get("structured_data")
+                    )
+                    doc_ids.append(doc_id)
+                    logger.info(f"源文档处理成功: {file_info['filename']}, doc_id: {doc_id}")
+                else:
+                    logger.error(f"源文档解析失败: {file_info['filename']}, error: {result.error}")
+
+            except Exception as e:
+                logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}")
+
+            progress = int((i + 1) / len(files) * 100)
+            await redis_db.set_task_status(
+                task_id, status="processing",
+                meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"}
+            )
+
+        await redis_db.set_task_status(
+            task_id, status="success",
+            meta={"progress": 100, "message": "源文档处理完成", "doc_ids": doc_ids}
+        )
+        logger.info(f"所有源文档处理完成: {len(doc_ids)}个")
+
+    except Exception as e:
+        logger.error(f"源文档批量处理失败: {str(e)}")
+        await redis_db.set_task_status(
+            task_id, status="failure",
+            meta={"error": str(e)}
+        )
+
+
 @router.post("/fields")
 async def extract_template_fields(
     template_id: str = Query(..., description="模板ID/文件路径"),
diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts
index 998fe62..d26e1a8 100644
--- a/frontend/src/db/backend-api.ts
+++ b/frontend/src/db/backend-api.ts
@@ -656,6 +656,46 @@ export const backendApi = {
     }
   },
 
+  /**
+   * 联合上传模板和源文档
+   */
+  async uploadTemplateAndSources(
+    templateFile: File,
+    sourceFiles: File[]
+  ): Promise<{
+    success: boolean;
+    template_id: string;
+    filename: string;
+    file_type: string;
+    fields: TemplateField[];
+    field_count: number;
+    source_file_paths: string[];
+    source_filenames: string[];
+    task_id: string;
+  }> {
+    const formData = new FormData();
+    formData.append('template_file', templateFile);
+    sourceFiles.forEach(file => formData.append('source_files', file));
+
+    const url = `${BACKEND_BASE_URL}/templates/upload-joint`;
+
+    try {
+      const response = await fetch(url, {
+        method: 'POST',
+        body: formData,
+      });
+
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error.detail || '联合上传失败');
+      }
+      return await response.json();
+    } catch (error) {
+      console.error('联合上传失败:', error);
+      throw error;
+    }
+  },
+
   /**
    * 执行表格填写
    */
diff --git a/frontend/src/pages/TemplateFill.tsx b/frontend/src/pages/TemplateFill.tsx
index 573d3f7..1fa7c99 100644
--- a/frontend/src/pages/TemplateFill.tsx
+++ b/frontend/src/pages/TemplateFill.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useEffect } from 'react';
+import React, { useState, useEffect, useCallback } from 'react';
 import { useDropzone } from 'react-dropzone';
 import {
   TableProperties,
@@ -14,7 +14,11 @@ import {
   RefreshCcw,
   ChevronDown,
   ChevronUp,
-  Loader2
+  Loader2,
+  Files,
+  Trash2,
+  Eye,
+  File
 } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
@@ -26,6 +30,13 @@ import { format } from 'date-fns';
 import { toast } from 'sonner';
 import { cn } from '@/lib/utils';
 import { Skeleton } from '@/components/ui/skeleton';
+import {
+  Dialog,
+  DialogContent,
+  DialogHeader,
+  DialogTitle,
+} from "@/components/ui/dialog";
+import { ScrollArea } from '@/components/ui/scroll-area';
 
 type DocumentItem = {
   doc_id: string;
@@ -41,6 +52,11 @@ type DocumentItem = {
   };
 };
 
+type SourceFile = {
+  file: File;
+  preview?: string;
+};
+
 type TemplateField = {
   cell: string;
   name: string;
@@ -50,64 +66,25 @@ type TemplateField = {
 };
 
 const TemplateFill: React.FC = () => {
-  const [step, setStep] = useState<'upload-template' | 'select-source' | 'preview' | 'filling'>('upload-template');
+  const [step, setStep] = useState<'upload' | 'filling' | 'preview'>('upload');
   const [templateFile, setTemplateFile] = useState<File | null>(null);
   const [templateFields, setTemplateFields] = useState<TemplateField[]>([]);
-  const [sourceDocs, setSourceDocs] = useState<DocumentItem[]>([]);
-  const [selectedDocs, setSelectedDocs] = useState<string[]>([]);
+  const [sourceFiles, setSourceFiles] = useState<SourceFile[]>([]);
+  const [sourceFilePaths, setSourceFilePaths] = useState<string[]>([]);
+  const [templateId, setTemplateId] = useState<string>('');
   const [loading, setLoading] = useState(false);
   const [filling, setFilling] = useState(false);
   const [filledResult, setFilledResult] = useState<any>(null);
+  const [previewDoc, setPreviewDoc] = useState<{ name: string; content: string } | null>(null);
+  const [previewOpen, setPreviewOpen] = useState(false);
 
-  // Load available source documents
-  useEffect(() => {
-    loadSourceDocuments();
-  }, []);
-
-  const loadSourceDocuments = async () => {
-    setLoading(true);
-    try {
-      const result = await backendApi.getDocuments(undefined, 100);
-      if (result.success) {
-        // Filter to only non-Excel documents that can be used as data sources
-        const docs = (result.documents || []).filter((d: DocumentItem) =>
-          ['docx', 'md', 'txt', 'xlsx'].includes(d.doc_type)
-        );
-        setSourceDocs(docs);
-      }
-    } catch (err: any) {
-      toast.error('加载数据源失败');
-    } finally {
-      setLoading(false);
-    }
-  };
-
-  const onTemplateDrop = async (acceptedFiles: File[]) => {
+  // 模板拖拽
+  const onTemplateDrop = useCallback((acceptedFiles: File[]) => {
     const file = acceptedFiles[0];
-    if (!file) return;
-
-    const ext = file.name.split('.').pop()?.toLowerCase();
-    if (!['xlsx', 'xls', 'docx'].includes(ext || '')) {
-      toast.error('仅支持 xlsx/xls/docx 格式的模板文件');
-      return;
+    if (file) {
+      setTemplateFile(file);
     }
-
-    setTemplateFile(file);
-    setLoading(true);
-
-    try {
-      const result = await backendApi.uploadTemplate(file);
-      if (result.success) {
-        setTemplateFields(result.fields || []);
-        setStep('select-source');
-        toast.success('模板上传成功');
-      }
-    } catch (err: any) {
-      toast.error('模板上传失败: ' + (err.message || '未知错误'));
-    } finally {
-      setLoading(false);
-    }
-  };
+  }, []);
 
   const { getRootProps: getTemplateProps, getInputProps: getTemplateInputProps, isDragActive: isTemplateDragActive } = useDropzone({
     onDrop: onTemplateDrop,
@@ -116,33 +93,108 @@ const TemplateFill: React.FC = () => {
       'application/vnd.ms-excel': ['.xls'],
       'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx']
     },
-    maxFiles: 1
+    maxFiles: 1,
+    multiple: false
   });
 
-  const handleFillTemplate = async () => {
-    if (!templateFile || selectedDocs.length === 0) {
+  // 源文档拖拽
+  const onSourceDrop = useCallback((acceptedFiles: File[]) => {
+    const newFiles = acceptedFiles.map(f => ({
+      file: f,
+      preview: f.type.startsWith('text/') || f.name.endsWith('.md') ? undefined : undefined
+    }));
+    setSourceFiles(prev => [...prev, ...newFiles]);
+  }, []);
+
+  const { getRootProps: getSourceProps, getInputProps: getSourceInputProps, isDragActive: isSourceDragActive } = useDropzone({
+    onDrop: onSourceDrop,
+    accept: {
+      'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
+      'application/vnd.ms-excel': ['.xls'],
+      'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
+      'text/plain': ['.txt'],
+      'text/markdown': ['.md']
+    },
+    multiple: true
+  });
+
+  const removeSourceFile = (index: number) => {
+    setSourceFiles(prev => prev.filter((_, i) => i !== index));
+  };
+
+  const handleJointUploadAndFill = async () => {
+    if (!templateFile) {
+      toast.error('请先上传模板文件');
+      return;
+    }
+
+    setLoading(true);
+
+    try {
+      // 使用联合上传API
+      const result = await backendApi.uploadTemplateAndSources(
+        templateFile,
+        sourceFiles.map(sf => sf.file)
+      );
+
+      if (result.success) {
+        setTemplateFields(result.fields || []);
+        setTemplateId(result.template_id);
+        setSourceFilePaths(result.source_file_paths || []);
+        toast.success('文档上传成功，开始智能填表');
+        setStep('filling');
+
+        // 自动开始填表
+        const fillResult = await backendApi.fillTemplate(
+          result.template_id,
+          result.fields || [],
+          [],  // 使用 source_file_paths 而非 source_doc_ids
+          result.source_file_paths || [],
+          '请从以下文档中提取相关信息填写表格'
+        );
+
+        setFilledResult(fillResult);
+        setStep('preview');
+        toast.success('表格填写完成');
+      }
+    } catch (err: any) {
+      toast.error('处理失败: ' + (err.message || '未知错误'));
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  // 传统方式：先上传源文档再填表（兼容已有文档库的场景）
+  const handleFillWithExistingDocs = async (selectedDocIds: string[]) => {
+    if (!templateFile || selectedDocIds.length === 0) {
       toast.error('请选择数据源文档');
       return;
     }
 
-    setFilling(true);
+    setLoading(true);
     setStep('filling');
 
     try {
-      // 调用后端填表接口，传递选中的文档ID
-      const result = await backendApi.fillTemplate(
-        'temp-template-id',
-        templateFields,
-        selectedDocs  // 传递源文档ID列表
+      // 先上传模板获取template_id
+      const uploadResult = await backendApi.uploadTemplate(templateFile);
+
+      const fillResult = await backendApi.fillTemplate(
+        uploadResult.template_id,
+        uploadResult.fields || [],
+        selectedDocIds,
+        [],
+        '请从以下文档中提取相关信息填写表格'
       );
-      setFilledResult(result);
+
+      setTemplateFields(uploadResult.fields || []);
+      setTemplateId(uploadResult.template_id);
+      setFilledResult(fillResult);
       setStep('preview');
       toast.success('表格填写完成');
     } catch (err: any) {
       toast.error('填表失败: ' + (err.message || '未知错误'));
-      setStep('select-source');
     } finally {
-      setFilling(false);
+      setLoading(false);
     }
   };
 
@@ -150,7 +202,11 @@ const TemplateFill: React.FC = () => {
     if (!templateFile || !filledResult) return;
 
     try {
-      const blob = await backendApi.exportFilledTemplate('temp', filledResult.filled_data || {}, 'xlsx');
+      const blob = await backendApi.exportFilledTemplate(
+        templateId || 'temp',
+        filledResult.filled_data || {},
+        'xlsx'
+      );
       const url = URL.createObjectURL(blob);
       const a = document.createElement('a');
       a.href = url;
@@ -164,13 +220,29 @@ const TemplateFill: React.FC = () => {
   };
 
   const resetFlow = () => {
-    setStep('upload-template');
+    setStep('upload');
     setTemplateFile(null);
     setTemplateFields([]);
-    setSelectedDocs([]);
+    setSourceFiles([]);
+    setSourceFilePaths([]);
+    setTemplateId('');
     setFilledResult(null);
   };
 
+  const getFileIcon = (filename: string) => {
+    const ext = filename.split('.').pop()?.toLowerCase();
+    if (['xlsx', 'xls'].includes(ext || '')) {
+      return <FileSpreadsheet size={20} className="text-emerald-500" />;
+    }
+    if (ext === 'docx') {
+      return <FileText size={20} className="text-blue-500" />;
+    }
+    if (['md', 'txt'].includes(ext || '')) {
+      return <FileText size={20} className="text-orange-500" />;
+    }
+    return <File size={20} className="text-gray-500" />;
+  };
+
   return (
     <div className="space-y-8 pb-10">
       <section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
@@ -180,7 +252,7 @@ const TemplateFill: React.FC = () => {
             根据您的表格模板，自动聚合多源文档信息进行精准填充
           </p>
         </div>
-        {step !== 'upload-template' && (
+        {step !== 'upload' && (
           <Button variant="outline" className="rounded-xl gap-2" onClick={resetFlow}>
             <RefreshCcw size={18} />
             <span>重新开始</span>
@@ -188,200 +260,129 @@ const TemplateFill: React.FC = () => {
         )}
       </section>
 
-      {/* Progress Steps */}
-      <div className="flex items-center justify-center gap-4">
-        {['上传模板', '选择数据源', '填写预览'].map((label, idx) => {
-          const stepIndex = ['upload-template', 'select-source', 'preview'].indexOf(step);
-          const isActive = idx <= stepIndex;
-          const isCurrent = idx === stepIndex;
-
-          return (
-            <React.Fragment key={idx}>
-              <div className={cn(
-                "flex items-center gap-2 px-4 py-2 rounded-full transition-all",
-                isActive ? "bg-primary text-primary-foreground" : "bg-muted text-muted-foreground"
-              )}>
-                <div className={cn(
-                  "w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold",
-                  isCurrent ? "bg-white/20" : ""
-                )}>
-                  {idx + 1}
-                </div>
-                <span className="text-sm font-medium">{label}</span>
-              </div>
-              {idx < 2 && (
-                <div className={cn(
-                  "w-12 h-0.5",
-                  idx < stepIndex ? "bg-primary" : "bg-muted"
-                )} />
-              )}
-            </React.Fragment>
-          );
-        })}
-      </div>
-
-      {/* Step 1: Upload Template */}
-      {step === 'upload-template' && (
-        <div
-          {...getTemplateProps()}
-          className={cn(
-            "border-2 border-dashed rounded-3xl p-16 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group",
-            isTemplateDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5"
-          )}
-        >
-          <input {...getTemplateInputProps()} />
-          <div className="w-20 h-20 rounded-2xl bg-primary/10 text-primary flex items-center justify-center mb-6 group-hover:scale-110 transition-transform">
-            {loading ? <Loader2 className="animate-spin" size={40} /> : <Upload size={40} />}
-          </div>
-          <div className="space-y-2 max-w-md">
-            <p className="text-xl font-bold tracking-tight">
-              {isTemplateDragActive ? '释放以开始上传' : '点击或拖拽上传表格模板'}
-            </p>
-            <p className="text-sm text-muted-foreground">
-              支持 Excel (.xlsx, .xls) 或 Word (.docx) 格式的表格模板
-            </p>
-          </div>
-          <div className="mt-6 flex gap-3">
-            <Badge variant="outline" className="bg-emerald-500/10 text-emerald-600 border-emerald-200">
-              <FileSpreadsheet size={14} className="mr-1" /> Excel 模板
-            </Badge>
-            <Badge variant="outline" className="bg-blue-500/10 text-blue-600 border-blue-200">
-              <FileText size={14} className="mr-1" /> Word 模板
-            </Badge>
-          </div>
-        </div>
-      )}
-
-      {/* Step 2: Select Source Documents */}
-      {step === 'select-source' && (
-        <div className="space-y-6">
-          {/* Template Info */}
+      {/* Step 1: Upload - Joint Upload of Template + Source Docs */}
+      {step === 'upload' && (
+        <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
+          {/* Template Upload */}
           <Card className="border-none shadow-md">
             <CardHeader className="pb-4">
               <CardTitle className="text-lg flex items-center gap-2">
                 <FileSpreadsheet className="text-primary" size={20} />
-                已上传模板
-              </CardTitle>
-            </CardHeader>
-            <CardContent>
-              <div className="flex items-center gap-4">
-                <div className="w-12 h-12 rounded-xl bg-emerald-500/10 text-emerald-500 flex items-center justify-center">
-                  <FileSpreadsheet size={24} />
-                </div>
-                <div className="flex-1">
-                  <p className="font-bold">{templateFile?.name}</p>
-                  <p className="text-sm text-muted-foreground">
-                    {templateFields.length} 个字段待填写
-                  </p>
-                </div>
-                <Button variant="ghost" size="sm" onClick={() => setStep('upload-template')}>
-                  重新选择
-                </Button>
-              </div>
-
-              {/* Template Fields Preview */}
-              <div className="mt-4 p-4 bg-muted/30 rounded-xl">
-                <p className="text-xs font-bold uppercase tracking-widest text-muted-foreground mb-3">待填写字段</p>
-                <div className="flex flex-wrap gap-2">
-                  {templateFields.map((field, idx) => (
-                    <Badge key={idx} variant="outline" className="bg-background">
-                      {field.name}
-                    </Badge>
-                  ))}
-                </div>
-              </div>
-            </CardContent>
-          </Card>
-
-          {/* Source Documents Selection */}
-          <Card className="border-none shadow-md">
-            <CardHeader className="pb-4">
-              <CardTitle className="text-lg flex items-center gap-2">
-                <FileText className="text-primary" size={20} />
-                选择数据源文档
+                表格模板
               </CardTitle>
               <CardDescription>
-                从已上传的文档中选择作为填表的数据来源，支持 Excel 和非结构化文档
+                上传需要填写的 Excel/Word 模板文件
               </CardDescription>
             </CardHeader>
             <CardContent>
-              {loading ? (
-                <div className="space-y-3">
-                  {[1, 2, 3].map(i => <Skeleton key={i} className="h-16 w-full rounded-xl" />)}
-                </div>
-              ) : sourceDocs.length > 0 ? (
-                <div className="space-y-3">
-                  {sourceDocs.map(doc => (
-                    <div
-                      key={doc.doc_id}
-                      className={cn(
-                        "flex items-center gap-4 p-4 rounded-xl border-2 transition-all cursor-pointer",
-                        selectedDocs.includes(doc.doc_id)
-                          ? "border-primary bg-primary/5"
-                          : "border-border hover:bg-muted/30"
-                      )}
-                      onClick={() => {
-                        setSelectedDocs(prev =>
-                          prev.includes(doc.doc_id)
-                            ? prev.filter(id => id !== doc.doc_id)
-                            : [...prev, doc.doc_id]
-                        );
-                      }}
-                    >
-                      <div className={cn(
-                        "w-6 h-6 rounded-md border-2 flex items-center justify-center transition-all",
-                        selectedDocs.includes(doc.doc_id)
-                          ? "border-primary bg-primary text-white"
-                          : "border-muted-foreground/30"
-                      )}>
-                        {selectedDocs.includes(doc.doc_id) && <CheckCircle2 size={14} />}
-                      </div>
-                      <div className={cn(
-                        "w-10 h-10 rounded-lg flex items-center justify-center",
-                        doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
-                      )}>
-                        {doc.doc_type === 'xlsx' ? <FileSpreadsheet size={20} /> : <FileText size={20} />}
-                      </div>
-                      <div className="flex-1 min-w-0">
-                        <p className="font-semibold truncate">{doc.original_filename}</p>
-                        <p className="text-xs text-muted-foreground">
-                          {doc.doc_type.toUpperCase()} • {format(new Date(doc.created_at), 'yyyy-MM-dd')}
-                        </p>
-                      </div>
-                      {doc.metadata?.columns && (
-                        <Badge variant="outline" className="text-xs">
-                          {doc.metadata.columns.length} 列
-                        </Badge>
-                      )}
-                    </div>
-                  ))}
+              {!templateFile ? (
+                <div
+                  {...getTemplateProps()}
+                  className={cn(
+                    "border-2 border-dashed rounded-2xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group min-h-[200px]",
+                    isTemplateDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5"
+                  )}
+                >
+                  <input {...getTemplateInputProps()} />
+                  <div className="w-14 h-14 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
+                    {loading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
+                  </div>
+                  <p className="font-medium">
+                    {isTemplateDragActive ? '释放以上传' : '点击或拖拽上传模板'}
+                  </p>
+                  <p className="text-xs text-muted-foreground mt-1">
+                    支持 .xlsx .xls .docx
+                  </p>
                 </div>
               ) : (
-                <div className="text-center py-12 text-muted-foreground">
-                  <FileText size={48} className="mx-auto mb-4 opacity-30" />
-                  <p>暂无数据源文档，请先上传文档</p>
+                <div className="flex items-center gap-3 p-4 bg-emerald-500/5 rounded-xl border border-emerald-200">
+                  <div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-500 flex items-center justify-center">
+                    <FileSpreadsheet size={20} />
+                  </div>
+                  <div className="flex-1 min-w-0">
+                    <p className="font-medium truncate">{templateFile.name}</p>
+                    <p className="text-xs text-muted-foreground">
+                      {(templateFile.size / 1024).toFixed(1)} KB
+                    </p>
+                  </div>
+                  <Button variant="ghost" size="sm" onClick={() => setTemplateFile(null)}>
+                    <X size={16} />
+                  </Button>
+                </div>
+              )}
+            </CardContent>
+          </Card>
+
+          {/* Source Documents Upload */}
+          <Card className="border-none shadow-md">
+            <CardHeader className="pb-4">
+              <CardTitle className="text-lg flex items-center gap-2">
+                <Files className="text-primary" size={20} />
+                源文档
+              </CardTitle>
+              <CardDescription>
+                上传包含数据的源文档（支持多选），可同时上传多个文件
+              </CardDescription>
+            </CardHeader>
+            <CardContent>
+              <div
+                {...getSourceProps()}
+                className={cn(
+                  "border-2 border-dashed rounded-2xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group min-h-[200px]",
+                  isSourceDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5"
+                )}
+              >
+                <input {...getSourceInputProps()} />
+                <div className="w-14 h-14 rounded-xl bg-blue-500/10 text-blue-500 flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
+                  {loading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
+                </div>
+                <p className="font-medium">
+                  {isSourceDragActive ? '释放以上传' : '点击或拖拽上传源文档'}
+                </p>
+                <p className="text-xs text-muted-foreground mt-1">
+                  支持 .xlsx .xls .docx .md .txt
+                </p>
+              </div>
+
+              {/* Selected Source Files */}
+              {sourceFiles.length > 0 && (
+                <div className="mt-4 space-y-2">
+                  {sourceFiles.map((sf, idx) => (
+                    <div key={idx} className="flex items-center gap-3 p-3 bg-muted/50 rounded-xl">
+                      {getFileIcon(sf.file.name)}
+                      <div className="flex-1 min-w-0">
+                        <p className="text-sm font-medium truncate">{sf.file.name}</p>
+                        <p className="text-xs text-muted-foreground">
+                          {(sf.file.size / 1024).toFixed(1)} KB
+                        </p>
+                      </div>
+                      <Button variant="ghost" size="sm" onClick={() => removeSourceFile(idx)}>
+                        <Trash2 size={14} className="text-red-500" />
+                      </Button>
+                    </div>
+                  ))}
                 </div>
               )}
             </CardContent>
           </Card>
 
           {/* Action Button */}
-          <div className="flex justify-center">
+          <div className="col-span-1 lg:col-span-2 flex justify-center">
             <Button
               size="lg"
-              className="rounded-xl px-8 shadow-lg shadow-primary/20 gap-2"
-              disabled={selectedDocs.length === 0 || filling}
-              onClick={handleFillTemplate}
+              className="rounded-xl px-12 shadow-lg shadow-primary/20 gap-2"
+              disabled={!templateFile || loading}
+              onClick={handleJointUploadAndFill}
             >
-              {filling ? (
+              {loading ? (
                 <>
                   <Loader2 className="animate-spin" size={20} />
-                  <span>AI 正在分析并填表...</span>
+                  <span>正在处理...</span>
                 </>
               ) : (
                 <>
                   <Sparkles size={20} />
-                  <span>开始智能填表</span>
+                  <span>上传并智能填表</span>
                 </>
               )}
             </Button>
@@ -389,49 +390,7 @@ const TemplateFill: React.FC = () => {
         </div>
       )}
 
-      {/* Step 3: Preview Results */}
-      {step === 'preview' && filledResult && (
-        <Card className="border-none shadow-md">
-          <CardHeader>
-            <CardTitle className="text-lg flex items-center gap-2">
-              <CheckCircle2 className="text-emerald-500" size={20} />
-              填表完成
-            </CardTitle>
-            <CardDescription>
-              系统已根据 {selectedDocs.length} 份文档自动完成表格填写
-            </CardDescription>
-          </CardHeader>
-          <CardContent className="space-y-6">
-            {/* Filled Data Preview */}
-            <div className="p-6 bg-muted/30 rounded-2xl">
-              <div className="space-y-4">
-                {templateFields.map((field, idx) => (
-                  <div key={idx} className="flex items-center gap-4">
-                    <div className="w-32 text-sm font-medium text-muted-foreground">{field.name}</div>
-                    <div className="flex-1 p-3 bg-background rounded-xl border">
-                      {(filledResult.filled_data || {})[field.name] || '-'}
-                    </div>
-                  </div>
-                ))}
-              </div>
-            </div>
-
-            {/* Action Buttons */}
-            <div className="flex justify-center gap-4">
-              <Button variant="outline" className="rounded-xl gap-2" onClick={resetFlow}>
-                <RefreshCcw size={18} />
-                <span>继续填表</span>
-              </Button>
-              <Button className="rounded-xl gap-2 shadow-lg shadow-primary/20" onClick={handleExport}>
-                <Download size={18} />
-                <span>导出结果</span>
-              </Button>
-            </div>
-          </CardContent>
-        </Card>
-      )}
-
-      {/* Filling State */}
+      {/* Step 2: Filling State */}
       {step === 'filling' && (
         <Card className="border-none shadow-md">
           <CardContent className="py-16 flex flex-col items-center justify-center">
@@ -440,11 +399,107 @@ const TemplateFill: React.FC = () => {
             </div>
             <h3 className="text-xl font-bold mb-2">AI 正在智能分析并填表</h3>
             <p className="text-muted-foreground text-center max-w-md">
-              系统正在从 {selectedDocs.length} 份文档中检索相关信息，生成字段描述，并使用 RAG 增强填写准确性...
+              系统正在从 {sourceFiles.length || sourceFilePaths.length} 份文档中检索相关信息...
             </p>
           </CardContent>
         </Card>
       )}
+
+      {/* Step 3: Preview Results */}
+      {step === 'preview' && filledResult && (
+        <div className="space-y-6">
+          <Card className="border-none shadow-md">
+            <CardHeader>
+              <CardTitle className="text-lg flex items-center gap-2">
+                <CheckCircle2 className="text-emerald-500" size={20} />
+                填表完成
+              </CardTitle>
+              <CardDescription>
+                系统已根据 {sourceFiles.length || sourceFilePaths.length} 份文档自动完成表格填写
+              </CardDescription>
+            </CardHeader>
+            <CardContent>
+              {/* Filled Data Preview */}
+              <div className="p-6 bg-muted/30 rounded-2xl">
+                <div className="space-y-4">
+                  {templateFields.map((field, idx) => {
+                    const value = filledResult.filled_data?.[field.name];
+                    const displayValue = Array.isArray(value)
+                      ? value.filter(v => v && String(v).trim()).join(', ') || '-'
+                      : value || '-';
+                    return (
+                      <div key={idx} className="flex items-center gap-4">
+                        <div className="w-40 text-sm font-medium text-muted-foreground">{field.name}</div>
+                        <div className="flex-1 p-3 bg-background rounded-xl border">
+                          {displayValue}
+                        </div>
+                      </div>
+                    );
+                  })}
+                </div>
+              </div>
+
+              {/* Source Files Info */}
+              <div className="mt-4 flex flex-wrap gap-2">
+                {sourceFiles.map((sf, idx) => (
+                  <Badge key={idx} variant="outline" className="bg-blue-500/5">
+                    {getFileIcon(sf.file.name)}
+                    <span className="ml-1">{sf.file.name}</span>
+                  </Badge>
+                ))}
+              </div>
+
+              {/* Action Buttons */}
+              <div className="flex justify-center gap-4 mt-6">
+                <Button variant="outline" className="rounded-xl gap-2" onClick={resetFlow}>
+                  <RefreshCcw size={18} />
+                  <span>继续填表</span>
+                </Button>
+                <Button className="rounded-xl gap-2 shadow-lg shadow-primary/20" onClick={handleExport}>
+                  <Download size={18} />
+                  <span>导出结果</span>
+                </Button>
+              </div>
+            </CardContent>
+          </Card>
+
+          {/* Fill Details */}
+          {filledResult.fill_details && filledResult.fill_details.length > 0 && (
+            <Card className="border-none shadow-md">
+              <CardHeader>
+                <CardTitle className="text-lg">填写详情</CardTitle>
+              </CardHeader>
+              <CardContent>
+                <div className="space-y-3">
+                  {filledResult.fill_details.map((detail: any, idx: number) => (
+                    <div key={idx} className="flex items-start gap-3 p-3 bg-muted/30 rounded-xl text-sm">
+                      <div className="w-1 h-1 rounded-full bg-primary mt-2" />
+                      <div className="flex-1">
+                        <div className="font-medium">{detail.field}</div>
+                        <div className="text-muted-foreground text-xs mt-1">
+                          来源: {detail.source} | 置信度: {detail.confidence ? (detail.confidence * 100).toFixed(0) + '%' : 'N/A'}
+                        </div>
+                      </div>
+                    </div>
+                  ))}
+                </div>
+              </CardContent>
+            </Card>
+          )}
+        </div>
+      )}
+
+      {/* Preview Dialog */}
+      <Dialog open={previewOpen} onOpenChange={setPreviewOpen}>
+        <DialogContent className="max-w-2xl">
+          <DialogHeader>
+            <DialogTitle>{previewDoc?.name || '文档预览'}</DialogTitle>
+          </DialogHeader>
+          <ScrollArea className="max-h-[60vh]">
+            <pre className="text-sm whitespace-pre-wrap">{previewDoc?.content}</pre>
+          </ScrollArea>
+        </DialogContent>
+      </Dialog>
     </div>
   );
 };
diff --git a/logs/rag_disable_note.txt b/logs/rag_disable_note.txt
new file mode 100644
index 0000000..cf75308
--- /dev/null
+++ b/logs/rag_disable_note.txt
@@ -0,0 +1,59 @@
+RAG 服务临时禁用说明
+========================
+日期: 2026-04-08
+
+修改内容:
+----------
+应需求，RAG 向量检索功能已临时禁用，具体如下:
+
+1. 修改文件: backend/app/services/rag_service.py
+
+2. 关键变更:
+   - 在 RAGService.__init__ 中添加 self._disabled = True 标志
+   - index_field() - 添加 _disabled 检查，跳过实际索引操作并记录日志
+   - index_document_content() - 添加 _disabled 检查，跳过实际索引操作并记录日志
+   - retrieve() - 添加 _disabled 检查，返回空列表并记录日志
+   - get_vector_count() - 添加 _disabled 检查，返回 0 并记录日志
+   - clear() - 添加 _disabled 检查，跳过实际清空操作并记录日志
+
+3. 行为变更:
+   - 所有 RAG 索引构建操作会被记录到日志 ([RAG DISABLED] 前缀)
+   - 所有 RAG 检索操作返回空结果
+   - 向量计数始终返回 0
+   - 实际向量数据库操作被跳过
+
+4. 恢复方式:
+   - 将 RAGService.__init__ 中的 self._disabled = True 改为 self._disabled = False
+   - 重新启动服务即可恢复 RAG 功能
+
+目的:
+------
+保留 RAG 索引构建功能的前端界面和代码结构，暂不实际调用向量数据库 API，
+待后续需要时再启用。
+
+影响范围:
+---------
+- /api/v1/rag/search - RAG 搜索接口 (返回空结果)
+- /api/v1/rag/status - RAG 状态接口 (返回 vector_count=0)
+- /api/v1/rag/rebuild - RAG 重建接口 (仅记录日志)
+- Excel/文档上传时的 RAG 索引构建 (仅记录日志)
+
+========================
+后续补充 (2026-04-08):
+========================
+修改文件: backend/app/services/table_rag_service.py
+
+关键变更:
+- 在 TableRAGService.__init__ 中添加 self._disabled = True 标志
+- build_table_rag_index() - RAG 索引部分被跳过，仅记录日志
+- index_document_table() - RAG 索引部分被跳过，仅记录日志
+
+行为变更:
+- Excel 上传时，MySQL 存储仍然正常进行
+- AI 字段描述仍然正常生成（调用 LLM）
+- 只有向量数据库索引操作被跳过
+
+恢复方式:
+- 将 TableRAGService.__init__ 中的 self._disabled = True 改为 self._disabled = False
+- 或将 rag_service.py 中的 self._disabled = True 改为 self._disabled = False
+- 两者需同时改为 False 才能完全恢复 RAG 功能

From d5df5b8283b0e7697b6f829836a157dbe7a44331 Mon Sep 17 00:00:00 2001
From: KiriAky 107 <KiriAky107@qq.com>
Date: Thu, 9 Apr 2026 21:00:31 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E6=A8=A1=E6=9D=BF?=
 =?UTF-8?q?=E5=A1=AB=E5=85=85=E6=9C=8D=E5=8A=A1=E6=94=AF=E6=8C=81=E9=9D=9E?=
 =?UTF-8?q?=E7=BB=93=E6=9E=84=E5=8C=96=E6=96=87=E6=A1=A3AI=E5=88=86?=
 =?UTF-8?q?=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 引入markdown_ai_service服务支持Markdown文档处理
- 实现_nonstructured_docs_for_fields方法对非结构化文档进行AI分析
- 优化LLM提示词，改进数据提取的准确性和格式规范
- 支持从Markdown表格格式{tables: [{headers: [...], rows: [...]}]}中提取数据
- 添加文档章节结构解析，提升上下文理解能力
- 增加JSON响应格式修复功能，提高数据解析成功率
---
 backend/app/services/template_fill_service.py | 200 +++++++++++++++++-
 1 file changed, 193 insertions(+), 7 deletions(-)

diff --git a/backend/app/services/template_fill_service.py b/backend/app/services/template_fill_service.py
index 71976a6..dfea7f8 100644
--- a/backend/app/services/template_fill_service.py
+++ b/backend/app/services/template_fill_service.py
@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional
 from app.core.database import mongodb
 from app.services.llm_service import llm_service
 from app.core.document_parser import ParserFactory
+from app.services.markdown_ai_service import markdown_ai_service
 
 logger = logging.getLogger(__name__)
 
@@ -233,6 +234,12 @@ class TemplateFillService:
                 confidence=1.0
             )
 
+        # 无法直接从结构化数据提取，尝试 AI 分析非结构化文档
+        ai_structured = await self._analyze_unstructured_docs_for_fields(source_docs, field, user_hint)
+        if ai_structured:
+            logger.info(f"✅ 字段 {field.name} 通过 AI 分析结构化提取到数据")
+            return ai_structured
+
         # 无法从结构化数据提取，使用 LLM
         logger.info(f"字段 {field.name} 无法直接从结构化数据提取，使用 LLM...")
 
@@ -244,18 +251,20 @@ class TemplateFillService:
         if user_hint:
             hint_text = f"{user_hint}。{hint_text}"
 
-        prompt = f"""你是一个专业的数据提取专家。请从以下文档内容中提取"{field.name}"字段的所有行数据。
+        prompt = f"""你是一个专业的数据提取专家。请从以下文档内容中提取与"{field.name}"相关的所有信息。
 
-参考文档内容（已提取" {field.name}"列的数据）：
+提示词: {hint_text}
+
+文档内容：
 {context_text}
 
-请提取上述所有行的" {field.name}"值，存入数组。每一行对应数组中的一个元素。
-如果某行该字段为空，请用空字符串""占位。
+请分析文档结构（可能包含表格、标题段落等），找出所有与"{field.name}"相关的数据。
+如果找到表格数据，返回多行值；如果是非表格段落，提取关键信息。
 
-请严格按照以下 JSON 格式输出，不要添加任何解释：
+请严格按照以下 JSON 格式输出：
 {{
-    "values": ["第1行的值", "第2行的值", "第3行的值", ...],
-    "source": "数据来源的文档描述",
+    "values": ["第1行的值", "第2行的值", ...],
+    "source": "数据来源描述",
     "confidence": 0.0到1.0之间的置信度
 }}
 """
@@ -473,6 +482,29 @@ class TemplateFillService:
                             elif isinstance(row, list):
                                 doc_content += " | ".join(str(cell) for cell in row) + "\n"
                             row_count += 1
+            elif doc.structured_data and doc.structured_data.get("tables"):
+                # Markdown 表格格式: {tables: [{headers: [...], rows: [...]}]}
+                tables = doc.structured_data.get("tables", [])
+                for table in tables:
+                    if isinstance(table, dict):
+                        headers = table.get("headers", [])
+                        rows = table.get("rows", [])
+                        if rows and headers:
+                            doc_content += f"\n【文档: {doc.filename} - 表格】\n"
+                            doc_content += " | ".join(str(h) for h in headers) + "\n"
+                            for row in rows:
+                                if isinstance(row, list):
+                                    doc_content += " | ".join(str(cell) for cell in row) + "\n"
+                                row_count += 1
+                # 如果有标题结构，也添加上下文
+                if doc.structured_data.get("titles"):
+                    titles = doc.structured_data.get("titles", [])
+                    doc_content += f"\n【文档章节结构】\n"
+                    for title in titles[:20]:  # 限制前20个标题
+                        doc_content += f"{'#' * title.get('level', 1)} {title.get('text', '')}\n"
+                # 如果没有提取到表格内容，使用纯文本
+                if not doc_content.strip():
+                    doc_content = doc.content[:5000] if doc.content else ""
             elif doc.content:
                 doc_content = doc.content[:5000]
 
@@ -720,6 +752,21 @@ class TemplateFillService:
                     logger.info(f"从文档 {doc.filename} 提取到 {len(values)} 个值")
                     break
 
+            # 处理 Markdown 表格格式: {tables: [{headers: [...], rows: [...]}]}
+            elif structured.get("tables"):
+                tables = structured.get("tables", [])
+                for table in tables:
+                    if isinstance(table, dict):
+                        headers = table.get("headers", [])
+                        rows = table.get("rows", [])
+                        values = self._extract_column_values(rows, headers, field_name)
+                        if values:
+                            all_values.extend(values)
+                            logger.info(f"从 Markdown 表格提取到 {len(values)} 个值")
+                            break
+                if all_values:
+                    break
+
         return all_values
 
     def _extract_column_values(self, rows: List, columns: List, field_name: str) -> List[str]:
@@ -1005,6 +1052,145 @@ class TemplateFillService:
         content = text.strip()[:500] if text.strip() else ""
         return [content] if content else []
 
+    async def _analyze_unstructured_docs_for_fields(
+        self,
+        source_docs: List[SourceDocument],
+        field: TemplateField,
+        user_hint: Optional[str] = None
+    ) -> Optional[FillResult]:
+        """
+        对非结构化文档进行 AI 分析，尝试提取结构化数据
+
+        适用于 Markdown 等没有表格格式的文档，通过 AI 分析提取结构化信息
+
+        Args:
+            source_docs: 源文档列表
+            field: 字段定义
+            user_hint: 用户提示
+
+        Returns:
+            FillResult 如果提取成功，否则返回 None
+        """
+        # 找出非结构化的 Markdown/TXT 文档（没有表格的）
+        unstructured_docs = []
+        for doc in source_docs:
+            if doc.doc_type in ["md", "txt", "markdown"]:
+                # 检查是否有表格
+                has_tables = (
+                    doc.structured_data and
+                    doc.structured_data.get("tables") and
+                    len(doc.structured_data.get("tables", [])) > 0
+                )
+                if not has_tables:
+                    unstructured_docs.append(doc)
+
+        if not unstructured_docs:
+            return None
+
+        logger.info(f"发现 {len(unstructured_docs)} 个非结构化文档，尝试 AI 分析...")
+
+        # 对每个非结构化文档进行 AI 分析
+        for doc in unstructured_docs:
+            try:
+                # 使用 markdown_ai_service 的 statistics 分析类型
+                # 这种类型专门用于政府统计公报等包含数据的文档
+                hint_text = field.hint if field.hint else f"请提取{field.name}的信息"
+                if user_hint:
+                    hint_text = f"{user_hint}。{hint_text}"
+
+                # 构建针对字段提取的提示词
+                prompt = f"""你是一个专业的数据提取专家。请从以下文档内容中提取与"{field.name}"相关的所有数据。
+
+字段提示: {hint_text}
+
+文档内容：
+{doc.content[:8000] if doc.content else ""}
+
+请完成以下任务：
+1. 仔细阅读文档，找出所有与"{field.name}"相关的数据
+2. 如果文档中有表格数据，提取表格中的对应列值
+3. 如果文档中是段落描述，提取其中的关键数值或结论
+4. 返回提取的所有值（可能多个，用数组存储）
+
+请用严格的 JSON 格式返回：
+{{
+    "values": ["值1", "值2", ...],
+    "source": "数据来源说明",
+    "confidence": 0.0到1.0之间的置信度
+}}
+
+如果没有找到相关数据，返回空数组 values: []"""
+
+                messages = [
+                    {"role": "system", "content": "你是一个专业的数据提取助手，擅长从政府统计公报等文档中提取数据。请严格按JSON格式输出。"},
+                    {"role": "user", "content": prompt}
+                ]
+
+                response = await self.llm.chat(
+                    messages=messages,
+                    temperature=0.1,
+                    max_tokens=5000
+                )
+
+                content = self.llm.extract_message_content(response)
+                logger.info(f"AI 分析返回: {content[:500]}")
+
+                # 解析 JSON
+                import json
+                import re
+
+                # 清理 markdown 格式
+                cleaned = content.strip()
+                cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE)
+                cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE)
+                cleaned = cleaned.strip()
+
+                # 查找 JSON
+                json_start = -1
+                for i, c in enumerate(cleaned):
+                    if c == '{' or c == '[':
+                        json_start = i
+                        break
+
+                if json_start == -1:
+                    continue
+
+                json_text = cleaned[json_start:]
+                try:
+                    result = json.loads(json_text)
+                    values = self._extract_values_from_json(result)
+                    if values:
+                        return FillResult(
+                            field=field.name,
+                            values=values,
+                            value=values[0] if values else "",
+                            source=f"AI分析: {doc.filename}",
+                            confidence=result.get("confidence", 0.8)
+                        )
+                except json.JSONDecodeError:
+                    # 尝试修复 JSON
+                    fixed = self._fix_json(json_text)
+                    if fixed:
+                        try:
+                            result = json.loads(fixed)
+                            values = self._extract_values_from_json(result)
+                            if values:
+                                return FillResult(
+                                    field=field.name,
+                                    values=values,
+                                    value=values[0] if values else "",
+                                    source=f"AI分析: {doc.filename}",
+                                    confidence=result.get("confidence", 0.8)
+                                )
+                        except json.JSONDecodeError:
+                            pass
+
+            except Exception as e:
+                logger.warning(f"AI 分析文档 {doc.filename} 失败: {str(e)}")
+                continue
+
+        return None
+
 
 # ==================== 全局单例 ====================