Compare commits
3 Commits
47c89d888f
...
827371cb90
| Author | SHA1 | Date | |
|---|---|---|---|
| 827371cb90 | |||
| e5d4724e82 | |||
| 9e7f9df384 |
@@ -14,6 +14,7 @@ from app.api.endpoints import (
|
||||
analysis_charts,
|
||||
health,
|
||||
instruction, # 智能指令
|
||||
conversation, # 对话历史
|
||||
)
|
||||
|
||||
# 创建主路由
|
||||
@@ -31,3 +32,4 @@ api_router.include_router(ai_analyze.router) # AI分析
|
||||
api_router.include_router(visualization.router) # 可视化
|
||||
api_router.include_router(analysis_charts.router) # 分析图表
|
||||
api_router.include_router(instruction.router) # 智能指令
|
||||
api_router.include_router(conversation.router) # 对话历史
|
||||
|
||||
98
backend/app/api/endpoints/conversation.py
Normal file
98
backend/app/api/endpoints/conversation.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
对话历史 API 接口
|
||||
|
||||
提供对话历史的存储和查询功能
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.database import mongodb
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/conversation", tags=["对话历史"])
|
||||
|
||||
|
||||
# ==================== 请求/响应模型 ====================
|
||||
|
||||
class ConversationMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
intent: Optional[str] = None
|
||||
|
||||
|
||||
class ConversationHistoryResponse(BaseModel):
|
||||
success: bool
|
||||
messages: list
|
||||
|
||||
|
||||
class ConversationListResponse(BaseModel):
|
||||
success: bool
|
||||
conversations: list
|
||||
|
||||
|
||||
# ==================== 接口 ====================
|
||||
|
||||
@router.get("/{conversation_id}/history", response_model=ConversationHistoryResponse)
|
||||
async def get_conversation_history(conversation_id: str, limit: int = 20):
|
||||
"""
|
||||
获取对话历史
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
limit: 返回消息数量(默认20条)
|
||||
"""
|
||||
try:
|
||||
messages = await mongodb.get_conversation_history(conversation_id, limit=limit)
|
||||
return ConversationHistoryResponse(
|
||||
success=True,
|
||||
messages=messages
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"获取对话历史失败: {e}")
|
||||
return ConversationHistoryResponse(
|
||||
success=False,
|
||||
messages=[]
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{conversation_id}")
|
||||
async def delete_conversation(conversation_id: str):
|
||||
"""
|
||||
删除对话会话
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
"""
|
||||
try:
|
||||
success = await mongodb.delete_conversation(conversation_id)
|
||||
return {"success": success}
|
||||
except Exception as e:
|
||||
logger.error(f"删除对话失败: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
@router.get("/all", response_model=ConversationListResponse)
|
||||
async def list_conversations(limit: int = 50, skip: int = 0):
|
||||
"""
|
||||
获取会话列表
|
||||
|
||||
Args:
|
||||
limit: 返回数量
|
||||
skip: 跳过数量
|
||||
"""
|
||||
try:
|
||||
conversations = await mongodb.list_conversations(limit=limit, skip=skip)
|
||||
return ConversationListResponse(
|
||||
success=True,
|
||||
conversations=conversations
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"获取会话列表失败: {e}")
|
||||
return ConversationListResponse(
|
||||
success=False,
|
||||
conversations=[]
|
||||
)
|
||||
@@ -4,6 +4,7 @@
|
||||
支持多格式文档(docx/xlsx/md/txt)上传、解析、存储和RAG索引
|
||||
集成 Excel 存储和 AI 生成字段描述
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
@@ -258,6 +259,7 @@ async def process_document(
|
||||
)
|
||||
|
||||
# 如果是 Excel,存储到 MySQL + AI生成描述 + RAG索引
|
||||
mysql_table_name = None
|
||||
if doc_type in ["xlsx", "xls"]:
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
@@ -265,17 +267,29 @@ async def process_document(
|
||||
)
|
||||
|
||||
try:
|
||||
# 使用 TableRAG 服务完成建表和RAG索引
|
||||
# 使用 TableRAG 服务存储到 MySQL(跳过 RAG 索引以提升速度)
|
||||
logger.info(f"开始存储Excel到MySQL: {original_filename}, file_path: {file_path}")
|
||||
rag_result = await table_rag_service.build_table_rag_index(
|
||||
file_path=file_path,
|
||||
filename=original_filename,
|
||||
sheet_name=parse_options.get("sheet_name"),
|
||||
header_row=parse_options.get("header_row", 0)
|
||||
header_row=parse_options.get("header_row", 0),
|
||||
skip_rag_index=True # 跳过 AI 字段描述生成和索引
|
||||
)
|
||||
|
||||
if rag_result.get("success"):
|
||||
logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {rag_result.get('table_name')}")
|
||||
mysql_table_name = rag_result.get('table_name')
|
||||
logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {mysql_table_name}")
|
||||
# 更新 MongoDB 中的 metadata,记录 MySQL 表名
|
||||
try:
|
||||
doc = await mongodb.get_document(doc_id)
|
||||
if doc:
|
||||
metadata = doc.get("metadata", {})
|
||||
metadata["mysql_table_name"] = mysql_table_name
|
||||
await mongodb.update_document_metadata(doc_id, metadata)
|
||||
logger.info(f"已更新 MongoDB 文档的 mysql_table_name: {mysql_table_name}")
|
||||
except Exception as update_err:
|
||||
logger.warning(f"更新 MongoDB mysql_table_name 失败: {update_err}")
|
||||
else:
|
||||
logger.error(f"RAG索引构建失败: {rag_result.get('error')}")
|
||||
except Exception as e:
|
||||
@@ -283,17 +297,16 @@ async def process_document(
|
||||
|
||||
else:
|
||||
# 非结构化文档
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
progress=60, message="正在建立索引"
|
||||
)
|
||||
|
||||
# 如果文档中有表格数据,提取并存储到 MySQL + RAG
|
||||
structured_data = result.data.get("structured_data", {})
|
||||
tables = structured_data.get("tables", [])
|
||||
|
||||
# 如果文档中有表格数据,提取并存储到 MySQL(不需要 RAG 索引)
|
||||
if tables:
|
||||
# 对每个表格建立 MySQL 表和 RAG 索引
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
progress=60, message="正在存储表格数据"
|
||||
)
|
||||
# 对每个表格建立 MySQL 表(跳过 RAG 索引,速度更快)
|
||||
for table_info in tables:
|
||||
await table_rag_service.index_document_table(
|
||||
doc_id=doc_id,
|
||||
@@ -302,7 +315,13 @@ async def process_document(
|
||||
source_doc_type=doc_type
|
||||
)
|
||||
|
||||
# 同时对文档内容建立 RAG 索引
|
||||
# 对文档内容建立 RAG 索引(非结构化文本需要语义搜索)
|
||||
content = result.data.get("content", "")
|
||||
if content and len(content) > 50: # 只有内容足够长才建立索引
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
progress=80, message="正在建立语义索引"
|
||||
)
|
||||
await index_document_to_rag(doc_id, original_filename, result, doc_type)
|
||||
|
||||
# 完成
|
||||
@@ -328,26 +347,32 @@ async def process_document(
|
||||
|
||||
|
||||
async def process_documents_batch(task_id: str, files: List[dict]):
|
||||
"""批量处理文档"""
|
||||
"""批量并行处理文档"""
|
||||
try:
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
progress=0, message="开始批量处理"
|
||||
progress=0, message=f"开始批量处理 {len(files)} 个文档",
|
||||
result={"total": len(files), "files": []}
|
||||
)
|
||||
|
||||
results = []
|
||||
for i, file_info in enumerate(files):
|
||||
async def process_single_file(file_info: dict, index: int) -> dict:
|
||||
"""处理单个文件"""
|
||||
filename = file_info["filename"]
|
||||
try:
|
||||
# 解析文档
|
||||
parser = ParserFactory.get_parser(file_info["path"])
|
||||
result = parser.parse(file_info["path"])
|
||||
|
||||
if result.success:
|
||||
if not result.success:
|
||||
return {"index": index, "filename": filename, "success": False, "error": result.error or "解析失败"}
|
||||
|
||||
# 存储到 MongoDB
|
||||
doc_id = await mongodb.insert_document(
|
||||
doc_type=file_info["ext"],
|
||||
content=result.data.get("content", ""),
|
||||
metadata={
|
||||
**result.metadata,
|
||||
"original_filename": file_info["filename"],
|
||||
"original_filename": filename,
|
||||
"file_path": file_info["path"]
|
||||
},
|
||||
structured_data=result.data.get("structured_data")
|
||||
@@ -357,43 +382,60 @@ async def process_documents_batch(task_id: str, files: List[dict]):
|
||||
if file_info["ext"] in ["xlsx", "xls"]:
|
||||
await table_rag_service.build_table_rag_index(
|
||||
file_path=file_info["path"],
|
||||
filename=file_info["filename"]
|
||||
filename=filename,
|
||||
skip_rag_index=True # 跳过 AI 字段描述生成和索引
|
||||
)
|
||||
else:
|
||||
# 非结构化文档:处理其中的表格 + 内容索引
|
||||
# 非结构化文档
|
||||
structured_data = result.data.get("structured_data", {})
|
||||
tables = structured_data.get("tables", [])
|
||||
|
||||
# 表格数据直接存 MySQL(跳过 RAG 索引)
|
||||
if tables:
|
||||
for table_info in tables:
|
||||
await table_rag_service.index_document_table(
|
||||
doc_id=doc_id,
|
||||
filename=file_info["filename"],
|
||||
filename=filename,
|
||||
table_data=table_info,
|
||||
source_doc_type=file_info["ext"]
|
||||
)
|
||||
|
||||
await index_document_to_rag(doc_id, file_info["filename"], result, file_info["ext"])
|
||||
# 只有内容足够长才建立语义索引
|
||||
content = result.data.get("content", "")
|
||||
if content and len(content) > 50:
|
||||
await index_document_to_rag(doc_id, filename, result, file_info["ext"])
|
||||
|
||||
results.append({"filename": file_info["filename"], "doc_id": doc_id, "success": True})
|
||||
else:
|
||||
results.append({"filename": file_info["filename"], "success": False, "error": result.error})
|
||||
return {"index": index, "filename": filename, "doc_id": doc_id, "success": True}
|
||||
|
||||
except Exception as e:
|
||||
results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
|
||||
logger.error(f"处理文件 {filename} 失败: {e}")
|
||||
return {"index": index, "filename": filename, "success": False, "error": str(e)}
|
||||
|
||||
progress = int((i + 1) / len(files) * 100)
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
progress=progress, message=f"已处理 {i+1}/{len(files)}"
|
||||
)
|
||||
# 并行处理所有文档
|
||||
tasks = [process_single_file(f, i) for i, f in enumerate(files)]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# 按原始顺序排序
|
||||
results.sort(key=lambda x: x["index"])
|
||||
|
||||
# 统计成功/失败数量
|
||||
success_count = sum(1 for r in results if r["success"])
|
||||
fail_count = len(results) - success_count
|
||||
|
||||
# 更新最终状态
|
||||
await update_task_status(
|
||||
task_id, status="success",
|
||||
progress=100, message="批量处理完成",
|
||||
result={"results": results}
|
||||
progress=100, message=f"批量处理完成: {success_count} 成功, {fail_count} 失败",
|
||||
result={
|
||||
"total": len(files),
|
||||
"success": success_count,
|
||||
"failure": fail_count,
|
||||
"results": results
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"批量处理完成: {success_count}/{len(files)} 成功")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量处理失败: {str(e)}")
|
||||
await update_task_status(
|
||||
@@ -404,20 +446,20 @@ async def process_documents_batch(task_id: str, files: List[dict]):
|
||||
|
||||
|
||||
async def index_document_to_rag(doc_id: str, filename: str, result: ParseResult, doc_type: str):
|
||||
"""将非结构化文档索引到 RAG(使用分块索引)"""
|
||||
"""将非结构化文档索引到 RAG(使用分块索引,异步执行)"""
|
||||
try:
|
||||
content = result.data.get("content", "")
|
||||
if content:
|
||||
# 将完整内容传递给 RAG 服务自动分块索引
|
||||
rag_service.index_document_content(
|
||||
# 使用异步方法索引,避免阻塞事件循环
|
||||
await rag_service.index_document_content_async(
|
||||
doc_id=doc_id,
|
||||
content=content, # 传递完整内容,由 RAG 服务自动分块
|
||||
content=content,
|
||||
metadata={
|
||||
"filename": filename,
|
||||
"doc_type": doc_type
|
||||
},
|
||||
chunk_size=500, # 每块 500 字符
|
||||
chunk_overlap=50 # 块之间 50 字符重叠
|
||||
chunk_size=1000, # 每块 1000 字符,提升速度
|
||||
chunk_overlap=100 # 块之间 100 字符重叠
|
||||
)
|
||||
logger.info(f"RAG 索引完成: {filename}, doc_id={doc_id}")
|
||||
except Exception as e:
|
||||
|
||||
@@ -25,6 +25,7 @@ class InstructionRequest(BaseModel):
|
||||
instruction: str
|
||||
doc_ids: Optional[List[str]] = None # 关联的文档 ID 列表
|
||||
context: Optional[Dict[str, Any]] = None # 额外上下文
|
||||
conversation_id: Optional[str] = None # 对话会话ID,用于关联历史记录
|
||||
|
||||
|
||||
class IntentRecognitionResponse(BaseModel):
|
||||
@@ -240,7 +241,8 @@ async def instruction_chat(
|
||||
task_id=task_id,
|
||||
instruction=request.instruction,
|
||||
doc_ids=request.doc_ids,
|
||||
context=request.context
|
||||
context=request.context,
|
||||
conversation_id=request.conversation_id
|
||||
)
|
||||
|
||||
return {
|
||||
@@ -251,14 +253,15 @@ async def instruction_chat(
|
||||
}
|
||||
|
||||
# 同步模式:等待执行完成
|
||||
return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context)
|
||||
return await _execute_chat_task(task_id, request.instruction, request.doc_ids, request.context, request.conversation_id)
|
||||
|
||||
|
||||
async def _execute_chat_task(
|
||||
task_id: str,
|
||||
instruction: str,
|
||||
doc_ids: Optional[List[str]],
|
||||
context: Optional[Dict[str, Any]]
|
||||
context: Optional[Dict[str, Any]],
|
||||
conversation_id: Optional[str] = None
|
||||
):
|
||||
"""执行指令对话的后台任务"""
|
||||
from app.core.database import mongodb as mongo_client
|
||||
@@ -278,6 +281,13 @@ async def _execute_chat_task(
|
||||
# 构建上下文
|
||||
ctx: Dict[str, Any] = context or {}
|
||||
|
||||
# 获取对话历史
|
||||
if conversation_id:
|
||||
history = await mongo_client.get_conversation_history(conversation_id, limit=20)
|
||||
if history:
|
||||
ctx["conversation_history"] = history
|
||||
logger.info(f"加载对话历史: conversation_id={conversation_id}, 消息数={len(history)}")
|
||||
|
||||
# 获取关联文档
|
||||
if doc_ids:
|
||||
docs = []
|
||||
@@ -291,6 +301,29 @@ async def _execute_chat_task(
|
||||
# 执行指令
|
||||
result = await instruction_executor.execute(instruction, ctx)
|
||||
|
||||
# 存储对话历史
|
||||
if conversation_id:
|
||||
try:
|
||||
# 存储用户消息
|
||||
await mongo_client.insert_conversation(
|
||||
conversation_id=conversation_id,
|
||||
role="user",
|
||||
content=instruction,
|
||||
intent=result.get("intent", "unknown")
|
||||
)
|
||||
# 存储助手回复
|
||||
response_content = result.get("message", "")
|
||||
if response_content:
|
||||
await mongo_client.insert_conversation(
|
||||
conversation_id=conversation_id,
|
||||
role="assistant",
|
||||
content=response_content,
|
||||
intent=result.get("intent", "unknown")
|
||||
)
|
||||
logger.info(f"已存储对话历史: conversation_id={conversation_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"存储对话历史失败: {e}")
|
||||
|
||||
# 根据意图类型添加友好的响应消息
|
||||
response_messages = {
|
||||
"extract": f"已提取 {len(result.get('extracted_data', {}))} 个字段的数据",
|
||||
|
||||
@@ -87,6 +87,7 @@ class ExportRequest(BaseModel):
|
||||
template_id: str
|
||||
filled_data: dict
|
||||
format: str = "xlsx" # xlsx 或 docx
|
||||
filled_file_path: Optional[str] = None # 已填写的 Word 文件路径(可选)
|
||||
|
||||
|
||||
# ==================== 接口实现 ====================
|
||||
@@ -541,7 +542,7 @@ async def export_filled_template(
|
||||
if request.format == "xlsx":
|
||||
return await _export_to_excel(request.filled_data, request.template_id)
|
||||
elif request.format == "docx":
|
||||
return await _export_to_word(request.filled_data, request.template_id)
|
||||
return await _export_to_word(request.filled_data, request.template_id, request.filled_file_path)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
@@ -608,11 +609,12 @@ async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResp
|
||||
)
|
||||
|
||||
|
||||
async def _export_to_word(filled_data: dict, template_id: str) -> StreamingResponse:
|
||||
async def _export_to_word(filled_data: dict, template_id: str, filled_file_path: Optional[str] = None) -> StreamingResponse:
|
||||
"""导出为 Word 格式"""
|
||||
import re
|
||||
import tempfile
|
||||
import os
|
||||
import urllib.parse
|
||||
from docx import Document
|
||||
from docx.shared import Pt, RGBColor
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
@@ -623,12 +625,32 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo
|
||||
return ""
|
||||
# 移除控制字符
|
||||
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', text)
|
||||
# 转义 XML 特殊字符以防破坏文档结构
|
||||
text = text.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
return text.strip()
|
||||
|
||||
tmp_path = None
|
||||
try:
|
||||
# 先保存到临时文件,再读取到内存,确保文档完整性
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp_file:
|
||||
tmp_path = tmp_file.name
|
||||
# 如果有已填写的文件(通过 _fill_docx 填写了模板单元格),直接返回该文件
|
||||
if filled_file_path and os.path.exists(filled_file_path):
|
||||
filename = os.path.basename(filled_file_path)
|
||||
with open(filled_file_path, 'rb') as f:
|
||||
file_content = f.read()
|
||||
output = io.BytesIO(file_content)
|
||||
encoded_filename = urllib.parse.quote(filename)
|
||||
return StreamingResponse(
|
||||
output,
|
||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
|
||||
"Content-Length": str(len(file_content))
|
||||
}
|
||||
)
|
||||
|
||||
# 没有已填写文件,创建新的 Word 文档(表格形式)
|
||||
# 创建临时文件(立即关闭句柄,避免 Windows 文件锁问题)
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix='.docx')
|
||||
os.close(tmp_fd) # 关闭立即得到的 fd,让 docx 可以写入
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading('填写结果', level=1)
|
||||
@@ -670,19 +692,23 @@ async def _export_to_word(filled_data: dict, template_id: str) -> StreamingRespo
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(tmp_path):
|
||||
if tmp_path and os.path.exists(tmp_path):
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
output = io.BytesIO(file_content)
|
||||
filename = "filled_template.docx"
|
||||
encoded_filename = urllib.parse.quote(filename)
|
||||
|
||||
return StreamingResponse(
|
||||
output,
|
||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
|
||||
"Content-Length": str(len(file_content))
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -64,6 +64,11 @@ class MongoDB:
|
||||
"""任务集合 - 存储任务历史记录"""
|
||||
return self.db["tasks"]
|
||||
|
||||
@property
|
||||
def conversations(self):
|
||||
"""对话集合 - 存储对话历史记录"""
|
||||
return self.db["conversations"]
|
||||
|
||||
# ==================== 文档操作 ====================
|
||||
|
||||
async def insert_document(
|
||||
@@ -117,14 +122,20 @@ class MongoDB:
|
||||
搜索文档
|
||||
|
||||
Args:
|
||||
query: 搜索关键词
|
||||
query: 搜索关键词(支持文件名和内容搜索)
|
||||
doc_type: 文档类型过滤
|
||||
limit: 返回数量
|
||||
|
||||
Returns:
|
||||
文档列表
|
||||
"""
|
||||
filter_query = {"content": {"$regex": query}}
|
||||
filter_query = {
|
||||
"$or": [
|
||||
{"content": {"$regex": query, "$options": "i"}},
|
||||
{"metadata.original_filename": {"$regex": query, "$options": "i"}},
|
||||
{"metadata.filename": {"$regex": query, "$options": "i"}},
|
||||
]
|
||||
}
|
||||
if doc_type:
|
||||
filter_query["doc_type"] = doc_type
|
||||
|
||||
@@ -141,6 +152,15 @@ class MongoDB:
|
||||
result = await self.documents.delete_one({"_id": ObjectId(doc_id)})
|
||||
return result.deleted_count > 0
|
||||
|
||||
async def update_document_metadata(self, doc_id: str, metadata: Dict[str, Any]) -> bool:
|
||||
"""更新文档 metadata 字段"""
|
||||
from bson import ObjectId
|
||||
result = await self.documents.update_one(
|
||||
{"_id": ObjectId(doc_id)},
|
||||
{"$set": {"metadata": metadata}}
|
||||
)
|
||||
return result.modified_count > 0
|
||||
|
||||
# ==================== RAG 索引操作 ====================
|
||||
|
||||
async def insert_rag_entry(
|
||||
@@ -251,6 +271,10 @@ class MongoDB:
|
||||
await self.tasks.create_index("task_id", unique=True)
|
||||
await self.tasks.create_index("created_at")
|
||||
|
||||
# 对话集合索引
|
||||
await self.conversations.create_index("conversation_id")
|
||||
await self.conversations.create_index("created_at")
|
||||
|
||||
logger.info("MongoDB 索引创建完成")
|
||||
|
||||
# ==================== 任务历史操作 ====================
|
||||
@@ -369,6 +393,108 @@ class MongoDB:
|
||||
result = await self.tasks.delete_one({"task_id": task_id})
|
||||
return result.deleted_count > 0
|
||||
|
||||
# ==================== 对话历史操作 ====================
|
||||
|
||||
async def insert_conversation(
|
||||
self,
|
||||
conversation_id: str,
|
||||
role: str,
|
||||
content: str,
|
||||
intent: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
插入对话记录
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
role: 角色 (user/assistant)
|
||||
content: 对话内容
|
||||
intent: 意图类型
|
||||
metadata: 额外元数据
|
||||
|
||||
Returns:
|
||||
插入文档的ID
|
||||
"""
|
||||
message = {
|
||||
"conversation_id": conversation_id,
|
||||
"role": role,
|
||||
"content": content,
|
||||
"intent": intent,
|
||||
"metadata": metadata or {},
|
||||
"created_at": datetime.utcnow(),
|
||||
}
|
||||
result = await self.conversations.insert_one(message)
|
||||
return str(result.inserted_id)
|
||||
|
||||
async def get_conversation_history(
|
||||
self,
|
||||
conversation_id: str,
|
||||
limit: int = 20,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取对话历史
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
limit: 返回消息数量
|
||||
|
||||
Returns:
|
||||
对话消息列表
|
||||
"""
|
||||
cursor = self.conversations.find(
|
||||
{"conversation_id": conversation_id}
|
||||
).sort("created_at", 1).limit(limit)
|
||||
|
||||
messages = []
|
||||
async for msg in cursor:
|
||||
msg["_id"] = str(msg["_id"])
|
||||
if msg.get("created_at"):
|
||||
msg["created_at"] = msg["created_at"].isoformat()
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
async def delete_conversation(self, conversation_id: str) -> bool:
|
||||
"""删除对话会话"""
|
||||
result = await self.conversations.delete_many({"conversation_id": conversation_id})
|
||||
return result.deleted_count > 0
|
||||
|
||||
async def list_conversations(
|
||||
self,
|
||||
limit: int = 50,
|
||||
skip: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取会话列表(按最近一条消息排序)
|
||||
|
||||
Args:
|
||||
limit: 返回数量
|
||||
skip: 跳过数量
|
||||
|
||||
Returns:
|
||||
会话列表
|
||||
"""
|
||||
# 使用 aggregation 获取每个会话的最新一条消息
|
||||
pipeline = [
|
||||
{"$sort": {"created_at": -1}},
|
||||
{"$group": {
|
||||
"_id": "$conversation_id",
|
||||
"last_message": {"$first": "$$ROOT"},
|
||||
}},
|
||||
{"$replaceRoot": {"newRoot": "$last_message"}},
|
||||
{"$sort": {"created_at": -1}},
|
||||
{"$skip": skip},
|
||||
{"$limit": limit},
|
||||
]
|
||||
|
||||
conversations = []
|
||||
async for doc in self.conversations.aggregate(pipeline):
|
||||
doc["_id"] = str(doc["_id"])
|
||||
if doc.get("created_at"):
|
||||
doc["created_at"] = doc["created_at"].isoformat()
|
||||
conversations.append(doc)
|
||||
return conversations
|
||||
|
||||
|
||||
# ==================== 全局单例 ====================
|
||||
|
||||
|
||||
@@ -44,6 +44,22 @@ class DocxParser(BaseParser):
|
||||
error=f"文件不存在: {file_path}"
|
||||
)
|
||||
|
||||
# 尝试使用 python-docx 解析,失败则使用备用方法
|
||||
try:
|
||||
return self._parse_with_docx(path)
|
||||
except Exception as e:
|
||||
logger.warning(f"python-docx 解析失败,使用备用方法: {e}")
|
||||
try:
|
||||
return self._parse_fallback(path)
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"备用解析方法也失败: {fallback_error}")
|
||||
return ParseResult(
|
||||
success=False,
|
||||
error=f"解析 Word 文档失败: {str(e)}"
|
||||
)
|
||||
|
||||
def _parse_with_docx(self, path: Path) -> ParseResult:
|
||||
"""使用 python-docx 解析文档"""
|
||||
# 检查文件扩展名
|
||||
if path.suffix.lower() not in self.supported_extensions:
|
||||
return ParseResult(
|
||||
@@ -51,9 +67,8 @@ class DocxParser(BaseParser):
|
||||
error=f"不支持的文件类型: {path.suffix}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取 Word 文档
|
||||
doc = Document(file_path)
|
||||
doc = Document(path)
|
||||
|
||||
# 提取文本内容
|
||||
paragraphs = []
|
||||
@@ -107,43 +122,123 @@ class DocxParser(BaseParser):
|
||||
metadata = {
|
||||
"filename": path.name,
|
||||
"extension": path.suffix.lower(),
|
||||
"file_size": path.stat().st_size,
|
||||
"paragraph_count": len(paragraphs),
|
||||
"table_count": len(tables_data),
|
||||
"word_count": len(full_text),
|
||||
"char_count": len(full_text.replace("\n", "")),
|
||||
"has_tables": len(tables_data) > 0,
|
||||
"has_images": images_info.get("image_count", 0) > 0,
|
||||
"image_count": images_info.get("image_count", 0)
|
||||
}
|
||||
|
||||
# 返回结果
|
||||
return ParseResult(
|
||||
success=True,
|
||||
data={
|
||||
"content": full_text,
|
||||
"paragraphs": paragraphs_text,
|
||||
"paragraphs": paragraphs,
|
||||
"paragraphs_with_style": paragraphs,
|
||||
"tables": tables_data,
|
||||
"images": images_info,
|
||||
"word_count": len(full_text),
|
||||
"structured_data": {
|
||||
"paragraphs": paragraphs,
|
||||
"paragraphs_text": paragraphs_text,
|
||||
"tables": tables_data,
|
||||
"images": images_info
|
||||
}
|
||||
},
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析 Word 文档失败: {str(e)}")
|
||||
def _parse_fallback(self, path: Path) -> ParseResult:
|
||||
"""备用解析方法:直接解析 docx 的 XML 结构"""
|
||||
import zipfile
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(path, 'r') as zf:
|
||||
# 读取 document.xml
|
||||
if 'word/document.xml' not in zf.namelist():
|
||||
return ParseResult(success=False, error="无效的 docx 文件格式")
|
||||
|
||||
xml_content = zf.read('word/document.xml')
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
# 命名空间
|
||||
namespaces = {
|
||||
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
|
||||
}
|
||||
|
||||
paragraphs = []
|
||||
tables = []
|
||||
current_table = []
|
||||
|
||||
for elem in root.iter():
|
||||
if elem.tag.endswith('}p'): # 段落
|
||||
text_parts = []
|
||||
for t in elem.iter():
|
||||
if t.tag.endswith('}t') and t.text:
|
||||
text_parts.append(t.text)
|
||||
text = ''.join(text_parts).strip()
|
||||
if text:
|
||||
paragraphs.append({'text': text, 'style': 'Normal'})
|
||||
elif elem.tag.endswith('}tr'): # 表格行
|
||||
row_data = []
|
||||
for tc in elem.iter():
|
||||
if tc.tag.endswith('}tc'): # 单元格
|
||||
cell_text = []
|
||||
for t in tc.iter():
|
||||
if t.tag.endswith('}t') and t.text:
|
||||
cell_text.append(t.text)
|
||||
row_data.append(''.join(cell_text).strip())
|
||||
if row_data:
|
||||
current_table.append(row_data)
|
||||
else:
|
||||
# 表格结束,保存
|
||||
if current_table:
|
||||
tables.append({
|
||||
'table_index': len(tables),
|
||||
'rows': current_table,
|
||||
'row_count': len(current_table),
|
||||
'column_count': len(current_table[0]) if current_table else 0
|
||||
})
|
||||
current_table = []
|
||||
|
||||
# 保存最后一张表格
|
||||
if current_table:
|
||||
tables.append({
|
||||
'table_index': len(tables),
|
||||
'rows': current_table,
|
||||
'row_count': len(current_table),
|
||||
'column_count': len(current_table[0]) if current_table else 0
|
||||
})
|
||||
|
||||
# 构建文本
|
||||
paragraphs_text = [p["text"] for p in paragraphs]
|
||||
full_text_parts = ["【文档正文】"] + paragraphs_text
|
||||
|
||||
if tables:
|
||||
full_text_parts.append("\n【文档表格】")
|
||||
for idx, table in enumerate(tables):
|
||||
full_text_parts.append(f"--- 表格 {idx + 1} ---")
|
||||
for row in table["rows"]:
|
||||
full_text_parts.append(" | ".join(str(cell) for cell in row))
|
||||
|
||||
full_text = "\n".join(full_text_parts)
|
||||
|
||||
return ParseResult(
|
||||
success=False,
|
||||
error=f"解析 Word 文档失败: {str(e)}"
|
||||
success=True,
|
||||
data={
|
||||
"content": full_text,
|
||||
"paragraphs": paragraphs,
|
||||
"paragraphs_with_style": paragraphs,
|
||||
"tables": tables,
|
||||
"images": {"image_count": 0, "descriptions": []}
|
||||
},
|
||||
metadata={
|
||||
"filename": path.name,
|
||||
"extension": path.suffix.lower(),
|
||||
"paragraph_count": len(paragraphs),
|
||||
"table_count": len(tables),
|
||||
"image_count": 0,
|
||||
"parse_method": "fallback_xml"
|
||||
}
|
||||
)
|
||||
|
||||
except zipfile.BadZipFile:
|
||||
return ParseResult(success=False, error="无效的 ZIP/文档文件")
|
||||
except Exception as e:
|
||||
return ParseResult(success=False, error=f"备用解析失败: {str(e)}")
|
||||
|
||||
def extract_images_as_base64(self, file_path: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
提取 Word 文档中的所有图片,返回 base64 编码列表
|
||||
@@ -197,6 +292,83 @@ class DocxParser(BaseParser):
|
||||
logger.info(f"共提取 {len(images)} 张图片")
|
||||
return images
|
||||
|
||||
def extract_text_from_images(self, file_path: str, lang: str = 'chi_sim+eng') -> Dict[str, Any]:
|
||||
"""
|
||||
对 Word 文档中的图片进行 OCR 文字识别
|
||||
|
||||
Args:
|
||||
file_path: Word 文件路径
|
||||
lang: Tesseract 语言代码,默认简体中文+英文 (chi_sim+eng)
|
||||
|
||||
Returns:
|
||||
包含识别结果的字典
|
||||
"""
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
try:
|
||||
import pytesseract
|
||||
except ImportError:
|
||||
logger.warning("pytesseract 未安装,OCR 功能不可用")
|
||||
return {
|
||||
"success": False,
|
||||
"error": "pytesseract 未安装,请运行: pip install pytesseract",
|
||||
"image_count": 0,
|
||||
"extracted_text": []
|
||||
}
|
||||
|
||||
results = {
|
||||
"success": True,
|
||||
"image_count": 0,
|
||||
"extracted_text": [],
|
||||
"total_chars": 0
|
||||
}
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(file_path, 'r') as zf:
|
||||
# 查找 word/media 目录下的图片文件
|
||||
media_files = [f for f in zf.namelist() if f.startswith('word/media/')]
|
||||
|
||||
for idx, filename in enumerate(media_files):
|
||||
ext = filename.split('.')[-1].lower()
|
||||
if ext not in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
|
||||
continue
|
||||
|
||||
try:
|
||||
# 读取图片数据
|
||||
image_data = zf.read(filename)
|
||||
image = Image.open(BytesIO(image_data))
|
||||
|
||||
# 使用 Tesseract OCR 提取文字
|
||||
text = pytesseract.image_to_string(image, lang=lang)
|
||||
text = text.strip()
|
||||
|
||||
if text:
|
||||
results["extracted_text"].append({
|
||||
"image_index": idx,
|
||||
"filename": filename,
|
||||
"text": text,
|
||||
"char_count": len(text)
|
||||
})
|
||||
results["total_chars"] += len(text)
|
||||
|
||||
logger.info(f"图片 {filename} OCR 识别完成,提取 {len(text)} 字符")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"图片 {filename} OCR 识别失败: {str(e)}")
|
||||
|
||||
results["image_count"] = len(results["extracted_text"])
|
||||
|
||||
except zipfile.BadZipFile:
|
||||
results["success"] = False
|
||||
results["error"] = "无效的 Word 文档文件"
|
||||
except Exception as e:
|
||||
results["success"] = False
|
||||
results["error"] = f"OCR 处理失败: {str(e)}"
|
||||
|
||||
return results
|
||||
|
||||
def extract_key_sentences(self, text: str, max_sentences: int = 10) -> List[str]:
|
||||
"""
|
||||
从文本中提取关键句子
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
"""
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from app.services.template_fill_service import template_fill_service
|
||||
from app.services.template_fill_service import template_fill_service, TemplateField
|
||||
from app.services.rag_service import rag_service
|
||||
from app.services.markdown_ai_service import markdown_ai_service
|
||||
from app.core.database import mongodb
|
||||
@@ -15,6 +16,31 @@ from app.core.database import mongodb
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_filenames_from_text(text: str) -> List[str]:
|
||||
"""
|
||||
从指令文本中提取文件名列表。
|
||||
|
||||
智能处理用'和'/'与'/'、分隔的多个文件名(尤其是带年号的统计公报)。
|
||||
"""
|
||||
# 先去掉"对比这两个文档"等引导语,只保留文件名部分
|
||||
text = re.sub(r'^(?:对比|比较)这两个?文档[的差异]?[::]?', '', text).strip()
|
||||
text = re.sub(r'两个文档.*$', '', text).strip()
|
||||
if not text:
|
||||
return []
|
||||
|
||||
# 直接查找所有带扩展名的文件名模式
|
||||
results = []
|
||||
for m in re.finditer(r'[^\s,。!?、和与]+(?=\.(?:docx|xlsx|md|txt))', text):
|
||||
start = m.start()
|
||||
ext_match = re.search(r'\.(?:docx|xlsx|md|txt)', text[m.end():])
|
||||
if ext_match:
|
||||
fn = text[start:m.end() + ext_match.end()]
|
||||
if fn:
|
||||
results.append(fn)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class InstructionExecutor:
|
||||
"""指令执行器"""
|
||||
|
||||
@@ -41,9 +67,10 @@ class InstructionExecutor:
|
||||
self.intent_parser = intent_parser
|
||||
|
||||
context = context or {}
|
||||
context["instruction"] = instruction # 保存原始指令以便后续使用
|
||||
|
||||
# 解析意图
|
||||
intent, params = await self.intent_parser.parse(instruction)
|
||||
# 解析意图(传递对话历史上下文)
|
||||
intent, params = await self.intent_parser.parse(instruction, context)
|
||||
|
||||
# 根据意图类型执行相应操作
|
||||
if intent == "extract":
|
||||
@@ -72,18 +99,48 @@ class InstructionExecutor:
|
||||
async def _execute_extract(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行信息提取"""
|
||||
try:
|
||||
target_fields = params.get("field_refs", [])
|
||||
# target_fields 来自意图解析,field_refs 来自引号/字段关键词匹配
|
||||
target_fields = params.get("target_fields", []) or params.get("field_refs", [])
|
||||
doc_ids = params.get("document_refs", [])
|
||||
instruction_text = context.get("instruction", "")
|
||||
|
||||
# 如果没有指定文档,尝试按文件名精确搜索
|
||||
if not doc_ids or "all_docs" in doc_ids:
|
||||
if instruction_text:
|
||||
import re
|
||||
# 提取引号内的内容或文件名
|
||||
filename_match = re.search(r'["""]([^"""]+)["""]', instruction_text)
|
||||
if filename_match:
|
||||
search_term = filename_match.group(1)
|
||||
else:
|
||||
match = re.search(r'([^\s]+\.(?:docx|xlsx|md|txt))', instruction_text)
|
||||
search_term = match.group(1) if match else None
|
||||
|
||||
if search_term:
|
||||
logger.info(f"提取时搜索文档: {search_term}")
|
||||
searched_docs = await mongodb.search_documents(search_term, limit=5)
|
||||
if searched_docs:
|
||||
# 优先选择文件名完全匹配的文档
|
||||
best_docs = [
|
||||
d for d in searched_docs
|
||||
if search_term.lower() in d.get("metadata", {}).get("original_filename", "").lower()
|
||||
]
|
||||
if not best_docs:
|
||||
best_docs = [searched_docs[0]]
|
||||
context["source_docs"] = best_docs
|
||||
doc_ids = [doc.get("_id", "") for doc in best_docs]
|
||||
logger.info(f"找到 {len(best_docs)} 个文档用于提取,最佳: {best_docs[0].get('metadata', {}).get('original_filename', '?')}")
|
||||
|
||||
if not target_fields:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "extract",
|
||||
"error": "未指定要提取的字段",
|
||||
"message": "请明确说明要提取哪些字段,如:'提取医院数量和床位数'"
|
||||
}
|
||||
|
||||
# 如果指定了文档,验证文档存在
|
||||
if doc_ids and "all_docs" not in doc_ids:
|
||||
# 如果指定了文档且还没有加载 source_docs,则验证并加载
|
||||
if doc_ids and "all_docs" not in doc_ids and not context.get("source_docs"):
|
||||
valid_docs = []
|
||||
for doc_ref in doc_ids:
|
||||
doc_id = doc_ref.replace("doc_", "")
|
||||
@@ -93,20 +150,22 @@ class InstructionExecutor:
|
||||
if not valid_docs:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "extract",
|
||||
"error": "指定的文档不存在",
|
||||
"message": "请检查文档编号是否正确"
|
||||
}
|
||||
context["source_docs"] = valid_docs
|
||||
|
||||
# 构建字段列表
|
||||
fields = []
|
||||
for i, field_name in enumerate(target_fields):
|
||||
fields.append({
|
||||
"name": field_name,
|
||||
"cell": f"A{i+1}",
|
||||
"field_type": "text",
|
||||
"required": False
|
||||
})
|
||||
# 构建字段列表(使用 TemplateField dataclass)
|
||||
fields = [
|
||||
TemplateField(
|
||||
name=field_name,
|
||||
cell=f"A{i+1}",
|
||||
field_type="text",
|
||||
required=False
|
||||
)
|
||||
for i, field_name in enumerate(target_fields)
|
||||
]
|
||||
|
||||
# 调用填表服务
|
||||
result = await template_fill_service.fill_template(
|
||||
@@ -143,7 +202,7 @@ class InstructionExecutor:
|
||||
}
|
||||
|
||||
# 获取源文档
|
||||
source_docs = context.get("source_docs", [])
|
||||
source_docs = context.get("source_docs", []) or []
|
||||
source_doc_ids = [doc.get("_id") for doc in source_docs if doc.get("_id")]
|
||||
|
||||
# 获取字段
|
||||
@@ -175,36 +234,103 @@ class InstructionExecutor:
|
||||
}
|
||||
|
||||
async def _execute_summarize(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行摘要总结"""
|
||||
"""执行摘要总结 - 使用 LLM 生成真实摘要"""
|
||||
try:
|
||||
docs = context.get("source_docs", [])
|
||||
import re
|
||||
docs = context.get("source_docs", []) or []
|
||||
instruction_text = context.get("instruction", "")
|
||||
|
||||
# 从指令中提取文件名/关键词,优先搜索精确文档
|
||||
search_term = None
|
||||
if instruction_text:
|
||||
filename_match = re.search(r'["""]([^"""]+)["""]', instruction_text)
|
||||
if filename_match:
|
||||
search_term = filename_match.group(1)
|
||||
else:
|
||||
file_match = re.search(r'([^\s,。!?,]+\.(?:docx|xlsx|md|txt))', instruction_text)
|
||||
if file_match:
|
||||
search_term = file_match.group(1)
|
||||
|
||||
# 如果没有文档或有更精确的搜索词,尝试重新搜索
|
||||
if not docs or search_term:
|
||||
if search_term:
|
||||
logger.info(f"按关键词搜索文档: {search_term}")
|
||||
searched_docs = await mongodb.search_documents(search_term, limit=5)
|
||||
if searched_docs:
|
||||
# 优先使用文件名最匹配的文档
|
||||
docs = sorted(
|
||||
searched_docs,
|
||||
key=lambda d: 1 if search_term.lower() in d.get("metadata", {}).get("original_filename", "").lower() else 0,
|
||||
reverse=True
|
||||
)
|
||||
logger.info(f"找到 {len(docs)} 个文档,最佳匹配: {docs[0].get('metadata', {}).get('original_filename', '?')}")
|
||||
|
||||
if not docs:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "没有可用的文档",
|
||||
"message": "请先上传要总结的文档"
|
||||
"success": True,
|
||||
"intent": "summarize",
|
||||
"action_needed": "provide_document",
|
||||
"message": "我理解了,您想分析文档内容。",
|
||||
"suggestion": "请提供已上传文档的名称(可以是文件名或部分名称),或者上传您想要分析的文档。\n\n支持的格式:docx、xlsx、md、txt\n\n例如:'分析2021年民政事业发展统计公报' 或 '总结卫生健康数据'"
|
||||
}
|
||||
|
||||
summaries = []
|
||||
for doc in docs[:5]: # 最多处理5个文档
|
||||
content = doc.get("content", "")[:5000] # 限制内容长度
|
||||
if content:
|
||||
summaries.append({
|
||||
"filename": doc.get("metadata", {}).get("original_filename", "未知"),
|
||||
"content_preview": content[:500] + "..." if len(content) > 500 else content
|
||||
})
|
||||
# 对第一个(最佳匹配)文档生成 AI 摘要
|
||||
primary_doc = docs[0]
|
||||
content = primary_doc.get("content", "")
|
||||
filename = primary_doc.get("metadata", {}).get("original_filename", "未知文档")
|
||||
|
||||
if not content:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "summarize",
|
||||
"error": "文档内容为空",
|
||||
"message": f"文档 {filename} 没有可供分析的文本内容"
|
||||
}
|
||||
|
||||
# 使用 LLM 生成摘要
|
||||
content_for_summary = content[:12000] # 最多取前 12000 字
|
||||
user_request = instruction_text or "请总结这份文档"
|
||||
|
||||
prompt = f"""请对以下文档进行全面、有条理的摘要分析。
|
||||
|
||||
文档名称:{filename}
|
||||
用户要求:{user_request}
|
||||
|
||||
文档内容:
|
||||
{content_for_summary}
|
||||
|
||||
请按以下格式输出摘要:
|
||||
1. **文档概述**:简述文档主题和背景(2-3句)
|
||||
2. **主要内容**:列出文档的核心数据和关键信息(用要点列出)
|
||||
3. **重要数据**:提取文档中的重要数字、统计数据
|
||||
4. **主要结论**:归纳文档的主要结论或趋势
|
||||
|
||||
要求:条理清晰,数据准确,不要遗漏关键信息。"""
|
||||
|
||||
from app.services.llm_service import llm_service
|
||||
messages = [
|
||||
{"role": "system", "content": "你是一个专业的文档分析助手,擅长提取关键信息并生成结构化摘要。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
response = await llm_service.chat(messages=messages, temperature=0.3, max_tokens=2000)
|
||||
ai_summary = llm_service.extract_message_content(response)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"intent": "summarize",
|
||||
"summaries": summaries,
|
||||
"message": f"找到 {len(summaries)} 个文档可供参考"
|
||||
"ai_summary": ai_summary,
|
||||
"filename": filename,
|
||||
"doc_id": primary_doc.get("_id", ""),
|
||||
"total_docs_found": len(docs),
|
||||
"message": f"已生成文档摘要"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"摘要执行失败: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "summarize",
|
||||
"error": str(e),
|
||||
"message": f"摘要生成失败: {str(e)}"
|
||||
}
|
||||
@@ -213,17 +339,39 @@ class InstructionExecutor:
|
||||
"""执行问答"""
|
||||
try:
|
||||
question = params.get("question", "")
|
||||
instruction_text = context.get("instruction", "")
|
||||
|
||||
if not question:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "question",
|
||||
"error": "未提供问题",
|
||||
"message": "请输入要回答的问题"
|
||||
}
|
||||
|
||||
# 使用 RAG 检索相关文档
|
||||
docs = context.get("source_docs", [])
|
||||
rag_results = []
|
||||
docs = context.get("source_docs", []) or []
|
||||
|
||||
# 如果没有文档,尝试从指令中提取文件名搜索
|
||||
if not docs:
|
||||
filename_match = re.search(r'["""]([^"""]+\.(?:docx|xlsx|md|txt))["""]', instruction_text)
|
||||
if not filename_match:
|
||||
filename_match = re.search(r'([^\s,。!?]+\.(?:docx|xlsx|md|txt))', instruction_text)
|
||||
if filename_match:
|
||||
found = await mongodb.search_documents(filename_match.group(1), limit=5)
|
||||
if found:
|
||||
docs = found
|
||||
|
||||
if not docs:
|
||||
return {
|
||||
"success": True,
|
||||
"intent": "question",
|
||||
"question": question,
|
||||
"answer": None,
|
||||
"message": "请先上传文档,我才能回答您的问题"
|
||||
}
|
||||
|
||||
# 使用 RAG 检索相关文档
|
||||
rag_results = []
|
||||
for doc in docs:
|
||||
doc_id = doc.get("_id", "")
|
||||
if doc_id:
|
||||
@@ -241,12 +389,42 @@ class InstructionExecutor:
|
||||
doc.get("content", "")[:3000] for doc in docs[:3] if doc.get("content")
|
||||
])
|
||||
|
||||
if not context_text:
|
||||
return {
|
||||
"success": True,
|
||||
"intent": "question",
|
||||
"question": question,
|
||||
"context_preview": context_text[:500] + "..." if len(context_text) > 500 else context_text,
|
||||
"message": "已找到相关上下文,可进行问答"
|
||||
"answer": None,
|
||||
"message": "文档内容为空,无法回答问题"
|
||||
}
|
||||
|
||||
# 使用 LLM 生成答案
|
||||
filename = docs[0].get("metadata", {}).get("original_filename", "文档")
|
||||
prompt = f"""基于以下文档内容,回答用户的问题。
|
||||
|
||||
文档名称:{filename}
|
||||
用户问题:{question}
|
||||
|
||||
文档内容:
|
||||
{context_text[:8000]}
|
||||
|
||||
请根据文档内容准确回答问题。如果文档中没有相关信息,请明确说明。"""
|
||||
|
||||
from app.services.llm_service import llm_service
|
||||
messages = [
|
||||
{"role": "system", "content": "你是一个专业的文档问答助手,根据提供的内容准确回答用户问题。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
response = await llm_service.chat(messages=messages, temperature=0.3, max_tokens=1500)
|
||||
answer = llm_service.extract_message_content(response)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"intent": "question",
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"filename": filename,
|
||||
"message": "已生成回答"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
@@ -299,12 +477,53 @@ class InstructionExecutor:
|
||||
async def _execute_compare(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行对比分析"""
|
||||
try:
|
||||
docs = context.get("source_docs", [])
|
||||
docs = context.get("source_docs", []) or []
|
||||
instruction_text = context.get("instruction", "")
|
||||
|
||||
# 优先从指令中提取具体的文件名
|
||||
filenames = _extract_filenames_from_text(instruction_text)
|
||||
|
||||
if filenames:
|
||||
# 只选择文件名匹配的那些文档
|
||||
matched_docs = []
|
||||
for doc in docs:
|
||||
fname = doc.get("metadata", {}).get("original_filename", "").lower()
|
||||
for fn in filenames:
|
||||
if fn.lower() in fname or fname in fn.lower():
|
||||
matched_docs.append(doc)
|
||||
break
|
||||
# 如果匹配到足够文档,用匹配的
|
||||
if len(matched_docs) >= 2:
|
||||
docs = matched_docs
|
||||
else:
|
||||
# 匹配不够,尝试按文件名搜索 MongoDB
|
||||
all_found = []
|
||||
for fn in filenames:
|
||||
found = await mongodb.search_documents(fn, limit=5)
|
||||
all_found.extend(found)
|
||||
seen = set()
|
||||
unique_docs = []
|
||||
for d in all_found:
|
||||
did = d.get("_id", "")
|
||||
if did and did not in seen:
|
||||
seen.add(did)
|
||||
unique_docs.append(d)
|
||||
if len(unique_docs) >= 2:
|
||||
docs = unique_docs
|
||||
elif len(unique_docs) == 1 and len(docs) >= 1:
|
||||
# 找到一个指定的 + 用一个通用的
|
||||
docs = unique_docs + docs[:1]
|
||||
elif docs and len(filenames) == 1:
|
||||
# 找到一个指定文件名但只有一个匹配,尝试补充
|
||||
docs = unique_docs + [d for d in docs if d not in unique_docs]
|
||||
docs = docs[:2]
|
||||
|
||||
if len(docs) < 2:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "compare",
|
||||
"error": "对比需要至少2个文档",
|
||||
"message": "请上传至少2个文档进行对比"
|
||||
"message": "请上传至少2个文档进行对比,或明确说出要对比的文档名称"
|
||||
}
|
||||
|
||||
# 提取文档基本信息
|
||||
@@ -329,6 +548,7 @@ class InstructionExecutor:
|
||||
logger.error(f"对比执行失败: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "compare",
|
||||
"error": str(e),
|
||||
"message": f"对比分析失败: {str(e)}"
|
||||
}
|
||||
@@ -336,10 +556,23 @@ class InstructionExecutor:
|
||||
async def _execute_edit(self, params: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""执行文档编辑操作"""
|
||||
try:
|
||||
docs = context.get("source_docs", [])
|
||||
docs = context.get("source_docs", []) or []
|
||||
instruction_text = context.get("instruction", "")
|
||||
|
||||
# 如果没有文档,尝试从指令中提取文件名搜索
|
||||
if not docs:
|
||||
filename_match = re.search(r'["""]([^"""]+\.(?:docx|xlsx|md|txt))["""]', instruction_text)
|
||||
if not filename_match:
|
||||
filename_match = re.search(r'([^\s,。!?]+\.(?:docx|xlsx|md|txt))', instruction_text)
|
||||
if filename_match:
|
||||
found = await mongodb.search_documents(filename_match.group(1), limit=3)
|
||||
if found:
|
||||
docs = found
|
||||
|
||||
if not docs:
|
||||
return {
|
||||
"success": False,
|
||||
"intent": "edit",
|
||||
"error": "没有可用的文档",
|
||||
"message": "请先上传要编辑的文档"
|
||||
}
|
||||
@@ -405,7 +638,7 @@ class InstructionExecutor:
|
||||
- Word -> Markdown
|
||||
"""
|
||||
try:
|
||||
docs = context.get("source_docs", [])
|
||||
docs = context.get("source_docs", []) or []
|
||||
if not docs:
|
||||
return {
|
||||
"success": False,
|
||||
|
||||
@@ -28,7 +28,7 @@ class IntentParser:
|
||||
INTENT_KEYWORDS = {
|
||||
INTENT_EXTRACT: ["提取", "抽取", "获取", "找出", "查找", "识别", "找到"],
|
||||
INTENT_FILL_TABLE: ["填表", "填写", "填充", "录入", "导入到表格", "填写到"],
|
||||
INTENT_SUMMARIZE: ["总结", "摘要", "概括", "概述", "归纳", "提炼"],
|
||||
INTENT_SUMMARIZE: ["总结", "摘要", "概括", "概述", "归纳", "提炼", "分析", "聊聊"],
|
||||
INTENT_QUESTION: ["问答", "回答", "解释", "什么是", "为什么", "如何", "怎样", "多少", "几个"],
|
||||
INTENT_SEARCH: ["搜索", "查找", "检索", "查询", "找"],
|
||||
INTENT_COMPARE: ["对比", "比较", "差异", "区别", "不同"],
|
||||
@@ -47,12 +47,13 @@ class IntentParser:
|
||||
def __init__(self):
|
||||
self.intent_history: List[Dict[str, Any]] = []
|
||||
|
||||
async def parse(self, text: str) -> Tuple[str, Dict[str, Any]]:
|
||||
async def parse(self, text: str, context: Dict[str, Any] = None) -> Tuple[str, Dict[str, Any]]:
|
||||
"""
|
||||
解析自然语言指令
|
||||
|
||||
Args:
|
||||
text: 用户输入的自然语言
|
||||
context: 执行上下文(包含对话历史等)
|
||||
|
||||
Returns:
|
||||
(意图类型, 参数字典)
|
||||
@@ -61,11 +62,17 @@ class IntentParser:
|
||||
if not text:
|
||||
return self.INTENT_UNKNOWN, {}
|
||||
|
||||
# 检查对话历史中的上下文
|
||||
conversation_history = []
|
||||
if context and context.get("conversation_history"):
|
||||
conversation_history = context.get("conversation_history", [])
|
||||
logger.info(f"解析时使用对话历史: {len(conversation_history)} 条消息")
|
||||
|
||||
# 记录历史
|
||||
self.intent_history.append({"text": text, "intent": None})
|
||||
|
||||
# 识别意图
|
||||
intent = self._recognize_intent(text)
|
||||
# 识别意图(考虑对话上下文)
|
||||
intent = self._recognize_intent_with_context(text, conversation_history)
|
||||
|
||||
# 提取参数
|
||||
params = self._extract_params(text, intent)
|
||||
@@ -78,6 +85,42 @@ class IntentParser:
|
||||
|
||||
return intent, params
|
||||
|
||||
def _recognize_intent_with_context(self, text: str, conversation_history: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
基于对话历史识别意图
|
||||
|
||||
Args:
|
||||
text: 当前用户输入
|
||||
conversation_history: 对话历史
|
||||
|
||||
Returns:
|
||||
意图类型
|
||||
"""
|
||||
# 如果对话历史为空,使用基础意图识别
|
||||
if not conversation_history:
|
||||
return self._recognize_intent(text)
|
||||
|
||||
# 基于历史上下文进行意图识别
|
||||
# 分析最近的对话了解用户意图的延续性
|
||||
last_intent = None
|
||||
last_topic = None
|
||||
|
||||
for msg in conversation_history[-5:]: # 最多看最近5条消息
|
||||
if msg.get("role") == "assistant":
|
||||
last_intent = msg.get("intent")
|
||||
if msg.get("intent") and msg.get("intent") != "unknown":
|
||||
last_topic = msg.get("intent")
|
||||
|
||||
# 如果当前消息很短(如"继续"、"是的"),可能延续之前的意图
|
||||
short_confirmation = ["是", "是的", "好", "继续", "ok", "好", "接着", "然后", "还有吗"]
|
||||
if text.strip() in short_confirmation or len(text.strip()) <= 3:
|
||||
if last_topic:
|
||||
logger.info(f"简短确认,延续之前的意图: {last_topic}")
|
||||
return last_topic
|
||||
|
||||
# 否则使用标准意图识别
|
||||
return self._recognize_intent(text)
|
||||
|
||||
def _recognize_intent(self, text: str) -> str:
|
||||
"""识别意图类型"""
|
||||
intent_scores: Dict[str, float] = {}
|
||||
@@ -214,18 +257,27 @@ class IntentParser:
|
||||
return template_info if template_info else None
|
||||
|
||||
def _extract_target_fields(self, text: str) -> List[str]:
|
||||
"""提取目标字段"""
|
||||
"""提取目标字段 - 按分隔符切分再逐段清理"""
|
||||
fields = []
|
||||
|
||||
# 匹配 "提取XXX和YYY"、"抽取XXX、YYY"
|
||||
patterns = [
|
||||
r"提取([^(and|,|,)+]+?)(?:和|与|、|,|plus)",
|
||||
r"抽取([^(and|,|,)+]+?)(?:和|与|、|,|plus)",
|
||||
]
|
||||
# 去除提取/抽取前缀
|
||||
cleaned_text = re.sub(r"^(?:提取|抽取)", "", text).strip()
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, text)
|
||||
fields.extend([m.strip() for m in matches if m.strip()])
|
||||
# 按'和'、'与'、'、'分割成多段
|
||||
segments = re.split(r"[和与、]", cleaned_text)
|
||||
|
||||
# 常见前缀(这些不是字段名,需要去除)
|
||||
prefixes = ["文档中的", "文档中", "文件中的", "文件中", "内容中的", "内容中"]
|
||||
|
||||
for seg in segments:
|
||||
seg = seg.strip()
|
||||
# 去除常见前缀
|
||||
for p in prefixes:
|
||||
if seg.startswith(p):
|
||||
seg = seg[len(p):]
|
||||
break
|
||||
if seg and 2 <= len(seg) <= 20:
|
||||
fields.append(seg)
|
||||
|
||||
return list(set(fields))
|
||||
|
||||
|
||||
@@ -526,9 +526,10 @@ class ExcelStorageService:
|
||||
# 创建表
|
||||
model_class = self._create_table_model(table_name, columns, column_types)
|
||||
|
||||
# 创建表结构
|
||||
# 创建表结构 (使用异步方式)
|
||||
async with self.mysql_db.get_session() as session:
|
||||
model_class.__table__.create(session.bind, checkfirst=True)
|
||||
async with session.bind.begin() as conn:
|
||||
await conn.run_sync(lambda: model_class.__table__.create(checkfirst=True))
|
||||
|
||||
# 插入数据
|
||||
records = []
|
||||
|
||||
@@ -165,9 +165,9 @@ class BM25:
|
||||
class RAGService:
|
||||
"""RAG 检索增强服务"""
|
||||
|
||||
# 默认分块参数
|
||||
DEFAULT_CHUNK_SIZE = 500 # 每个文本块的大小(字符数)
|
||||
DEFAULT_CHUNK_OVERLAP = 50 # 块之间的重叠(字符数)
|
||||
# 默认分块参数 - 增大块大小减少embedding次数
|
||||
DEFAULT_CHUNK_SIZE = 1000 # 每个文本块的大小(字符数),增大以提升速度
|
||||
DEFAULT_CHUNK_OVERLAP = 100 # 块之间的重叠(字符数)
|
||||
|
||||
def __init__(self):
|
||||
self.embedding_model = None
|
||||
@@ -389,6 +389,70 @@ class RAGService:
|
||||
self._add_documents(documents, chunk_ids)
|
||||
logger.info(f"已索引文档 {doc_id},共 {len(chunks)} 个块")
|
||||
|
||||
async def index_document_content_async(
|
||||
self,
|
||||
doc_id: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
chunk_size: int = None,
|
||||
chunk_overlap: int = None
|
||||
):
|
||||
"""
|
||||
异步将文档内容索引到向量数据库(自动分块)
|
||||
|
||||
使用 asyncio.to_thread 避免阻塞事件循环
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
if self._disabled:
|
||||
logger.info(f"[RAG DISABLED] 文档索引操作已跳过: {doc_id}")
|
||||
return
|
||||
|
||||
if not self._initialized:
|
||||
self._init_vector_store()
|
||||
|
||||
if self.embedding_model is None:
|
||||
logger.debug(f"文档跳过索引 (无嵌入模型): {doc_id}")
|
||||
return
|
||||
|
||||
# 分割文档为小块
|
||||
if chunk_size is None:
|
||||
chunk_size = self.DEFAULT_CHUNK_SIZE
|
||||
if chunk_overlap is None:
|
||||
chunk_overlap = self.DEFAULT_CHUNK_OVERLAP
|
||||
|
||||
chunks = self._split_into_chunks(content, chunk_size, chunk_overlap)
|
||||
|
||||
if not chunks:
|
||||
logger.warning(f"文档内容为空,跳过索引: {doc_id}")
|
||||
return
|
||||
|
||||
# 为每个块创建文档对象
|
||||
documents = []
|
||||
chunk_ids = []
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_id = f"{doc_id}_chunk_{i}"
|
||||
chunk_metadata = metadata.copy() if metadata else {}
|
||||
chunk_metadata.update({
|
||||
"chunk_index": i,
|
||||
"total_chunks": len(chunks),
|
||||
"doc_id": doc_id
|
||||
})
|
||||
|
||||
documents.append(SimpleDocument(
|
||||
page_content=chunk,
|
||||
metadata=chunk_metadata
|
||||
))
|
||||
chunk_ids.append(chunk_id)
|
||||
|
||||
# 使用线程池执行 CPU 密集型的 embedding 计算
|
||||
def _sync_add():
|
||||
self._add_documents(documents, chunk_ids)
|
||||
|
||||
await asyncio.to_thread(_sync_add)
|
||||
logger.info(f"已异步索引文档 {doc_id},共 {len(chunks)} 个块")
|
||||
|
||||
def _add_documents(self, documents: List[SimpleDocument], doc_ids: List[str]):
|
||||
"""批量添加文档到向量索引"""
|
||||
if not documents:
|
||||
|
||||
@@ -300,13 +300,15 @@ class TableRAGService:
|
||||
filename: str,
|
||||
sheet_name: Optional[str] = None,
|
||||
header_row: int = 0,
|
||||
sample_size: int = 10
|
||||
sample_size: int = 10,
|
||||
skip_rag_index: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
为 Excel 表构建完整的 RAG 索引
|
||||
|
||||
流程:
|
||||
1. 读取 Excel 获取字段信息
|
||||
2. 如果 skip_rag_index=True,跳过 RAG 索引,直接存 MySQL
|
||||
2. AI 生成每个字段的语义描述
|
||||
3. 将字段描述存入向量数据库
|
||||
|
||||
@@ -367,6 +369,20 @@ class TableRAGService:
|
||||
results["field_count"] = len(df.columns)
|
||||
logger.info(f"表名: {table_name}, 字段数: {len(df.columns)}")
|
||||
|
||||
# 跳过 RAG 索引时直接存 MySQL
|
||||
if skip_rag_index:
|
||||
logger.info(f"跳过 RAG 索引,直接存储到 MySQL")
|
||||
store_result = await self.excel_storage.store_excel(
|
||||
file_path=file_path,
|
||||
filename=filename,
|
||||
sheet_name=sheet_name,
|
||||
header_row=header_row
|
||||
)
|
||||
results["mysql_table"] = store_result.get("table_name") if store_result.get("success") else None
|
||||
results["row_count"] = store_result.get("row_count", len(df))
|
||||
results["indexed_count"] = 0
|
||||
return results
|
||||
|
||||
# 3. 初始化 RAG (如果需要)
|
||||
if not self.rag._initialized:
|
||||
self.rag._init_vector_store()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -39,6 +39,8 @@ openpyxl==3.1.2
|
||||
python-docx==0.8.11
|
||||
markdown-it-py==3.0.0
|
||||
chardet==5.2.0
|
||||
Pillow>=10.0.0
|
||||
pytesseract>=0.3.10
|
||||
|
||||
# ==================== AI / LLM ====================
|
||||
httpx==0.25.2
|
||||
|
||||
@@ -781,7 +781,8 @@ export const backendApi = {
|
||||
async exportFilledTemplate(
|
||||
templateId: string,
|
||||
filledData: Record<string, any>,
|
||||
format: 'xlsx' | 'docx' = 'xlsx'
|
||||
format: 'xlsx' | 'docx' = 'xlsx',
|
||||
filledFilePath?: string
|
||||
): Promise<Blob> {
|
||||
const url = `${BACKEND_BASE_URL}/templates/export`;
|
||||
|
||||
@@ -793,6 +794,7 @@ export const backendApi = {
|
||||
template_id: templateId,
|
||||
filled_data: filledData,
|
||||
format,
|
||||
...(filledFilePath && { filled_file_path: filledFilePath }),
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -964,6 +966,101 @@ export const backendApi = {
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
// ==================== 智能指令 API ====================
|
||||
|
||||
/**
|
||||
* 智能对话(支持多轮对话的指令执行)
|
||||
*/
|
||||
async instructionChat(
|
||||
instruction: string,
|
||||
docIds?: string[],
|
||||
context?: Record<string, any>
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
intent: string;
|
||||
result: Record<string, any>;
|
||||
message: string;
|
||||
hint?: string;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/instruction/chat`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ instruction, doc_ids: docIds, context }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '对话处理失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('对话处理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取支持的指令类型列表
|
||||
*/
|
||||
async getSupportedIntents(): Promise<{
|
||||
intents: Array<{
|
||||
intent: string;
|
||||
name: string;
|
||||
examples: string[];
|
||||
params: string[];
|
||||
}>;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/instruction/intents`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error('获取指令列表失败');
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('获取指令列表失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 执行指令(同步模式)
|
||||
*/
|
||||
async executeInstruction(
|
||||
instruction: string,
|
||||
docIds?: string[],
|
||||
context?: Record<string, any>
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
intent: string;
|
||||
result: Record<string, any>;
|
||||
message: string;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/instruction/execute`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ instruction, doc_ids: docIds, context }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '指令执行失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('指令执行失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
// ==================== AI 分析 API ====================
|
||||
@@ -1529,61 +1626,66 @@ export const aiApi = {
|
||||
}
|
||||
},
|
||||
|
||||
// ==================== 对话历史 API ====================
|
||||
|
||||
/**
|
||||
* 智能对话(支持多轮对话的指令执行)
|
||||
* 获取对话历史
|
||||
*/
|
||||
async instructionChat(
|
||||
instruction: string,
|
||||
docIds?: string[],
|
||||
context?: Record<string, any>
|
||||
): Promise<{
|
||||
async getConversationHistory(conversationId: string, limit: number = 20): Promise<{
|
||||
success: boolean;
|
||||
intent: string;
|
||||
result: Record<string, any>;
|
||||
message: string;
|
||||
hint?: string;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/instruction/chat`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ instruction, doc_ids: docIds, context }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '对话处理失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('对话处理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取支持的指令类型列表
|
||||
*/
|
||||
async getSupportedIntents(): Promise<{
|
||||
intents: Array<{
|
||||
intent: string;
|
||||
name: string;
|
||||
examples: string[];
|
||||
params: string[];
|
||||
messages: Array<{
|
||||
role: string;
|
||||
content: string;
|
||||
intent?: string;
|
||||
created_at: string;
|
||||
}>;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/instruction/intents`;
|
||||
const url = `${BACKEND_BASE_URL}/conversation/${conversationId}/history?limit=${limit}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error('获取指令列表失败');
|
||||
if (!response.ok) throw new Error('获取对话历史失败');
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('获取指令列表失败:', error);
|
||||
throw error;
|
||||
console.error('获取对话历史失败:', error);
|
||||
return { success: false, messages: [] };
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 删除对话历史
|
||||
*/
|
||||
async deleteConversation(conversationId: string): Promise<{
|
||||
success: boolean;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/conversation/${conversationId}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, { method: 'DELETE' });
|
||||
if (!response.ok) throw new Error('删除对话历史失败');
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('删除对话历史失败:', error);
|
||||
return { success: false };
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取会话列表
|
||||
*/
|
||||
async listConversations(limit: number = 50): Promise<{
|
||||
success: boolean;
|
||||
conversations: Array<any>;
|
||||
}> {
|
||||
const url = `${BACKEND_BASE_URL}/conversation/all?limit=${limit}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error('获取会话列表失败');
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('获取会话列表失败:', error);
|
||||
return { success: false, conversations: [] };
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -15,12 +15,14 @@ import {
|
||||
Sparkles,
|
||||
Database,
|
||||
FileSpreadsheet,
|
||||
RefreshCcw
|
||||
RefreshCcw,
|
||||
Trash2
|
||||
} from 'lucide-react';
|
||||
import { backendApi } from '@/db/backend-api';
|
||||
import { formatDistanceToNow } from 'date-fns';
|
||||
import { zhCN } from 'date-fns/locale';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
type DocumentItem = {
|
||||
doc_id: string;
|
||||
@@ -108,7 +110,7 @@ const Dashboard: React.FC = () => {
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
{[
|
||||
{ label: '已上传文档', value: stats.docs, icon: FileText, color: 'bg-blue-500', trend: '非结构化文档', link: '/documents' },
|
||||
{ label: 'Excel 文件', value: stats.excelFiles, icon: FileSpreadsheet, color: 'bg-emerald-500', trend: '结构化数据', link: '/excel-parse' },
|
||||
{ label: 'Excel 文件', value: stats.excelFiles, icon: FileSpreadsheet, color: 'bg-emerald-500', trend: '结构化数据', link: '/documents' },
|
||||
{ label: '填表任务', value: stats.tasks, icon: TableProperties, color: 'bg-indigo-500', trend: '待实现', link: '/form-fill' }
|
||||
].map((stat, i) => (
|
||||
<Card key={i} className="border-none shadow-md overflow-hidden group hover:shadow-xl transition-all duration-300">
|
||||
@@ -164,9 +166,31 @@ const Dashboard: React.FC = () => {
|
||||
{doc.doc_type.toUpperCase()} • {formatDistanceToNow(new Date(doc.created_at), { addSuffix: true, locale: zhCN })}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="px-2 py-1 rounded-full text-[10px] font-bold uppercase tracking-wider bg-muted">
|
||||
{doc.doc_type}
|
||||
</div>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="opacity-0 group-hover:opacity-100 text-destructive hover:bg-destructive/10 transition-opacity"
|
||||
onClick={async (e) => {
|
||||
e.stopPropagation();
|
||||
if (!confirm(`确定要删除 "${doc.original_filename}" 吗?`)) return;
|
||||
try {
|
||||
const result = await backendApi.deleteDocument(doc.doc_id);
|
||||
if (result.success) {
|
||||
setRecentDocs(prev => prev.filter(d => d.doc_id !== doc.doc_id));
|
||||
toast.success('文档已删除');
|
||||
}
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || '删除失败');
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Trash2 size={16} />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
@@ -197,7 +221,7 @@ const Dashboard: React.FC = () => {
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||
{[
|
||||
{ title: '上传文档', desc: '支持 docx/md/txt', icon: FileText, link: '/documents', color: 'bg-blue-500' },
|
||||
{ title: '解析 Excel', desc: '上传并分析数据', icon: FileSpreadsheet, link: '/excel-parse', color: 'bg-emerald-500' },
|
||||
{ title: '解析 Excel', desc: '上传并分析数据', icon: FileSpreadsheet, link: '/documents', color: 'bg-emerald-500' },
|
||||
{ title: '智能填表', desc: '自动填写表格模板', icon: TableProperties, link: '/form-fill', color: 'bg-indigo-500' },
|
||||
{ title: 'AI 助手', desc: '自然语言交互', icon: MessageSquareCode, link: '/assistant', color: 'bg-amber-500' }
|
||||
].map((item, i) => (
|
||||
|
||||
@@ -78,6 +78,19 @@ const Documents: React.FC = () => {
|
||||
const [expandedSheet, setExpandedSheet] = useState<string | null>(null);
|
||||
const [uploadExpanded, setUploadExpanded] = useState(false);
|
||||
|
||||
// 批量上传状态跟踪
|
||||
type FileUploadStatus = 'pending' | 'uploading' | 'processing' | 'success' | 'failed';
|
||||
interface UploadFileState {
|
||||
file: File;
|
||||
status: FileUploadStatus;
|
||||
progress: number;
|
||||
taskId?: string;
|
||||
error?: string;
|
||||
docId?: string;
|
||||
}
|
||||
const [uploadStates, setUploadStates] = useState<UploadFileState[]>([]);
|
||||
const [batchTaskId, setBatchTaskId] = useState<string | null>(null);
|
||||
|
||||
// AI 分析相关状态
|
||||
const [analyzing, setAnalyzing] = useState(false);
|
||||
const [analyzingForCharts, setAnalyzingForCharts] = useState(false);
|
||||
@@ -211,21 +224,119 @@ const Documents: React.FC = () => {
|
||||
}
|
||||
};
|
||||
|
||||
// 文件上传处理
|
||||
// 文件上传处理 - 批量上传
|
||||
const onDrop = async (acceptedFiles: File[]) => {
|
||||
if (acceptedFiles.length === 0) return;
|
||||
|
||||
// 初始化上传状态
|
||||
const initialStates: UploadFileState[] = acceptedFiles.map(file => ({
|
||||
file,
|
||||
status: 'pending',
|
||||
progress: 0
|
||||
}));
|
||||
setUploadStates(initialStates);
|
||||
setUploadExpanded(true);
|
||||
setUploading(true);
|
||||
|
||||
try {
|
||||
// 使用批量上传接口
|
||||
const result = await backendApi.uploadDocuments(acceptedFiles);
|
||||
|
||||
if (result.task_id) {
|
||||
setBatchTaskId(result.task_id);
|
||||
|
||||
// 更新所有文件状态为上传中
|
||||
setUploadStates(prev => prev.map(s => ({ ...s, status: 'uploading', progress: 30 })));
|
||||
|
||||
// 轮询任务状态
|
||||
let attempts = 0;
|
||||
const maxAttempts = 150; // 最多5分钟
|
||||
|
||||
const checkBatchStatus = async () => {
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
const status = await backendApi.getTaskStatus(result.task_id);
|
||||
|
||||
if (status.status === 'success' && status.result) {
|
||||
// 更新每个文件的状态
|
||||
const fileResults = status.result.results || [];
|
||||
setUploadStates(prev => prev.map((s, idx) => {
|
||||
const fileResult = fileResults[idx];
|
||||
if (fileResult?.success) {
|
||||
return { ...s, status: 'success', progress: 100, docId: fileResult.doc_id };
|
||||
} else {
|
||||
return { ...s, status: 'failed', progress: 0, error: fileResult?.error || '处理失败' };
|
||||
}
|
||||
}));
|
||||
loadDocuments();
|
||||
return;
|
||||
} else if (status.status === 'failure') {
|
||||
setUploadStates(prev => prev.map(s => ({
|
||||
...s,
|
||||
status: 'failed',
|
||||
error: status.error || '批量处理失败'
|
||||
})));
|
||||
return;
|
||||
} else {
|
||||
// 处理中 - 更新进度
|
||||
const progress = status.progress || Math.min(30 + attempts * 2, 90);
|
||||
setUploadStates(prev => prev.map(s => ({
|
||||
...s,
|
||||
status: s.status === 'uploading' ? 'processing' : s.status,
|
||||
progress
|
||||
})));
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('检查批量状态失败', e);
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
attempts++;
|
||||
}
|
||||
|
||||
// 超时
|
||||
setUploadStates(prev => prev.map(s => {
|
||||
if (s.status !== 'success') {
|
||||
return { ...s, status: 'failed', error: '处理超时' };
|
||||
}
|
||||
return s;
|
||||
}));
|
||||
};
|
||||
|
||||
checkBatchStatus();
|
||||
} else {
|
||||
// 单文件直接上传(旧逻辑作为后备)
|
||||
await handleSingleFileUploads(acceptedFiles);
|
||||
}
|
||||
} catch (error: any) {
|
||||
toast.error(error.message || '上传失败');
|
||||
setUploadStates(prev => prev.map(s => ({
|
||||
...s,
|
||||
status: 'failed',
|
||||
error: error.message || '上传失败'
|
||||
})));
|
||||
} finally {
|
||||
setUploading(false);
|
||||
}
|
||||
};
|
||||
|
||||
// 单文件上传后备逻辑
|
||||
const handleSingleFileUploads = async (files: File[]) => {
|
||||
let successCount = 0;
|
||||
let failCount = 0;
|
||||
const successfulFiles: File[] = [];
|
||||
|
||||
// 逐个上传文件
|
||||
for (const file of acceptedFiles) {
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const file = files[i];
|
||||
const ext = file.name.split('.').pop()?.toLowerCase();
|
||||
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'uploading' } : s
|
||||
));
|
||||
|
||||
try {
|
||||
if (ext === 'xlsx' || ext === 'xls') {
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'processing', progress: 50 } : s
|
||||
));
|
||||
const result = await backendApi.uploadExcel(file, {
|
||||
parseAllSheets: parseOptions.parseAllSheets,
|
||||
headerRow: parseOptions.headerRow
|
||||
@@ -233,99 +344,60 @@ const Documents: React.FC = () => {
|
||||
if (result.success) {
|
||||
successCount++;
|
||||
successfulFiles.push(file);
|
||||
// 第一个Excel文件设置解析结果供预览
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'success', progress: 100 } : s
|
||||
));
|
||||
if (successCount === 1) {
|
||||
setUploadedFile(file);
|
||||
setParseResult(result);
|
||||
if (result.metadata?.sheet_count === 1) {
|
||||
setExpandedSheet(Object.keys(result.data?.sheets || {})[0] || null);
|
||||
}
|
||||
}
|
||||
loadDocuments();
|
||||
} else {
|
||||
failCount++;
|
||||
toast.error(`${file.name}: ${result.error || '解析失败'}`);
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'failed', error: result.error || '解析失败' } : s
|
||||
));
|
||||
}
|
||||
} else if (ext === 'md' || ext === 'markdown') {
|
||||
} else {
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'processing', progress: 50 } : s
|
||||
));
|
||||
const result = await backendApi.uploadDocument(file);
|
||||
if (result.task_id) {
|
||||
// 等待任务完成
|
||||
let attempts = 0;
|
||||
while (attempts < 60) {
|
||||
const status = await backendApi.getTaskStatus(result.task_id);
|
||||
if (status.status === 'success') {
|
||||
successCount++;
|
||||
successfulFiles.push(file);
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'success', progress: 100, docId: status.result?.doc_id } : s
|
||||
));
|
||||
if (successCount === 1) {
|
||||
setUploadedFile(file);
|
||||
}
|
||||
// 轮询任务状态
|
||||
let attempts = 0;
|
||||
const checkStatus = async () => {
|
||||
while (attempts < 30) {
|
||||
try {
|
||||
const status = await backendApi.getTaskStatus(result.task_id);
|
||||
if (status.status === 'success') {
|
||||
loadDocuments();
|
||||
return;
|
||||
break;
|
||||
} else if (status.status === 'failure') {
|
||||
return;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('检查状态失败', e);
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'failed', error: status.error || '处理失败' } : s
|
||||
));
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
attempts++;
|
||||
}
|
||||
};
|
||||
checkStatus();
|
||||
} else {
|
||||
failCount++;
|
||||
}
|
||||
} else {
|
||||
// 其他文档使用通用上传接口
|
||||
const result = await backendApi.uploadDocument(file);
|
||||
if (result.task_id) {
|
||||
successCount++;
|
||||
successfulFiles.push(file);
|
||||
if (successCount === 1) {
|
||||
setUploadedFile(file);
|
||||
}
|
||||
// 轮询任务状态
|
||||
let attempts = 0;
|
||||
const checkStatus = async () => {
|
||||
while (attempts < 30) {
|
||||
try {
|
||||
const status = await backendApi.getTaskStatus(result.task_id);
|
||||
if (status.status === 'success') {
|
||||
loadDocuments();
|
||||
return;
|
||||
} else if (status.status === 'failure') {
|
||||
return;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('检查状态失败', e);
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
attempts++;
|
||||
}
|
||||
};
|
||||
checkStatus();
|
||||
} else {
|
||||
failCount++;
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
failCount++;
|
||||
toast.error(`${file.name}: ${error.message || '上传失败'}`);
|
||||
setUploadStates(prev => prev.map((s, idx) =>
|
||||
idx === i ? { ...s, status: 'failed', error: error.message || '上传失败' } : s
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
setUploading(false);
|
||||
loadDocuments();
|
||||
|
||||
if (successCount > 0) {
|
||||
toast.success(`成功上传 ${successCount} 个文件`);
|
||||
setUploadedFiles(prev => [...prev, ...successfulFiles]);
|
||||
setUploadExpanded(true);
|
||||
}
|
||||
if (failCount > 0) {
|
||||
toast.error(`${failCount} 个文件上传失败`);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -699,7 +771,110 @@ const Documents: React.FC = () => {
|
||||
</CardHeader>
|
||||
{uploadPanelOpen && (
|
||||
<CardContent className="space-y-4">
|
||||
{uploadedFiles.length > 0 || uploadedFile ? (
|
||||
{/* 优先显示正在上传的状态 */}
|
||||
{uploadStates.length > 0 && (
|
||||
<div className="space-y-3">
|
||||
{/* 上传状态头部 */}
|
||||
<div
|
||||
className="flex items-center justify-between p-3 bg-primary/5 rounded-xl cursor-pointer hover:bg-primary/10 transition-colors"
|
||||
onClick={() => setUploadExpanded(!uploadExpanded)}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 rounded-lg bg-primary/10 text-primary flex items-center justify-center">
|
||||
{uploading ? <Loader2 size={20} className="animate-spin" /> : <Upload size={20} />}
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-semibold text-sm">
|
||||
{uploading ? '正在上传' : '上传完成'} {uploadStates.length} 个文件
|
||||
</p>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{uploading ? '上传中,请稍候...' : uploadStates.filter(s => s.status === 'failed').length > 0 ? '部分失败' : '点击查看详情'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{!uploading && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
setUploadStates([]);
|
||||
setUploadedFiles([]);
|
||||
setUploadedFile(null);
|
||||
}}
|
||||
className="text-destructive hover:text-destructive"
|
||||
>
|
||||
<Trash2 size={14} className="mr-1" />
|
||||
清空
|
||||
</Button>
|
||||
)}
|
||||
{uploadExpanded ? <ChevronUp size={16} /> : <ChevronDown size={16} />}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 上传进度列表(总是展开显示) */}
|
||||
{uploadExpanded && (
|
||||
<div className="space-y-2 border rounded-xl p-3 bg-background">
|
||||
{uploadStates.map((state, index) => (
|
||||
<div key={index} className="flex items-center gap-3 p-2 rounded-lg hover:bg-muted/30 transition-colors">
|
||||
<div className={cn(
|
||||
"w-8 h-8 rounded flex items-center justify-center shrink-0",
|
||||
isExcelFile(state.file.name) ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
||||
)}>
|
||||
{state.status === 'pending' && <Clock size={16} />}
|
||||
{state.status === 'uploading' && <Upload size={16} className="animate-pulse" />}
|
||||
{state.status === 'processing' && <Loader2 size={16} className="animate-spin" />}
|
||||
{state.status === 'success' && <CheckCircle size={16} className="text-green-500" />}
|
||||
{state.status === 'failed' && <AlertCircle size={16} className="text-red-500" />}
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="text-sm truncate">{state.file.name}</p>
|
||||
<div className="flex items-center gap-2">
|
||||
{state.status === 'pending' && <p className="text-xs text-muted-foreground">等待上传...</p>}
|
||||
{state.status === 'uploading' && <p className="text-xs text-primary">上传中...</p>}
|
||||
{state.status === 'processing' && <p className="text-xs text-primary">处理中...</p>}
|
||||
{state.status === 'failed' && state.error && (
|
||||
<p className="text-xs text-red-500 truncate">{state.error}</p>
|
||||
)}
|
||||
{state.status === 'success' && (
|
||||
<p className="text-xs text-green-500">已完成</p>
|
||||
)}
|
||||
</div>
|
||||
{/* 进度条 */}
|
||||
{(state.status === 'uploading' || state.status === 'processing') && (
|
||||
<div className="mt-1 h-1 bg-muted rounded-full overflow-hidden">
|
||||
<div
|
||||
className="h-full bg-primary transition-all duration-300"
|
||||
style={{ width: `${state.progress}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{state.status === 'success' && (
|
||||
<CheckCircle size={16} className="text-green-500 shrink-0" />
|
||||
)}
|
||||
{state.status === 'failed' && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="text-destructive hover:bg-destructive/10 shrink-0"
|
||||
onClick={() => {
|
||||
setUploadStates(prev => prev.filter((_, i) => i !== index));
|
||||
}}
|
||||
>
|
||||
<Trash2 size={14} />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 已上传文件列表(没有正在上传时显示) */}
|
||||
{uploadStates.length === 0 && (uploadedFiles.length > 0 || uploadedFile) ? (
|
||||
<div className="space-y-3">
|
||||
{/* 文件列表头部 */}
|
||||
<div
|
||||
@@ -739,6 +914,84 @@ const Documents: React.FC = () => {
|
||||
{/* 展开的文件列表 */}
|
||||
{uploadExpanded && (
|
||||
<div className="space-y-2 border rounded-xl p-3">
|
||||
{/* 显示已上传文件列表 */}
|
||||
{(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).filter(Boolean).map((file, index) => (
|
||||
<div key={index} className="flex items-center gap-3 p-2 bg-background rounded-lg">
|
||||
<div className={cn(
|
||||
"w-8 h-8 rounded flex items-center justify-center",
|
||||
isExcelFile(file?.name || '') ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
||||
)}>
|
||||
{isExcelFile(file?.name || '') ? <FileSpreadsheet size={16} /> : <FileText size={16} />}
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="text-sm truncate">{file?.name}</p>
|
||||
<p className="text-xs text-muted-foreground">{formatFileSize(file?.size || 0)}</p>
|
||||
</div>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="text-destructive hover:bg-destructive/10"
|
||||
onClick={() => handleRemoveUploadedFile(index)}
|
||||
>
|
||||
<Trash2 size={14} />
|
||||
</Button>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* 继续添加按钮 */}
|
||||
<div
|
||||
{...getRootProps()}
|
||||
className="flex items-center justify-center gap-2 p-3 border-2 border-dashed rounded-lg cursor-pointer hover:border-primary/50 hover:bg-primary/5 transition-colors"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<input {...getInputProps()} multiple={true} />
|
||||
<Plus size={16} className="text-muted-foreground" />
|
||||
<span className="text-sm text-muted-foreground">继续添加更多文件</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (uploadedFiles.length > 0 || uploadedFile) ? (
|
||||
<div className="space-y-3">
|
||||
{/* 文件列表头部 */}
|
||||
<div
|
||||
className="flex items-center justify-between p-3 bg-muted/50 rounded-xl cursor-pointer hover:bg-muted/70 transition-colors"
|
||||
onClick={() => setUploadExpanded(!uploadExpanded)}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 rounded-lg bg-primary/10 text-primary flex items-center justify-center">
|
||||
<Upload size={20} />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-semibold text-sm">
|
||||
已上传 {(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).length} 个文件
|
||||
</p>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{uploadExpanded ? '点击收起' : '点击展开查看'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
handleDeleteFile();
|
||||
}}
|
||||
className="text-destructive hover:text-destructive"
|
||||
>
|
||||
<Trash2 size={14} className="mr-1" />
|
||||
清空
|
||||
</Button>
|
||||
{uploadExpanded ? <ChevronUp size={16} /> : <ChevronDown size={16} />}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 展开的文件列表 */}
|
||||
{uploadExpanded && (
|
||||
<div className="space-y-2 border rounded-xl p-3">
|
||||
{/* 显示已上传文件列表 */}
|
||||
{(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).filter(Boolean).map((file, index) => (
|
||||
<div key={index} className="flex items-center gap-3 p-2 bg-background rounded-lg">
|
||||
<div className={cn(
|
||||
|
||||
@@ -1,26 +1,10 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import {
|
||||
Send,
|
||||
Bot,
|
||||
User,
|
||||
Sparkles,
|
||||
Trash2,
|
||||
RefreshCcw,
|
||||
FileText,
|
||||
TableProperties,
|
||||
ChevronRight,
|
||||
ArrowRight,
|
||||
Loader2,
|
||||
Download,
|
||||
Search,
|
||||
MessageSquare,
|
||||
CheckCircle
|
||||
} from 'lucide-react';
|
||||
import { Send, Bot, User, Sparkles, Trash2, FileText, TableProperties, ArrowRight, Search, MessageSquare } from 'lucide-react';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
|
||||
import { ScrollArea } from '@/components/ui/scroll-area';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { Markdown } from '@/components/ui/markdown';
|
||||
import { backendApi } from '@/db/backend-api';
|
||||
import { toast } from 'sonner';
|
||||
import { cn } from '@/lib/utils';
|
||||
@@ -39,8 +23,21 @@ const InstructionChat: React.FC = () => {
|
||||
const [input, setInput] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [currentDocIds, setCurrentDocIds] = useState<string[]>([]);
|
||||
const [conversationId, setConversationId] = useState<string>('');
|
||||
const scrollAreaRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// 初始化会话ID
|
||||
useEffect(() => {
|
||||
const storedId = localStorage.getItem('chat_conversation_id');
|
||||
if (storedId) {
|
||||
setConversationId(storedId);
|
||||
} else {
|
||||
const newId = `conv_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
||||
setConversationId(newId);
|
||||
localStorage.setItem('chat_conversation_id', newId);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
// Initial welcome message
|
||||
if (messages.length === 0) {
|
||||
@@ -119,7 +116,8 @@ const InstructionChat: React.FC = () => {
|
||||
// 使用真实的智能指令 API
|
||||
const response = await backendApi.instructionChat(
|
||||
input.trim(),
|
||||
currentDocIds.length > 0 ? currentDocIds : undefined
|
||||
currentDocIds.length > 0 ? currentDocIds : undefined,
|
||||
{ conversation_id: conversationId }
|
||||
);
|
||||
|
||||
// 根据意图类型生成友好响应
|
||||
@@ -135,11 +133,12 @@ const InstructionChat: React.FC = () => {
|
||||
responseContent = `✅ 已提取到 ${keys.length} 个字段的数据:\n\n`;
|
||||
for (const [key, value] of Object.entries(extracted)) {
|
||||
const values = Array.isArray(value) ? value : [value];
|
||||
responseContent += `**${key}**: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}\n`;
|
||||
const displayValues = values.length > 10 ? values.slice(0, 10).join(', ') + ` ...(共${values.length}条)` : values.join(', ');
|
||||
responseContent += `**${key}**: ${displayValues}\n`;
|
||||
}
|
||||
responseContent += `\n💡 您可以将这些数据填入表格。`;
|
||||
responseContent += `\n💡 可直接使用以上数据,或说"填入表格"继续填表操作。`;
|
||||
} else {
|
||||
responseContent = '未能从文档中提取到相关数据。请尝试更明确的字段名称。';
|
||||
responseContent = resultData?.message || '未能从文档中提取到相关数据。请尝试更明确的字段名称。';
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -151,24 +150,24 @@ const InstructionChat: React.FC = () => {
|
||||
responseContent = `✅ 填表完成!成功填写 ${filledKeys.length} 个字段:\n\n`;
|
||||
for (const [key, value] of Object.entries(filled)) {
|
||||
const values = Array.isArray(value) ? value : [value];
|
||||
responseContent += `**${key}**: ${values.slice(0, 3).join(', ')}\n`;
|
||||
const displayValues = values.length > 10 ? values.slice(0, 10).join(', ') + ` ...(共${values.length}条)` : values.join(', ');
|
||||
responseContent += `**${key}**: ${displayValues}\n`;
|
||||
}
|
||||
responseContent += `\n📋 请到【智能填表】页面查看或导出结果。`;
|
||||
} else {
|
||||
responseContent = '填表未能提取到数据。请检查模板表头和数据源内容。';
|
||||
responseContent = resultData?.message || '填表未能提取到数据。请检查模板表头和数据源内容。';
|
||||
}
|
||||
break;
|
||||
|
||||
case 'summarize':
|
||||
// 摘要结果
|
||||
const summaries = resultData?.summaries || [];
|
||||
if (summaries.length > 0) {
|
||||
responseContent = `📄 找到 ${summaries.length} 个文档的摘要:\n\n`;
|
||||
summaries.forEach((s: any, idx: number) => {
|
||||
responseContent += `**${idx + 1}. ${s.filename}**\n${s.content_preview}\n\n`;
|
||||
});
|
||||
if (resultData?.action_needed === 'provide_document' || resultData?.action_needed === 'upload_document') {
|
||||
responseContent = `📋 ${resultData.message}\n\n${resultData.suggestion || ''}`;
|
||||
} else if (resultData?.ai_summary) {
|
||||
// AI 生成的摘要
|
||||
responseContent = `📄 **${resultData.filename}** 摘要分析:\n\n${resultData.ai_summary}`;
|
||||
} else {
|
||||
responseContent = '未能生成摘要。请确保已上传文档。';
|
||||
responseContent = resultData?.message || '未能生成摘要。请确保已上传文档。';
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -176,8 +175,10 @@ const InstructionChat: React.FC = () => {
|
||||
// 问答结果
|
||||
if (resultData?.answer) {
|
||||
responseContent = `**问题**: ${resultData.question}\n\n**答案**: ${resultData.answer}`;
|
||||
} else if (resultData?.context_preview) {
|
||||
responseContent = `**问题**: ${resultData.question}\n\n**相关上下文**:\n${resultData.context_preview}`;
|
||||
} else {
|
||||
responseContent = resultData?.message || '我找到了相关信息,请查看上文。';
|
||||
responseContent = resultData?.message || '请先上传文档,我才能回答您的问题。';
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -207,8 +208,35 @@ const InstructionChat: React.FC = () => {
|
||||
}
|
||||
break;
|
||||
|
||||
case 'edit':
|
||||
// 文档编辑结果
|
||||
if (resultData?.edited_content) {
|
||||
responseContent = `✏️ **${resultData.original_filename}** 编辑完成:\n\n${resultData.edited_content.substring(0, 500)}${resultData.edited_content.length > 500 ? '\n\n...(内容已截断)' : ''}`;
|
||||
} else {
|
||||
responseContent = resultData?.message || '编辑完成。';
|
||||
}
|
||||
break;
|
||||
|
||||
case 'transform':
|
||||
// 格式转换结果
|
||||
if (resultData?.excel_data) {
|
||||
responseContent = `🔄 格式转换完成!\n\n已转换为 **Excel** 格式,共 **${resultData.excel_data.length}** 行数据。\n\n${resultData.message || ''}`;
|
||||
} else if (resultData?.content) {
|
||||
responseContent = `🔄 格式转换完成!\n\n目标格式: **${resultData.target_format?.toUpperCase()}**\n\n${resultData.message || ''}`;
|
||||
} else {
|
||||
responseContent = resultData?.message || '格式转换完成。';
|
||||
}
|
||||
break;
|
||||
|
||||
case 'unknown':
|
||||
responseContent = `我理解您想要: "${input.trim()}"\n\n但我目前无法完成此操作。您可以尝试:\n\n1. **提取数据**: "提取医院数量和床位数"\n2. **填表**: "根据这些数据填表"\n3. **总结**: "总结这份文档"\n4. **问答**: "文档里说了什么?"\n5. **搜索**: "搜索相关内容"`;
|
||||
// 检查是否需要用户上传文档
|
||||
if (resultData?.suggestion) {
|
||||
responseContent = resultData.suggestion;
|
||||
} else if (resultData?.message && resultData.message !== '无法理解该指令,请尝试更明确的描述') {
|
||||
responseContent = resultData.message;
|
||||
} else {
|
||||
responseContent = `我理解您想要: "${input.trim()}"\n\n请尝试以下操作:\n\n1. **提取数据**: "提取医院数量和床位数"\n2. **填表**: "根据这些数据填表"\n3. **总结**: "总结这份文档"\n4. **问答**: "文档里说了什么?"\n5. **搜索**: "搜索相关内容"`;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -299,9 +327,11 @@ const InstructionChat: React.FC = () => {
|
||||
? "bg-primary text-primary-foreground shadow-xl shadow-primary/20 rounded-tr-none"
|
||||
: "bg-white border border-border/50 shadow-md rounded-tl-none"
|
||||
)}>
|
||||
<p className="text-sm leading-relaxed whitespace-pre-wrap font-medium">
|
||||
{m.content}
|
||||
</p>
|
||||
{m.role === 'assistant' ? (
|
||||
<Markdown content={m.content} className="text-sm leading-relaxed prose prose-sm max-w-none" />
|
||||
) : (
|
||||
<p className="text-sm leading-relaxed whitespace-pre-wrap font-medium">{m.content}</p>
|
||||
)}
|
||||
<span className={cn(
|
||||
"text-[10px] block opacity-50 font-bold tracking-widest",
|
||||
m.role === 'user' ? "text-right" : "text-left"
|
||||
|
||||
@@ -248,15 +248,25 @@ const TemplateFill: React.FC = () => {
|
||||
if (!templateFile || !filledResult) return;
|
||||
|
||||
try {
|
||||
const ext = templateFile.name.split('.').pop()?.toLowerCase();
|
||||
const exportFormat = (ext === 'docx') ? 'docx' : 'xlsx';
|
||||
// 对于 Word 模板,如果已有填写后的文件(已填入表格单元格),传递其路径以便直接下载
|
||||
const filledFilePath = (ext === 'docx' && filledResult.filled_file_path)
|
||||
? filledResult.filled_file_path
|
||||
: undefined;
|
||||
const blob = await backendApi.exportFilledTemplate(
|
||||
templateId || 'temp',
|
||||
filledResult.filled_data || {},
|
||||
'xlsx'
|
||||
exportFormat,
|
||||
filledFilePath
|
||||
);
|
||||
const ext_match = templateFile.name.match(/\.([^.])+$/);
|
||||
const baseName = ext_match ? templateFile.name.replace(ext_match[0], '') : templateFile.name;
|
||||
const downloadName = `filled_${baseName}.${exportFormat}`;
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = `filled_${templateFile.name}`;
|
||||
a.download = downloadName;
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
toast.success('导出成功');
|
||||
@@ -546,7 +556,7 @@ const TemplateFill: React.FC = () => {
|
||||
</div>
|
||||
<h3 className="text-xl font-bold mb-2">AI 正在智能分析并填表</h3>
|
||||
<p className="text-muted-foreground text-center max-w-md">
|
||||
系统正在从 {sourceFiles.length || sourceFilePaths.length} 份文档中检索相关信息...
|
||||
系统正在从 {sourceFiles.length || sourceFilePaths.length || sourceDocIds.length || 0} 份文档中检索相关信息...
|
||||
</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
@@ -562,7 +572,7 @@ const TemplateFill: React.FC = () => {
|
||||
填表完成
|
||||
</CardTitle>
|
||||
<CardDescription>
|
||||
系统已根据 {sourceFiles.length || sourceFilePaths.length} 份文档自动完成表格填写
|
||||
系统已根据 {filledResult.source_doc_count || sourceFiles.length || sourceFilePaths.length || sourceDocIds.length} 份文档自动完成表格填写
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
|
||||
Reference in New Issue
Block a user