【智能助手增强】
- 新增对话历史管理:MongoDB新增conversations集合,存储用户与AI的对话上下文,支持多轮对话意图延续
- 新增对话历史API(conversation.py):GET/DELETE conversation历史、列出所有会话
- 意图解析增强:支持基于对话历史的意图识别,上下文理解更准确
- 字段提取优化:支持"提取文档中的医院数量"等自然语言模式,智能去除"文档中的"前缀
- 文档对比优化:从指令中提取文件名并精确匹配source_docs,支持"对比A和B两个文档"
- 文档摘要优化:使用LLM生成真实AI摘要而非返回原始文档预览
【Word模板填表核心功能】
- Word模板字段生成:空白Word上传后,自动从源文档(Excel/Word/TXT/MD)内容AI生成字段名
- Word模板填表(_fill_docx):将提取数据写入Word模板表格,支持精确匹配、模糊匹配、追加新行
- 数据润色(_polish_word_filled_data):LLM对多行Excel数据进行统计归纳(合计/平均/极值),转化为专业自然语言描述
- 段落格式输出:使用📌字段名+值段落+分隔线(灰色横线)格式,提升可读性
- 导出链打通:fill_template返回filled_file_path,export直接返回已填好的Word文件
【其他修复】
- 修复Word导出Windows文件锁问题:NamedTemporaryFile改为mkstemp+close
- 修复Word方框非法字符:扩展clean_text移除\uFFFD、□等Unicode替代符和零宽字符
- 修复文档对比"需要至少2个文档":从指令提取具体文件名优先匹配而非取前2个
- 修复导出format硬编码:自动识别docx/xlsx格式
- Docx解析器增加备用解析方法和更完整的段落/表格/标题提取
- RAG服务新增MySQL数据源支持
This commit is contained in:
@@ -64,6 +64,11 @@ class MongoDB:
|
||||
"""任务集合 - 存储任务历史记录"""
|
||||
return self.db["tasks"]
|
||||
|
||||
@property
|
||||
def conversations(self):
|
||||
"""对话集合 - 存储对话历史记录"""
|
||||
return self.db["conversations"]
|
||||
|
||||
# ==================== 文档操作 ====================
|
||||
|
||||
async def insert_document(
|
||||
@@ -117,14 +122,20 @@ class MongoDB:
|
||||
搜索文档
|
||||
|
||||
Args:
|
||||
query: 搜索关键词
|
||||
query: 搜索关键词(支持文件名和内容搜索)
|
||||
doc_type: 文档类型过滤
|
||||
limit: 返回数量
|
||||
|
||||
Returns:
|
||||
文档列表
|
||||
"""
|
||||
filter_query = {"content": {"$regex": query}}
|
||||
filter_query = {
|
||||
"$or": [
|
||||
{"content": {"$regex": query, "$options": "i"}},
|
||||
{"metadata.original_filename": {"$regex": query, "$options": "i"}},
|
||||
{"metadata.filename": {"$regex": query, "$options": "i"}},
|
||||
]
|
||||
}
|
||||
if doc_type:
|
||||
filter_query["doc_type"] = doc_type
|
||||
|
||||
@@ -141,6 +152,15 @@ class MongoDB:
|
||||
result = await self.documents.delete_one({"_id": ObjectId(doc_id)})
|
||||
return result.deleted_count > 0
|
||||
|
||||
async def update_document_metadata(self, doc_id: str, metadata: Dict[str, Any]) -> bool:
|
||||
"""更新文档 metadata 字段"""
|
||||
from bson import ObjectId
|
||||
result = await self.documents.update_one(
|
||||
{"_id": ObjectId(doc_id)},
|
||||
{"$set": {"metadata": metadata}}
|
||||
)
|
||||
return result.modified_count > 0
|
||||
|
||||
# ==================== RAG 索引操作 ====================
|
||||
|
||||
async def insert_rag_entry(
|
||||
@@ -251,6 +271,10 @@ class MongoDB:
|
||||
await self.tasks.create_index("task_id", unique=True)
|
||||
await self.tasks.create_index("created_at")
|
||||
|
||||
# 对话集合索引
|
||||
await self.conversations.create_index("conversation_id")
|
||||
await self.conversations.create_index("created_at")
|
||||
|
||||
logger.info("MongoDB 索引创建完成")
|
||||
|
||||
# ==================== 任务历史操作 ====================
|
||||
@@ -369,6 +393,108 @@ class MongoDB:
|
||||
result = await self.tasks.delete_one({"task_id": task_id})
|
||||
return result.deleted_count > 0
|
||||
|
||||
# ==================== 对话历史操作 ====================
|
||||
|
||||
async def insert_conversation(
|
||||
self,
|
||||
conversation_id: str,
|
||||
role: str,
|
||||
content: str,
|
||||
intent: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
插入对话记录
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
role: 角色 (user/assistant)
|
||||
content: 对话内容
|
||||
intent: 意图类型
|
||||
metadata: 额外元数据
|
||||
|
||||
Returns:
|
||||
插入文档的ID
|
||||
"""
|
||||
message = {
|
||||
"conversation_id": conversation_id,
|
||||
"role": role,
|
||||
"content": content,
|
||||
"intent": intent,
|
||||
"metadata": metadata or {},
|
||||
"created_at": datetime.utcnow(),
|
||||
}
|
||||
result = await self.conversations.insert_one(message)
|
||||
return str(result.inserted_id)
|
||||
|
||||
async def get_conversation_history(
|
||||
self,
|
||||
conversation_id: str,
|
||||
limit: int = 20,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取对话历史
|
||||
|
||||
Args:
|
||||
conversation_id: 对话会话ID
|
||||
limit: 返回消息数量
|
||||
|
||||
Returns:
|
||||
对话消息列表
|
||||
"""
|
||||
cursor = self.conversations.find(
|
||||
{"conversation_id": conversation_id}
|
||||
).sort("created_at", 1).limit(limit)
|
||||
|
||||
messages = []
|
||||
async for msg in cursor:
|
||||
msg["_id"] = str(msg["_id"])
|
||||
if msg.get("created_at"):
|
||||
msg["created_at"] = msg["created_at"].isoformat()
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
async def delete_conversation(self, conversation_id: str) -> bool:
|
||||
"""删除对话会话"""
|
||||
result = await self.conversations.delete_many({"conversation_id": conversation_id})
|
||||
return result.deleted_count > 0
|
||||
|
||||
async def list_conversations(
|
||||
self,
|
||||
limit: int = 50,
|
||||
skip: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取会话列表(按最近一条消息排序)
|
||||
|
||||
Args:
|
||||
limit: 返回数量
|
||||
skip: 跳过数量
|
||||
|
||||
Returns:
|
||||
会话列表
|
||||
"""
|
||||
# 使用 aggregation 获取每个会话的最新一条消息
|
||||
pipeline = [
|
||||
{"$sort": {"created_at": -1}},
|
||||
{"$group": {
|
||||
"_id": "$conversation_id",
|
||||
"last_message": {"$first": "$$ROOT"},
|
||||
}},
|
||||
{"$replaceRoot": {"newRoot": "$last_message"}},
|
||||
{"$sort": {"created_at": -1}},
|
||||
{"$skip": skip},
|
||||
{"$limit": limit},
|
||||
]
|
||||
|
||||
conversations = []
|
||||
async for doc in self.conversations.aggregate(pipeline):
|
||||
doc["_id"] = str(doc["_id"])
|
||||
if doc.get("created_at"):
|
||||
doc["created_at"] = doc["created_at"].isoformat()
|
||||
conversations.append(doc)
|
||||
return conversations
|
||||
|
||||
|
||||
# ==================== 全局单例 ====================
|
||||
|
||||
|
||||
Reference in New Issue
Block a user