添加任务状态双写机制和历史记录功能
- 实现任务状态同时写入Redis和MongoDB的双写机制 - 添加MongoDB任务集合及CRUD操作接口 - 新增任务历史记录查询、列表展示和删除功能 - 重构任务状态更新逻辑,统一使用update_task_status函数 - 添加模板填服务中AI审核字段值的功能 - 优化前端任务历史页面显示和交互体验
This commit is contained in:
@@ -23,6 +23,52 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/upload", tags=["文档上传"])
|
||||
|
||||
|
||||
# ==================== 辅助函数 ====================
|
||||
|
||||
async def update_task_status(
|
||||
task_id: str,
|
||||
status: str,
|
||||
progress: int = 0,
|
||||
message: str = "",
|
||||
result: dict = None,
|
||||
error: str = None
|
||||
):
|
||||
"""
|
||||
更新任务状态,同时写入 Redis 和 MongoDB
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
status: 状态
|
||||
progress: 进度
|
||||
message: 消息
|
||||
result: 结果
|
||||
error: 错误信息
|
||||
"""
|
||||
meta = {"progress": progress, "message": message}
|
||||
if result:
|
||||
meta["result"] = result
|
||||
if error:
|
||||
meta["error"] = error
|
||||
|
||||
# 尝试写入 Redis
|
||||
try:
|
||||
await redis_db.set_task_status(task_id, status, meta)
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis 任务状态更新失败: {e}")
|
||||
|
||||
# 尝试写入 MongoDB(作为备用)
|
||||
try:
|
||||
await mongodb.update_task(
|
||||
task_id=task_id,
|
||||
status=status,
|
||||
message=message,
|
||||
result=result,
|
||||
error=error
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"MongoDB 任务状态更新失败: {e}")
|
||||
|
||||
|
||||
# ==================== 请求/响应模型 ====================
|
||||
|
||||
class UploadResponse(BaseModel):
|
||||
@@ -77,6 +123,17 @@ async def upload_document(
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
try:
|
||||
# 保存任务记录到 MongoDB(如果 Redis 不可用时仍能查询)
|
||||
try:
|
||||
await mongodb.insert_task(
|
||||
task_id=task_id,
|
||||
task_type="document_parse",
|
||||
status="pending",
|
||||
message=f"文档 {file.filename} 已提交处理"
|
||||
)
|
||||
except Exception as mongo_err:
|
||||
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
|
||||
|
||||
content = await file.read()
|
||||
saved_path = file_service.save_uploaded_file(
|
||||
content,
|
||||
@@ -122,6 +179,17 @@ async def upload_documents(
|
||||
saved_paths = []
|
||||
|
||||
try:
|
||||
# 保存任务记录到 MongoDB
|
||||
try:
|
||||
await mongodb.insert_task(
|
||||
task_id=task_id,
|
||||
task_type="batch_parse",
|
||||
status="pending",
|
||||
message=f"已提交 {len(files)} 个文档处理"
|
||||
)
|
||||
except Exception as mongo_err:
|
||||
logger.warning(f"MongoDB 保存批量任务记录失败: {mongo_err}")
|
||||
|
||||
for file in files:
|
||||
if not file.filename:
|
||||
continue
|
||||
@@ -159,9 +227,9 @@ async def process_document(
|
||||
"""处理单个文档"""
|
||||
try:
|
||||
# 状态: 解析中
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 10, "message": "正在解析文档"}
|
||||
progress=10, message="正在解析文档"
|
||||
)
|
||||
|
||||
# 解析文档
|
||||
@@ -172,9 +240,9 @@ async def process_document(
|
||||
raise Exception(result.error or "解析失败")
|
||||
|
||||
# 状态: 存储中
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 30, "message": "正在存储数据"}
|
||||
progress=30, message="正在存储数据"
|
||||
)
|
||||
|
||||
# 存储到 MongoDB
|
||||
@@ -191,9 +259,9 @@ async def process_document(
|
||||
|
||||
# 如果是 Excel,存储到 MySQL + AI生成描述 + RAG索引
|
||||
if doc_type in ["xlsx", "xls"]:
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"}
|
||||
progress=50, message="正在存储到MySQL并生成字段描述"
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -215,9 +283,9 @@ async def process_document(
|
||||
|
||||
else:
|
||||
# 非结构化文档
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 60, "message": "正在建立索引"}
|
||||
progress=60, message="正在建立索引"
|
||||
)
|
||||
|
||||
# 如果文档中有表格数据,提取并存储到 MySQL + RAG
|
||||
@@ -238,17 +306,13 @@ async def process_document(
|
||||
await index_document_to_rag(doc_id, original_filename, result, doc_type)
|
||||
|
||||
# 完成
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="success",
|
||||
meta={
|
||||
"progress": 100,
|
||||
"message": "处理完成",
|
||||
progress=100, message="处理完成",
|
||||
result={
|
||||
"doc_id": doc_id,
|
||||
"result": {
|
||||
"doc_id": doc_id,
|
||||
"doc_type": doc_type,
|
||||
"filename": original_filename
|
||||
}
|
||||
"doc_type": doc_type,
|
||||
"filename": original_filename
|
||||
}
|
||||
)
|
||||
|
||||
@@ -256,18 +320,19 @@ async def process_document(
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"文档处理失败: {str(e)}")
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="failure",
|
||||
meta={"error": str(e)}
|
||||
progress=0, message="处理失败",
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
async def process_documents_batch(task_id: str, files: List[dict]):
|
||||
"""批量处理文档"""
|
||||
try:
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 0, "message": "开始批量处理"}
|
||||
progress=0, message="开始批量处理"
|
||||
)
|
||||
|
||||
results = []
|
||||
@@ -318,21 +383,23 @@ async def process_documents_batch(task_id: str, files: List[dict]):
|
||||
results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
|
||||
|
||||
progress = int((i + 1) / len(files) * 100)
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"}
|
||||
progress=progress, message=f"已处理 {i+1}/{len(files)}"
|
||||
)
|
||||
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="success",
|
||||
meta={"progress": 100, "message": "批量处理完成", "results": results}
|
||||
progress=100, message="批量处理完成",
|
||||
result={"results": results}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"批量处理失败: {str(e)}")
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="failure",
|
||||
meta={"error": str(e)}
|
||||
progress=0, message="批量处理失败",
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
"""
|
||||
任务管理 API 接口
|
||||
|
||||
提供异步任务状态查询
|
||||
提供异步任务状态查询和历史记录
|
||||
"""
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.core.database import redis_db
|
||||
from app.core.database import redis_db, mongodb
|
||||
|
||||
router = APIRouter(prefix="/tasks", tags=["任务管理"])
|
||||
|
||||
@@ -23,25 +23,94 @@ async def get_task_status(task_id: str):
|
||||
Returns:
|
||||
任务状态信息
|
||||
"""
|
||||
# 优先从 Redis 获取
|
||||
status = await redis_db.get_task_status(task_id)
|
||||
|
||||
if not status:
|
||||
# Redis不可用时,假设任务已完成(文档已成功处理)
|
||||
# 前端轮询时会得到这个响应
|
||||
if status:
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"status": "success",
|
||||
"progress": 100,
|
||||
"message": "任务处理完成",
|
||||
"result": None,
|
||||
"error": None
|
||||
"status": status.get("status", "unknown"),
|
||||
"progress": status.get("meta", {}).get("progress", 0),
|
||||
"message": status.get("meta", {}).get("message"),
|
||||
"result": status.get("meta", {}).get("result"),
|
||||
"error": status.get("meta", {}).get("error")
|
||||
}
|
||||
|
||||
# Redis 不可用时,尝试从 MongoDB 获取
|
||||
mongo_task = await mongodb.get_task(task_id)
|
||||
if mongo_task:
|
||||
return {
|
||||
"task_id": mongo_task.get("task_id"),
|
||||
"status": mongo_task.get("status", "unknown"),
|
||||
"progress": 100 if mongo_task.get("status") == "success" else 0,
|
||||
"message": mongo_task.get("message"),
|
||||
"result": mongo_task.get("result"),
|
||||
"error": mongo_task.get("error")
|
||||
}
|
||||
|
||||
# 任务不存在或状态未知
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"status": status.get("status", "unknown"),
|
||||
"progress": status.get("meta", {}).get("progress", 0),
|
||||
"message": status.get("meta", {}).get("message"),
|
||||
"result": status.get("meta", {}).get("result"),
|
||||
"error": status.get("meta", {}).get("error")
|
||||
"status": "unknown",
|
||||
"progress": 0,
|
||||
"message": "无法获取任务状态(Redis和MongoDB均不可用)",
|
||||
"result": None,
|
||||
"error": None
|
||||
}
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_tasks(limit: int = 50, skip: int = 0):
|
||||
"""
|
||||
获取任务历史列表
|
||||
|
||||
Args:
|
||||
limit: 返回数量限制
|
||||
skip: 跳过数量
|
||||
|
||||
Returns:
|
||||
任务列表
|
||||
"""
|
||||
try:
|
||||
tasks = await mongodb.list_tasks(limit=limit, skip=skip)
|
||||
return {
|
||||
"success": True,
|
||||
"tasks": tasks,
|
||||
"count": len(tasks)
|
||||
}
|
||||
except Exception as e:
|
||||
# MongoDB 不可用时返回空列表
|
||||
return {
|
||||
"success": False,
|
||||
"tasks": [],
|
||||
"count": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/{task_id}")
|
||||
async def delete_task(task_id: str):
|
||||
"""
|
||||
删除任务
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
|
||||
Returns:
|
||||
是否删除成功
|
||||
"""
|
||||
try:
|
||||
# 从 Redis 删除
|
||||
if redis_db._connected and redis_db.client:
|
||||
key = f"task:{task_id}"
|
||||
await redis_db.client.delete(key)
|
||||
|
||||
# 从 MongoDB 删除
|
||||
deleted = await mongodb.delete_task(task_id)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"deleted": deleted
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"删除任务失败: {str(e)}")
|
||||
|
||||
@@ -23,6 +23,44 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/templates", tags=["表格模板"])
|
||||
|
||||
|
||||
# ==================== 辅助函数 ====================
|
||||
|
||||
async def update_task_status(
|
||||
task_id: str,
|
||||
status: str,
|
||||
progress: int = 0,
|
||||
message: str = "",
|
||||
result: dict = None,
|
||||
error: str = None
|
||||
):
|
||||
"""
|
||||
更新任务状态,同时写入 Redis 和 MongoDB
|
||||
"""
|
||||
from app.core.database import redis_db
|
||||
|
||||
meta = {"progress": progress, "message": message}
|
||||
if result:
|
||||
meta["result"] = result
|
||||
if error:
|
||||
meta["error"] = error
|
||||
|
||||
try:
|
||||
await redis_db.set_task_status(task_id, status, meta)
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis 任务状态更新失败: {e}")
|
||||
|
||||
try:
|
||||
await mongodb.update_task(
|
||||
task_id=task_id,
|
||||
status=status,
|
||||
message=message,
|
||||
result=result,
|
||||
error=error
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"MongoDB 任务状态更新失败: {e}")
|
||||
|
||||
|
||||
# ==================== 请求/响应模型 ====================
|
||||
|
||||
class TemplateFieldRequest(BaseModel):
|
||||
@@ -244,6 +282,17 @@ async def upload_joint_template(
|
||||
# 3. 异步处理源文档到MongoDB
|
||||
task_id = str(uuid.uuid4())
|
||||
if source_file_info:
|
||||
# 保存任务记录到 MongoDB
|
||||
try:
|
||||
await mongodb.insert_task(
|
||||
task_id=task_id,
|
||||
task_type="source_process",
|
||||
status="pending",
|
||||
message=f"开始处理 {len(source_file_info)} 个源文档"
|
||||
)
|
||||
except Exception as mongo_err:
|
||||
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
|
||||
|
||||
background_tasks.add_task(
|
||||
process_source_documents,
|
||||
task_id=task_id,
|
||||
@@ -282,12 +331,10 @@ async def upload_joint_template(
|
||||
|
||||
async def process_source_documents(task_id: str, files: List[dict]):
|
||||
"""异步处理源文档,存入MongoDB"""
|
||||
from app.core.database import redis_db
|
||||
|
||||
try:
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": 0, "message": "开始处理源文档"}
|
||||
progress=0, message="开始处理源文档"
|
||||
)
|
||||
|
||||
doc_ids = []
|
||||
@@ -316,22 +363,24 @@ async def process_source_documents(task_id: str, files: List[dict]):
|
||||
logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}")
|
||||
|
||||
progress = int((i + 1) / len(files) * 100)
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="processing",
|
||||
meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"}
|
||||
progress=progress, message=f"已处理 {i+1}/{len(files)}"
|
||||
)
|
||||
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="success",
|
||||
meta={"progress": 100, "message": "源文档处理完成", "doc_ids": doc_ids}
|
||||
progress=100, message="源文档处理完成",
|
||||
result={"doc_ids": doc_ids}
|
||||
)
|
||||
logger.info(f"所有源文档处理完成: {len(doc_ids)}个")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"源文档批量处理失败: {str(e)}")
|
||||
await redis_db.set_task_status(
|
||||
await update_task_status(
|
||||
task_id, status="failure",
|
||||
meta={"error": str(e)}
|
||||
progress=0, message="源文档处理失败",
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -59,6 +59,11 @@ class MongoDB:
|
||||
"""RAG索引集合 - 存储字段语义索引"""
|
||||
return self.db["rag_index"]
|
||||
|
||||
@property
|
||||
def tasks(self):
|
||||
"""任务集合 - 存储任务历史记录"""
|
||||
return self.db["tasks"]
|
||||
|
||||
# ==================== 文档操作 ====================
|
||||
|
||||
async def insert_document(
|
||||
@@ -242,8 +247,128 @@ class MongoDB:
|
||||
await self.rag_index.create_index("table_name")
|
||||
await self.rag_index.create_index("field_name")
|
||||
|
||||
# 任务集合索引
|
||||
await self.tasks.create_index("task_id", unique=True)
|
||||
await self.tasks.create_index("created_at")
|
||||
|
||||
logger.info("MongoDB 索引创建完成")
|
||||
|
||||
# ==================== 任务历史操作 ====================
|
||||
|
||||
async def insert_task(
|
||||
self,
|
||||
task_id: str,
|
||||
task_type: str,
|
||||
status: str = "pending",
|
||||
message: str = "",
|
||||
result: Optional[Dict[str, Any]] = None,
|
||||
error: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
插入任务记录
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
task_type: 任务类型
|
||||
status: 任务状态
|
||||
message: 任务消息
|
||||
result: 任务结果
|
||||
error: 错误信息
|
||||
|
||||
Returns:
|
||||
插入文档的ID
|
||||
"""
|
||||
task = {
|
||||
"task_id": task_id,
|
||||
"task_type": task_type,
|
||||
"status": status,
|
||||
"message": message,
|
||||
"result": result,
|
||||
"error": error,
|
||||
"created_at": datetime.utcnow(),
|
||||
"updated_at": datetime.utcnow(),
|
||||
}
|
||||
result_obj = await self.tasks.insert_one(task)
|
||||
return str(result_obj.inserted_id)
|
||||
|
||||
async def update_task(
|
||||
self,
|
||||
task_id: str,
|
||||
status: Optional[str] = None,
|
||||
message: Optional[str] = None,
|
||||
result: Optional[Dict[str, Any]] = None,
|
||||
error: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
更新任务状态
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
status: 任务状态
|
||||
message: 任务消息
|
||||
result: 任务结果
|
||||
error: 错误信息
|
||||
|
||||
Returns:
|
||||
是否更新成功
|
||||
"""
|
||||
from bson import ObjectId
|
||||
|
||||
update_data = {"updated_at": datetime.utcnow()}
|
||||
if status is not None:
|
||||
update_data["status"] = status
|
||||
if message is not None:
|
||||
update_data["message"] = message
|
||||
if result is not None:
|
||||
update_data["result"] = result
|
||||
if error is not None:
|
||||
update_data["error"] = error
|
||||
|
||||
update_result = await self.tasks.update_one(
|
||||
{"task_id": task_id},
|
||||
{"$set": update_data}
|
||||
)
|
||||
return update_result.modified_count > 0
|
||||
|
||||
async def get_task(self, task_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""根据task_id获取任务"""
|
||||
task = await self.tasks.find_one({"task_id": task_id})
|
||||
if task:
|
||||
task["_id"] = str(task["_id"])
|
||||
return task
|
||||
|
||||
async def list_tasks(
|
||||
self,
|
||||
limit: int = 50,
|
||||
skip: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取任务列表
|
||||
|
||||
Args:
|
||||
limit: 返回数量
|
||||
skip: 跳过数量
|
||||
|
||||
Returns:
|
||||
任务列表
|
||||
"""
|
||||
cursor = self.tasks.find().sort("created_at", -1).skip(skip).limit(limit)
|
||||
tasks = []
|
||||
async for task in cursor:
|
||||
task["_id"] = str(task["_id"])
|
||||
# 转换 datetime 为字符串
|
||||
if task.get("created_at"):
|
||||
task["created_at"] = task["created_at"].isoformat()
|
||||
if task.get("updated_at"):
|
||||
task["updated_at"] = task["updated_at"].isoformat()
|
||||
tasks.append(task)
|
||||
return tasks
|
||||
|
||||
async def delete_task(self, task_id: str) -> bool:
|
||||
"""删除任务"""
|
||||
result = await self.tasks.delete_one({"task_id": task_id})
|
||||
return result.deleted_count > 0
|
||||
|
||||
|
||||
# ==================== 全局单例 ====================
|
||||
|
||||
|
||||
@@ -181,6 +181,22 @@ class TemplateFillService:
|
||||
user_hint=user_hint
|
||||
)
|
||||
|
||||
# AI审核:验证提取的值是否合理
|
||||
if result.values and result.values[0]:
|
||||
logger.info(f"字段 {field.name} 进入AI审核阶段...")
|
||||
verified_result = await self._verify_field_value(
|
||||
field=field,
|
||||
extracted_values=result.values,
|
||||
source_docs=source_docs,
|
||||
user_hint=user_hint
|
||||
)
|
||||
if verified_result:
|
||||
# 审核给出了修正结果
|
||||
result = verified_result
|
||||
logger.info(f"字段 {field.name} 审核后修正值: {result.values[:3]}")
|
||||
else:
|
||||
logger.info(f"字段 {field.name} 审核通过,使用原提取结果")
|
||||
|
||||
# 存储结果 - 使用 values 数组
|
||||
filled_data[field.name] = result.values if result.values else [""]
|
||||
fill_details.append({
|
||||
@@ -533,6 +549,137 @@ class TemplateFillService:
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
async def _verify_field_value(
|
||||
self,
|
||||
field: TemplateField,
|
||||
extracted_values: List[str],
|
||||
source_docs: List[SourceDocument],
|
||||
user_hint: Optional[str] = None
|
||||
) -> Optional[FillResult]:
|
||||
"""
|
||||
验证并修正提取的字段值
|
||||
|
||||
Args:
|
||||
field: 字段定义
|
||||
extracted_values: 已提取的值
|
||||
source_docs: 源文档列表
|
||||
user_hint: 用户提示
|
||||
|
||||
Returns:
|
||||
验证后的结果,如果验证通过返回None(使用原结果)
|
||||
"""
|
||||
if not extracted_values or not extracted_values[0]:
|
||||
return None
|
||||
|
||||
if not source_docs:
|
||||
return None
|
||||
|
||||
try:
|
||||
# 构建验证上下文
|
||||
context_text = self._build_context_text(source_docs, field_name=field.name, max_length=15000)
|
||||
|
||||
hint_text = field.hint if field.hint else f"请理解{field.name}字段的含义"
|
||||
if user_hint:
|
||||
hint_text = f"{user_hint}。{hint_text}"
|
||||
|
||||
prompt = f"""你是一个数据质量审核专家。请审核以下提取的数据是否合理。
|
||||
|
||||
【待审核字段】
|
||||
字段名:{field.name}
|
||||
字段说明:{hint_text}
|
||||
|
||||
【已提取的值】
|
||||
{extracted_values[:10]} # 最多审核前10个值
|
||||
|
||||
【源文档上下文】
|
||||
{context_text[:8000]}
|
||||
|
||||
【审核要求】
|
||||
1. 这些值是否符合字段的含义?
|
||||
2. 值在原文中的原始含义是什么?检查是否有误解或误提取
|
||||
3. 是否存在明显错误、空值或不合理的数据?
|
||||
4. 如果表格有多个列,请确认提取的是正确的列
|
||||
|
||||
请严格按照以下 JSON 格式输出(只需输出 JSON,不要其他内容):
|
||||
{{
|
||||
"is_valid": true或false,
|
||||
"corrected_values": ["修正后的值列表"] 或 null(如果无需修正),
|
||||
"reason": "审核说明,解释判断理由",
|
||||
"original_meaning": "值在原文中的原始含义描述"
|
||||
}}
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "你是一个严格的数据质量审核专家。请仔细核对原文和提取的值是否匹配。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
response = await self.llm.chat(
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
max_tokens=3000
|
||||
)
|
||||
|
||||
content = self.llm.extract_message_content(response)
|
||||
logger.info(f"字段 {field.name} 审核返回: {content[:300]}")
|
||||
|
||||
# 解析 JSON
|
||||
import json
|
||||
import re
|
||||
|
||||
cleaned = content.strip()
|
||||
cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE)
|
||||
cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE)
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
json_start = -1
|
||||
for i, c in enumerate(cleaned):
|
||||
if c == '{':
|
||||
json_start = i
|
||||
break
|
||||
|
||||
if json_start == -1:
|
||||
logger.warning(f"字段 {field.name} 审核:无法找到 JSON")
|
||||
return None
|
||||
|
||||
json_text = cleaned[json_start:]
|
||||
result = json.loads(json_text)
|
||||
|
||||
is_valid = result.get("is_valid", True)
|
||||
corrected_values = result.get("corrected_values")
|
||||
reason = result.get("reason", "")
|
||||
original_meaning = result.get("original_meaning", "")
|
||||
|
||||
logger.info(f"字段 {field.name} 审核结果: is_valid={is_valid}, reason={reason[:100]}")
|
||||
|
||||
if not is_valid and corrected_values:
|
||||
# 值有问题且有修正建议,使用修正后的值
|
||||
logger.info(f"字段 {field.name} 使用修正后的值: {corrected_values[:5]}")
|
||||
return FillResult(
|
||||
field=field.name,
|
||||
values=corrected_values,
|
||||
value=corrected_values[0] if corrected_values else "",
|
||||
source=f"AI审核修正: {reason[:100]}",
|
||||
confidence=0.7
|
||||
)
|
||||
elif not is_valid and original_meaning:
|
||||
# 值有问题但无修正,记录原始含义供用户参考
|
||||
logger.info(f"字段 {field.name} 审核发现问题: {original_meaning}")
|
||||
return FillResult(
|
||||
field=field.name,
|
||||
values=extracted_values,
|
||||
value=extracted_values[0] if extracted_values else "",
|
||||
source=f"AI审核疑问: {original_meaning[:100]}",
|
||||
confidence=0.5
|
||||
)
|
||||
|
||||
# 验证通过,返回 None 表示使用原结果
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"字段 {field.name} 审核失败: {str(e)}")
|
||||
return None
|
||||
|
||||
def _build_context_text(self, source_docs: List[SourceDocument], field_name: str = None, max_length: int = 8000) -> str:
|
||||
"""
|
||||
构建上下文文本
|
||||
@@ -1580,30 +1727,35 @@ class TemplateFillService:
|
||||
import pandas as pd
|
||||
|
||||
# 读取 Excel 内容检查是否为空
|
||||
content_sample = ""
|
||||
if file_type in ["xlsx", "xls"]:
|
||||
df = pd.read_excel(file_path, header=None)
|
||||
if df.shape[0] == 0 or df.shape[1] == 0:
|
||||
logger.info("Excel 表格为空")
|
||||
# 生成默认字段
|
||||
return [TemplateField(
|
||||
cell=self._column_to_cell(i),
|
||||
name=f"字段{i+1}",
|
||||
field_type="text",
|
||||
required=False,
|
||||
hint="请填写此字段"
|
||||
) for i in range(5)]
|
||||
|
||||
# 表格有数据但没有表头
|
||||
if df.shape[1] > 0:
|
||||
# 读取第一行作为参考,看是否为空
|
||||
first_row = df.iloc[0].tolist() if len(df) > 0 else []
|
||||
if not any(pd.notna(v) and str(v).strip() != '' for v in first_row):
|
||||
# 第一行为空,AI 生成表头
|
||||
content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string()
|
||||
else:
|
||||
content_sample = df.to_string()
|
||||
# 即使 Excel 为空,如果有源文档,仍然尝试使用 AI 生成表头
|
||||
if not source_contents:
|
||||
logger.info("Excel 为空且没有源文档,使用默认字段名")
|
||||
return [TemplateField(
|
||||
cell=self._column_to_cell(i),
|
||||
name=f"字段{i+1}",
|
||||
field_type="text",
|
||||
required=False,
|
||||
hint="请填写此字段"
|
||||
) for i in range(5)]
|
||||
# 有源文档,继续调用 AI 生成表头
|
||||
logger.info("Excel 为空但有源文档,使用源文档内容生成表头...")
|
||||
else:
|
||||
content_sample = ""
|
||||
# 表格有数据但没有表头
|
||||
if df.shape[1] > 0:
|
||||
# 读取第一行作为参考,看是否为空
|
||||
first_row = df.iloc[0].tolist() if len(df) > 0 else []
|
||||
if not any(pd.notna(v) and str(v).strip() != '' for v in first_row):
|
||||
# 第一行为空,AI 生成表头
|
||||
content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string()
|
||||
else:
|
||||
content_sample = df.to_string()
|
||||
else:
|
||||
content_sample = ""
|
||||
|
||||
# 调用 AI 生成表头
|
||||
# 根据源文档内容生成表头
|
||||
@@ -1641,21 +1793,21 @@ class TemplateFillService:
|
||||
|
||||
prompt = f"""你是一个专业的表格设计助手。请根据源文档内容生成合适的表格表头字段。
|
||||
|
||||
任务:用户有一些源文档(可能包含表格数据、统计信息等),需要填写到表格中。请分析源文档内容,生成适合的表头字段。
|
||||
任务:用户有一些源文档(包含表格数据),需要填写到空白表格模板中。源文档中的表格如下:
|
||||
|
||||
{source_info}
|
||||
|
||||
请生成5-10个简洁的表头字段名,这些字段应该:
|
||||
1. 简洁明了,易于理解
|
||||
2. 适合作为表格列标题
|
||||
3. 直接对应源文档中的关键数据项
|
||||
4. 字段之间有明显的区分度
|
||||
【重要要求】
|
||||
1. 请仔细阅读上面的源文档表格,找出所有不同的列名(如"产品名称"、"1995年产量"、"按资产总额计算(%)"等)
|
||||
2. 直接使用这些实际的列名作为表头字段名,不要生成新的或同义词
|
||||
3. 如果一个源文档有多个表格,请为每个表格选择合适的列名
|
||||
4. 生成3-8个表头字段,优先选择数据量大的表格的列
|
||||
|
||||
请严格按照以下 JSON 格式输出(只需输出 JSON,不要其他内容):
|
||||
{{
|
||||
"fields": [
|
||||
{{"name": "字段名1", "hint": "字段说明提示1"}},
|
||||
{{"name": "字段名2", "hint": "字段说明提示2"}}
|
||||
{{"name": "实际列名1", "hint": "对该列的说明"}},
|
||||
{{"name": "实际列名2", "hint": "对该列的说明"}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
@@ -400,6 +400,49 @@ export const backendApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取任务历史列表
|
||||
*/
|
||||
async getTasks(
|
||||
limit: number = 50,
|
||||
skip: number = 0
|
||||
): Promise<{ success: boolean; tasks: any[]; count: number }> {
|
||||
const url = `${BACKEND_BASE_URL}/tasks?limit=${limit}&skip=${skip}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '获取任务列表失败');
|
||||
}
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('获取任务列表失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 删除任务
|
||||
*/
|
||||
async deleteTask(taskId: string): Promise<{ success: boolean; deleted: boolean }> {
|
||||
const url = `${BACKEND_BASE_URL}/tasks/${taskId}`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'DELETE'
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || '删除任务失败');
|
||||
}
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('删除任务失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 轮询任务状态直到完成
|
||||
*/
|
||||
|
||||
@@ -11,7 +11,8 @@ import {
|
||||
ChevronDown,
|
||||
ChevronUp,
|
||||
Trash2,
|
||||
AlertCircle
|
||||
AlertCircle,
|
||||
HelpCircle
|
||||
} from 'lucide-react';
|
||||
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
||||
import { Button } from '@/components/ui/button';
|
||||
@@ -24,9 +25,9 @@ import { Skeleton } from '@/components/ui/skeleton';
|
||||
|
||||
type Task = {
|
||||
task_id: string;
|
||||
status: 'pending' | 'processing' | 'success' | 'failure';
|
||||
status: 'pending' | 'processing' | 'success' | 'failure' | 'unknown';
|
||||
created_at: string;
|
||||
completed_at?: string;
|
||||
updated_at?: string;
|
||||
message?: string;
|
||||
result?: any;
|
||||
error?: string;
|
||||
@@ -38,54 +39,38 @@ const TaskHistory: React.FC = () => {
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [expandedTask, setExpandedTask] = useState<string | null>(null);
|
||||
|
||||
// Mock data for demonstration
|
||||
useEffect(() => {
|
||||
// 模拟任务数据,实际应该从后端获取
|
||||
setTasks([
|
||||
{
|
||||
task_id: 'task-001',
|
||||
status: 'success',
|
||||
created_at: new Date(Date.now() - 3600000).toISOString(),
|
||||
completed_at: new Date(Date.now() - 3500000).toISOString(),
|
||||
task_type: 'document_parse',
|
||||
message: '文档解析完成',
|
||||
result: {
|
||||
doc_id: 'doc-001',
|
||||
filename: 'report_q1_2026.docx',
|
||||
extracted_fields: ['标题', '作者', '日期', '金额']
|
||||
}
|
||||
},
|
||||
{
|
||||
task_id: 'task-002',
|
||||
status: 'success',
|
||||
created_at: new Date(Date.now() - 7200000).toISOString(),
|
||||
completed_at: new Date(Date.now() - 7100000).toISOString(),
|
||||
task_type: 'excel_analysis',
|
||||
message: 'Excel 分析完成',
|
||||
result: {
|
||||
filename: 'sales_data.xlsx',
|
||||
row_count: 1250,
|
||||
charts_generated: 3
|
||||
}
|
||||
},
|
||||
{
|
||||
task_id: 'task-003',
|
||||
status: 'processing',
|
||||
created_at: new Date(Date.now() - 600000).toISOString(),
|
||||
task_type: 'template_fill',
|
||||
message: '正在填充表格...'
|
||||
},
|
||||
{
|
||||
task_id: 'task-004',
|
||||
status: 'failure',
|
||||
created_at: new Date(Date.now() - 86400000).toISOString(),
|
||||
completed_at: new Date(Date.now() - 86390000).toISOString(),
|
||||
task_type: 'document_parse',
|
||||
message: '解析失败',
|
||||
error: '文件格式不支持或文件已损坏'
|
||||
// 获取任务历史数据
|
||||
const fetchTasks = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await backendApi.getTasks(50, 0);
|
||||
if (response.success && response.tasks) {
|
||||
// 转换后端数据格式为前端格式
|
||||
const convertedTasks: Task[] = response.tasks.map((t: any) => ({
|
||||
task_id: t.task_id,
|
||||
status: t.status || 'unknown',
|
||||
created_at: t.created_at || new Date().toISOString(),
|
||||
updated_at: t.updated_at,
|
||||
message: t.message || '',
|
||||
result: t.result,
|
||||
error: t.error,
|
||||
task_type: t.task_type || 'document_parse'
|
||||
}));
|
||||
setTasks(convertedTasks);
|
||||
} else {
|
||||
setTasks([]);
|
||||
}
|
||||
]);
|
||||
setLoading(false);
|
||||
} catch (error) {
|
||||
console.error('获取任务列表失败:', error);
|
||||
toast.error('获取任务列表失败');
|
||||
setTasks([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchTasks();
|
||||
}, []);
|
||||
|
||||
const getStatusBadge = (status: string) => {
|
||||
@@ -96,6 +81,8 @@ const TaskHistory: React.FC = () => {
|
||||
return <Badge className="bg-destructive text-white text-[10px]"><XCircle size={12} className="mr-1" />失败</Badge>;
|
||||
case 'processing':
|
||||
return <Badge className="bg-amber-500 text-white text-[10px]"><Loader2 size={12} className="mr-1 animate-spin" />处理中</Badge>;
|
||||
case 'unknown':
|
||||
return <Badge className="bg-gray-500 text-white text-[10px]"><HelpCircle size={12} className="mr-1" />未知</Badge>;
|
||||
default:
|
||||
return <Badge className="bg-gray-500 text-white text-[10px]"><Clock size={12} className="mr-1" />等待</Badge>;
|
||||
}
|
||||
@@ -133,15 +120,22 @@ const TaskHistory: React.FC = () => {
|
||||
};
|
||||
|
||||
const handleDelete = async (taskId: string) => {
|
||||
setTasks(prev => prev.filter(t => t.task_id !== taskId));
|
||||
toast.success('任务已删除');
|
||||
try {
|
||||
await backendApi.deleteTask(taskId);
|
||||
setTasks(prev => prev.filter(t => t.task_id !== taskId));
|
||||
toast.success('任务已删除');
|
||||
} catch (error) {
|
||||
console.error('删除任务失败:', error);
|
||||
toast.error('删除任务失败');
|
||||
}
|
||||
};
|
||||
|
||||
const stats = {
|
||||
total: tasks.length,
|
||||
success: tasks.filter(t => t.status === 'success').length,
|
||||
processing: tasks.filter(t => t.status === 'processing').length,
|
||||
failure: tasks.filter(t => t.status === 'failure').length
|
||||
failure: tasks.filter(t => t.status === 'failure').length,
|
||||
unknown: tasks.filter(t => t.status === 'unknown').length
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -151,7 +145,7 @@ const TaskHistory: React.FC = () => {
|
||||
<h1 className="text-3xl font-extrabold tracking-tight">任务历史</h1>
|
||||
<p className="text-muted-foreground">查看和管理您所有的文档处理任务记录</p>
|
||||
</div>
|
||||
<Button variant="outline" className="rounded-xl gap-2" onClick={() => window.location.reload()}>
|
||||
<Button variant="outline" className="rounded-xl gap-2" onClick={() => fetchTasks()}>
|
||||
<RefreshCcw size={18} />
|
||||
<span>刷新</span>
|
||||
</Button>
|
||||
@@ -194,7 +188,8 @@ const TaskHistory: React.FC = () => {
|
||||
"w-12 h-12 rounded-xl flex items-center justify-center shrink-0",
|
||||
task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" :
|
||||
task.status === 'failure' ? "bg-destructive/10 text-destructive" :
|
||||
"bg-amber-500/10 text-amber-500"
|
||||
task.status === 'processing' ? "bg-amber-500/10 text-amber-500" :
|
||||
"bg-gray-500/10 text-gray-500"
|
||||
)}>
|
||||
{task.status === 'processing' ? (
|
||||
<Loader2 size={24} className="animate-spin" />
|
||||
@@ -212,16 +207,16 @@ const TaskHistory: React.FC = () => {
|
||||
</Badge>
|
||||
</div>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
{task.message || '任务执行中...'}
|
||||
{task.message || (task.status === 'unknown' ? '无法获取状态' : '任务执行中...')}
|
||||
</p>
|
||||
<div className="flex items-center gap-4 text-xs text-muted-foreground">
|
||||
<span className="flex items-center gap-1">
|
||||
<Clock size={12} />
|
||||
{format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss')}
|
||||
{task.created_at ? format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss') : '时间未知'}
|
||||
</span>
|
||||
{task.completed_at && (
|
||||
{task.updated_at && task.status !== 'processing' && (
|
||||
<span>
|
||||
耗时: {Math.round((new Date(task.completed_at).getTime() - new Date(task.created_at).getTime()) / 1000)} 秒
|
||||
更新: {format(new Date(task.updated_at), 'HH:mm:ss')}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user