添加任务状态双写机制和历史记录功能

- 实现任务状态同时写入Redis和MongoDB的双写机制
- 添加MongoDB任务集合及CRUD操作接口
- 新增任务历史记录查询、列表展示和删除功能
- 重构任务状态更新逻辑,统一使用update_task_status函数
- 添加模板填服务中AI审核字段值的功能
- 优化前端任务历史页面显示和交互体验
This commit is contained in:
2026-04-10 01:15:53 +08:00
parent ed0f51f2a4
commit 858b594171
7 changed files with 638 additions and 138 deletions

View File

@@ -23,6 +23,52 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/upload", tags=["文档上传"]) router = APIRouter(prefix="/upload", tags=["文档上传"])
# ==================== 辅助函数 ====================
async def update_task_status(
task_id: str,
status: str,
progress: int = 0,
message: str = "",
result: dict = None,
error: str = None
):
"""
更新任务状态,同时写入 Redis 和 MongoDB
Args:
task_id: 任务ID
status: 状态
progress: 进度
message: 消息
result: 结果
error: 错误信息
"""
meta = {"progress": progress, "message": message}
if result:
meta["result"] = result
if error:
meta["error"] = error
# 尝试写入 Redis
try:
await redis_db.set_task_status(task_id, status, meta)
except Exception as e:
logger.warning(f"Redis 任务状态更新失败: {e}")
# 尝试写入 MongoDB作为备用
try:
await mongodb.update_task(
task_id=task_id,
status=status,
message=message,
result=result,
error=error
)
except Exception as e:
logger.warning(f"MongoDB 任务状态更新失败: {e}")
# ==================== 请求/响应模型 ==================== # ==================== 请求/响应模型 ====================
class UploadResponse(BaseModel): class UploadResponse(BaseModel):
@@ -77,6 +123,17 @@ async def upload_document(
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
try: try:
# 保存任务记录到 MongoDB如果 Redis 不可用时仍能查询)
try:
await mongodb.insert_task(
task_id=task_id,
task_type="document_parse",
status="pending",
message=f"文档 {file.filename} 已提交处理"
)
except Exception as mongo_err:
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
content = await file.read() content = await file.read()
saved_path = file_service.save_uploaded_file( saved_path = file_service.save_uploaded_file(
content, content,
@@ -122,6 +179,17 @@ async def upload_documents(
saved_paths = [] saved_paths = []
try: try:
# 保存任务记录到 MongoDB
try:
await mongodb.insert_task(
task_id=task_id,
task_type="batch_parse",
status="pending",
message=f"已提交 {len(files)} 个文档处理"
)
except Exception as mongo_err:
logger.warning(f"MongoDB 保存批量任务记录失败: {mongo_err}")
for file in files: for file in files:
if not file.filename: if not file.filename:
continue continue
@@ -159,9 +227,9 @@ async def process_document(
"""处理单个文档""" """处理单个文档"""
try: try:
# 状态: 解析中 # 状态: 解析中
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 10, "message": "正在解析文档"} progress=10, message="正在解析文档"
) )
# 解析文档 # 解析文档
@@ -172,9 +240,9 @@ async def process_document(
raise Exception(result.error or "解析失败") raise Exception(result.error or "解析失败")
# 状态: 存储中 # 状态: 存储中
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 30, "message": "正在存储数据"} progress=30, message="正在存储数据"
) )
# 存储到 MongoDB # 存储到 MongoDB
@@ -191,9 +259,9 @@ async def process_document(
# 如果是 Excel存储到 MySQL + AI生成描述 + RAG索引 # 如果是 Excel存储到 MySQL + AI生成描述 + RAG索引
if doc_type in ["xlsx", "xls"]: if doc_type in ["xlsx", "xls"]:
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"} progress=50, message="正在存储到MySQL并生成字段描述"
) )
try: try:
@@ -215,9 +283,9 @@ async def process_document(
else: else:
# 非结构化文档 # 非结构化文档
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 60, "message": "正在建立索引"} progress=60, message="正在建立索引"
) )
# 如果文档中有表格数据,提取并存储到 MySQL + RAG # 如果文档中有表格数据,提取并存储到 MySQL + RAG
@@ -238,17 +306,13 @@ async def process_document(
await index_document_to_rag(doc_id, original_filename, result, doc_type) await index_document_to_rag(doc_id, original_filename, result, doc_type)
# 完成 # 完成
await redis_db.set_task_status( await update_task_status(
task_id, status="success", task_id, status="success",
meta={ progress=100, message="处理完成",
"progress": 100, result={
"message": "处理完成",
"doc_id": doc_id, "doc_id": doc_id,
"result": { "doc_type": doc_type,
"doc_id": doc_id, "filename": original_filename
"doc_type": doc_type,
"filename": original_filename
}
} }
) )
@@ -256,18 +320,19 @@ async def process_document(
except Exception as e: except Exception as e:
logger.error(f"文档处理失败: {str(e)}") logger.error(f"文档处理失败: {str(e)}")
await redis_db.set_task_status( await update_task_status(
task_id, status="failure", task_id, status="failure",
meta={"error": str(e)} progress=0, message="处理失败",
error=str(e)
) )
async def process_documents_batch(task_id: str, files: List[dict]): async def process_documents_batch(task_id: str, files: List[dict]):
"""批量处理文档""" """批量处理文档"""
try: try:
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 0, "message": "开始批量处理"} progress=0, message="开始批量处理"
) )
results = [] results = []
@@ -318,21 +383,23 @@ async def process_documents_batch(task_id: str, files: List[dict]):
results.append({"filename": file_info["filename"], "success": False, "error": str(e)}) results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
progress = int((i + 1) / len(files) * 100) progress = int((i + 1) / len(files) * 100)
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"} progress=progress, message=f"已处理 {i+1}/{len(files)}"
) )
await redis_db.set_task_status( await update_task_status(
task_id, status="success", task_id, status="success",
meta={"progress": 100, "message": "批量处理完成", "results": results} progress=100, message="批量处理完成",
result={"results": results}
) )
except Exception as e: except Exception as e:
logger.error(f"批量处理失败: {str(e)}") logger.error(f"批量处理失败: {str(e)}")
await redis_db.set_task_status( await update_task_status(
task_id, status="failure", task_id, status="failure",
meta={"error": str(e)} progress=0, message="批量处理失败",
error=str(e)
) )

View File

@@ -1,13 +1,13 @@
""" """
任务管理 API 接口 任务管理 API 接口
提供异步任务状态查询 提供异步任务状态查询和历史记录
""" """
from typing import Optional from typing import Optional
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from app.core.database import redis_db from app.core.database import redis_db, mongodb
router = APIRouter(prefix="/tasks", tags=["任务管理"]) router = APIRouter(prefix="/tasks", tags=["任务管理"])
@@ -23,25 +23,94 @@ async def get_task_status(task_id: str):
Returns: Returns:
任务状态信息 任务状态信息
""" """
# 优先从 Redis 获取
status = await redis_db.get_task_status(task_id) status = await redis_db.get_task_status(task_id)
if not status: if status:
# Redis不可用时假设任务已完成文档已成功处理
# 前端轮询时会得到这个响应
return { return {
"task_id": task_id, "task_id": task_id,
"status": "success", "status": status.get("status", "unknown"),
"progress": 100, "progress": status.get("meta", {}).get("progress", 0),
"message": "任务处理完成", "message": status.get("meta", {}).get("message"),
"result": None, "result": status.get("meta", {}).get("result"),
"error": None "error": status.get("meta", {}).get("error")
} }
# Redis 不可用时,尝试从 MongoDB 获取
mongo_task = await mongodb.get_task(task_id)
if mongo_task:
return {
"task_id": mongo_task.get("task_id"),
"status": mongo_task.get("status", "unknown"),
"progress": 100 if mongo_task.get("status") == "success" else 0,
"message": mongo_task.get("message"),
"result": mongo_task.get("result"),
"error": mongo_task.get("error")
}
# 任务不存在或状态未知
return { return {
"task_id": task_id, "task_id": task_id,
"status": status.get("status", "unknown"), "status": "unknown",
"progress": status.get("meta", {}).get("progress", 0), "progress": 0,
"message": status.get("meta", {}).get("message"), "message": "无法获取任务状态Redis和MongoDB均不可用",
"result": status.get("meta", {}).get("result"), "result": None,
"error": status.get("meta", {}).get("error") "error": None
} }
@router.get("/")
async def list_tasks(limit: int = 50, skip: int = 0):
"""
获取任务历史列表
Args:
limit: 返回数量限制
skip: 跳过数量
Returns:
任务列表
"""
try:
tasks = await mongodb.list_tasks(limit=limit, skip=skip)
return {
"success": True,
"tasks": tasks,
"count": len(tasks)
}
except Exception as e:
# MongoDB 不可用时返回空列表
return {
"success": False,
"tasks": [],
"count": 0,
"error": str(e)
}
@router.delete("/{task_id}")
async def delete_task(task_id: str):
"""
删除任务
Args:
task_id: 任务ID
Returns:
是否删除成功
"""
try:
# 从 Redis 删除
if redis_db._connected and redis_db.client:
key = f"task:{task_id}"
await redis_db.client.delete(key)
# 从 MongoDB 删除
deleted = await mongodb.delete_task(task_id)
return {
"success": True,
"deleted": deleted
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"删除任务失败: {str(e)}")

View File

@@ -23,6 +23,44 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/templates", tags=["表格模板"]) router = APIRouter(prefix="/templates", tags=["表格模板"])
# ==================== 辅助函数 ====================
async def update_task_status(
task_id: str,
status: str,
progress: int = 0,
message: str = "",
result: dict = None,
error: str = None
):
"""
更新任务状态,同时写入 Redis 和 MongoDB
"""
from app.core.database import redis_db
meta = {"progress": progress, "message": message}
if result:
meta["result"] = result
if error:
meta["error"] = error
try:
await redis_db.set_task_status(task_id, status, meta)
except Exception as e:
logger.warning(f"Redis 任务状态更新失败: {e}")
try:
await mongodb.update_task(
task_id=task_id,
status=status,
message=message,
result=result,
error=error
)
except Exception as e:
logger.warning(f"MongoDB 任务状态更新失败: {e}")
# ==================== 请求/响应模型 ==================== # ==================== 请求/响应模型 ====================
class TemplateFieldRequest(BaseModel): class TemplateFieldRequest(BaseModel):
@@ -244,6 +282,17 @@ async def upload_joint_template(
# 3. 异步处理源文档到MongoDB # 3. 异步处理源文档到MongoDB
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
if source_file_info: if source_file_info:
# 保存任务记录到 MongoDB
try:
await mongodb.insert_task(
task_id=task_id,
task_type="source_process",
status="pending",
message=f"开始处理 {len(source_file_info)} 个源文档"
)
except Exception as mongo_err:
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
background_tasks.add_task( background_tasks.add_task(
process_source_documents, process_source_documents,
task_id=task_id, task_id=task_id,
@@ -282,12 +331,10 @@ async def upload_joint_template(
async def process_source_documents(task_id: str, files: List[dict]): async def process_source_documents(task_id: str, files: List[dict]):
"""异步处理源文档存入MongoDB""" """异步处理源文档存入MongoDB"""
from app.core.database import redis_db
try: try:
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": 0, "message": "开始处理源文档"} progress=0, message="开始处理源文档"
) )
doc_ids = [] doc_ids = []
@@ -316,22 +363,24 @@ async def process_source_documents(task_id: str, files: List[dict]):
logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}") logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}")
progress = int((i + 1) / len(files) * 100) progress = int((i + 1) / len(files) * 100)
await redis_db.set_task_status( await update_task_status(
task_id, status="processing", task_id, status="processing",
meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"} progress=progress, message=f"已处理 {i+1}/{len(files)}"
) )
await redis_db.set_task_status( await update_task_status(
task_id, status="success", task_id, status="success",
meta={"progress": 100, "message": "源文档处理完成", "doc_ids": doc_ids} progress=100, message="源文档处理完成",
result={"doc_ids": doc_ids}
) )
logger.info(f"所有源文档处理完成: {len(doc_ids)}") logger.info(f"所有源文档处理完成: {len(doc_ids)}")
except Exception as e: except Exception as e:
logger.error(f"源文档批量处理失败: {str(e)}") logger.error(f"源文档批量处理失败: {str(e)}")
await redis_db.set_task_status( await update_task_status(
task_id, status="failure", task_id, status="failure",
meta={"error": str(e)} progress=0, message="源文档处理失败",
error=str(e)
) )

View File

@@ -59,6 +59,11 @@ class MongoDB:
"""RAG索引集合 - 存储字段语义索引""" """RAG索引集合 - 存储字段语义索引"""
return self.db["rag_index"] return self.db["rag_index"]
@property
def tasks(self):
"""任务集合 - 存储任务历史记录"""
return self.db["tasks"]
# ==================== 文档操作 ==================== # ==================== 文档操作 ====================
async def insert_document( async def insert_document(
@@ -242,8 +247,128 @@ class MongoDB:
await self.rag_index.create_index("table_name") await self.rag_index.create_index("table_name")
await self.rag_index.create_index("field_name") await self.rag_index.create_index("field_name")
# 任务集合索引
await self.tasks.create_index("task_id", unique=True)
await self.tasks.create_index("created_at")
logger.info("MongoDB 索引创建完成") logger.info("MongoDB 索引创建完成")
# ==================== 任务历史操作 ====================
async def insert_task(
self,
task_id: str,
task_type: str,
status: str = "pending",
message: str = "",
result: Optional[Dict[str, Any]] = None,
error: Optional[str] = None,
) -> str:
"""
插入任务记录
Args:
task_id: 任务ID
task_type: 任务类型
status: 任务状态
message: 任务消息
result: 任务结果
error: 错误信息
Returns:
插入文档的ID
"""
task = {
"task_id": task_id,
"task_type": task_type,
"status": status,
"message": message,
"result": result,
"error": error,
"created_at": datetime.utcnow(),
"updated_at": datetime.utcnow(),
}
result_obj = await self.tasks.insert_one(task)
return str(result_obj.inserted_id)
async def update_task(
self,
task_id: str,
status: Optional[str] = None,
message: Optional[str] = None,
result: Optional[Dict[str, Any]] = None,
error: Optional[str] = None,
) -> bool:
"""
更新任务状态
Args:
task_id: 任务ID
status: 任务状态
message: 任务消息
result: 任务结果
error: 错误信息
Returns:
是否更新成功
"""
from bson import ObjectId
update_data = {"updated_at": datetime.utcnow()}
if status is not None:
update_data["status"] = status
if message is not None:
update_data["message"] = message
if result is not None:
update_data["result"] = result
if error is not None:
update_data["error"] = error
update_result = await self.tasks.update_one(
{"task_id": task_id},
{"$set": update_data}
)
return update_result.modified_count > 0
async def get_task(self, task_id: str) -> Optional[Dict[str, Any]]:
"""根据task_id获取任务"""
task = await self.tasks.find_one({"task_id": task_id})
if task:
task["_id"] = str(task["_id"])
return task
async def list_tasks(
self,
limit: int = 50,
skip: int = 0,
) -> List[Dict[str, Any]]:
"""
获取任务列表
Args:
limit: 返回数量
skip: 跳过数量
Returns:
任务列表
"""
cursor = self.tasks.find().sort("created_at", -1).skip(skip).limit(limit)
tasks = []
async for task in cursor:
task["_id"] = str(task["_id"])
# 转换 datetime 为字符串
if task.get("created_at"):
task["created_at"] = task["created_at"].isoformat()
if task.get("updated_at"):
task["updated_at"] = task["updated_at"].isoformat()
tasks.append(task)
return tasks
async def delete_task(self, task_id: str) -> bool:
"""删除任务"""
result = await self.tasks.delete_one({"task_id": task_id})
return result.deleted_count > 0
# ==================== 全局单例 ==================== # ==================== 全局单例 ====================

View File

@@ -181,6 +181,22 @@ class TemplateFillService:
user_hint=user_hint user_hint=user_hint
) )
# AI审核验证提取的值是否合理
if result.values and result.values[0]:
logger.info(f"字段 {field.name} 进入AI审核阶段...")
verified_result = await self._verify_field_value(
field=field,
extracted_values=result.values,
source_docs=source_docs,
user_hint=user_hint
)
if verified_result:
# 审核给出了修正结果
result = verified_result
logger.info(f"字段 {field.name} 审核后修正值: {result.values[:3]}")
else:
logger.info(f"字段 {field.name} 审核通过,使用原提取结果")
# 存储结果 - 使用 values 数组 # 存储结果 - 使用 values 数组
filled_data[field.name] = result.values if result.values else [""] filled_data[field.name] = result.values if result.values else [""]
fill_details.append({ fill_details.append({
@@ -533,6 +549,137 @@ class TemplateFillService:
confidence=0.0 confidence=0.0
) )
async def _verify_field_value(
self,
field: TemplateField,
extracted_values: List[str],
source_docs: List[SourceDocument],
user_hint: Optional[str] = None
) -> Optional[FillResult]:
"""
验证并修正提取的字段值
Args:
field: 字段定义
extracted_values: 已提取的值
source_docs: 源文档列表
user_hint: 用户提示
Returns:
验证后的结果如果验证通过返回None使用原结果
"""
if not extracted_values or not extracted_values[0]:
return None
if not source_docs:
return None
try:
# 构建验证上下文
context_text = self._build_context_text(source_docs, field_name=field.name, max_length=15000)
hint_text = field.hint if field.hint else f"请理解{field.name}字段的含义"
if user_hint:
hint_text = f"{user_hint}{hint_text}"
prompt = f"""你是一个数据质量审核专家。请审核以下提取的数据是否合理。
【待审核字段】
字段名:{field.name}
字段说明:{hint_text}
【已提取的值】
{extracted_values[:10]} # 最多审核前10个值
【源文档上下文】
{context_text[:8000]}
【审核要求】
1. 这些值是否符合字段的含义?
2. 值在原文中的原始含义是什么?检查是否有误解或误提取
3. 是否存在明显错误、空值或不合理的数据?
4. 如果表格有多个列,请确认提取的是正确的列
请严格按照以下 JSON 格式输出(只需输出 JSON不要其他内容
{{
"is_valid": true或false,
"corrected_values": ["修正后的值列表"] 或 null如果无需修正,
"reason": "审核说明,解释判断理由",
"original_meaning": "值在原文中的原始含义描述"
}}
"""
messages = [
{"role": "system", "content": "你是一个严格的数据质量审核专家。请仔细核对原文和提取的值是否匹配。"},
{"role": "user", "content": prompt}
]
response = await self.llm.chat(
messages=messages,
temperature=0.2,
max_tokens=3000
)
content = self.llm.extract_message_content(response)
logger.info(f"字段 {field.name} 审核返回: {content[:300]}")
# 解析 JSON
import json
import re
cleaned = content.strip()
cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE)
cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE)
cleaned = cleaned.strip()
json_start = -1
for i, c in enumerate(cleaned):
if c == '{':
json_start = i
break
if json_start == -1:
logger.warning(f"字段 {field.name} 审核:无法找到 JSON")
return None
json_text = cleaned[json_start:]
result = json.loads(json_text)
is_valid = result.get("is_valid", True)
corrected_values = result.get("corrected_values")
reason = result.get("reason", "")
original_meaning = result.get("original_meaning", "")
logger.info(f"字段 {field.name} 审核结果: is_valid={is_valid}, reason={reason[:100]}")
if not is_valid and corrected_values:
# 值有问题且有修正建议,使用修正后的值
logger.info(f"字段 {field.name} 使用修正后的值: {corrected_values[:5]}")
return FillResult(
field=field.name,
values=corrected_values,
value=corrected_values[0] if corrected_values else "",
source=f"AI审核修正: {reason[:100]}",
confidence=0.7
)
elif not is_valid and original_meaning:
# 值有问题但无修正,记录原始含义供用户参考
logger.info(f"字段 {field.name} 审核发现问题: {original_meaning}")
return FillResult(
field=field.name,
values=extracted_values,
value=extracted_values[0] if extracted_values else "",
source=f"AI审核疑问: {original_meaning[:100]}",
confidence=0.5
)
# 验证通过,返回 None 表示使用原结果
return None
except Exception as e:
logger.error(f"字段 {field.name} 审核失败: {str(e)}")
return None
def _build_context_text(self, source_docs: List[SourceDocument], field_name: str = None, max_length: int = 8000) -> str: def _build_context_text(self, source_docs: List[SourceDocument], field_name: str = None, max_length: int = 8000) -> str:
""" """
构建上下文文本 构建上下文文本
@@ -1580,30 +1727,35 @@ class TemplateFillService:
import pandas as pd import pandas as pd
# 读取 Excel 内容检查是否为空 # 读取 Excel 内容检查是否为空
content_sample = ""
if file_type in ["xlsx", "xls"]: if file_type in ["xlsx", "xls"]:
df = pd.read_excel(file_path, header=None) df = pd.read_excel(file_path, header=None)
if df.shape[0] == 0 or df.shape[1] == 0: if df.shape[0] == 0 or df.shape[1] == 0:
logger.info("Excel 表格为空") logger.info("Excel 表格为空")
# 生成默认字段 # 即使 Excel 为空,如果有源文档,仍然尝试使用 AI 生成表头
return [TemplateField( if not source_contents:
cell=self._column_to_cell(i), logger.info("Excel 为空且没有源文档,使用默认字段名")
name=f"字段{i+1}", return [TemplateField(
field_type="text", cell=self._column_to_cell(i),
required=False, name=f"字段{i+1}",
hint="请填写此字段" field_type="text",
) for i in range(5)] required=False,
hint="请填写此字段"
# 表格有数据但没有表头 ) for i in range(5)]
if df.shape[1] > 0: # 有源文档,继续调用 AI 生成表头
# 读取第一行作为参考,看是否为空 logger.info("Excel 为空但有源文档,使用源文档内容生成表头...")
first_row = df.iloc[0].tolist() if len(df) > 0 else []
if not any(pd.notna(v) and str(v).strip() != '' for v in first_row):
# 第一行为空AI 生成表头
content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string()
else:
content_sample = df.to_string()
else: else:
content_sample = "" # 表格有数据但没有表头
if df.shape[1] > 0:
# 读取第一行作为参考,看是否为空
first_row = df.iloc[0].tolist() if len(df) > 0 else []
if not any(pd.notna(v) and str(v).strip() != '' for v in first_row):
# 第一行为空AI 生成表头
content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string()
else:
content_sample = df.to_string()
else:
content_sample = ""
# 调用 AI 生成表头 # 调用 AI 生成表头
# 根据源文档内容生成表头 # 根据源文档内容生成表头
@@ -1641,21 +1793,21 @@ class TemplateFillService:
prompt = f"""你是一个专业的表格设计助手。请根据源文档内容生成合适的表格表头字段。 prompt = f"""你是一个专业的表格设计助手。请根据源文档内容生成合适的表格表头字段。
任务:用户有一些源文档(可能包含表格数据、统计信息等),需要填写到表格中。请分析源文档内容,生成适合的表头字段。 任务:用户有一些源文档(包含表格数据),需要填写到空白表格模板中。源文档中的表格如下:
{source_info} {source_info}
请生成5-10个简洁的表头字段名这些字段应该 【重要要求】
1. 简洁明了,易于理解 1. 请仔细阅读上面的源文档表格,找出所有不同的列名(如"产品名称""1995年产量""按资产总额计算(%)"等)
2. 适合作为表格列标题 2. 直接使用这些实际的列名作为表头字段名,不要生成新的或同义词
3. 直接对应源文档中的关键数据项 3. 如果一个源文档有多个表格,请为每个表格选择合适的列名
4. 字段之间有明显的区分度 4. 生成3-8个表头字段优先选择数据量大的表格的列
请严格按照以下 JSON 格式输出(只需输出 JSON不要其他内容 请严格按照以下 JSON 格式输出(只需输出 JSON不要其他内容
{{ {{
"fields": [ "fields": [
{{"name": "字段名1", "hint": "字段说明提示1"}}, {{"name": "实际列名1", "hint": "对该列的说明"}},
{{"name": "字段名2", "hint": "字段说明提示2"}} {{"name": "实际列名2", "hint": "对该列的说明"}}
] ]
}} }}
""" """

View File

@@ -400,6 +400,49 @@ export const backendApi = {
} }
}, },
/**
* 获取任务历史列表
*/
async getTasks(
limit: number = 50,
skip: number = 0
): Promise<{ success: boolean; tasks: any[]; count: number }> {
const url = `${BACKEND_BASE_URL}/tasks?limit=${limit}&skip=${skip}`;
try {
const response = await fetch(url);
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || '获取任务列表失败');
}
return await response.json();
} catch (error) {
console.error('获取任务列表失败:', error);
throw error;
}
},
/**
* 删除任务
*/
async deleteTask(taskId: string): Promise<{ success: boolean; deleted: boolean }> {
const url = `${BACKEND_BASE_URL}/tasks/${taskId}`;
try {
const response = await fetch(url, {
method: 'DELETE'
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || '删除任务失败');
}
return await response.json();
} catch (error) {
console.error('删除任务失败:', error);
throw error;
}
},
/** /**
* 轮询任务状态直到完成 * 轮询任务状态直到完成
*/ */

View File

@@ -11,7 +11,8 @@ import {
ChevronDown, ChevronDown,
ChevronUp, ChevronUp,
Trash2, Trash2,
AlertCircle AlertCircle,
HelpCircle
} from 'lucide-react'; } from 'lucide-react';
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card'; import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
@@ -24,9 +25,9 @@ import { Skeleton } from '@/components/ui/skeleton';
type Task = { type Task = {
task_id: string; task_id: string;
status: 'pending' | 'processing' | 'success' | 'failure'; status: 'pending' | 'processing' | 'success' | 'failure' | 'unknown';
created_at: string; created_at: string;
completed_at?: string; updated_at?: string;
message?: string; message?: string;
result?: any; result?: any;
error?: string; error?: string;
@@ -38,54 +39,38 @@ const TaskHistory: React.FC = () => {
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [expandedTask, setExpandedTask] = useState<string | null>(null); const [expandedTask, setExpandedTask] = useState<string | null>(null);
// Mock data for demonstration // 获取任务历史数据
useEffect(() => { const fetchTasks = async () => {
// 模拟任务数据,实际应该从后端获取 try {
setTasks([ setLoading(true);
{ const response = await backendApi.getTasks(50, 0);
task_id: 'task-001', if (response.success && response.tasks) {
status: 'success', // 转换后端数据格式为前端格式
created_at: new Date(Date.now() - 3600000).toISOString(), const convertedTasks: Task[] = response.tasks.map((t: any) => ({
completed_at: new Date(Date.now() - 3500000).toISOString(), task_id: t.task_id,
task_type: 'document_parse', status: t.status || 'unknown',
message: '文档解析完成', created_at: t.created_at || new Date().toISOString(),
result: { updated_at: t.updated_at,
doc_id: 'doc-001', message: t.message || '',
filename: 'report_q1_2026.docx', result: t.result,
extracted_fields: ['标题', '作者', '日期', '金额'] error: t.error,
} task_type: t.task_type || 'document_parse'
}, }));
{ setTasks(convertedTasks);
task_id: 'task-002', } else {
status: 'success', setTasks([]);
created_at: new Date(Date.now() - 7200000).toISOString(),
completed_at: new Date(Date.now() - 7100000).toISOString(),
task_type: 'excel_analysis',
message: 'Excel 分析完成',
result: {
filename: 'sales_data.xlsx',
row_count: 1250,
charts_generated: 3
}
},
{
task_id: 'task-003',
status: 'processing',
created_at: new Date(Date.now() - 600000).toISOString(),
task_type: 'template_fill',
message: '正在填充表格...'
},
{
task_id: 'task-004',
status: 'failure',
created_at: new Date(Date.now() - 86400000).toISOString(),
completed_at: new Date(Date.now() - 86390000).toISOString(),
task_type: 'document_parse',
message: '解析失败',
error: '文件格式不支持或文件已损坏'
} }
]); } catch (error) {
setLoading(false); console.error('获取任务列表失败:', error);
toast.error('获取任务列表失败');
setTasks([]);
} finally {
setLoading(false);
}
};
useEffect(() => {
fetchTasks();
}, []); }, []);
const getStatusBadge = (status: string) => { const getStatusBadge = (status: string) => {
@@ -96,6 +81,8 @@ const TaskHistory: React.FC = () => {
return <Badge className="bg-destructive text-white text-[10px]"><XCircle size={12} className="mr-1" /></Badge>; return <Badge className="bg-destructive text-white text-[10px]"><XCircle size={12} className="mr-1" /></Badge>;
case 'processing': case 'processing':
return <Badge className="bg-amber-500 text-white text-[10px]"><Loader2 size={12} className="mr-1 animate-spin" /></Badge>; return <Badge className="bg-amber-500 text-white text-[10px]"><Loader2 size={12} className="mr-1 animate-spin" /></Badge>;
case 'unknown':
return <Badge className="bg-gray-500 text-white text-[10px]"><HelpCircle size={12} className="mr-1" /></Badge>;
default: default:
return <Badge className="bg-gray-500 text-white text-[10px]"><Clock size={12} className="mr-1" /></Badge>; return <Badge className="bg-gray-500 text-white text-[10px]"><Clock size={12} className="mr-1" /></Badge>;
} }
@@ -133,15 +120,22 @@ const TaskHistory: React.FC = () => {
}; };
const handleDelete = async (taskId: string) => { const handleDelete = async (taskId: string) => {
setTasks(prev => prev.filter(t => t.task_id !== taskId)); try {
toast.success('任务已删除'); await backendApi.deleteTask(taskId);
setTasks(prev => prev.filter(t => t.task_id !== taskId));
toast.success('任务已删除');
} catch (error) {
console.error('删除任务失败:', error);
toast.error('删除任务失败');
}
}; };
const stats = { const stats = {
total: tasks.length, total: tasks.length,
success: tasks.filter(t => t.status === 'success').length, success: tasks.filter(t => t.status === 'success').length,
processing: tasks.filter(t => t.status === 'processing').length, processing: tasks.filter(t => t.status === 'processing').length,
failure: tasks.filter(t => t.status === 'failure').length failure: tasks.filter(t => t.status === 'failure').length,
unknown: tasks.filter(t => t.status === 'unknown').length
}; };
return ( return (
@@ -151,7 +145,7 @@ const TaskHistory: React.FC = () => {
<h1 className="text-3xl font-extrabold tracking-tight"></h1> <h1 className="text-3xl font-extrabold tracking-tight"></h1>
<p className="text-muted-foreground"></p> <p className="text-muted-foreground"></p>
</div> </div>
<Button variant="outline" className="rounded-xl gap-2" onClick={() => window.location.reload()}> <Button variant="outline" className="rounded-xl gap-2" onClick={() => fetchTasks()}>
<RefreshCcw size={18} /> <RefreshCcw size={18} />
<span></span> <span></span>
</Button> </Button>
@@ -194,7 +188,8 @@ const TaskHistory: React.FC = () => {
"w-12 h-12 rounded-xl flex items-center justify-center shrink-0", "w-12 h-12 rounded-xl flex items-center justify-center shrink-0",
task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" : task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" :
task.status === 'failure' ? "bg-destructive/10 text-destructive" : task.status === 'failure' ? "bg-destructive/10 text-destructive" :
"bg-amber-500/10 text-amber-500" task.status === 'processing' ? "bg-amber-500/10 text-amber-500" :
"bg-gray-500/10 text-gray-500"
)}> )}>
{task.status === 'processing' ? ( {task.status === 'processing' ? (
<Loader2 size={24} className="animate-spin" /> <Loader2 size={24} className="animate-spin" />
@@ -212,16 +207,16 @@ const TaskHistory: React.FC = () => {
</Badge> </Badge>
</div> </div>
<p className="text-sm text-muted-foreground"> <p className="text-sm text-muted-foreground">
{task.message || '任务执行中...'} {task.message || (task.status === 'unknown' ? '无法获取状态' : '任务执行中...')}
</p> </p>
<div className="flex items-center gap-4 text-xs text-muted-foreground"> <div className="flex items-center gap-4 text-xs text-muted-foreground">
<span className="flex items-center gap-1"> <span className="flex items-center gap-1">
<Clock size={12} /> <Clock size={12} />
{format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss')} {task.created_at ? format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss') : '时间未知'}
</span> </span>
{task.completed_at && ( {task.updated_at && task.status !== 'processing' && (
<span> <span>
: {Math.round((new Date(task.completed_at).getTime() - new Date(task.created_at).getTime()) / 1000)} : {format(new Date(task.updated_at), 'HH:mm:ss')}
</span> </span>
)} )}
</div> </div>