From 858b594171310d3d31802f6920ba585883939721 Mon Sep 17 00:00:00 2001 From: KiriAky 107 Date: Fri, 10 Apr 2026 01:15:53 +0800 Subject: [PATCH] =?UTF-8?q?=20=E6=B7=BB=E5=8A=A0=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=E7=8A=B6=E6=80=81=E5=8F=8C=E5=86=99=E6=9C=BA=E5=88=B6=E5=92=8C?= =?UTF-8?q?=E5=8E=86=E5=8F=B2=E8=AE=B0=E5=BD=95=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现任务状态同时写入Redis和MongoDB的双写机制 - 添加MongoDB任务集合及CRUD操作接口 - 新增任务历史记录查询、列表展示和删除功能 - 重构任务状态更新逻辑,统一使用update_task_status函数 - 添加模板填服务中AI审核字段值的功能 - 优化前端任务历史页面显示和交互体验 --- backend/app/api/endpoints/documents.py | 121 +++++++--- backend/app/api/endpoints/tasks.py | 99 +++++++-- backend/app/api/endpoints/templates.py | 69 +++++- backend/app/core/database/mongodb.py | 125 +++++++++++ backend/app/services/template_fill_service.py | 206 +++++++++++++++--- frontend/src/db/backend-api.ts | 43 ++++ frontend/src/pages/TaskHistory.tsx | 113 +++++----- 7 files changed, 638 insertions(+), 138 deletions(-) diff --git a/backend/app/api/endpoints/documents.py b/backend/app/api/endpoints/documents.py index 848a582..4260ec6 100644 --- a/backend/app/api/endpoints/documents.py +++ b/backend/app/api/endpoints/documents.py @@ -23,6 +23,52 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/upload", tags=["文档上传"]) +# ==================== 辅助函数 ==================== + +async def update_task_status( + task_id: str, + status: str, + progress: int = 0, + message: str = "", + result: dict = None, + error: str = None +): + """ + 更新任务状态,同时写入 Redis 和 MongoDB + + Args: + task_id: 任务ID + status: 状态 + progress: 进度 + message: 消息 + result: 结果 + error: 错误信息 + """ + meta = {"progress": progress, "message": message} + if result: + meta["result"] = result + if error: + meta["error"] = error + + # 尝试写入 Redis + try: + await redis_db.set_task_status(task_id, status, meta) + except Exception as e: + logger.warning(f"Redis 任务状态更新失败: {e}") + + # 尝试写入 MongoDB(作为备用) + try: + await mongodb.update_task( + task_id=task_id, + status=status, + message=message, + result=result, + error=error + ) + except Exception as e: + logger.warning(f"MongoDB 任务状态更新失败: {e}") + + # ==================== 请求/响应模型 ==================== class UploadResponse(BaseModel): @@ -77,6 +123,17 @@ async def upload_document( task_id = str(uuid.uuid4()) try: + # 保存任务记录到 MongoDB(如果 Redis 不可用时仍能查询) + try: + await mongodb.insert_task( + task_id=task_id, + task_type="document_parse", + status="pending", + message=f"文档 {file.filename} 已提交处理" + ) + except Exception as mongo_err: + logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}") + content = await file.read() saved_path = file_service.save_uploaded_file( content, @@ -122,6 +179,17 @@ async def upload_documents( saved_paths = [] try: + # 保存任务记录到 MongoDB + try: + await mongodb.insert_task( + task_id=task_id, + task_type="batch_parse", + status="pending", + message=f"已提交 {len(files)} 个文档处理" + ) + except Exception as mongo_err: + logger.warning(f"MongoDB 保存批量任务记录失败: {mongo_err}") + for file in files: if not file.filename: continue @@ -159,9 +227,9 @@ async def process_document( """处理单个文档""" try: # 状态: 解析中 - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 10, "message": "正在解析文档"} + progress=10, message="正在解析文档" ) # 解析文档 @@ -172,9 +240,9 @@ async def process_document( raise Exception(result.error or "解析失败") # 状态: 存储中 - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 30, "message": "正在存储数据"} + progress=30, message="正在存储数据" ) # 存储到 MongoDB @@ -191,9 +259,9 @@ async def process_document( # 如果是 Excel,存储到 MySQL + AI生成描述 + RAG索引 if doc_type in ["xlsx", "xls"]: - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"} + progress=50, message="正在存储到MySQL并生成字段描述" ) try: @@ -215,9 +283,9 @@ async def process_document( else: # 非结构化文档 - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 60, "message": "正在建立索引"} + progress=60, message="正在建立索引" ) # 如果文档中有表格数据,提取并存储到 MySQL + RAG @@ -238,17 +306,13 @@ async def process_document( await index_document_to_rag(doc_id, original_filename, result, doc_type) # 完成 - await redis_db.set_task_status( + await update_task_status( task_id, status="success", - meta={ - "progress": 100, - "message": "处理完成", + progress=100, message="处理完成", + result={ "doc_id": doc_id, - "result": { - "doc_id": doc_id, - "doc_type": doc_type, - "filename": original_filename - } + "doc_type": doc_type, + "filename": original_filename } ) @@ -256,18 +320,19 @@ async def process_document( except Exception as e: logger.error(f"文档处理失败: {str(e)}") - await redis_db.set_task_status( + await update_task_status( task_id, status="failure", - meta={"error": str(e)} + progress=0, message="处理失败", + error=str(e) ) async def process_documents_batch(task_id: str, files: List[dict]): """批量处理文档""" try: - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 0, "message": "开始批量处理"} + progress=0, message="开始批量处理" ) results = [] @@ -318,21 +383,23 @@ async def process_documents_batch(task_id: str, files: List[dict]): results.append({"filename": file_info["filename"], "success": False, "error": str(e)}) progress = int((i + 1) / len(files) * 100) - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"} + progress=progress, message=f"已处理 {i+1}/{len(files)}" ) - await redis_db.set_task_status( + await update_task_status( task_id, status="success", - meta={"progress": 100, "message": "批量处理完成", "results": results} + progress=100, message="批量处理完成", + result={"results": results} ) except Exception as e: logger.error(f"批量处理失败: {str(e)}") - await redis_db.set_task_status( + await update_task_status( task_id, status="failure", - meta={"error": str(e)} + progress=0, message="批量处理失败", + error=str(e) ) diff --git a/backend/app/api/endpoints/tasks.py b/backend/app/api/endpoints/tasks.py index aeea884..1df7a44 100644 --- a/backend/app/api/endpoints/tasks.py +++ b/backend/app/api/endpoints/tasks.py @@ -1,13 +1,13 @@ """ 任务管理 API 接口 -提供异步任务状态查询 +提供异步任务状态查询和历史记录 """ from typing import Optional from fastapi import APIRouter, HTTPException -from app.core.database import redis_db +from app.core.database import redis_db, mongodb router = APIRouter(prefix="/tasks", tags=["任务管理"]) @@ -23,25 +23,94 @@ async def get_task_status(task_id: str): Returns: 任务状态信息 """ + # 优先从 Redis 获取 status = await redis_db.get_task_status(task_id) - if not status: - # Redis不可用时,假设任务已完成(文档已成功处理) - # 前端轮询时会得到这个响应 + if status: return { "task_id": task_id, - "status": "success", - "progress": 100, - "message": "任务处理完成", - "result": None, - "error": None + "status": status.get("status", "unknown"), + "progress": status.get("meta", {}).get("progress", 0), + "message": status.get("meta", {}).get("message"), + "result": status.get("meta", {}).get("result"), + "error": status.get("meta", {}).get("error") } + # Redis 不可用时,尝试从 MongoDB 获取 + mongo_task = await mongodb.get_task(task_id) + if mongo_task: + return { + "task_id": mongo_task.get("task_id"), + "status": mongo_task.get("status", "unknown"), + "progress": 100 if mongo_task.get("status") == "success" else 0, + "message": mongo_task.get("message"), + "result": mongo_task.get("result"), + "error": mongo_task.get("error") + } + + # 任务不存在或状态未知 return { "task_id": task_id, - "status": status.get("status", "unknown"), - "progress": status.get("meta", {}).get("progress", 0), - "message": status.get("meta", {}).get("message"), - "result": status.get("meta", {}).get("result"), - "error": status.get("meta", {}).get("error") + "status": "unknown", + "progress": 0, + "message": "无法获取任务状态(Redis和MongoDB均不可用)", + "result": None, + "error": None } + + +@router.get("/") +async def list_tasks(limit: int = 50, skip: int = 0): + """ + 获取任务历史列表 + + Args: + limit: 返回数量限制 + skip: 跳过数量 + + Returns: + 任务列表 + """ + try: + tasks = await mongodb.list_tasks(limit=limit, skip=skip) + return { + "success": True, + "tasks": tasks, + "count": len(tasks) + } + except Exception as e: + # MongoDB 不可用时返回空列表 + return { + "success": False, + "tasks": [], + "count": 0, + "error": str(e) + } + + +@router.delete("/{task_id}") +async def delete_task(task_id: str): + """ + 删除任务 + + Args: + task_id: 任务ID + + Returns: + 是否删除成功 + """ + try: + # 从 Redis 删除 + if redis_db._connected and redis_db.client: + key = f"task:{task_id}" + await redis_db.client.delete(key) + + # 从 MongoDB 删除 + deleted = await mongodb.delete_task(task_id) + + return { + "success": True, + "deleted": deleted + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"删除任务失败: {str(e)}") diff --git a/backend/app/api/endpoints/templates.py b/backend/app/api/endpoints/templates.py index a248dde..0ef92d3 100644 --- a/backend/app/api/endpoints/templates.py +++ b/backend/app/api/endpoints/templates.py @@ -23,6 +23,44 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/templates", tags=["表格模板"]) +# ==================== 辅助函数 ==================== + +async def update_task_status( + task_id: str, + status: str, + progress: int = 0, + message: str = "", + result: dict = None, + error: str = None +): + """ + 更新任务状态,同时写入 Redis 和 MongoDB + """ + from app.core.database import redis_db + + meta = {"progress": progress, "message": message} + if result: + meta["result"] = result + if error: + meta["error"] = error + + try: + await redis_db.set_task_status(task_id, status, meta) + except Exception as e: + logger.warning(f"Redis 任务状态更新失败: {e}") + + try: + await mongodb.update_task( + task_id=task_id, + status=status, + message=message, + result=result, + error=error + ) + except Exception as e: + logger.warning(f"MongoDB 任务状态更新失败: {e}") + + # ==================== 请求/响应模型 ==================== class TemplateFieldRequest(BaseModel): @@ -244,6 +282,17 @@ async def upload_joint_template( # 3. 异步处理源文档到MongoDB task_id = str(uuid.uuid4()) if source_file_info: + # 保存任务记录到 MongoDB + try: + await mongodb.insert_task( + task_id=task_id, + task_type="source_process", + status="pending", + message=f"开始处理 {len(source_file_info)} 个源文档" + ) + except Exception as mongo_err: + logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}") + background_tasks.add_task( process_source_documents, task_id=task_id, @@ -282,12 +331,10 @@ async def upload_joint_template( async def process_source_documents(task_id: str, files: List[dict]): """异步处理源文档,存入MongoDB""" - from app.core.database import redis_db - try: - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": 0, "message": "开始处理源文档"} + progress=0, message="开始处理源文档" ) doc_ids = [] @@ -316,22 +363,24 @@ async def process_source_documents(task_id: str, files: List[dict]): logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}") progress = int((i + 1) / len(files) * 100) - await redis_db.set_task_status( + await update_task_status( task_id, status="processing", - meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"} + progress=progress, message=f"已处理 {i+1}/{len(files)}" ) - await redis_db.set_task_status( + await update_task_status( task_id, status="success", - meta={"progress": 100, "message": "源文档处理完成", "doc_ids": doc_ids} + progress=100, message="源文档处理完成", + result={"doc_ids": doc_ids} ) logger.info(f"所有源文档处理完成: {len(doc_ids)}个") except Exception as e: logger.error(f"源文档批量处理失败: {str(e)}") - await redis_db.set_task_status( + await update_task_status( task_id, status="failure", - meta={"error": str(e)} + progress=0, message="源文档处理失败", + error=str(e) ) diff --git a/backend/app/core/database/mongodb.py b/backend/app/core/database/mongodb.py index e8481ec..90ddb44 100644 --- a/backend/app/core/database/mongodb.py +++ b/backend/app/core/database/mongodb.py @@ -59,6 +59,11 @@ class MongoDB: """RAG索引集合 - 存储字段语义索引""" return self.db["rag_index"] + @property + def tasks(self): + """任务集合 - 存储任务历史记录""" + return self.db["tasks"] + # ==================== 文档操作 ==================== async def insert_document( @@ -242,8 +247,128 @@ class MongoDB: await self.rag_index.create_index("table_name") await self.rag_index.create_index("field_name") + # 任务集合索引 + await self.tasks.create_index("task_id", unique=True) + await self.tasks.create_index("created_at") + logger.info("MongoDB 索引创建完成") + # ==================== 任务历史操作 ==================== + + async def insert_task( + self, + task_id: str, + task_type: str, + status: str = "pending", + message: str = "", + result: Optional[Dict[str, Any]] = None, + error: Optional[str] = None, + ) -> str: + """ + 插入任务记录 + + Args: + task_id: 任务ID + task_type: 任务类型 + status: 任务状态 + message: 任务消息 + result: 任务结果 + error: 错误信息 + + Returns: + 插入文档的ID + """ + task = { + "task_id": task_id, + "task_type": task_type, + "status": status, + "message": message, + "result": result, + "error": error, + "created_at": datetime.utcnow(), + "updated_at": datetime.utcnow(), + } + result_obj = await self.tasks.insert_one(task) + return str(result_obj.inserted_id) + + async def update_task( + self, + task_id: str, + status: Optional[str] = None, + message: Optional[str] = None, + result: Optional[Dict[str, Any]] = None, + error: Optional[str] = None, + ) -> bool: + """ + 更新任务状态 + + Args: + task_id: 任务ID + status: 任务状态 + message: 任务消息 + result: 任务结果 + error: 错误信息 + + Returns: + 是否更新成功 + """ + from bson import ObjectId + + update_data = {"updated_at": datetime.utcnow()} + if status is not None: + update_data["status"] = status + if message is not None: + update_data["message"] = message + if result is not None: + update_data["result"] = result + if error is not None: + update_data["error"] = error + + update_result = await self.tasks.update_one( + {"task_id": task_id}, + {"$set": update_data} + ) + return update_result.modified_count > 0 + + async def get_task(self, task_id: str) -> Optional[Dict[str, Any]]: + """根据task_id获取任务""" + task = await self.tasks.find_one({"task_id": task_id}) + if task: + task["_id"] = str(task["_id"]) + return task + + async def list_tasks( + self, + limit: int = 50, + skip: int = 0, + ) -> List[Dict[str, Any]]: + """ + 获取任务列表 + + Args: + limit: 返回数量 + skip: 跳过数量 + + Returns: + 任务列表 + """ + cursor = self.tasks.find().sort("created_at", -1).skip(skip).limit(limit) + tasks = [] + async for task in cursor: + task["_id"] = str(task["_id"]) + # 转换 datetime 为字符串 + if task.get("created_at"): + task["created_at"] = task["created_at"].isoformat() + if task.get("updated_at"): + task["updated_at"] = task["updated_at"].isoformat() + tasks.append(task) + return tasks + + async def delete_task(self, task_id: str) -> bool: + """删除任务""" + result = await self.tasks.delete_one({"task_id": task_id}) + return result.deleted_count > 0 + # ==================== 全局单例 ==================== diff --git a/backend/app/services/template_fill_service.py b/backend/app/services/template_fill_service.py index 13db9a2..9d18529 100644 --- a/backend/app/services/template_fill_service.py +++ b/backend/app/services/template_fill_service.py @@ -181,6 +181,22 @@ class TemplateFillService: user_hint=user_hint ) + # AI审核:验证提取的值是否合理 + if result.values and result.values[0]: + logger.info(f"字段 {field.name} 进入AI审核阶段...") + verified_result = await self._verify_field_value( + field=field, + extracted_values=result.values, + source_docs=source_docs, + user_hint=user_hint + ) + if verified_result: + # 审核给出了修正结果 + result = verified_result + logger.info(f"字段 {field.name} 审核后修正值: {result.values[:3]}") + else: + logger.info(f"字段 {field.name} 审核通过,使用原提取结果") + # 存储结果 - 使用 values 数组 filled_data[field.name] = result.values if result.values else [""] fill_details.append({ @@ -533,6 +549,137 @@ class TemplateFillService: confidence=0.0 ) + async def _verify_field_value( + self, + field: TemplateField, + extracted_values: List[str], + source_docs: List[SourceDocument], + user_hint: Optional[str] = None + ) -> Optional[FillResult]: + """ + 验证并修正提取的字段值 + + Args: + field: 字段定义 + extracted_values: 已提取的值 + source_docs: 源文档列表 + user_hint: 用户提示 + + Returns: + 验证后的结果,如果验证通过返回None(使用原结果) + """ + if not extracted_values or not extracted_values[0]: + return None + + if not source_docs: + return None + + try: + # 构建验证上下文 + context_text = self._build_context_text(source_docs, field_name=field.name, max_length=15000) + + hint_text = field.hint if field.hint else f"请理解{field.name}字段的含义" + if user_hint: + hint_text = f"{user_hint}。{hint_text}" + + prompt = f"""你是一个数据质量审核专家。请审核以下提取的数据是否合理。 + +【待审核字段】 +字段名:{field.name} +字段说明:{hint_text} + +【已提取的值】 +{extracted_values[:10]} # 最多审核前10个值 + +【源文档上下文】 +{context_text[:8000]} + +【审核要求】 +1. 这些值是否符合字段的含义? +2. 值在原文中的原始含义是什么?检查是否有误解或误提取 +3. 是否存在明显错误、空值或不合理的数据? +4. 如果表格有多个列,请确认提取的是正确的列 + +请严格按照以下 JSON 格式输出(只需输出 JSON,不要其他内容): +{{ + "is_valid": true或false, + "corrected_values": ["修正后的值列表"] 或 null(如果无需修正), + "reason": "审核说明,解释判断理由", + "original_meaning": "值在原文中的原始含义描述" +}} +""" + + messages = [ + {"role": "system", "content": "你是一个严格的数据质量审核专家。请仔细核对原文和提取的值是否匹配。"}, + {"role": "user", "content": prompt} + ] + + response = await self.llm.chat( + messages=messages, + temperature=0.2, + max_tokens=3000 + ) + + content = self.llm.extract_message_content(response) + logger.info(f"字段 {field.name} 审核返回: {content[:300]}") + + # 解析 JSON + import json + import re + + cleaned = content.strip() + cleaned = re.sub(r'^```json\s*', '', cleaned, flags=re.MULTILINE) + cleaned = re.sub(r'^```\s*', '', cleaned, flags=re.MULTILINE) + cleaned = cleaned.strip() + + json_start = -1 + for i, c in enumerate(cleaned): + if c == '{': + json_start = i + break + + if json_start == -1: + logger.warning(f"字段 {field.name} 审核:无法找到 JSON") + return None + + json_text = cleaned[json_start:] + result = json.loads(json_text) + + is_valid = result.get("is_valid", True) + corrected_values = result.get("corrected_values") + reason = result.get("reason", "") + original_meaning = result.get("original_meaning", "") + + logger.info(f"字段 {field.name} 审核结果: is_valid={is_valid}, reason={reason[:100]}") + + if not is_valid and corrected_values: + # 值有问题且有修正建议,使用修正后的值 + logger.info(f"字段 {field.name} 使用修正后的值: {corrected_values[:5]}") + return FillResult( + field=field.name, + values=corrected_values, + value=corrected_values[0] if corrected_values else "", + source=f"AI审核修正: {reason[:100]}", + confidence=0.7 + ) + elif not is_valid and original_meaning: + # 值有问题但无修正,记录原始含义供用户参考 + logger.info(f"字段 {field.name} 审核发现问题: {original_meaning}") + return FillResult( + field=field.name, + values=extracted_values, + value=extracted_values[0] if extracted_values else "", + source=f"AI审核疑问: {original_meaning[:100]}", + confidence=0.5 + ) + + # 验证通过,返回 None 表示使用原结果 + return None + + except Exception as e: + logger.error(f"字段 {field.name} 审核失败: {str(e)}") + return None + def _build_context_text(self, source_docs: List[SourceDocument], field_name: str = None, max_length: int = 8000) -> str: """ 构建上下文文本 @@ -1580,30 +1727,35 @@ class TemplateFillService: import pandas as pd # 读取 Excel 内容检查是否为空 + content_sample = "" if file_type in ["xlsx", "xls"]: df = pd.read_excel(file_path, header=None) if df.shape[0] == 0 or df.shape[1] == 0: logger.info("Excel 表格为空") - # 生成默认字段 - return [TemplateField( - cell=self._column_to_cell(i), - name=f"字段{i+1}", - field_type="text", - required=False, - hint="请填写此字段" - ) for i in range(5)] - - # 表格有数据但没有表头 - if df.shape[1] > 0: - # 读取第一行作为参考,看是否为空 - first_row = df.iloc[0].tolist() if len(df) > 0 else [] - if not any(pd.notna(v) and str(v).strip() != '' for v in first_row): - # 第一行为空,AI 生成表头 - content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string() - else: - content_sample = df.to_string() + # 即使 Excel 为空,如果有源文档,仍然尝试使用 AI 生成表头 + if not source_contents: + logger.info("Excel 为空且没有源文档,使用默认字段名") + return [TemplateField( + cell=self._column_to_cell(i), + name=f"字段{i+1}", + field_type="text", + required=False, + hint="请填写此字段" + ) for i in range(5)] + # 有源文档,继续调用 AI 生成表头 + logger.info("Excel 为空但有源文档,使用源文档内容生成表头...") else: - content_sample = "" + # 表格有数据但没有表头 + if df.shape[1] > 0: + # 读取第一行作为参考,看是否为空 + first_row = df.iloc[0].tolist() if len(df) > 0 else [] + if not any(pd.notna(v) and str(v).strip() != '' for v in first_row): + # 第一行为空,AI 生成表头 + content_sample = df.iloc[:10].to_string() if len(df) >= 10 else df.to_string() + else: + content_sample = df.to_string() + else: + content_sample = "" # 调用 AI 生成表头 # 根据源文档内容生成表头 @@ -1641,21 +1793,21 @@ class TemplateFillService: prompt = f"""你是一个专业的表格设计助手。请根据源文档内容生成合适的表格表头字段。 -任务:用户有一些源文档(可能包含表格数据、统计信息等),需要填写到表格中。请分析源文档内容,生成适合的表头字段。 +任务:用户有一些源文档(包含表格数据),需要填写到空白表格模板中。源文档中的表格如下: {source_info} -请生成5-10个简洁的表头字段名,这些字段应该: -1. 简洁明了,易于理解 -2. 适合作为表格列标题 -3. 直接对应源文档中的关键数据项 -4. 字段之间有明显的区分度 +【重要要求】 +1. 请仔细阅读上面的源文档表格,找出所有不同的列名(如"产品名称"、"1995年产量"、"按资产总额计算(%)"等) +2. 直接使用这些实际的列名作为表头字段名,不要生成新的或同义词 +3. 如果一个源文档有多个表格,请为每个表格选择合适的列名 +4. 生成3-8个表头字段,优先选择数据量大的表格的列 请严格按照以下 JSON 格式输出(只需输出 JSON,不要其他内容): {{ "fields": [ - {{"name": "字段名1", "hint": "字段说明提示1"}}, - {{"name": "字段名2", "hint": "字段说明提示2"}} + {{"name": "实际列名1", "hint": "对该列的说明"}}, + {{"name": "实际列名2", "hint": "对该列的说明"}} ] }} """ diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts index d26e1a8..59cc0ea 100644 --- a/frontend/src/db/backend-api.ts +++ b/frontend/src/db/backend-api.ts @@ -400,6 +400,49 @@ export const backendApi = { } }, + /** + * 获取任务历史列表 + */ + async getTasks( + limit: number = 50, + skip: number = 0 + ): Promise<{ success: boolean; tasks: any[]; count: number }> { + const url = `${BACKEND_BASE_URL}/tasks?limit=${limit}&skip=${skip}`; + + try { + const response = await fetch(url); + if (!response.ok) { + const error = await response.json(); + throw new Error(error.detail || '获取任务列表失败'); + } + return await response.json(); + } catch (error) { + console.error('获取任务列表失败:', error); + throw error; + } + }, + + /** + * 删除任务 + */ + async deleteTask(taskId: string): Promise<{ success: boolean; deleted: boolean }> { + const url = `${BACKEND_BASE_URL}/tasks/${taskId}`; + + try { + const response = await fetch(url, { + method: 'DELETE' + }); + if (!response.ok) { + const error = await response.json(); + throw new Error(error.detail || '删除任务失败'); + } + return await response.json(); + } catch (error) { + console.error('删除任务失败:', error); + throw error; + } + }, + /** * 轮询任务状态直到完成 */ diff --git a/frontend/src/pages/TaskHistory.tsx b/frontend/src/pages/TaskHistory.tsx index 91b162b..2235c51 100644 --- a/frontend/src/pages/TaskHistory.tsx +++ b/frontend/src/pages/TaskHistory.tsx @@ -11,7 +11,8 @@ import { ChevronDown, ChevronUp, Trash2, - AlertCircle + AlertCircle, + HelpCircle } from 'lucide-react'; import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card'; import { Button } from '@/components/ui/button'; @@ -24,9 +25,9 @@ import { Skeleton } from '@/components/ui/skeleton'; type Task = { task_id: string; - status: 'pending' | 'processing' | 'success' | 'failure'; + status: 'pending' | 'processing' | 'success' | 'failure' | 'unknown'; created_at: string; - completed_at?: string; + updated_at?: string; message?: string; result?: any; error?: string; @@ -38,54 +39,38 @@ const TaskHistory: React.FC = () => { const [loading, setLoading] = useState(true); const [expandedTask, setExpandedTask] = useState(null); - // Mock data for demonstration - useEffect(() => { - // 模拟任务数据,实际应该从后端获取 - setTasks([ - { - task_id: 'task-001', - status: 'success', - created_at: new Date(Date.now() - 3600000).toISOString(), - completed_at: new Date(Date.now() - 3500000).toISOString(), - task_type: 'document_parse', - message: '文档解析完成', - result: { - doc_id: 'doc-001', - filename: 'report_q1_2026.docx', - extracted_fields: ['标题', '作者', '日期', '金额'] - } - }, - { - task_id: 'task-002', - status: 'success', - created_at: new Date(Date.now() - 7200000).toISOString(), - completed_at: new Date(Date.now() - 7100000).toISOString(), - task_type: 'excel_analysis', - message: 'Excel 分析完成', - result: { - filename: 'sales_data.xlsx', - row_count: 1250, - charts_generated: 3 - } - }, - { - task_id: 'task-003', - status: 'processing', - created_at: new Date(Date.now() - 600000).toISOString(), - task_type: 'template_fill', - message: '正在填充表格...' - }, - { - task_id: 'task-004', - status: 'failure', - created_at: new Date(Date.now() - 86400000).toISOString(), - completed_at: new Date(Date.now() - 86390000).toISOString(), - task_type: 'document_parse', - message: '解析失败', - error: '文件格式不支持或文件已损坏' + // 获取任务历史数据 + const fetchTasks = async () => { + try { + setLoading(true); + const response = await backendApi.getTasks(50, 0); + if (response.success && response.tasks) { + // 转换后端数据格式为前端格式 + const convertedTasks: Task[] = response.tasks.map((t: any) => ({ + task_id: t.task_id, + status: t.status || 'unknown', + created_at: t.created_at || new Date().toISOString(), + updated_at: t.updated_at, + message: t.message || '', + result: t.result, + error: t.error, + task_type: t.task_type || 'document_parse' + })); + setTasks(convertedTasks); + } else { + setTasks([]); } - ]); - setLoading(false); + } catch (error) { + console.error('获取任务列表失败:', error); + toast.error('获取任务列表失败'); + setTasks([]); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + fetchTasks(); }, []); const getStatusBadge = (status: string) => { @@ -96,6 +81,8 @@ const TaskHistory: React.FC = () => { return 失败; case 'processing': return 处理中; + case 'unknown': + return 未知; default: return 等待; } @@ -133,15 +120,22 @@ const TaskHistory: React.FC = () => { }; const handleDelete = async (taskId: string) => { - setTasks(prev => prev.filter(t => t.task_id !== taskId)); - toast.success('任务已删除'); + try { + await backendApi.deleteTask(taskId); + setTasks(prev => prev.filter(t => t.task_id !== taskId)); + toast.success('任务已删除'); + } catch (error) { + console.error('删除任务失败:', error); + toast.error('删除任务失败'); + } }; const stats = { total: tasks.length, success: tasks.filter(t => t.status === 'success').length, processing: tasks.filter(t => t.status === 'processing').length, - failure: tasks.filter(t => t.status === 'failure').length + failure: tasks.filter(t => t.status === 'failure').length, + unknown: tasks.filter(t => t.status === 'unknown').length }; return ( @@ -151,7 +145,7 @@ const TaskHistory: React.FC = () => {

任务历史

查看和管理您所有的文档处理任务记录

- @@ -194,7 +188,8 @@ const TaskHistory: React.FC = () => { "w-12 h-12 rounded-xl flex items-center justify-center shrink-0", task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" : task.status === 'failure' ? "bg-destructive/10 text-destructive" : - "bg-amber-500/10 text-amber-500" + task.status === 'processing' ? "bg-amber-500/10 text-amber-500" : + "bg-gray-500/10 text-gray-500" )}> {task.status === 'processing' ? ( @@ -212,16 +207,16 @@ const TaskHistory: React.FC = () => {

- {task.message || '任务执行中...'} + {task.message || (task.status === 'unknown' ? '无法获取状态' : '任务执行中...')}

- {format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss')} + {task.created_at ? format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss') : '时间未知'} - {task.completed_at && ( + {task.updated_at && task.status !== 'processing' && ( - 耗时: {Math.round((new Date(task.completed_at).getTime() - new Date(task.created_at).getTime()) / 1000)} 秒 + 更新: {format(new Date(task.updated_at), 'HH:mm:ss')} )}