Compare commits
24 Commits
6f8976cf71
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 5fca4eb094 | |||
| 0dbf74db9d | |||
| 858b594171 | |||
| ed0f51f2a4 | |||
| ecc0c79475 | |||
| 6befc510d8 | |||
| 8f66c235fa | |||
| 886d5ae0cc | |||
| 6752c5c231 | |||
| 610d475ce0 | |||
| 496b96508d | |||
| 07ebdc09bc | |||
| 7f67fa89de | |||
| c1886fb68f | |||
| 78417c898a | |||
| d5df5b8283 | |||
| 718f864926 | |||
| e5711b3f05 | |||
| df35105d16 | |||
| 2c2ab56d2d | |||
| faff1a5977 | |||
| b2ebd3e12d | |||
| 4eda6cf758 | |||
| 38e41c6eff |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -36,4 +36,3 @@
|
|||||||
|
|
||||||
**/__pycache__/*
|
**/__pycache__/*
|
||||||
**.pyc
|
**.pyc
|
||||||
**/logs/
|
|
||||||
|
|||||||
@@ -215,9 +215,12 @@ async def analyze_markdown(
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# 清理临时文件
|
# 清理临时文件,确保在所有情况下都能清理
|
||||||
if os.path.exists(tmp_path):
|
try:
|
||||||
os.unlink(tmp_path)
|
if tmp_path and os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
@@ -279,8 +282,12 @@ async def analyze_markdown_stream(
|
|||||||
)
|
)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(tmp_path):
|
# 清理临时文件,确保在所有情况下都能清理
|
||||||
os.unlink(tmp_path)
|
try:
|
||||||
|
if tmp_path and os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
@@ -289,7 +296,7 @@ async def analyze_markdown_stream(
|
|||||||
raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"流式分析失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@router.get("/analyze/md/outline")
|
@router.post("/analyze/md/outline")
|
||||||
async def get_markdown_outline(
|
async def get_markdown_outline(
|
||||||
file: UploadFile = File(...)
|
file: UploadFile = File(...)
|
||||||
):
|
):
|
||||||
@@ -323,8 +330,12 @@ async def get_markdown_outline(
|
|||||||
result = await markdown_ai_service.extract_outline(tmp_path)
|
result = await markdown_ai_service.extract_outline(tmp_path)
|
||||||
return result
|
return result
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(tmp_path):
|
# 清理临时文件,确保在所有情况下都能清理
|
||||||
os.unlink(tmp_path)
|
try:
|
||||||
|
if tmp_path and os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.warning(f"临时文件清理失败: {tmp_path}, error: {cleanup_error}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"获取 Markdown 大纲失败: {str(e)}")
|
logger.error(f"获取 Markdown 大纲失败: {str(e)}")
|
||||||
|
|||||||
@@ -23,6 +23,52 @@ logger = logging.getLogger(__name__)
|
|||||||
router = APIRouter(prefix="/upload", tags=["文档上传"])
|
router = APIRouter(prefix="/upload", tags=["文档上传"])
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== 辅助函数 ====================
|
||||||
|
|
||||||
|
async def update_task_status(
|
||||||
|
task_id: str,
|
||||||
|
status: str,
|
||||||
|
progress: int = 0,
|
||||||
|
message: str = "",
|
||||||
|
result: dict = None,
|
||||||
|
error: str = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
更新任务状态,同时写入 Redis 和 MongoDB
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: 任务ID
|
||||||
|
status: 状态
|
||||||
|
progress: 进度
|
||||||
|
message: 消息
|
||||||
|
result: 结果
|
||||||
|
error: 错误信息
|
||||||
|
"""
|
||||||
|
meta = {"progress": progress, "message": message}
|
||||||
|
if result:
|
||||||
|
meta["result"] = result
|
||||||
|
if error:
|
||||||
|
meta["error"] = error
|
||||||
|
|
||||||
|
# 尝试写入 Redis
|
||||||
|
try:
|
||||||
|
await redis_db.set_task_status(task_id, status, meta)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Redis 任务状态更新失败: {e}")
|
||||||
|
|
||||||
|
# 尝试写入 MongoDB(作为备用)
|
||||||
|
try:
|
||||||
|
await mongodb.update_task(
|
||||||
|
task_id=task_id,
|
||||||
|
status=status,
|
||||||
|
message=message,
|
||||||
|
result=result,
|
||||||
|
error=error
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"MongoDB 任务状态更新失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
# ==================== 请求/响应模型 ====================
|
# ==================== 请求/响应模型 ====================
|
||||||
|
|
||||||
class UploadResponse(BaseModel):
|
class UploadResponse(BaseModel):
|
||||||
@@ -77,6 +123,17 @@ async def upload_document(
|
|||||||
task_id = str(uuid.uuid4())
|
task_id = str(uuid.uuid4())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# 保存任务记录到 MongoDB(如果 Redis 不可用时仍能查询)
|
||||||
|
try:
|
||||||
|
await mongodb.insert_task(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="document_parse",
|
||||||
|
status="pending",
|
||||||
|
message=f"文档 {file.filename} 已提交处理"
|
||||||
|
)
|
||||||
|
except Exception as mongo_err:
|
||||||
|
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
|
||||||
|
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
saved_path = file_service.save_uploaded_file(
|
saved_path = file_service.save_uploaded_file(
|
||||||
content,
|
content,
|
||||||
@@ -122,6 +179,17 @@ async def upload_documents(
|
|||||||
saved_paths = []
|
saved_paths = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# 保存任务记录到 MongoDB
|
||||||
|
try:
|
||||||
|
await mongodb.insert_task(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="batch_parse",
|
||||||
|
status="pending",
|
||||||
|
message=f"已提交 {len(files)} 个文档处理"
|
||||||
|
)
|
||||||
|
except Exception as mongo_err:
|
||||||
|
logger.warning(f"MongoDB 保存批量任务记录失败: {mongo_err}")
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
if not file.filename:
|
if not file.filename:
|
||||||
continue
|
continue
|
||||||
@@ -159,9 +227,9 @@ async def process_document(
|
|||||||
"""处理单个文档"""
|
"""处理单个文档"""
|
||||||
try:
|
try:
|
||||||
# 状态: 解析中
|
# 状态: 解析中
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": 10, "message": "正在解析文档"}
|
progress=10, message="正在解析文档"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 解析文档
|
# 解析文档
|
||||||
@@ -172,9 +240,9 @@ async def process_document(
|
|||||||
raise Exception(result.error or "解析失败")
|
raise Exception(result.error or "解析失败")
|
||||||
|
|
||||||
# 状态: 存储中
|
# 状态: 存储中
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": 30, "message": "正在存储数据"}
|
progress=30, message="正在存储数据"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 存储到 MongoDB
|
# 存储到 MongoDB
|
||||||
@@ -191,9 +259,9 @@ async def process_document(
|
|||||||
|
|
||||||
# 如果是 Excel,存储到 MySQL + AI生成描述 + RAG索引
|
# 如果是 Excel,存储到 MySQL + AI生成描述 + RAG索引
|
||||||
if doc_type in ["xlsx", "xls"]:
|
if doc_type in ["xlsx", "xls"]:
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"}
|
progress=50, message="正在存储到MySQL并生成字段描述"
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -215,9 +283,9 @@ async def process_document(
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
# 非结构化文档
|
# 非结构化文档
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": 60, "message": "正在建立索引"}
|
progress=60, message="正在建立索引"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 如果文档中有表格数据,提取并存储到 MySQL + RAG
|
# 如果文档中有表格数据,提取并存储到 MySQL + RAG
|
||||||
@@ -238,17 +306,13 @@ async def process_document(
|
|||||||
await index_document_to_rag(doc_id, original_filename, result, doc_type)
|
await index_document_to_rag(doc_id, original_filename, result, doc_type)
|
||||||
|
|
||||||
# 完成
|
# 完成
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="success",
|
task_id, status="success",
|
||||||
meta={
|
progress=100, message="处理完成",
|
||||||
"progress": 100,
|
result={
|
||||||
"message": "处理完成",
|
|
||||||
"doc_id": doc_id,
|
"doc_id": doc_id,
|
||||||
"result": {
|
"doc_type": doc_type,
|
||||||
"doc_id": doc_id,
|
"filename": original_filename
|
||||||
"doc_type": doc_type,
|
|
||||||
"filename": original_filename
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -256,18 +320,19 @@ async def process_document(
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"文档处理失败: {str(e)}")
|
logger.error(f"文档处理失败: {str(e)}")
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="failure",
|
task_id, status="failure",
|
||||||
meta={"error": str(e)}
|
progress=0, message="处理失败",
|
||||||
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def process_documents_batch(task_id: str, files: List[dict]):
|
async def process_documents_batch(task_id: str, files: List[dict]):
|
||||||
"""批量处理文档"""
|
"""批量处理文档"""
|
||||||
try:
|
try:
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": 0, "message": "开始批量处理"}
|
progress=0, message="开始批量处理"
|
||||||
)
|
)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
@@ -318,21 +383,23 @@ async def process_documents_batch(task_id: str, files: List[dict]):
|
|||||||
results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
|
results.append({"filename": file_info["filename"], "success": False, "error": str(e)})
|
||||||
|
|
||||||
progress = int((i + 1) / len(files) * 100)
|
progress = int((i + 1) / len(files) * 100)
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="processing",
|
task_id, status="processing",
|
||||||
meta={"progress": progress, "message": f"已处理 {i+1}/{len(files)}"}
|
progress=progress, message=f"已处理 {i+1}/{len(files)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="success",
|
task_id, status="success",
|
||||||
meta={"progress": 100, "message": "批量处理完成", "results": results}
|
progress=100, message="批量处理完成",
|
||||||
|
result={"results": results}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"批量处理失败: {str(e)}")
|
logger.error(f"批量处理失败: {str(e)}")
|
||||||
await redis_db.set_task_status(
|
await update_task_status(
|
||||||
task_id, status="failure",
|
task_id, status="failure",
|
||||||
meta={"error": str(e)}
|
progress=0, message="批量处理失败",
|
||||||
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,26 +19,43 @@ async def health_check() -> Dict[str, Any]:
|
|||||||
返回各数据库连接状态和应用信息
|
返回各数据库连接状态和应用信息
|
||||||
"""
|
"""
|
||||||
# 检查各数据库连接状态
|
# 检查各数据库连接状态
|
||||||
mysql_status = "connected"
|
mysql_status = "unknown"
|
||||||
mongodb_status = "connected"
|
mongodb_status = "unknown"
|
||||||
redis_status = "connected"
|
redis_status = "unknown"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if mysql_db.async_engine is None:
|
if mysql_db.async_engine is None:
|
||||||
mysql_status = "disconnected"
|
mysql_status = "disconnected"
|
||||||
except Exception:
|
else:
|
||||||
|
# 实际执行一次查询验证连接
|
||||||
|
from sqlalchemy import text
|
||||||
|
async with mysql_db.async_engine.connect() as conn:
|
||||||
|
await conn.execute(text("SELECT 1"))
|
||||||
|
mysql_status = "connected"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"MySQL 健康检查失败: {e}")
|
||||||
mysql_status = "error"
|
mysql_status = "error"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if mongodb.client is None:
|
if mongodb.client is None:
|
||||||
mongodb_status = "disconnected"
|
mongodb_status = "disconnected"
|
||||||
except Exception:
|
else:
|
||||||
|
# 实际 ping 验证
|
||||||
|
await mongodb.client.admin.command('ping')
|
||||||
|
mongodb_status = "connected"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"MongoDB 健康检查失败: {e}")
|
||||||
mongodb_status = "error"
|
mongodb_status = "error"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not redis_db.is_connected:
|
if not redis_db.is_connected or redis_db.client is None:
|
||||||
redis_status = "disconnected"
|
redis_status = "disconnected"
|
||||||
except Exception:
|
else:
|
||||||
|
# 实际执行 ping 验证
|
||||||
|
await redis_db.client.ping()
|
||||||
|
redis_status = "connected"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Redis 健康检查失败: {e}")
|
||||||
redis_status = "error"
|
redis_status = "error"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
提供文档列表、详情查询和删除功能
|
提供文档列表、详情查询和删除功能
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Query
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
@@ -10,6 +11,8 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from app.core.database import mongodb
|
from app.core.database import mongodb
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/documents", tags=["文档库"])
|
router = APIRouter(prefix="/documents", tags=["文档库"])
|
||||||
|
|
||||||
|
|
||||||
@@ -26,7 +29,8 @@ class DocumentItem(BaseModel):
|
|||||||
@router.get("")
|
@router.get("")
|
||||||
async def get_documents(
|
async def get_documents(
|
||||||
doc_type: Optional[str] = Query(None, description="文档类型过滤"),
|
doc_type: Optional[str] = Query(None, description="文档类型过滤"),
|
||||||
limit: int = Query(50, ge=1, le=100, description="返回数量")
|
limit: int = Query(20, ge=1, le=100, description="返回数量"),
|
||||||
|
skip: int = Query(0, ge=0, description="跳过数量")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
获取文档列表
|
获取文档列表
|
||||||
@@ -40,11 +44,25 @@ async def get_documents(
|
|||||||
if doc_type:
|
if doc_type:
|
||||||
query["doc_type"] = doc_type
|
query["doc_type"] = doc_type
|
||||||
|
|
||||||
# 查询文档
|
logger.info(f"开始查询文档列表, query: {query}, limit: {limit}")
|
||||||
cursor = mongodb.documents.find(query).sort("created_at", -1).limit(limit)
|
|
||||||
|
# 使用 batch_size 和 max_time_ms 来控制查询
|
||||||
|
cursor = mongodb.documents.find(
|
||||||
|
query,
|
||||||
|
{"content": 0} # 不返回 content 字段,减少数据传输
|
||||||
|
).sort("created_at", -1).skip(skip).limit(limit)
|
||||||
|
|
||||||
|
# 设置 10 秒超时
|
||||||
|
cursor.max_time_ms(10000)
|
||||||
|
|
||||||
|
logger.info("Cursor created with 10s timeout, executing...")
|
||||||
|
|
||||||
|
# 使用 batch_size 逐批获取
|
||||||
|
documents_raw = await cursor.to_list(length=limit)
|
||||||
|
logger.info(f"查询到原始文档数: {len(documents_raw)}")
|
||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
async for doc in cursor:
|
for doc in documents_raw:
|
||||||
documents.append({
|
documents.append({
|
||||||
"doc_id": str(doc["_id"]),
|
"doc_id": str(doc["_id"]),
|
||||||
"filename": doc.get("metadata", {}).get("filename", ""),
|
"filename": doc.get("metadata", {}).get("filename", ""),
|
||||||
@@ -55,10 +73,12 @@ async def get_documents(
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"row_count": doc.get("metadata", {}).get("row_count"),
|
"row_count": doc.get("metadata", {}).get("row_count"),
|
||||||
"column_count": doc.get("metadata", {}).get("column_count"),
|
"column_count": doc.get("metadata", {}).get("column_count"),
|
||||||
"columns": doc.get("metadata", {}).get("columns", [])[:10] # 只返回前10列
|
"columns": doc.get("metadata", {}).get("columns", [])[:10]
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
logger.info(f"文档列表处理完成: {len(documents)} 个文档")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"documents": documents,
|
"documents": documents,
|
||||||
@@ -66,6 +86,17 @@ async def get_documents(
|
|||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
err_str = str(e)
|
||||||
|
# 如果是超时错误,返回空列表而不是报错
|
||||||
|
if "timeout" in err_str.lower() or "time" in err_str.lower():
|
||||||
|
logger.warning(f"文档查询超时,返回空列表: {err_str}")
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"documents": [],
|
||||||
|
"total": 0,
|
||||||
|
"warning": "查询超时,请稍后重试"
|
||||||
|
}
|
||||||
|
logger.error(f"获取文档列表失败: {str(e)}", exc_info=True)
|
||||||
raise HTTPException(status_code=500, detail=f"获取文档列表失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"获取文档列表失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
"""
|
"""
|
||||||
任务管理 API 接口
|
任务管理 API 接口
|
||||||
|
|
||||||
提供异步任务状态查询
|
提供异步任务状态查询和历史记录
|
||||||
"""
|
"""
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
from app.core.database import redis_db
|
from app.core.database import redis_db, mongodb
|
||||||
|
|
||||||
router = APIRouter(prefix="/tasks", tags=["任务管理"])
|
router = APIRouter(prefix="/tasks", tags=["任务管理"])
|
||||||
|
|
||||||
@@ -23,25 +23,94 @@ async def get_task_status(task_id: str):
|
|||||||
Returns:
|
Returns:
|
||||||
任务状态信息
|
任务状态信息
|
||||||
"""
|
"""
|
||||||
|
# 优先从 Redis 获取
|
||||||
status = await redis_db.get_task_status(task_id)
|
status = await redis_db.get_task_status(task_id)
|
||||||
|
|
||||||
if not status:
|
if status:
|
||||||
# Redis不可用时,假设任务已完成(文档已成功处理)
|
|
||||||
# 前端轮询时会得到这个响应
|
|
||||||
return {
|
return {
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"status": "success",
|
"status": status.get("status", "unknown"),
|
||||||
"progress": 100,
|
"progress": status.get("meta", {}).get("progress", 0),
|
||||||
"message": "任务处理完成",
|
"message": status.get("meta", {}).get("message"),
|
||||||
"result": None,
|
"result": status.get("meta", {}).get("result"),
|
||||||
"error": None
|
"error": status.get("meta", {}).get("error")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Redis 不可用时,尝试从 MongoDB 获取
|
||||||
|
mongo_task = await mongodb.get_task(task_id)
|
||||||
|
if mongo_task:
|
||||||
|
return {
|
||||||
|
"task_id": mongo_task.get("task_id"),
|
||||||
|
"status": mongo_task.get("status", "unknown"),
|
||||||
|
"progress": 100 if mongo_task.get("status") == "success" else 0,
|
||||||
|
"message": mongo_task.get("message"),
|
||||||
|
"result": mongo_task.get("result"),
|
||||||
|
"error": mongo_task.get("error")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 任务不存在或状态未知
|
||||||
return {
|
return {
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"status": status.get("status", "unknown"),
|
"status": "unknown",
|
||||||
"progress": status.get("meta", {}).get("progress", 0),
|
"progress": 0,
|
||||||
"message": status.get("meta", {}).get("message"),
|
"message": "无法获取任务状态(Redis和MongoDB均不可用)",
|
||||||
"result": status.get("meta", {}).get("result"),
|
"result": None,
|
||||||
"error": status.get("meta", {}).get("error")
|
"error": None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/")
|
||||||
|
async def list_tasks(limit: int = 50, skip: int = 0):
|
||||||
|
"""
|
||||||
|
获取任务历史列表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: 返回数量限制
|
||||||
|
skip: 跳过数量
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
任务列表
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tasks = await mongodb.list_tasks(limit=limit, skip=skip)
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"tasks": tasks,
|
||||||
|
"count": len(tasks)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
# MongoDB 不可用时返回空列表
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"tasks": [],
|
||||||
|
"count": 0,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/{task_id}")
|
||||||
|
async def delete_task(task_id: str):
|
||||||
|
"""
|
||||||
|
删除任务
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: 任务ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否删除成功
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 从 Redis 删除
|
||||||
|
if redis_db._connected and redis_db.client:
|
||||||
|
key = f"task:{task_id}"
|
||||||
|
await redis_db.client.delete(key)
|
||||||
|
|
||||||
|
# 从 MongoDB 删除
|
||||||
|
deleted = await mongodb.delete_task(task_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"deleted": deleted
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"删除任务失败: {str(e)}")
|
||||||
|
|||||||
@@ -5,21 +5,62 @@
|
|||||||
"""
|
"""
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import uuid
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
|
from fastapi import APIRouter, File, HTTPException, Query, UploadFile, BackgroundTasks
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from app.services.template_fill_service import template_fill_service, TemplateField
|
from app.services.template_fill_service import template_fill_service, TemplateField
|
||||||
from app.services.excel_storage_service import excel_storage_service
|
from app.services.file_service import file_service
|
||||||
|
from app.core.database import mongodb
|
||||||
|
from app.core.document_parser import ParserFactory
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/templates", tags=["表格模板"])
|
router = APIRouter(prefix="/templates", tags=["表格模板"])
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== 辅助函数 ====================
|
||||||
|
|
||||||
|
async def update_task_status(
|
||||||
|
task_id: str,
|
||||||
|
status: str,
|
||||||
|
progress: int = 0,
|
||||||
|
message: str = "",
|
||||||
|
result: dict = None,
|
||||||
|
error: str = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
更新任务状态,同时写入 Redis 和 MongoDB
|
||||||
|
"""
|
||||||
|
from app.core.database import redis_db
|
||||||
|
|
||||||
|
meta = {"progress": progress, "message": message}
|
||||||
|
if result:
|
||||||
|
meta["result"] = result
|
||||||
|
if error:
|
||||||
|
meta["error"] = error
|
||||||
|
|
||||||
|
try:
|
||||||
|
await redis_db.set_task_status(task_id, status, meta)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Redis 任务状态更新失败: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
await mongodb.update_task(
|
||||||
|
task_id=task_id,
|
||||||
|
status=status,
|
||||||
|
message=message,
|
||||||
|
result=result,
|
||||||
|
error=error
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"MongoDB 任务状态更新失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
# ==================== 请求/响应模型 ====================
|
# ==================== 请求/响应模型 ====================
|
||||||
|
|
||||||
class TemplateFieldRequest(BaseModel):
|
class TemplateFieldRequest(BaseModel):
|
||||||
@@ -28,14 +69,17 @@ class TemplateFieldRequest(BaseModel):
|
|||||||
name: str
|
name: str
|
||||||
field_type: str = "text"
|
field_type: str = "text"
|
||||||
required: bool = True
|
required: bool = True
|
||||||
|
hint: str = ""
|
||||||
|
|
||||||
|
|
||||||
class FillRequest(BaseModel):
|
class FillRequest(BaseModel):
|
||||||
"""填写请求"""
|
"""填写请求"""
|
||||||
template_id: str
|
template_id: str
|
||||||
template_fields: List[TemplateFieldRequest]
|
template_fields: List[TemplateFieldRequest]
|
||||||
source_doc_ids: Optional[List[str]] = None
|
source_doc_ids: Optional[List[str]] = None # MongoDB 文档 ID 列表
|
||||||
|
source_file_paths: Optional[List[str]] = None # 源文档文件路径列表
|
||||||
user_hint: Optional[str] = None
|
user_hint: Optional[str] = None
|
||||||
|
task_id: Optional[str] = None # 可选的任务ID,用于任务历史跟踪
|
||||||
|
|
||||||
|
|
||||||
class ExportRequest(BaseModel):
|
class ExportRequest(BaseModel):
|
||||||
@@ -71,7 +115,6 @@ async def upload_template(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# 保存文件
|
# 保存文件
|
||||||
from app.services.file_service import file_service
|
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
saved_path = file_service.save_uploaded_file(
|
saved_path = file_service.save_uploaded_file(
|
||||||
content,
|
content,
|
||||||
@@ -87,7 +130,7 @@ async def upload_template(
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"template_id": saved_path, # 使用文件路径作为ID
|
"template_id": saved_path,
|
||||||
"filename": file.filename,
|
"filename": file.filename,
|
||||||
"file_type": file_ext,
|
"file_type": file_ext,
|
||||||
"fields": [
|
"fields": [
|
||||||
@@ -95,7 +138,8 @@ async def upload_template(
|
|||||||
"cell": f.cell,
|
"cell": f.cell,
|
||||||
"name": f.name,
|
"name": f.name,
|
||||||
"field_type": f.field_type,
|
"field_type": f.field_type,
|
||||||
"required": f.required
|
"required": f.required,
|
||||||
|
"hint": f.hint
|
||||||
}
|
}
|
||||||
for f in template_fields
|
for f in template_fields
|
||||||
],
|
],
|
||||||
@@ -107,6 +151,240 @@ async def upload_template(
|
|||||||
raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/upload-joint")
|
||||||
|
async def upload_joint_template(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
template_file: UploadFile = File(..., description="模板文件"),
|
||||||
|
source_files: List[UploadFile] = File(..., description="源文档文件列表"),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
联合上传模板和源文档,一键完成解析和存储
|
||||||
|
|
||||||
|
1. 保存模板文件并提取字段
|
||||||
|
2. 异步处理源文档(解析+存MongoDB)
|
||||||
|
3. 返回模板信息和源文档ID列表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
template_file: 模板文件 (xlsx/xls/docx)
|
||||||
|
source_files: 源文档列表 (docx/xlsx/md/txt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
模板ID、字段列表、源文档ID列表
|
||||||
|
"""
|
||||||
|
if not template_file.filename:
|
||||||
|
raise HTTPException(status_code=400, detail="模板文件名为空")
|
||||||
|
|
||||||
|
# 验证模板格式
|
||||||
|
template_ext = template_file.filename.split('.')[-1].lower()
|
||||||
|
if template_ext not in ['xlsx', 'xls', 'docx']:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"不支持的模板格式: {template_ext},仅支持 xlsx/xls/docx"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 验证源文档格式
|
||||||
|
valid_exts = ['docx', 'xlsx', 'xls', 'md', 'txt']
|
||||||
|
for sf in source_files:
|
||||||
|
if sf.filename:
|
||||||
|
sf_ext = sf.filename.split('.')[-1].lower()
|
||||||
|
if sf_ext not in valid_exts:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"不支持的源文档格式: {sf_ext},仅支持 docx/xlsx/xls/md/txt"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. 保存模板文件
|
||||||
|
template_content = await template_file.read()
|
||||||
|
template_path = file_service.save_uploaded_file(
|
||||||
|
template_content,
|
||||||
|
template_file.filename,
|
||||||
|
subfolder="templates"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. 保存并解析源文档 - 提取内容用于生成表头
|
||||||
|
source_file_info = []
|
||||||
|
source_contents = []
|
||||||
|
for sf in source_files:
|
||||||
|
if sf.filename:
|
||||||
|
sf_content = await sf.read()
|
||||||
|
sf_ext = sf.filename.split('.')[-1].lower()
|
||||||
|
sf_path = file_service.save_uploaded_file(
|
||||||
|
sf_content,
|
||||||
|
sf.filename,
|
||||||
|
subfolder=sf_ext
|
||||||
|
)
|
||||||
|
source_file_info.append({
|
||||||
|
"path": sf_path,
|
||||||
|
"filename": sf.filename,
|
||||||
|
"ext": sf_ext
|
||||||
|
})
|
||||||
|
# 解析源文档获取内容(用于 AI 生成表头)
|
||||||
|
try:
|
||||||
|
from app.core.document_parser import ParserFactory
|
||||||
|
parser = ParserFactory.get_parser(sf_path)
|
||||||
|
parse_result = parser.parse(sf_path)
|
||||||
|
if parse_result.success and parse_result.data:
|
||||||
|
# 获取原始内容
|
||||||
|
content = parse_result.data.get("content", "")[:5000] if parse_result.data.get("content") else ""
|
||||||
|
|
||||||
|
# 获取标题(可能在顶层或structured_data内)
|
||||||
|
titles = parse_result.data.get("titles", [])
|
||||||
|
if not titles and parse_result.data.get("structured_data"):
|
||||||
|
titles = parse_result.data.get("structured_data", {}).get("titles", [])
|
||||||
|
titles = titles[:10] if titles else []
|
||||||
|
|
||||||
|
# 获取表格数量(可能在顶层或structured_data内)
|
||||||
|
tables = parse_result.data.get("tables", [])
|
||||||
|
if not tables and parse_result.data.get("structured_data"):
|
||||||
|
tables = parse_result.data.get("structured_data", {}).get("tables", [])
|
||||||
|
tables_count = len(tables) if tables else 0
|
||||||
|
|
||||||
|
# 获取表格内容摘要(用于 AI 理解源文档结构)
|
||||||
|
tables_summary = ""
|
||||||
|
if tables:
|
||||||
|
tables_summary = "\n【文档中的表格】:\n"
|
||||||
|
for idx, table in enumerate(tables[:5]): # 最多5个表格
|
||||||
|
if isinstance(table, dict):
|
||||||
|
headers = table.get("headers", [])
|
||||||
|
rows = table.get("rows", [])
|
||||||
|
if headers:
|
||||||
|
tables_summary += f"表格{idx+1}表头: {', '.join(str(h) for h in headers)}\n"
|
||||||
|
if rows:
|
||||||
|
tables_summary += f"表格{idx+1}前3行: "
|
||||||
|
for row_idx, row in enumerate(rows[:3]):
|
||||||
|
if isinstance(row, list):
|
||||||
|
tables_summary += " | ".join(str(c) for c in row) + "; "
|
||||||
|
elif isinstance(row, dict):
|
||||||
|
tables_summary += " | ".join(str(row.get(h, "")) for h in headers if headers) + "; "
|
||||||
|
tables_summary += "\n"
|
||||||
|
|
||||||
|
source_contents.append({
|
||||||
|
"filename": sf.filename,
|
||||||
|
"doc_type": sf_ext,
|
||||||
|
"content": content,
|
||||||
|
"titles": titles,
|
||||||
|
"tables_count": tables_count,
|
||||||
|
"tables_summary": tables_summary
|
||||||
|
})
|
||||||
|
logger.info(f"[DEBUG] source_contents built: filename={sf.filename}, content_len={len(content)}, titles_count={len(titles)}, tables_count={tables_count}")
|
||||||
|
if tables_summary:
|
||||||
|
logger.info(f"[DEBUG] tables_summary preview: {tables_summary[:300]}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"解析源文档失败 {sf.filename}: {e}")
|
||||||
|
|
||||||
|
# 3. 根据源文档内容生成表头
|
||||||
|
template_fields = await template_fill_service.get_template_fields_from_file(
|
||||||
|
template_path,
|
||||||
|
template_ext,
|
||||||
|
source_contents=source_contents # 传递源文档内容
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. 异步处理源文档到MongoDB
|
||||||
|
task_id = str(uuid.uuid4())
|
||||||
|
if source_file_info:
|
||||||
|
# 保存任务记录到 MongoDB
|
||||||
|
try:
|
||||||
|
await mongodb.insert_task(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="source_process",
|
||||||
|
status="pending",
|
||||||
|
message=f"开始处理 {len(source_file_info)} 个源文档"
|
||||||
|
)
|
||||||
|
except Exception as mongo_err:
|
||||||
|
logger.warning(f"MongoDB 保存任务记录失败: {mongo_err}")
|
||||||
|
|
||||||
|
background_tasks.add_task(
|
||||||
|
process_source_documents,
|
||||||
|
task_id=task_id,
|
||||||
|
files=source_file_info
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"联合上传完成: 模板={template_file.filename}, 源文档={len(source_file_info)}个")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"template_id": template_path,
|
||||||
|
"filename": template_file.filename,
|
||||||
|
"file_type": template_ext,
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"cell": f.cell,
|
||||||
|
"name": f.name,
|
||||||
|
"field_type": f.field_type,
|
||||||
|
"required": f.required,
|
||||||
|
"hint": f.hint
|
||||||
|
}
|
||||||
|
for f in template_fields
|
||||||
|
],
|
||||||
|
"field_count": len(template_fields),
|
||||||
|
"source_file_paths": [f["path"] for f in source_file_info],
|
||||||
|
"source_filenames": [f["filename"] for f in source_file_info],
|
||||||
|
"task_id": task_id
|
||||||
|
}
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"联合上传失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"联合上传失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def process_source_documents(task_id: str, files: List[dict]):
|
||||||
|
"""异步处理源文档,存入MongoDB"""
|
||||||
|
try:
|
||||||
|
await update_task_status(
|
||||||
|
task_id, status="processing",
|
||||||
|
progress=0, message="开始处理源文档"
|
||||||
|
)
|
||||||
|
|
||||||
|
doc_ids = []
|
||||||
|
for i, file_info in enumerate(files):
|
||||||
|
try:
|
||||||
|
parser = ParserFactory.get_parser(file_info["path"])
|
||||||
|
result = parser.parse(file_info["path"])
|
||||||
|
|
||||||
|
if result.success:
|
||||||
|
doc_id = await mongodb.insert_document(
|
||||||
|
doc_type=file_info["ext"],
|
||||||
|
content=result.data.get("content", ""),
|
||||||
|
metadata={
|
||||||
|
**result.metadata,
|
||||||
|
"original_filename": file_info["filename"],
|
||||||
|
"file_path": file_info["path"]
|
||||||
|
},
|
||||||
|
structured_data=result.data.get("structured_data")
|
||||||
|
)
|
||||||
|
doc_ids.append(doc_id)
|
||||||
|
logger.info(f"源文档处理成功: {file_info['filename']}, doc_id: {doc_id}")
|
||||||
|
else:
|
||||||
|
logger.error(f"源文档解析失败: {file_info['filename']}, error: {result.error}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"源文档处理异常: {file_info['filename']}, error: {str(e)}")
|
||||||
|
|
||||||
|
progress = int((i + 1) / len(files) * 100)
|
||||||
|
await update_task_status(
|
||||||
|
task_id, status="processing",
|
||||||
|
progress=progress, message=f"已处理 {i+1}/{len(files)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
await update_task_status(
|
||||||
|
task_id, status="success",
|
||||||
|
progress=100, message="源文档处理完成",
|
||||||
|
result={"doc_ids": doc_ids}
|
||||||
|
)
|
||||||
|
logger.info(f"所有源文档处理完成: {len(doc_ids)}个")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"源文档批量处理失败: {str(e)}")
|
||||||
|
await update_task_status(
|
||||||
|
task_id, status="failure",
|
||||||
|
progress=0, message="源文档处理失败",
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/fields")
|
@router.post("/fields")
|
||||||
async def extract_template_fields(
|
async def extract_template_fields(
|
||||||
template_id: str = Query(..., description="模板ID/文件路径"),
|
template_id: str = Query(..., description="模板ID/文件路径"),
|
||||||
@@ -135,7 +413,8 @@ async def extract_template_fields(
|
|||||||
"cell": f.cell,
|
"cell": f.cell,
|
||||||
"name": f.name,
|
"name": f.name,
|
||||||
"field_type": f.field_type,
|
"field_type": f.field_type,
|
||||||
"required": f.required
|
"required": f.required,
|
||||||
|
"hint": f.hint
|
||||||
}
|
}
|
||||||
for f in fields
|
for f in fields
|
||||||
]
|
]
|
||||||
@@ -153,7 +432,7 @@ async def fill_template(
|
|||||||
"""
|
"""
|
||||||
执行表格填写
|
执行表格填写
|
||||||
|
|
||||||
根据提供的字段定义,从已上传的文档中检索信息并填写
|
根据提供的字段定义,从源文档中检索信息并填写
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
request: 填写请求
|
request: 填写请求
|
||||||
@@ -161,28 +440,84 @@ async def fill_template(
|
|||||||
Returns:
|
Returns:
|
||||||
填写结果
|
填写结果
|
||||||
"""
|
"""
|
||||||
|
# 生成或使用传入的 task_id
|
||||||
|
task_id = request.task_id or str(uuid.uuid4())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# 创建任务记录到 MongoDB
|
||||||
|
try:
|
||||||
|
await mongodb.insert_task(
|
||||||
|
task_id=task_id,
|
||||||
|
task_type="template_fill",
|
||||||
|
status="processing",
|
||||||
|
message=f"开始填表任务: {len(request.template_fields)} 个字段"
|
||||||
|
)
|
||||||
|
except Exception as mongo_err:
|
||||||
|
logger.warning(f"MongoDB 创建任务记录失败: {mongo_err}")
|
||||||
|
|
||||||
|
# 更新进度 - 开始
|
||||||
|
await update_task_status(
|
||||||
|
task_id, "processing",
|
||||||
|
progress=0, message="开始处理..."
|
||||||
|
)
|
||||||
|
|
||||||
# 转换字段
|
# 转换字段
|
||||||
fields = [
|
fields = [
|
||||||
TemplateField(
|
TemplateField(
|
||||||
cell=f.cell,
|
cell=f.cell,
|
||||||
name=f.name,
|
name=f.name,
|
||||||
field_type=f.field_type,
|
field_type=f.field_type,
|
||||||
required=f.required
|
required=f.required,
|
||||||
|
hint=f.hint
|
||||||
)
|
)
|
||||||
for f in request.template_fields
|
for f in request.template_fields
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# 从 template_id 提取文件类型
|
||||||
|
template_file_type = "xlsx" # 默认类型
|
||||||
|
if request.template_id:
|
||||||
|
ext = request.template_id.split('.')[-1].lower()
|
||||||
|
if ext in ["xlsx", "xls"]:
|
||||||
|
template_file_type = "xlsx"
|
||||||
|
elif ext == "docx":
|
||||||
|
template_file_type = "docx"
|
||||||
|
|
||||||
|
# 更新进度 - 准备开始填写
|
||||||
|
await update_task_status(
|
||||||
|
task_id, "processing",
|
||||||
|
progress=10, message=f"准备填写 {len(fields)} 个字段..."
|
||||||
|
)
|
||||||
|
|
||||||
# 执行填写
|
# 执行填写
|
||||||
result = await template_fill_service.fill_template(
|
result = await template_fill_service.fill_template(
|
||||||
template_fields=fields,
|
template_fields=fields,
|
||||||
source_doc_ids=request.source_doc_ids,
|
source_doc_ids=request.source_doc_ids,
|
||||||
user_hint=request.user_hint
|
source_file_paths=request.source_file_paths,
|
||||||
|
user_hint=request.user_hint,
|
||||||
|
template_id=request.template_id,
|
||||||
|
template_file_type=template_file_type,
|
||||||
|
task_id=task_id
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
# 更新为成功
|
||||||
|
await update_task_status(
|
||||||
|
task_id, "success",
|
||||||
|
progress=100, message="填表完成",
|
||||||
|
result={
|
||||||
|
"field_count": len(fields),
|
||||||
|
"max_rows": result.get("max_rows", 0)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {**result, "task_id": task_id}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# 更新为失败
|
||||||
|
await update_task_status(
|
||||||
|
task_id, "failure",
|
||||||
|
progress=0, message="填表失败",
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
logger.error(f"填写表格失败: {str(e)}")
|
logger.error(f"填写表格失败: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"填写失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"填写失败: {str(e)}")
|
||||||
|
|
||||||
@@ -194,6 +529,8 @@ async def export_filled_template(
|
|||||||
"""
|
"""
|
||||||
导出填写后的表格
|
导出填写后的表格
|
||||||
|
|
||||||
|
支持 Excel (.xlsx) 和 Word (.docx) 格式
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
request: 导出请求
|
request: 导出请求
|
||||||
|
|
||||||
@@ -201,25 +538,157 @@ async def export_filled_template(
|
|||||||
文件流
|
文件流
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 创建 DataFrame
|
if request.format == "xlsx":
|
||||||
df = pd.DataFrame([request.filled_data])
|
return await _export_to_excel(request.filled_data, request.template_id)
|
||||||
|
elif request.format == "docx":
|
||||||
# 导出为 Excel
|
return await _export_to_word(request.filled_data, request.template_id)
|
||||||
output = io.BytesIO()
|
else:
|
||||||
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
raise HTTPException(
|
||||||
df.to_excel(writer, index=False, sheet_name='填写结果')
|
status_code=400,
|
||||||
|
detail=f"不支持的导出格式: {request.format},仅支持 xlsx/docx"
|
||||||
output.seek(0)
|
)
|
||||||
|
|
||||||
# 生成文件名
|
|
||||||
filename = f"filled_template.{request.format}"
|
|
||||||
|
|
||||||
return StreamingResponse(
|
|
||||||
io.BytesIO(output.getvalue()),
|
|
||||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
||||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"导出失败: {str(e)}")
|
logger.error(f"导出失败: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResponse:
|
||||||
|
"""导出为 Excel 格式(支持多行)"""
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
logger.info(f"导出填表数据: {len(filled_data)} 个字段")
|
||||||
|
|
||||||
|
# 计算最大行数
|
||||||
|
max_rows = 1
|
||||||
|
for k, v in filled_data.items():
|
||||||
|
if isinstance(v, list) and len(v) > max_rows:
|
||||||
|
max_rows = len(v)
|
||||||
|
logger.info(f" {k}: {type(v).__name__} = {str(v)[:80]}")
|
||||||
|
|
||||||
|
logger.info(f"最大行数: {max_rows}")
|
||||||
|
|
||||||
|
# 构建多行数据
|
||||||
|
rows_data = []
|
||||||
|
for row_idx in range(max_rows):
|
||||||
|
row = {}
|
||||||
|
for col_name, values in filled_data.items():
|
||||||
|
if isinstance(values, list):
|
||||||
|
# 取对应行的值,不足则填空
|
||||||
|
row[col_name] = values[row_idx] if row_idx < len(values) else ""
|
||||||
|
else:
|
||||||
|
# 非列表,整个值填入第一行
|
||||||
|
row[col_name] = values if row_idx == 0 else ""
|
||||||
|
rows_data.append(row)
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows_data)
|
||||||
|
|
||||||
|
# 确保列顺序
|
||||||
|
if not df.empty:
|
||||||
|
df = df[list(filled_data.keys())]
|
||||||
|
|
||||||
|
logger.info(f"DataFrame 形状: {df.shape}")
|
||||||
|
logger.info(f"DataFrame 列: {list(df.columns)}")
|
||||||
|
|
||||||
|
output = io.BytesIO()
|
||||||
|
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||||
|
df.to_excel(writer, index=False, sheet_name='填写结果')
|
||||||
|
|
||||||
|
output.seek(0)
|
||||||
|
|
||||||
|
filename = f"filled_template.xlsx"
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(output.getvalue()),
|
||||||
|
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _export_to_word(filled_data: dict, template_id: str) -> StreamingResponse:
|
||||||
|
"""导出为 Word 格式"""
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Pt, RGBColor
|
||||||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
|
||||||
|
# 添加标题
|
||||||
|
title = doc.add_heading('填写结果', level=1)
|
||||||
|
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
|
||||||
|
# 添加填写时间和模板信息
|
||||||
|
from datetime import datetime
|
||||||
|
info_para = doc.add_paragraph()
|
||||||
|
info_para.add_run(f"模板ID: {template_id}\n").bold = True
|
||||||
|
info_para.add_run(f"导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
doc.add_paragraph() # 空行
|
||||||
|
|
||||||
|
# 添加字段表格
|
||||||
|
table = doc.add_table(rows=1, cols=3)
|
||||||
|
table.style = 'Light Grid Accent 1'
|
||||||
|
|
||||||
|
# 表头
|
||||||
|
header_cells = table.rows[0].cells
|
||||||
|
header_cells[0].text = '字段名'
|
||||||
|
header_cells[1].text = '填写值'
|
||||||
|
header_cells[2].text = '状态'
|
||||||
|
|
||||||
|
for field_name, field_value in filled_data.items():
|
||||||
|
row_cells = table.add_row().cells
|
||||||
|
row_cells[0].text = field_name
|
||||||
|
row_cells[1].text = str(field_value) if field_value else ''
|
||||||
|
row_cells[2].text = '已填写' if field_value else '为空'
|
||||||
|
|
||||||
|
# 保存到 BytesIO
|
||||||
|
output = io.BytesIO()
|
||||||
|
doc.save(output)
|
||||||
|
output.seek(0)
|
||||||
|
|
||||||
|
filename = f"filled_template.docx"
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(output.getvalue()),
|
||||||
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/export/excel")
|
||||||
|
async def export_to_excel(
|
||||||
|
filled_data: dict,
|
||||||
|
template_id: str = Query(..., description="模板ID")
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
专门导出为 Excel 格式
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filled_data: 填写数据
|
||||||
|
template_id: 模板ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Excel 文件流
|
||||||
|
"""
|
||||||
|
return await _export_to_excel(filled_data, template_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/export/word")
|
||||||
|
async def export_to_word(
|
||||||
|
filled_data: dict,
|
||||||
|
template_id: str = Query(..., description="模板ID")
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
专门导出为 Word 格式
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filled_data: 填写数据
|
||||||
|
template_id: 模板ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Word 文件流
|
||||||
|
"""
|
||||||
|
return await _export_to_word(filled_data, template_id)
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ from fastapi import APIRouter, UploadFile, File, HTTPException, Query
|
|||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from app.services.file_service import file_service
|
from app.services.file_service import file_service
|
||||||
from app.core.document_parser import XlsxParser
|
from app.core.document_parser import XlsxParser
|
||||||
from app.services.table_rag_service import table_rag_service
|
from app.services.table_rag_service import table_rag_service
|
||||||
|
from app.core.database import mongodb
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -95,6 +97,56 @@ async def upload_excel(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
|
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
|
# 存储到 MongoDB(用于文档列表展示)
|
||||||
|
try:
|
||||||
|
content = ""
|
||||||
|
# 构建文本内容用于展示
|
||||||
|
if result.data:
|
||||||
|
if isinstance(result.data, dict):
|
||||||
|
# 单 sheet 格式: {columns, rows, ...}
|
||||||
|
if 'columns' in result.data and 'rows' in result.data:
|
||||||
|
content += f"Sheet: {result.metadata.get('current_sheet', 'Sheet1') if result.metadata else 'Sheet1'}\n"
|
||||||
|
content += ", ".join(str(h) for h in result.data['columns']) + "\n"
|
||||||
|
for row in result.data['rows'][:100]:
|
||||||
|
if isinstance(row, dict):
|
||||||
|
content += ", ".join(str(row.get(col, "")) for col in result.data['columns']) + "\n"
|
||||||
|
elif isinstance(row, list):
|
||||||
|
content += ", ".join(str(cell) for cell in row) + "\n"
|
||||||
|
content += f"... (共 {len(result.data['rows'])} 行)\n\n"
|
||||||
|
# 多 sheet 格式: {sheets: {sheet_name: {columns, rows}}}
|
||||||
|
elif 'sheets' in result.data:
|
||||||
|
for sheet_name_key, sheet_data in result.data['sheets'].items():
|
||||||
|
if isinstance(sheet_data, dict) and 'columns' in sheet_data and 'rows' in sheet_data:
|
||||||
|
content += f"Sheet: {sheet_name_key}\n"
|
||||||
|
content += ", ".join(str(h) for h in sheet_data['columns']) + "\n"
|
||||||
|
for row in sheet_data['rows'][:100]:
|
||||||
|
if isinstance(row, dict):
|
||||||
|
content += ", ".join(str(row.get(col, "")) for col in sheet_data['columns']) + "\n"
|
||||||
|
elif isinstance(row, list):
|
||||||
|
content += ", ".join(str(cell) for cell in row) + "\n"
|
||||||
|
content += f"... (共 {len(sheet_data['rows'])} 行)\n\n"
|
||||||
|
|
||||||
|
doc_metadata = {
|
||||||
|
"filename": os.path.basename(saved_path),
|
||||||
|
"original_filename": file.filename,
|
||||||
|
"saved_path": saved_path,
|
||||||
|
"file_size": len(content),
|
||||||
|
"row_count": result.metadata.get('row_count', 0) if result.metadata else 0,
|
||||||
|
"column_count": result.metadata.get('column_count', 0) if result.metadata else 0,
|
||||||
|
"columns": result.metadata.get('columns', []) if result.metadata else [],
|
||||||
|
"mysql_table": result.metadata.get('mysql_table') if result.metadata else None,
|
||||||
|
"sheet_count": result.metadata.get('sheet_count', 1) if result.metadata else 1,
|
||||||
|
}
|
||||||
|
await mongodb.insert_document(
|
||||||
|
doc_type="xlsx",
|
||||||
|
content=content,
|
||||||
|
metadata=doc_metadata,
|
||||||
|
structured_data=result.data if result.data else None
|
||||||
|
)
|
||||||
|
logger.info(f"Excel文档已存储到MongoDB: {file.filename}, content长度: {len(content)}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel存储到MongoDB异常: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
return result.to_dict()
|
return result.to_dict()
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
@@ -202,7 +254,7 @@ async def export_excel(
|
|||||||
output.seek(0)
|
output.seek(0)
|
||||||
|
|
||||||
# 生成文件名
|
# 生成文件名
|
||||||
original_name = file_path.split('/')[-1] if '/' in file_path else file_path
|
original_name = os.path.basename(file_path)
|
||||||
if columns:
|
if columns:
|
||||||
export_name = f"export_{sheet_name or 'data'}_{len(column_list) if columns else 'all'}_cols.xlsx"
|
export_name = f"export_{sheet_name or 'data'}_{len(column_list) if columns else 'all'}_cols.xlsx"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -26,7 +26,9 @@ class MongoDB:
|
|||||||
try:
|
try:
|
||||||
self.client = AsyncIOMotorClient(
|
self.client = AsyncIOMotorClient(
|
||||||
settings.MONGODB_URL,
|
settings.MONGODB_URL,
|
||||||
serverSelectionTimeoutMS=5000,
|
serverSelectionTimeoutMS=30000, # 30秒超时,适应远程服务器
|
||||||
|
connectTimeoutMS=30000, # 连接超时
|
||||||
|
socketTimeoutMS=60000, # Socket 超时
|
||||||
)
|
)
|
||||||
self.db = self.client[settings.MONGODB_DB_NAME]
|
self.db = self.client[settings.MONGODB_DB_NAME]
|
||||||
# 验证连接
|
# 验证连接
|
||||||
@@ -57,6 +59,11 @@ class MongoDB:
|
|||||||
"""RAG索引集合 - 存储字段语义索引"""
|
"""RAG索引集合 - 存储字段语义索引"""
|
||||||
return self.db["rag_index"]
|
return self.db["rag_index"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tasks(self):
|
||||||
|
"""任务集合 - 存储任务历史记录"""
|
||||||
|
return self.db["tasks"]
|
||||||
|
|
||||||
# ==================== 文档操作 ====================
|
# ==================== 文档操作 ====================
|
||||||
|
|
||||||
async def insert_document(
|
async def insert_document(
|
||||||
@@ -240,8 +247,128 @@ class MongoDB:
|
|||||||
await self.rag_index.create_index("table_name")
|
await self.rag_index.create_index("table_name")
|
||||||
await self.rag_index.create_index("field_name")
|
await self.rag_index.create_index("field_name")
|
||||||
|
|
||||||
|
# 任务集合索引
|
||||||
|
await self.tasks.create_index("task_id", unique=True)
|
||||||
|
await self.tasks.create_index("created_at")
|
||||||
|
|
||||||
logger.info("MongoDB 索引创建完成")
|
logger.info("MongoDB 索引创建完成")
|
||||||
|
|
||||||
|
# ==================== 任务历史操作 ====================
|
||||||
|
|
||||||
|
async def insert_task(
|
||||||
|
self,
|
||||||
|
task_id: str,
|
||||||
|
task_type: str,
|
||||||
|
status: str = "pending",
|
||||||
|
message: str = "",
|
||||||
|
result: Optional[Dict[str, Any]] = None,
|
||||||
|
error: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
插入任务记录
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: 任务ID
|
||||||
|
task_type: 任务类型
|
||||||
|
status: 任务状态
|
||||||
|
message: 任务消息
|
||||||
|
result: 任务结果
|
||||||
|
error: 错误信息
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
插入文档的ID
|
||||||
|
"""
|
||||||
|
task = {
|
||||||
|
"task_id": task_id,
|
||||||
|
"task_type": task_type,
|
||||||
|
"status": status,
|
||||||
|
"message": message,
|
||||||
|
"result": result,
|
||||||
|
"error": error,
|
||||||
|
"created_at": datetime.utcnow(),
|
||||||
|
"updated_at": datetime.utcnow(),
|
||||||
|
}
|
||||||
|
result_obj = await self.tasks.insert_one(task)
|
||||||
|
return str(result_obj.inserted_id)
|
||||||
|
|
||||||
|
async def update_task(
|
||||||
|
self,
|
||||||
|
task_id: str,
|
||||||
|
status: Optional[str] = None,
|
||||||
|
message: Optional[str] = None,
|
||||||
|
result: Optional[Dict[str, Any]] = None,
|
||||||
|
error: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
更新任务状态
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_id: 任务ID
|
||||||
|
status: 任务状态
|
||||||
|
message: 任务消息
|
||||||
|
result: 任务结果
|
||||||
|
error: 错误信息
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否更新成功
|
||||||
|
"""
|
||||||
|
from bson import ObjectId
|
||||||
|
|
||||||
|
update_data = {"updated_at": datetime.utcnow()}
|
||||||
|
if status is not None:
|
||||||
|
update_data["status"] = status
|
||||||
|
if message is not None:
|
||||||
|
update_data["message"] = message
|
||||||
|
if result is not None:
|
||||||
|
update_data["result"] = result
|
||||||
|
if error is not None:
|
||||||
|
update_data["error"] = error
|
||||||
|
|
||||||
|
update_result = await self.tasks.update_one(
|
||||||
|
{"task_id": task_id},
|
||||||
|
{"$set": update_data}
|
||||||
|
)
|
||||||
|
return update_result.modified_count > 0
|
||||||
|
|
||||||
|
async def get_task(self, task_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""根据task_id获取任务"""
|
||||||
|
task = await self.tasks.find_one({"task_id": task_id})
|
||||||
|
if task:
|
||||||
|
task["_id"] = str(task["_id"])
|
||||||
|
return task
|
||||||
|
|
||||||
|
async def list_tasks(
|
||||||
|
self,
|
||||||
|
limit: int = 50,
|
||||||
|
skip: int = 0,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
获取任务列表
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: 返回数量
|
||||||
|
skip: 跳过数量
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
任务列表
|
||||||
|
"""
|
||||||
|
cursor = self.tasks.find().sort("created_at", -1).skip(skip).limit(limit)
|
||||||
|
tasks = []
|
||||||
|
async for task in cursor:
|
||||||
|
task["_id"] = str(task["_id"])
|
||||||
|
# 转换 datetime 为字符串
|
||||||
|
if task.get("created_at"):
|
||||||
|
task["created_at"] = task["created_at"].isoformat()
|
||||||
|
if task.get("updated_at"):
|
||||||
|
task["updated_at"] = task["updated_at"].isoformat()
|
||||||
|
tasks.append(task)
|
||||||
|
return tasks
|
||||||
|
|
||||||
|
async def delete_task(self, task_id: str) -> bool:
|
||||||
|
"""删除任务"""
|
||||||
|
result = await self.tasks.delete_one({"task_id": task_id})
|
||||||
|
return result.deleted_count > 0
|
||||||
|
|
||||||
|
|
||||||
# ==================== 全局单例 ====================
|
# ==================== 全局单例 ====================
|
||||||
|
|
||||||
|
|||||||
@@ -161,3 +161,133 @@ class DocxParser(BaseParser):
|
|||||||
fields[field_name] = match.group(1)
|
fields[field_name] = match.group(1)
|
||||||
|
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
|
def parse_tables_for_template(
|
||||||
|
self,
|
||||||
|
file_path: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
解析 Word 文档中的表格,提取模板字段
|
||||||
|
|
||||||
|
专门用于比赛场景:解析表格模板,识别需要填写的字段
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Word 文件路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含表格字段信息的字典
|
||||||
|
"""
|
||||||
|
from docx import Document
|
||||||
|
from docx.table import Table
|
||||||
|
from docx.oxml.ns import qn
|
||||||
|
|
||||||
|
doc = Document(file_path)
|
||||||
|
|
||||||
|
template_info = {
|
||||||
|
"tables": [],
|
||||||
|
"fields": [],
|
||||||
|
"field_count": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for table_idx, table in enumerate(doc.tables):
|
||||||
|
table_info = {
|
||||||
|
"table_index": table_idx,
|
||||||
|
"rows": [],
|
||||||
|
"headers": [],
|
||||||
|
"data_rows": [],
|
||||||
|
"field_hints": {} # 字段名称 -> 提示词/描述
|
||||||
|
}
|
||||||
|
|
||||||
|
# 提取表头(第一行)
|
||||||
|
if table.rows:
|
||||||
|
header_cells = [cell.text.strip() for cell in table.rows[0].cells]
|
||||||
|
table_info["headers"] = header_cells
|
||||||
|
|
||||||
|
# 提取数据行
|
||||||
|
for row_idx, row in enumerate(table.rows[1:], 1):
|
||||||
|
row_data = [cell.text.strip() for cell in row.cells]
|
||||||
|
table_info["data_rows"].append(row_data)
|
||||||
|
table_info["rows"].append({
|
||||||
|
"row_index": row_idx,
|
||||||
|
"cells": row_data
|
||||||
|
})
|
||||||
|
|
||||||
|
# 尝试从第二列/第三列提取提示词
|
||||||
|
# 比赛模板通常格式为:字段名 | 提示词 | 填写值
|
||||||
|
if len(table.rows[0].cells) >= 2:
|
||||||
|
for row_idx, row in enumerate(table.rows[1:], 1):
|
||||||
|
cells = [cell.text.strip() for cell in row.cells]
|
||||||
|
if len(cells) >= 2 and cells[0]:
|
||||||
|
# 第一列是字段名
|
||||||
|
field_name = cells[0]
|
||||||
|
# 第二列可能是提示词或描述
|
||||||
|
hint = cells[1] if len(cells) > 1 else ""
|
||||||
|
table_info["field_hints"][field_name] = hint
|
||||||
|
|
||||||
|
template_info["fields"].append({
|
||||||
|
"table_index": table_idx,
|
||||||
|
"row_index": row_idx,
|
||||||
|
"field_name": field_name,
|
||||||
|
"hint": hint,
|
||||||
|
"expected_value": cells[2] if len(cells) > 2 else ""
|
||||||
|
})
|
||||||
|
|
||||||
|
template_info["tables"].append(table_info)
|
||||||
|
|
||||||
|
template_info["field_count"] = len(template_info["fields"])
|
||||||
|
return template_info
|
||||||
|
|
||||||
|
def extract_template_fields_from_docx(
|
||||||
|
self,
|
||||||
|
file_path: str
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
从 Word 文档中提取模板字段定义
|
||||||
|
|
||||||
|
适用于比赛评分表格:表格第一列是字段名,第二列是提示词/填写示例
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Word 文件路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
字段定义列表
|
||||||
|
"""
|
||||||
|
template_info = self.parse_tables_for_template(file_path)
|
||||||
|
|
||||||
|
fields = []
|
||||||
|
for field in template_info["fields"]:
|
||||||
|
fields.append({
|
||||||
|
"cell": f"T{field['table_index']}R{field['row_index']}", # TableXRowY 格式
|
||||||
|
"name": field["field_name"],
|
||||||
|
"hint": field["hint"],
|
||||||
|
"table_index": field["table_index"],
|
||||||
|
"row_index": field["row_index"],
|
||||||
|
"field_type": self._infer_field_type_from_hint(field["hint"]),
|
||||||
|
"required": True
|
||||||
|
})
|
||||||
|
|
||||||
|
return fields
|
||||||
|
|
||||||
|
def _infer_field_type_from_hint(self, hint: str) -> str:
|
||||||
|
"""
|
||||||
|
从提示词推断字段类型
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hint: 字段提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
字段类型 (text/number/date)
|
||||||
|
"""
|
||||||
|
hint_lower = hint.lower()
|
||||||
|
|
||||||
|
# 日期关键词
|
||||||
|
date_keywords = ["年", "月", "日", "日期", "时间", "出生"]
|
||||||
|
if any(kw in hint for kw in date_keywords):
|
||||||
|
return "date"
|
||||||
|
|
||||||
|
# 数字关键词
|
||||||
|
number_keywords = ["数量", "金额", "人数", "面积", "增长", "比率", "%", "率"]
|
||||||
|
if any(kw in hint_lower for kw in number_keywords):
|
||||||
|
return "number"
|
||||||
|
|
||||||
|
return "text"
|
||||||
|
|||||||
@@ -104,8 +104,15 @@ class XlsxParser(BaseParser):
|
|||||||
# pandas 读取失败,尝试 XML 方式
|
# pandas 读取失败,尝试 XML 方式
|
||||||
df = self._read_excel_sheet_xml(file_path, sheet_name=target_sheet, header_row=header_row)
|
df = self._read_excel_sheet_xml(file_path, sheet_name=target_sheet, header_row=header_row)
|
||||||
|
|
||||||
# 检查 DataFrame 是否为空
|
# 检查 DataFrame 是否为空(但如果有列名,仍算有效)
|
||||||
if df is None or df.empty:
|
if df is None:
|
||||||
|
return ParseResult(
|
||||||
|
success=False,
|
||||||
|
error=f"工作表 '{target_sheet}' 读取失败"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 如果 DataFrame 为空但有列名(比如模板文件),仍算有效
|
||||||
|
if df.empty and len(df.columns) == 0:
|
||||||
return ParseResult(
|
return ParseResult(
|
||||||
success=False,
|
success=False,
|
||||||
error=f"工作表 '{target_sheet}' 为空,请检查 Excel 文件内容"
|
error=f"工作表 '{target_sheet}' 为空,请检查 Excel 文件内容"
|
||||||
@@ -310,24 +317,70 @@ class XlsxParser(BaseParser):
|
|||||||
import zipfile
|
import zipfile
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
|
# 常见的命名空间
|
||||||
|
COMMON_NAMESPACES = [
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2005/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2004/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2003/main',
|
||||||
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with zipfile.ZipFile(file_path, 'r') as z:
|
with zipfile.ZipFile(file_path, 'r') as z:
|
||||||
if 'xl/workbook.xml' not in z.namelist():
|
# 尝试多种可能的 workbook.xml 路径
|
||||||
|
possible_paths = ['xl/workbook.xml', 'xl\\workbook.xml', 'workbook.xml']
|
||||||
|
content = None
|
||||||
|
for path in possible_paths:
|
||||||
|
if path in z.namelist():
|
||||||
|
content = z.read(path)
|
||||||
|
logger.info(f"找到 workbook.xml at: {path}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if content is None:
|
||||||
|
logger.warning(f"未找到 workbook.xml,文件列表: {z.namelist()[:10]}")
|
||||||
return []
|
return []
|
||||||
content = z.read('xl/workbook.xml')
|
|
||||||
root = ET.fromstring(content)
|
root = ET.fromstring(content)
|
||||||
|
|
||||||
# 命名空间
|
|
||||||
ns = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
|
|
||||||
|
|
||||||
sheet_names = []
|
sheet_names = []
|
||||||
for sheet in root.findall('.//main:sheet', ns):
|
|
||||||
name = sheet.get('name')
|
# 方法1:尝试带命名空间的查找
|
||||||
if name:
|
for ns in COMMON_NAMESPACES:
|
||||||
sheet_names.append(name)
|
sheet_elements = root.findall(f'.//{{{ns}}}sheet')
|
||||||
|
if sheet_elements:
|
||||||
|
for sheet in sheet_elements:
|
||||||
|
name = sheet.get('name')
|
||||||
|
if name:
|
||||||
|
sheet_names.append(name)
|
||||||
|
if sheet_names:
|
||||||
|
logger.info(f"使用命名空间 {ns} 提取工作表: {sheet_names}")
|
||||||
|
return sheet_names
|
||||||
|
|
||||||
|
# 方法2:不使用命名空间,直接查找所有 sheet 元素
|
||||||
|
if not sheet_names:
|
||||||
|
for elem in root.iter():
|
||||||
|
if elem.tag.endswith('sheet') and elem.tag != 'sheets':
|
||||||
|
name = elem.get('name')
|
||||||
|
if name:
|
||||||
|
sheet_names.append(name)
|
||||||
|
for child in elem:
|
||||||
|
if child.tag.endswith('sheet') or child.tag == 'sheet':
|
||||||
|
name = child.get('name')
|
||||||
|
if name and name not in sheet_names:
|
||||||
|
sheet_names.append(name)
|
||||||
|
|
||||||
|
# 方法3:直接从 XML 文本中正则匹配 sheet name
|
||||||
|
if not sheet_names:
|
||||||
|
import re
|
||||||
|
xml_str = content.decode('utf-8', errors='ignore')
|
||||||
|
matches = re.findall(r'<sheet\s+[^>]*name=["\']([^"\']+)["\']', xml_str, re.IGNORECASE)
|
||||||
|
if matches:
|
||||||
|
sheet_names = matches
|
||||||
|
logger.info(f"使用正则提取工作表: {sheet_names}")
|
||||||
|
|
||||||
logger.info(f"从 XML 提取工作表: {sheet_names}")
|
logger.info(f"从 XML 提取工作表: {sheet_names}")
|
||||||
return sheet_names
|
return sheet_names
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"从 XML 提取工作表名称失败: {e}")
|
logger.error(f"从 XML 提取工作表名称失败: {e}")
|
||||||
return []
|
return []
|
||||||
@@ -349,6 +402,32 @@ class XlsxParser(BaseParser):
|
|||||||
import zipfile
|
import zipfile
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
|
# 常见的命名空间
|
||||||
|
COMMON_NAMESPACES = [
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2005/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2004/main',
|
||||||
|
'http://schemas.openxmlformats.org/spreadsheetml/2003/main',
|
||||||
|
]
|
||||||
|
|
||||||
|
def find_elements_with_ns(root, tag_name):
|
||||||
|
"""灵活查找元素,支持任意命名空间"""
|
||||||
|
results = []
|
||||||
|
# 方法1:用固定命名空间
|
||||||
|
for ns in COMMON_NAMESPACES:
|
||||||
|
try:
|
||||||
|
elems = root.findall(f'.//{{{ns}}}{tag_name}')
|
||||||
|
if elems:
|
||||||
|
results.extend(elems)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
# 方法2:不带命名空间查找
|
||||||
|
if not results:
|
||||||
|
for elem in root.iter():
|
||||||
|
if elem.tag.endswith('}' + tag_name):
|
||||||
|
results.append(elem)
|
||||||
|
return results
|
||||||
|
|
||||||
with zipfile.ZipFile(file_path, 'r') as z:
|
with zipfile.ZipFile(file_path, 'r') as z:
|
||||||
# 获取工作表名称
|
# 获取工作表名称
|
||||||
sheet_names = self._extract_sheet_names_from_xml(file_path)
|
sheet_names = self._extract_sheet_names_from_xml(file_path)
|
||||||
@@ -359,57 +438,68 @@ class XlsxParser(BaseParser):
|
|||||||
target_sheet = sheet_name if sheet_name and sheet_name in sheet_names else sheet_names[0]
|
target_sheet = sheet_name if sheet_name and sheet_name in sheet_names else sheet_names[0]
|
||||||
sheet_index = sheet_names.index(target_sheet) + 1 # sheet1.xml, sheet2.xml, ...
|
sheet_index = sheet_names.index(target_sheet) + 1 # sheet1.xml, sheet2.xml, ...
|
||||||
|
|
||||||
# 读取 shared strings
|
# 读取 shared strings - 尝试多种路径
|
||||||
shared_strings = []
|
shared_strings = []
|
||||||
if 'xl/sharedStrings.xml' in z.namelist():
|
ss_paths = ['xl/sharedStrings.xml', 'xl\\sharedStrings.xml', 'sharedStrings.xml']
|
||||||
ss_content = z.read('xl/sharedStrings.xml')
|
for ss_path in ss_paths:
|
||||||
ss_root = ET.fromstring(ss_content)
|
if ss_path in z.namelist():
|
||||||
ns = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
|
try:
|
||||||
for si in ss_root.findall('.//main:si', ns):
|
ss_content = z.read(ss_path)
|
||||||
t = si.find('.//main:t', ns)
|
ss_root = ET.fromstring(ss_content)
|
||||||
if t is not None:
|
for si in find_elements_with_ns(ss_root, 'si'):
|
||||||
shared_strings.append(t.text or '')
|
t_elements = [c for c in si if c.tag.endswith('}t') or c.tag == 't']
|
||||||
else:
|
if t_elements:
|
||||||
shared_strings.append('')
|
shared_strings.append(t_elements[0].text or '')
|
||||||
|
else:
|
||||||
|
shared_strings.append('')
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"读取 sharedStrings 失败: {e}")
|
||||||
|
|
||||||
# 读取工作表
|
# 读取工作表 - 尝试多种可能的路径
|
||||||
sheet_file = f'xl/worksheets/sheet{sheet_index}.xml'
|
sheet_content = None
|
||||||
if sheet_file not in z.namelist():
|
sheet_paths = [
|
||||||
raise ValueError(f"工作表文件 {sheet_file} 不存在")
|
f'xl/worksheets/sheet{sheet_index}.xml',
|
||||||
|
f'xl\\worksheets\\sheet{sheet_index}.xml',
|
||||||
|
f'worksheets/sheet{sheet_index}.xml',
|
||||||
|
]
|
||||||
|
for sp in sheet_paths:
|
||||||
|
if sp in z.namelist():
|
||||||
|
sheet_content = z.read(sp)
|
||||||
|
break
|
||||||
|
|
||||||
|
if sheet_content is None:
|
||||||
|
raise ValueError(f"工作表文件 sheet{sheet_index}.xml 不存在")
|
||||||
|
|
||||||
sheet_content = z.read(sheet_file)
|
|
||||||
root = ET.fromstring(sheet_content)
|
root = ET.fromstring(sheet_content)
|
||||||
ns = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
|
|
||||||
|
|
||||||
# 收集所有行数据
|
# 收集所有行数据
|
||||||
all_rows = []
|
all_rows = []
|
||||||
headers = {}
|
headers = {}
|
||||||
|
|
||||||
for row in root.findall('.//main:row', ns):
|
for row in find_elements_with_ns(root, 'row'):
|
||||||
row_idx = int(row.get('r', 0))
|
row_idx = int(row.get('r', 0))
|
||||||
row_cells = {}
|
row_cells = {}
|
||||||
for cell in row.findall('main:c', ns):
|
for cell in find_elements_with_ns(row, 'c'):
|
||||||
cell_ref = cell.get('r', '')
|
cell_ref = cell.get('r', '')
|
||||||
col_letters = ''.join(filter(str.isalpha, cell_ref))
|
col_letters = ''.join(filter(str.isalpha, cell_ref))
|
||||||
cell_type = cell.get('t', 'n')
|
cell_type = cell.get('t', 'n')
|
||||||
v = cell.find('main:v', ns)
|
v_elements = find_elements_with_ns(cell, 'v')
|
||||||
|
v = v_elements[0] if v_elements else None
|
||||||
|
|
||||||
if v is not None and v.text:
|
if v is not None and v.text:
|
||||||
if cell_type == 's':
|
if cell_type == 's':
|
||||||
# shared string
|
|
||||||
try:
|
try:
|
||||||
row_cells[col_letters] = shared_strings[int(v.text)]
|
row_cells[col_letters] = shared_strings[int(v.text)]
|
||||||
except (ValueError, IndexError):
|
except (ValueError, IndexError):
|
||||||
row_cells[col_letters] = v.text
|
row_cells[col_letters] = v.text
|
||||||
elif cell_type == 'b':
|
elif cell_type == 'b':
|
||||||
# boolean
|
|
||||||
row_cells[col_letters] = v.text == '1'
|
row_cells[col_letters] = v.text == '1'
|
||||||
else:
|
else:
|
||||||
row_cells[col_letters] = v.text
|
row_cells[col_letters] = v.text
|
||||||
else:
|
else:
|
||||||
row_cells[col_letters] = None
|
row_cells[col_letters] = None
|
||||||
|
|
||||||
# 处理表头行
|
|
||||||
if row_idx == header_row + 1:
|
if row_idx == header_row + 1:
|
||||||
headers = {**row_cells}
|
headers = {**row_cells}
|
||||||
elif row_idx > header_row + 1:
|
elif row_idx > header_row + 1:
|
||||||
@@ -417,7 +507,6 @@ class XlsxParser(BaseParser):
|
|||||||
|
|
||||||
# 构建 DataFrame
|
# 构建 DataFrame
|
||||||
if headers:
|
if headers:
|
||||||
# 按原始列顺序排列
|
|
||||||
col_order = list(headers.keys())
|
col_order = list(headers.keys())
|
||||||
df = pd.DataFrame(all_rows)
|
df = pd.DataFrame(all_rows)
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
"""
|
||||||
|
指令执行模块
|
||||||
|
|
||||||
|
注意: 此模块为可选功能,当前尚未实现。
|
||||||
|
如需启用,请实现 intent_parser.py 和 executor.py
|
||||||
|
"""
|
||||||
|
from .intent_parser import IntentParser, DefaultIntentParser
|
||||||
|
from .executor import InstructionExecutor, DefaultInstructionExecutor
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"IntentParser",
|
||||||
|
"DefaultIntentParser",
|
||||||
|
"InstructionExecutor",
|
||||||
|
"DefaultInstructionExecutor",
|
||||||
|
]
|
||||||
|
|||||||
@@ -0,0 +1,35 @@
|
|||||||
|
"""
|
||||||
|
指令执行器模块
|
||||||
|
|
||||||
|
将自然语言指令转换为可执行操作
|
||||||
|
|
||||||
|
注意: 此模块为可选功能,当前尚未实现。
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class InstructionExecutor(ABC):
|
||||||
|
"""指令执行器抽象基类"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def execute(self, instruction: str, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
执行指令
|
||||||
|
|
||||||
|
Args:
|
||||||
|
instruction: 解析后的指令
|
||||||
|
context: 执行上下文
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
执行结果
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultInstructionExecutor(InstructionExecutor):
|
||||||
|
"""默认指令执行器"""
|
||||||
|
|
||||||
|
async def execute(self, instruction: str, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""暂未实现"""
|
||||||
|
raise NotImplementedError("指令执行功能暂未实现")
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
"""
|
||||||
|
意图解析器模块
|
||||||
|
|
||||||
|
解析用户自然语言指令,识别意图和参数
|
||||||
|
|
||||||
|
注意: 此模块为可选功能,当前尚未实现。
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Dict, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
class IntentParser(ABC):
|
||||||
|
"""意图解析器抽象基类"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def parse(self, text: str) -> Tuple[str, Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
解析自然语言指令
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 用户输入的自然语言
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(意图类型, 参数字典)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultIntentParser(IntentParser):
|
||||||
|
"""默认意图解析器"""
|
||||||
|
|
||||||
|
async def parse(self, text: str) -> Tuple[str, Dict[str, Any]]:
|
||||||
|
"""暂未实现"""
|
||||||
|
raise NotImplementedError("意图解析功能暂未实现")
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -115,8 +115,7 @@ pip install -r requirements.txt
|
|||||||
在终端输入以下命令:
|
在终端输入以下命令:
|
||||||
```bash
|
```bash
|
||||||
cd backend #确保启动时在后端跟目录下
|
cd backend #确保启动时在后端跟目录下
|
||||||
./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000
|
./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload #启动后端项目
|
||||||
--reload #启动后端项目
|
|
||||||
```
|
```
|
||||||
先启动后端项目,再启动前端项目
|
先启动后端项目,再启动前端项目
|
||||||
|
|
||||||
|
|||||||
Submodule frontend - 副本 deleted from 797125940b
@@ -1,13 +1,16 @@
|
|||||||
import { RouterProvider } from 'react-router-dom';
|
import { RouterProvider } from 'react-router-dom';
|
||||||
import { AuthProvider } from '@/context/AuthContext';
|
import { AuthProvider } from '@/contexts/AuthContext';
|
||||||
|
import { TemplateFillProvider } from '@/context/TemplateFillContext';
|
||||||
import { router } from '@/routes';
|
import { router } from '@/routes';
|
||||||
import { Toaster } from 'sonner';
|
import { Toaster } from 'sonner';
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
return (
|
return (
|
||||||
<AuthProvider>
|
<AuthProvider>
|
||||||
<RouterProvider router={router} />
|
<TemplateFillProvider>
|
||||||
<Toaster position="top-right" richColors closeButton />
|
<RouterProvider router={router} />
|
||||||
|
<Toaster position="top-right" richColors closeButton />
|
||||||
|
</TemplateFillProvider>
|
||||||
</AuthProvider>
|
</AuthProvider>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import { Navigate, useLocation } from 'react-router-dom';
|
import { Navigate, useLocation } from 'react-router-dom';
|
||||||
import { useAuth } from '@/context/AuthContext';
|
import { useAuth } from '@/contexts/AuthContext';
|
||||||
|
|
||||||
export const RouteGuard: React.FC<{ children: React.ReactNode }> = ({ children }) => {
|
export const RouteGuard: React.FC<{ children: React.ReactNode }> = ({ children }) => {
|
||||||
const { user, loading } = useAuth();
|
const { user, loading } = useAuth();
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
import React, { createContext, useContext, useEffect, useState } from 'react';
|
|
||||||
import { supabase } from '@/db/supabase';
|
|
||||||
import { User } from '@supabase/supabase-js';
|
|
||||||
import { Profile } from '@/types/types';
|
|
||||||
|
|
||||||
interface AuthContextType {
|
|
||||||
user: User | null;
|
|
||||||
profile: Profile | null;
|
|
||||||
signIn: (email: string, password: string) => Promise<{ error: any }>;
|
|
||||||
signUp: (email: string, password: string) => Promise<{ error: any }>;
|
|
||||||
signOut: () => Promise<{ error: any }>;
|
|
||||||
loading: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
const AuthContext = createContext<AuthContextType | undefined>(undefined);
|
|
||||||
|
|
||||||
export const AuthProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => {
|
|
||||||
const [user, setUser] = useState<User | null>(null);
|
|
||||||
const [profile, setProfile] = useState<Profile | null>(null);
|
|
||||||
const [loading, setLoading] = useState(true);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
// Check active sessions and sets the user
|
|
||||||
supabase.auth.getSession().then(({ data: { session } }) => {
|
|
||||||
setUser(session?.user ?? null);
|
|
||||||
if (session?.user) fetchProfile(session.user.id);
|
|
||||||
else setLoading(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Listen for changes on auth state (sign in, sign out, etc.)
|
|
||||||
const { data: { subscription } } = supabase.auth.onAuthStateChange((_event, session) => {
|
|
||||||
setUser(session?.user ?? null);
|
|
||||||
if (session?.user) fetchProfile(session.user.id);
|
|
||||||
else {
|
|
||||||
setProfile(null);
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return () => subscription.unsubscribe();
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const fetchProfile = async (uid: string) => {
|
|
||||||
try {
|
|
||||||
const { data, error } = await supabase
|
|
||||||
.from('profiles')
|
|
||||||
.select('*')
|
|
||||||
.eq('id', uid)
|
|
||||||
.maybeSingle();
|
|
||||||
|
|
||||||
if (error) throw error;
|
|
||||||
setProfile(data);
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Error fetching profile:', err);
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const signIn = async (email: string, password: string) => {
|
|
||||||
return await supabase.auth.signInWithPassword({ email, password });
|
|
||||||
};
|
|
||||||
|
|
||||||
const signUp = async (email: string, password: string) => {
|
|
||||||
return await supabase.auth.signUp({ email, password });
|
|
||||||
};
|
|
||||||
|
|
||||||
const signOut = async () => {
|
|
||||||
return await supabase.auth.signOut();
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<AuthContext.Provider value={{ user, profile, signIn, signUp, signOut, loading }}>
|
|
||||||
{children}
|
|
||||||
</AuthContext.Provider>
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
export const useAuth = () => {
|
|
||||||
const context = useContext(AuthContext);
|
|
||||||
if (context === undefined) {
|
|
||||||
throw new Error('useAuth must be used within an AuthProvider');
|
|
||||||
}
|
|
||||||
return context;
|
|
||||||
};
|
|
||||||
136
frontend/src/context/TemplateFillContext.tsx
Normal file
136
frontend/src/context/TemplateFillContext.tsx
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
import React, { createContext, useContext, useState, ReactNode } from 'react';
|
||||||
|
|
||||||
|
type SourceFile = {
|
||||||
|
file: File;
|
||||||
|
preview?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type TemplateField = {
|
||||||
|
cell: string;
|
||||||
|
name: string;
|
||||||
|
field_type: string;
|
||||||
|
required: boolean;
|
||||||
|
hint?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type Step = 'upload' | 'filling' | 'preview';
|
||||||
|
|
||||||
|
interface TemplateFillState {
|
||||||
|
step: Step;
|
||||||
|
templateFile: File | null;
|
||||||
|
templateFields: TemplateField[];
|
||||||
|
sourceFiles: SourceFile[];
|
||||||
|
sourceFilePaths: string[];
|
||||||
|
sourceDocIds: string[];
|
||||||
|
templateId: string;
|
||||||
|
filledResult: any;
|
||||||
|
setStep: (step: Step) => void;
|
||||||
|
setTemplateFile: (file: File | null) => void;
|
||||||
|
setTemplateFields: (fields: TemplateField[]) => void;
|
||||||
|
setSourceFiles: (files: SourceFile[]) => void;
|
||||||
|
addSourceFiles: (files: SourceFile[]) => void;
|
||||||
|
removeSourceFile: (index: number) => void;
|
||||||
|
setSourceFilePaths: (paths: string[]) => void;
|
||||||
|
setSourceDocIds: (ids: string[]) => void;
|
||||||
|
addSourceDocId: (id: string) => void;
|
||||||
|
removeSourceDocId: (id: string) => void;
|
||||||
|
setTemplateId: (id: string) => void;
|
||||||
|
setFilledResult: (result: any) => void;
|
||||||
|
reset: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const initialState = {
|
||||||
|
step: 'upload' as Step,
|
||||||
|
templateFile: null,
|
||||||
|
templateFields: [],
|
||||||
|
sourceFiles: [],
|
||||||
|
sourceFilePaths: [],
|
||||||
|
sourceDocIds: [],
|
||||||
|
templateId: '',
|
||||||
|
filledResult: null,
|
||||||
|
setStep: () => {},
|
||||||
|
setTemplateFile: () => {},
|
||||||
|
setTemplateFields: () => {},
|
||||||
|
setSourceFiles: () => {},
|
||||||
|
addSourceFiles: () => {},
|
||||||
|
removeSourceFile: () => {},
|
||||||
|
setSourceFilePaths: () => {},
|
||||||
|
setSourceDocIds: () => {},
|
||||||
|
addSourceDocId: () => {},
|
||||||
|
removeSourceDocId: () => {},
|
||||||
|
setTemplateId: () => {},
|
||||||
|
setFilledResult: () => {},
|
||||||
|
reset: () => {},
|
||||||
|
};
|
||||||
|
|
||||||
|
const TemplateFillContext = createContext<TemplateFillState>(initialState);
|
||||||
|
|
||||||
|
export const TemplateFillProvider: React.FC<{ children: ReactNode }> = ({ children }) => {
|
||||||
|
const [step, setStep] = useState<Step>('upload');
|
||||||
|
const [templateFile, setTemplateFile] = useState<File | null>(null);
|
||||||
|
const [templateFields, setTemplateFields] = useState<TemplateField[]>([]);
|
||||||
|
const [sourceFiles, setSourceFiles] = useState<SourceFile[]>([]);
|
||||||
|
const [sourceFilePaths, setSourceFilePaths] = useState<string[]>([]);
|
||||||
|
const [sourceDocIds, setSourceDocIds] = useState<string[]>([]);
|
||||||
|
const [templateId, setTemplateId] = useState<string>('');
|
||||||
|
const [filledResult, setFilledResult] = useState<any>(null);
|
||||||
|
|
||||||
|
const addSourceFiles = (files: SourceFile[]) => {
|
||||||
|
setSourceFiles(prev => [...prev, ...files]);
|
||||||
|
};
|
||||||
|
|
||||||
|
const removeSourceFile = (index: number) => {
|
||||||
|
setSourceFiles(prev => prev.filter((_, i) => i !== index));
|
||||||
|
};
|
||||||
|
|
||||||
|
const addSourceDocId = (id: string) => {
|
||||||
|
setSourceDocIds(prev => prev.includes(id) ? prev : [...prev, id]);
|
||||||
|
};
|
||||||
|
|
||||||
|
const removeSourceDocId = (id: string) => {
|
||||||
|
setSourceDocIds(prev => prev.filter(docId => docId !== id));
|
||||||
|
};
|
||||||
|
|
||||||
|
const reset = () => {
|
||||||
|
setStep('upload');
|
||||||
|
setTemplateFile(null);
|
||||||
|
setTemplateFields([]);
|
||||||
|
setSourceFiles([]);
|
||||||
|
setSourceFilePaths([]);
|
||||||
|
setSourceDocIds([]);
|
||||||
|
setTemplateId('');
|
||||||
|
setFilledResult(null);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<TemplateFillContext.Provider
|
||||||
|
value={{
|
||||||
|
step,
|
||||||
|
templateFile,
|
||||||
|
templateFields,
|
||||||
|
sourceFiles,
|
||||||
|
sourceFilePaths,
|
||||||
|
sourceDocIds,
|
||||||
|
templateId,
|
||||||
|
filledResult,
|
||||||
|
setStep,
|
||||||
|
setTemplateFile,
|
||||||
|
setTemplateFields,
|
||||||
|
setSourceFiles,
|
||||||
|
addSourceFiles,
|
||||||
|
removeSourceFile,
|
||||||
|
setSourceFilePaths,
|
||||||
|
setSourceDocIds,
|
||||||
|
addSourceDocId,
|
||||||
|
removeSourceDocId,
|
||||||
|
setTemplateId,
|
||||||
|
setFilledResult,
|
||||||
|
reset,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{children}
|
||||||
|
</TemplateFillContext.Provider>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const useTemplateFill = () => useContext(TemplateFillContext);
|
||||||
@@ -92,6 +92,7 @@ export interface TemplateField {
|
|||||||
name: string;
|
name: string;
|
||||||
field_type: string;
|
field_type: string;
|
||||||
required: boolean;
|
required: boolean;
|
||||||
|
hint?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 表格填写结果
|
// 表格填写结果
|
||||||
@@ -102,7 +103,9 @@ export interface FillResult {
|
|||||||
field: string;
|
field: string;
|
||||||
value: any;
|
value: any;
|
||||||
source: string;
|
source: string;
|
||||||
|
confidence?: number;
|
||||||
}>;
|
}>;
|
||||||
|
source_doc_count?: number;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -397,6 +400,49 @@ export const backendApi = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取任务历史列表
|
||||||
|
*/
|
||||||
|
async getTasks(
|
||||||
|
limit: number = 50,
|
||||||
|
skip: number = 0
|
||||||
|
): Promise<{ success: boolean; tasks: any[]; count: number }> {
|
||||||
|
const url = `${BACKEND_BASE_URL}/tasks?limit=${limit}&skip=${skip}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || '获取任务列表失败');
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('获取任务列表失败:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 删除任务
|
||||||
|
*/
|
||||||
|
async deleteTask(taskId: string): Promise<{ success: boolean; deleted: boolean }> {
|
||||||
|
const url = `${BACKEND_BASE_URL}/tasks/${taskId}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'DELETE'
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || '删除任务失败');
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('删除任务失败:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 轮询任务状态直到完成
|
* 轮询任务状态直到完成
|
||||||
*/
|
*/
|
||||||
@@ -620,12 +666,88 @@ export const backendApi = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从已上传的模板提取字段定义
|
||||||
|
*/
|
||||||
|
async extractTemplateFields(
|
||||||
|
templateId: string,
|
||||||
|
fileType: string = 'xlsx'
|
||||||
|
): Promise<{
|
||||||
|
success: boolean;
|
||||||
|
fields: TemplateField[];
|
||||||
|
}> {
|
||||||
|
const url = `${BACKEND_BASE_URL}/templates/fields`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
template_id: templateId,
|
||||||
|
file_type: fileType,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || '提取字段失败');
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('提取字段失败:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 联合上传模板和源文档
|
||||||
|
*/
|
||||||
|
async uploadTemplateAndSources(
|
||||||
|
templateFile: File,
|
||||||
|
sourceFiles: File[]
|
||||||
|
): Promise<{
|
||||||
|
success: boolean;
|
||||||
|
template_id: string;
|
||||||
|
filename: string;
|
||||||
|
file_type: string;
|
||||||
|
fields: TemplateField[];
|
||||||
|
field_count: number;
|
||||||
|
source_file_paths: string[];
|
||||||
|
source_filenames: string[];
|
||||||
|
task_id: string;
|
||||||
|
}> {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('template_file', templateFile);
|
||||||
|
sourceFiles.forEach(file => formData.append('source_files', file));
|
||||||
|
|
||||||
|
const url = `${BACKEND_BASE_URL}/templates/upload-joint`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || '联合上传失败');
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('联合上传失败:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 执行表格填写
|
* 执行表格填写
|
||||||
*/
|
*/
|
||||||
async fillTemplate(
|
async fillTemplate(
|
||||||
templateId: string,
|
templateId: string,
|
||||||
templateFields: TemplateField[]
|
templateFields: TemplateField[],
|
||||||
|
sourceDocIds?: string[],
|
||||||
|
sourceFilePaths?: string[],
|
||||||
|
userHint?: string
|
||||||
): Promise<FillResult> {
|
): Promise<FillResult> {
|
||||||
const url = `${BACKEND_BASE_URL}/templates/fill`;
|
const url = `${BACKEND_BASE_URL}/templates/fill`;
|
||||||
|
|
||||||
@@ -636,6 +758,9 @@ export const backendApi = {
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
template_id: templateId,
|
template_id: templateId,
|
||||||
template_fields: templateFields,
|
template_fields: templateFields,
|
||||||
|
source_doc_ids: sourceDocIds || [],
|
||||||
|
source_file_paths: sourceFilePaths || [],
|
||||||
|
user_hint: userHint || null,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1063,7 +1188,7 @@ export const aiApi = {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: 'GET',
|
method: 'POST',
|
||||||
body: formData,
|
body: formData,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useState, useEffect, useCallback } from 'react';
|
import React, { useState, useEffect, useCallback, useRef } from 'react';
|
||||||
import { useDropzone } from 'react-dropzone';
|
import { useDropzone } from 'react-dropzone';
|
||||||
import {
|
import {
|
||||||
FileText,
|
FileText,
|
||||||
@@ -23,7 +23,8 @@ import {
|
|||||||
List,
|
List,
|
||||||
MessageSquareCode,
|
MessageSquareCode,
|
||||||
Tag,
|
Tag,
|
||||||
HelpCircle
|
HelpCircle,
|
||||||
|
Plus
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import { Input } from '@/components/ui/input';
|
import { Input } from '@/components/ui/input';
|
||||||
@@ -72,8 +73,10 @@ const Documents: React.FC = () => {
|
|||||||
// 上传相关状态
|
// 上传相关状态
|
||||||
const [uploading, setUploading] = useState(false);
|
const [uploading, setUploading] = useState(false);
|
||||||
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
||||||
|
const [uploadedFiles, setUploadedFiles] = useState<File[]>([]);
|
||||||
const [parseResult, setParseResult] = useState<ExcelParseResult | null>(null);
|
const [parseResult, setParseResult] = useState<ExcelParseResult | null>(null);
|
||||||
const [expandedSheet, setExpandedSheet] = useState<string | null>(null);
|
const [expandedSheet, setExpandedSheet] = useState<string | null>(null);
|
||||||
|
const [uploadExpanded, setUploadExpanded] = useState(false);
|
||||||
|
|
||||||
// AI 分析相关状态
|
// AI 分析相关状态
|
||||||
const [analyzing, setAnalyzing] = useState(false);
|
const [analyzing, setAnalyzing] = useState(false);
|
||||||
@@ -210,74 +213,119 @@ const Documents: React.FC = () => {
|
|||||||
|
|
||||||
// 文件上传处理
|
// 文件上传处理
|
||||||
const onDrop = async (acceptedFiles: File[]) => {
|
const onDrop = async (acceptedFiles: File[]) => {
|
||||||
const file = acceptedFiles[0];
|
if (acceptedFiles.length === 0) return;
|
||||||
if (!file) return;
|
|
||||||
|
|
||||||
setUploadedFile(file);
|
|
||||||
setUploading(true);
|
setUploading(true);
|
||||||
setParseResult(null);
|
let successCount = 0;
|
||||||
setAiAnalysis(null);
|
let failCount = 0;
|
||||||
setAnalysisCharts(null);
|
const successfulFiles: File[] = [];
|
||||||
setExpandedSheet(null);
|
|
||||||
setMdAnalysis(null);
|
|
||||||
setMdSections([]);
|
|
||||||
setMdStreamingContent('');
|
|
||||||
|
|
||||||
const ext = file.name.split('.').pop()?.toLowerCase();
|
// 逐个上传文件
|
||||||
|
for (const file of acceptedFiles) {
|
||||||
|
const ext = file.name.split('.').pop()?.toLowerCase();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Excel 文件使用专门的上传接口
|
if (ext === 'xlsx' || ext === 'xls') {
|
||||||
if (ext === 'xlsx' || ext === 'xls') {
|
const result = await backendApi.uploadExcel(file, {
|
||||||
const result = await backendApi.uploadExcel(file, {
|
parseAllSheets: parseOptions.parseAllSheets,
|
||||||
parseAllSheets: parseOptions.parseAllSheets,
|
headerRow: parseOptions.headerRow
|
||||||
headerRow: parseOptions.headerRow
|
});
|
||||||
});
|
if (result.success) {
|
||||||
if (result.success) {
|
successCount++;
|
||||||
toast.success(`解析成功: ${file.name}`);
|
successfulFiles.push(file);
|
||||||
setParseResult(result);
|
// 第一个Excel文件设置解析结果供预览
|
||||||
if (result.metadata?.sheet_count === 1) {
|
if (successCount === 1) {
|
||||||
setExpandedSheet(Object.keys(result.data?.sheets || {})[0] || null);
|
setUploadedFile(file);
|
||||||
|
setParseResult(result);
|
||||||
|
if (result.metadata?.sheet_count === 1) {
|
||||||
|
setExpandedSheet(Object.keys(result.data?.sheets || {})[0] || null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loadDocuments();
|
||||||
|
} else {
|
||||||
|
failCount++;
|
||||||
|
toast.error(`${file.name}: ${result.error || '解析失败'}`);
|
||||||
|
}
|
||||||
|
} else if (ext === 'md' || ext === 'markdown') {
|
||||||
|
const result = await backendApi.uploadDocument(file);
|
||||||
|
if (result.task_id) {
|
||||||
|
successCount++;
|
||||||
|
successfulFiles.push(file);
|
||||||
|
if (successCount === 1) {
|
||||||
|
setUploadedFile(file);
|
||||||
|
}
|
||||||
|
// 轮询任务状态
|
||||||
|
let attempts = 0;
|
||||||
|
const checkStatus = async () => {
|
||||||
|
while (attempts < 30) {
|
||||||
|
try {
|
||||||
|
const status = await backendApi.getTaskStatus(result.task_id);
|
||||||
|
if (status.status === 'success') {
|
||||||
|
loadDocuments();
|
||||||
|
return;
|
||||||
|
} else if (status.status === 'failure') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('检查状态失败', e);
|
||||||
|
}
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
|
attempts++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
checkStatus();
|
||||||
|
} else {
|
||||||
|
failCount++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
toast.error(result.error || '解析失败');
|
// 其他文档使用通用上传接口
|
||||||
}
|
const result = await backendApi.uploadDocument(file);
|
||||||
} else if (ext === 'md' || ext === 'markdown') {
|
if (result.task_id) {
|
||||||
// Markdown 文件:获取大纲
|
successCount++;
|
||||||
await fetchMdOutline();
|
successfulFiles.push(file);
|
||||||
} else {
|
if (successCount === 1) {
|
||||||
// 其他文档使用通用上传接口
|
setUploadedFile(file);
|
||||||
const result = await backendApi.uploadDocument(file);
|
|
||||||
if (result.task_id) {
|
|
||||||
toast.success(`文件 ${file.name} 已提交处理`);
|
|
||||||
// 轮询任务状态
|
|
||||||
let attempts = 0;
|
|
||||||
const checkStatus = async () => {
|
|
||||||
while (attempts < 30) {
|
|
||||||
try {
|
|
||||||
const status = await backendApi.getTaskStatus(result.task_id);
|
|
||||||
if (status.status === 'success') {
|
|
||||||
toast.success(`文件 ${file.name} 处理完成`);
|
|
||||||
loadDocuments();
|
|
||||||
return;
|
|
||||||
} else if (status.status === 'failure') {
|
|
||||||
toast.error(`文件 ${file.name} 处理失败`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.error('检查状态失败', e);
|
|
||||||
}
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
||||||
attempts++;
|
|
||||||
}
|
}
|
||||||
toast.error(`文件 ${file.name} 处理超时`);
|
// 轮询任务状态
|
||||||
};
|
let attempts = 0;
|
||||||
checkStatus();
|
const checkStatus = async () => {
|
||||||
|
while (attempts < 30) {
|
||||||
|
try {
|
||||||
|
const status = await backendApi.getTaskStatus(result.task_id);
|
||||||
|
if (status.status === 'success') {
|
||||||
|
loadDocuments();
|
||||||
|
return;
|
||||||
|
} else if (status.status === 'failure') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('检查状态失败', e);
|
||||||
|
}
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
|
attempts++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
checkStatus();
|
||||||
|
} else {
|
||||||
|
failCount++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
failCount++;
|
||||||
|
toast.error(`${file.name}: ${error.message || '上传失败'}`);
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
}
|
||||||
toast.error(error.message || '上传失败');
|
|
||||||
} finally {
|
setUploading(false);
|
||||||
setUploading(false);
|
loadDocuments();
|
||||||
|
|
||||||
|
if (successCount > 0) {
|
||||||
|
toast.success(`成功上传 ${successCount} 个文件`);
|
||||||
|
setUploadedFiles(prev => [...prev, ...successfulFiles]);
|
||||||
|
setUploadExpanded(true);
|
||||||
|
}
|
||||||
|
if (failCount > 0) {
|
||||||
|
toast.error(`${failCount} 个文件上传失败`);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -290,7 +338,7 @@ const Documents: React.FC = () => {
|
|||||||
'text/markdown': ['.md'],
|
'text/markdown': ['.md'],
|
||||||
'text/plain': ['.txt']
|
'text/plain': ['.txt']
|
||||||
},
|
},
|
||||||
maxFiles: 1
|
multiple: true
|
||||||
});
|
});
|
||||||
|
|
||||||
// AI 分析处理
|
// AI 分析处理
|
||||||
@@ -448,6 +496,7 @@ const Documents: React.FC = () => {
|
|||||||
|
|
||||||
const handleDeleteFile = () => {
|
const handleDeleteFile = () => {
|
||||||
setUploadedFile(null);
|
setUploadedFile(null);
|
||||||
|
setUploadedFiles([]);
|
||||||
setParseResult(null);
|
setParseResult(null);
|
||||||
setAiAnalysis(null);
|
setAiAnalysis(null);
|
||||||
setAnalysisCharts(null);
|
setAnalysisCharts(null);
|
||||||
@@ -455,6 +504,17 @@ const Documents: React.FC = () => {
|
|||||||
toast.success('文件已清除');
|
toast.success('文件已清除');
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleRemoveUploadedFile = (index: number) => {
|
||||||
|
setUploadedFiles(prev => {
|
||||||
|
const newFiles = prev.filter((_, i) => i !== index);
|
||||||
|
if (newFiles.length === 0) {
|
||||||
|
setUploadedFile(null);
|
||||||
|
}
|
||||||
|
return newFiles;
|
||||||
|
});
|
||||||
|
toast.success('文件已从列表移除');
|
||||||
|
};
|
||||||
|
|
||||||
const handleDelete = async (docId: string) => {
|
const handleDelete = async (docId: string) => {
|
||||||
try {
|
try {
|
||||||
const result = await backendApi.deleteDocument(docId);
|
const result = await backendApi.deleteDocument(docId);
|
||||||
@@ -614,7 +674,7 @@ const Documents: React.FC = () => {
|
|||||||
<h1 className="text-3xl font-extrabold tracking-tight">文档中心</h1>
|
<h1 className="text-3xl font-extrabold tracking-tight">文档中心</h1>
|
||||||
<p className="text-muted-foreground">上传文档,自动解析并使用 AI 进行深度分析</p>
|
<p className="text-muted-foreground">上传文档,自动解析并使用 AI 进行深度分析</p>
|
||||||
</div>
|
</div>
|
||||||
<Button variant="outline" className="rounded-xl gap-2" onClick={loadDocuments}>
|
<Button variant="outline" className="rounded-xl gap-2" onClick={() => loadDocuments()}>
|
||||||
<RefreshCcw size={18} />
|
<RefreshCcw size={18} />
|
||||||
<span>刷新</span>
|
<span>刷新</span>
|
||||||
</Button>
|
</Button>
|
||||||
@@ -639,7 +699,82 @@ const Documents: React.FC = () => {
|
|||||||
</CardHeader>
|
</CardHeader>
|
||||||
{uploadPanelOpen && (
|
{uploadPanelOpen && (
|
||||||
<CardContent className="space-y-4">
|
<CardContent className="space-y-4">
|
||||||
{!uploadedFile ? (
|
{uploadedFiles.length > 0 || uploadedFile ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{/* 文件列表头部 */}
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between p-3 bg-muted/50 rounded-xl cursor-pointer hover:bg-muted/70 transition-colors"
|
||||||
|
onClick={() => setUploadExpanded(!uploadExpanded)}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<div className="w-10 h-10 rounded-lg bg-primary/10 text-primary flex items-center justify-center">
|
||||||
|
<Upload size={20} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p className="font-semibold text-sm">
|
||||||
|
已上传 {(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).length} 个文件
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
{uploadExpanded ? '点击收起' : '点击展开查看'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="sm"
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
handleDeleteFile();
|
||||||
|
}}
|
||||||
|
className="text-destructive hover:text-destructive"
|
||||||
|
>
|
||||||
|
<Trash2 size={14} className="mr-1" />
|
||||||
|
清空
|
||||||
|
</Button>
|
||||||
|
{uploadExpanded ? <ChevronUp size={16} /> : <ChevronDown size={16} />}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* 展开的文件列表 */}
|
||||||
|
{uploadExpanded && (
|
||||||
|
<div className="space-y-2 border rounded-xl p-3">
|
||||||
|
{(uploadedFiles.length > 0 ? uploadedFiles : [uploadedFile]).filter(Boolean).map((file, index) => (
|
||||||
|
<div key={index} className="flex items-center gap-3 p-2 bg-background rounded-lg">
|
||||||
|
<div className={cn(
|
||||||
|
"w-8 h-8 rounded flex items-center justify-center",
|
||||||
|
isExcelFile(file?.name || '') ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
||||||
|
)}>
|
||||||
|
{isExcelFile(file?.name || '') ? <FileSpreadsheet size={16} /> : <FileText size={16} />}
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="text-sm truncate">{file?.name}</p>
|
||||||
|
<p className="text-xs text-muted-foreground">{formatFileSize(file?.size || 0)}</p>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
className="text-destructive hover:bg-destructive/10"
|
||||||
|
onClick={() => handleRemoveUploadedFile(index)}
|
||||||
|
>
|
||||||
|
<Trash2 size={14} />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* 继续添加按钮 */}
|
||||||
|
<div
|
||||||
|
{...getRootProps()}
|
||||||
|
className="flex items-center justify-center gap-2 p-3 border-2 border-dashed rounded-lg cursor-pointer hover:border-primary/50 hover:bg-primary/5 transition-colors"
|
||||||
|
>
|
||||||
|
<input {...getInputProps()} multiple={true} />
|
||||||
|
<Plus size={16} className="text-muted-foreground" />
|
||||||
|
<span className="text-sm text-muted-foreground">继续添加更多文件</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
<div
|
<div
|
||||||
{...getRootProps()}
|
{...getRootProps()}
|
||||||
className={cn(
|
className={cn(
|
||||||
@@ -648,7 +783,7 @@ const Documents: React.FC = () => {
|
|||||||
uploading && "opacity-50 pointer-events-none"
|
uploading && "opacity-50 pointer-events-none"
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<input {...getInputProps()} />
|
<input {...getInputProps()} multiple={true} />
|
||||||
<div className="w-14 h-14 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
|
<div className="w-14 h-14 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
|
||||||
{uploading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
|
{uploading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
|
||||||
</div>
|
</div>
|
||||||
@@ -670,30 +805,6 @@ const Documents: React.FC = () => {
|
|||||||
</Badge>
|
</Badge>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<div className="flex items-center gap-3 p-3 bg-muted/30 rounded-xl">
|
|
||||||
<div className={cn(
|
|
||||||
"w-10 h-10 rounded-lg flex items-center justify-center",
|
|
||||||
isExcelFile(uploadedFile.name) ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
|
||||||
)}>
|
|
||||||
{isExcelFile(uploadedFile.name) ? <FileSpreadsheet size={20} /> : <FileText size={20} />}
|
|
||||||
</div>
|
|
||||||
<div className="flex-1 min-w-0">
|
|
||||||
<p className="font-semibold text-sm truncate">{uploadedFile.name}</p>
|
|
||||||
<p className="text-xs text-muted-foreground">{formatFileSize(uploadedFile.size)}</p>
|
|
||||||
</div>
|
|
||||||
<Button variant="ghost" size="icon" className="text-destructive hover:bg-destructive/10" onClick={handleDeleteFile}>
|
|
||||||
<Trash2 size={16} />
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{isExcelFile(uploadedFile.name) && (
|
|
||||||
<Button onClick={() => onDrop([uploadedFile])} className="w-full" disabled={uploading}>
|
|
||||||
{uploading ? '解析中...' : '重新解析'}
|
|
||||||
</Button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
</CardContent>
|
</CardContent>
|
||||||
)}
|
)}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,603 +0,0 @@
|
|||||||
import React, { useState, useEffect } from 'react';
|
|
||||||
import {
|
|
||||||
TableProperties,
|
|
||||||
Plus,
|
|
||||||
FilePlus,
|
|
||||||
CheckCircle2,
|
|
||||||
Download,
|
|
||||||
Clock,
|
|
||||||
RefreshCcw,
|
|
||||||
Sparkles,
|
|
||||||
Zap,
|
|
||||||
FileCheck,
|
|
||||||
FileSpreadsheet,
|
|
||||||
Trash2,
|
|
||||||
ChevronDown,
|
|
||||||
ChevronUp,
|
|
||||||
BarChart3,
|
|
||||||
FileText,
|
|
||||||
TrendingUp,
|
|
||||||
Info,
|
|
||||||
AlertCircle,
|
|
||||||
Loader2
|
|
||||||
} from 'lucide-react';
|
|
||||||
import { Button } from '@/components/ui/button';
|
|
||||||
import { Card, CardContent, CardHeader, CardTitle, CardDescription, CardFooter } from '@/components/ui/card';
|
|
||||||
import { Badge } from '@/components/ui/badge';
|
|
||||||
import { useAuth } from '@/context/AuthContext';
|
|
||||||
import { templateApi, documentApi, taskApi } from '@/db/api';
|
|
||||||
import { backendApi, aiApi } from '@/db/backend-api';
|
|
||||||
import { supabase } from '@/db/supabase';
|
|
||||||
import { format } from 'date-fns';
|
|
||||||
import { toast } from 'sonner';
|
|
||||||
import { cn } from '@/lib/utils';
|
|
||||||
import { Skeleton } from '@/components/ui/skeleton';
|
|
||||||
import {
|
|
||||||
Dialog,
|
|
||||||
DialogContent,
|
|
||||||
DialogHeader,
|
|
||||||
DialogTitle,
|
|
||||||
DialogTrigger,
|
|
||||||
DialogFooter,
|
|
||||||
DialogDescription
|
|
||||||
} from '@/components/ui/dialog';
|
|
||||||
import { Checkbox } from '@/components/ui/checkbox';
|
|
||||||
import { ScrollArea } from '@/components/ui/scroll-area';
|
|
||||||
import { Input } from '@/components/ui/input';
|
|
||||||
import { Label } from '@/components/ui/label';
|
|
||||||
import { Textarea } from '@/components/ui/textarea';
|
|
||||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select';
|
|
||||||
import { useDropzone } from 'react-dropzone';
|
|
||||||
import { Markdown } from '@/components/ui/markdown';
|
|
||||||
|
|
||||||
type Template = any;
|
|
||||||
type Document = any;
|
|
||||||
type FillTask = any;
|
|
||||||
|
|
||||||
const FormFill: React.FC = () => {
|
|
||||||
const { profile } = useAuth();
|
|
||||||
const [templates, setTemplates] = useState<Template[]>([]);
|
|
||||||
const [documents, setDocuments] = useState<Document[]>([]);
|
|
||||||
const [tasks, setTasks] = useState<any[]>([]);
|
|
||||||
const [loading, setLoading] = useState(true);
|
|
||||||
|
|
||||||
// Selection state
|
|
||||||
const [selectedTemplate, setSelectedTemplate] = useState<string | null>(null);
|
|
||||||
const [selectedDocs, setSelectedDocs] = useState<string[]>([]);
|
|
||||||
const [creating, setCreating] = useState(false);
|
|
||||||
const [openTaskDialog, setOpenTaskDialog] = useState(false);
|
|
||||||
const [viewingTask, setViewingTask] = useState<any | null>(null);
|
|
||||||
|
|
||||||
// Excel upload state
|
|
||||||
const [excelFile, setExcelFile] = useState<File | null>(null);
|
|
||||||
const [excelParseResult, setExcelParseResult] = useState<any>(null);
|
|
||||||
const [excelAnalysis, setExcelAnalysis] = useState<any>(null);
|
|
||||||
const [excelAnalyzing, setExcelAnalyzing] = useState(false);
|
|
||||||
const [expandedSheet, setExpandedSheet] = useState<string | null>(null);
|
|
||||||
const [aiOptions, setAiOptions] = useState({
|
|
||||||
userPrompt: '请分析这些数据,并提取关键信息用于填表,包括数值、分类、摘要等。',
|
|
||||||
analysisType: 'general' as 'general' | 'summary' | 'statistics' | 'insights'
|
|
||||||
});
|
|
||||||
|
|
||||||
const loadData = async () => {
|
|
||||||
if (!profile) return;
|
|
||||||
try {
|
|
||||||
const [t, d, ts] = await Promise.all([
|
|
||||||
templateApi.listTemplates((profile as any).id),
|
|
||||||
documentApi.listDocuments((profile as any).id),
|
|
||||||
taskApi.listTasks((profile as any).id)
|
|
||||||
]);
|
|
||||||
setTemplates(t);
|
|
||||||
setDocuments(d);
|
|
||||||
setTasks(ts);
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error('数据加载失败');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
loadData();
|
|
||||||
}, [profile]);
|
|
||||||
|
|
||||||
// Excel upload handlers
|
|
||||||
const onExcelDrop = async (acceptedFiles: File[]) => {
|
|
||||||
const file = acceptedFiles[0];
|
|
||||||
if (!file) return;
|
|
||||||
|
|
||||||
if (!file.name.match(/\.(xlsx|xls)$/i)) {
|
|
||||||
toast.error('仅支持 .xlsx 和 .xls 格式的 Excel 文件');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
setExcelFile(file);
|
|
||||||
setExcelParseResult(null);
|
|
||||||
setExcelAnalysis(null);
|
|
||||||
setExpandedSheet(null);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await backendApi.uploadExcel(file);
|
|
||||||
if (result.success) {
|
|
||||||
toast.success(`Excel 解析成功: ${file.name}`);
|
|
||||||
setExcelParseResult(result);
|
|
||||||
} else {
|
|
||||||
toast.error(result.error || '解析失败');
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
toast.error(error.message || '上传失败');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
|
||||||
onDrop: onExcelDrop,
|
|
||||||
accept: {
|
|
||||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
|
||||||
'application/vnd.ms-excel': ['.xls']
|
|
||||||
},
|
|
||||||
maxFiles: 1
|
|
||||||
});
|
|
||||||
|
|
||||||
const handleAnalyzeExcel = async () => {
|
|
||||||
if (!excelFile || !excelParseResult?.success) {
|
|
||||||
toast.error('请先上传并解析 Excel 文件');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
setExcelAnalyzing(true);
|
|
||||||
setExcelAnalysis(null);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await aiApi.analyzeExcel(excelFile, {
|
|
||||||
userPrompt: aiOptions.userPrompt,
|
|
||||||
analysisType: aiOptions.analysisType
|
|
||||||
});
|
|
||||||
|
|
||||||
if (result.success) {
|
|
||||||
toast.success('AI 分析完成');
|
|
||||||
setExcelAnalysis(result);
|
|
||||||
} else {
|
|
||||||
toast.error(result.error || 'AI 分析失败');
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
toast.error(error.message || 'AI 分析失败');
|
|
||||||
} finally {
|
|
||||||
setExcelAnalyzing(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleUseExcelData = () => {
|
|
||||||
if (!excelParseResult?.success) {
|
|
||||||
toast.error('请先解析 Excel 文件');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 将 Excel 解析的数据标记为"文档",添加到选择列表
|
|
||||||
toast.success('Excel 数据已添加到数据源,请在任务对话框中选择');
|
|
||||||
// 这里可以添加逻辑来将 Excel 数据传递给后端创建任务
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleDeleteExcel = () => {
|
|
||||||
setExcelFile(null);
|
|
||||||
setExcelParseResult(null);
|
|
||||||
setExcelAnalysis(null);
|
|
||||||
setExpandedSheet(null);
|
|
||||||
toast.success('Excel 文件已清除');
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleUploadTemplate = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
|
||||||
const file = e.target.files?.[0];
|
|
||||||
if (!file || !profile) return;
|
|
||||||
|
|
||||||
try {
|
|
||||||
toast.loading('正在上传模板...');
|
|
||||||
await templateApi.uploadTemplate(file, (profile as any).id);
|
|
||||||
toast.dismiss();
|
|
||||||
toast.success('模板上传成功');
|
|
||||||
loadData();
|
|
||||||
} catch (err) {
|
|
||||||
toast.dismiss();
|
|
||||||
toast.error('上传模板失败');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleCreateTask = async () => {
|
|
||||||
if (!profile || !selectedTemplate || selectedDocs.length === 0) {
|
|
||||||
toast.error('请先选择模板和数据源文档');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
setCreating(true);
|
|
||||||
try {
|
|
||||||
const task = await taskApi.createTask((profile as any).id, selectedTemplate, selectedDocs);
|
|
||||||
if (task) {
|
|
||||||
toast.success('任务已创建,正在进行智能填表...');
|
|
||||||
setOpenTaskDialog(false);
|
|
||||||
|
|
||||||
// Invoke edge function
|
|
||||||
supabase.functions.invoke('fill-template', {
|
|
||||||
body: { taskId: task.id }
|
|
||||||
}).then(({ error }) => {
|
|
||||||
if (error) toast.error('填表任务执行失败');
|
|
||||||
else {
|
|
||||||
toast.success('表格填写完成!');
|
|
||||||
loadData();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
loadData();
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error('创建任务失败');
|
|
||||||
} finally {
|
|
||||||
setCreating(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const getStatusColor = (status: string) => {
|
|
||||||
switch (status) {
|
|
||||||
case 'completed': return 'bg-emerald-500 text-white';
|
|
||||||
case 'failed': return 'bg-destructive text-white';
|
|
||||||
default: return 'bg-amber-500 text-white';
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const formatFileSize = (bytes: number): string => {
|
|
||||||
if (bytes === 0) return '0 B';
|
|
||||||
const k = 1024;
|
|
||||||
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
||||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
||||||
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="space-y-8 animate-fade-in pb-10">
|
|
||||||
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
|
||||||
<div className="space-y-1">
|
|
||||||
<h1 className="text-3xl font-extrabold tracking-tight">智能填表</h1>
|
|
||||||
<p className="text-muted-foreground">根据您的表格模板,自动聚合多源文档信息进行精准填充,告别重复劳动。</p>
|
|
||||||
</div>
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<Dialog open={openTaskDialog} onOpenChange={setOpenTaskDialog}>
|
|
||||||
<DialogTrigger asChild>
|
|
||||||
<Button className="rounded-xl shadow-lg shadow-primary/20 gap-2 h-11 px-6">
|
|
||||||
<FilePlus size={18} />
|
|
||||||
<span>新建填表任务</span>
|
|
||||||
</Button>
|
|
||||||
</DialogTrigger>
|
|
||||||
<DialogContent className="max-w-4xl max-h-[90vh] flex flex-col p-0 overflow-hidden border-none shadow-2xl rounded-3xl">
|
|
||||||
<DialogHeader className="p-8 pb-4 bg-muted/50">
|
|
||||||
<DialogTitle className="text-2xl font-bold flex items-center gap-2">
|
|
||||||
<Sparkles size={24} className="text-primary" />
|
|
||||||
开启智能填表之旅
|
|
||||||
</DialogTitle>
|
|
||||||
<DialogDescription>
|
|
||||||
选择一个表格模板及若干个数据源文档,AI 将自动为您分析并填写。
|
|
||||||
</DialogDescription>
|
|
||||||
</DialogHeader>
|
|
||||||
|
|
||||||
<ScrollArea className="flex-1 p-8 pt-4">
|
|
||||||
<div className="space-y-8">
|
|
||||||
{/* Step 1: Select Template */}
|
|
||||||
<div className="space-y-4">
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<h4 className="font-bold flex items-center gap-2 text-primary uppercase tracking-widest text-xs">
|
|
||||||
<span className="w-5 h-5 rounded-full bg-primary text-white flex items-center justify-center text-[10px]">1</span>
|
|
||||||
选择表格模板
|
|
||||||
</h4>
|
|
||||||
<label className="cursor-pointer text-xs font-semibold text-primary hover:underline flex items-center gap-1">
|
|
||||||
<Plus size={12} /> 上传新模板
|
|
||||||
<input type="file" className="hidden" onChange={handleUploadTemplate} accept=".docx,.xlsx" />
|
|
||||||
</label>
|
|
||||||
</div>
|
|
||||||
{templates.length > 0 ? (
|
|
||||||
<div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
|
|
||||||
{templates.map(t => (
|
|
||||||
<div
|
|
||||||
key={t.id}
|
|
||||||
className={cn(
|
|
||||||
"p-4 rounded-2xl border-2 transition-all cursor-pointer flex items-center gap-3 group relative overflow-hidden",
|
|
||||||
selectedTemplate === t.id ? "border-primary bg-primary/5" : "border-border hover:border-primary/50"
|
|
||||||
)}
|
|
||||||
onClick={() => setSelectedTemplate(t.id)}
|
|
||||||
>
|
|
||||||
<div className={cn(
|
|
||||||
"w-10 h-10 rounded-xl flex items-center justify-center shrink-0 transition-colors",
|
|
||||||
selectedTemplate === t.id ? "bg-primary text-white" : "bg-muted text-muted-foreground"
|
|
||||||
)}>
|
|
||||||
<TableProperties size={20} />
|
|
||||||
</div>
|
|
||||||
<div className="flex-1 min-w-0">
|
|
||||||
<p className="font-bold text-sm truncate">{t.name}</p>
|
|
||||||
<p className="text-[10px] text-muted-foreground uppercase">{t.type}</p>
|
|
||||||
</div>
|
|
||||||
{selectedTemplate === t.id && (
|
|
||||||
<div className="absolute top-0 right-0 w-8 h-8 bg-primary text-white flex items-center justify-center rounded-bl-xl">
|
|
||||||
<CheckCircle2 size={14} />
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="p-8 text-center bg-muted/30 rounded-2xl border border-dashed text-sm italic text-muted-foreground">
|
|
||||||
暂无模板,请先点击右上角上传。
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Step 2: Upload & Analyze Excel */}
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h4 className="font-bold flex items-center gap-2 text-primary uppercase tracking-widest text-xs">
|
|
||||||
<span className="w-5 h-5 rounded-full bg-primary text-white flex items-center justify-center text-[10px]">1.5</span>
|
|
||||||
Excel 数据源
|
|
||||||
</h4>
|
|
||||||
<div className="bg-muted/20 rounded-2xl p-6">
|
|
||||||
{!excelFile ? (
|
|
||||||
<div
|
|
||||||
{...getRootProps()}
|
|
||||||
className={cn(
|
|
||||||
"border-2 border-dashed rounded-xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group",
|
|
||||||
isDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-muted/30"
|
|
||||||
)}
|
|
||||||
>
|
|
||||||
<input {...getInputProps()} />
|
|
||||||
<div className="w-12 h-12 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
|
|
||||||
<FileSpreadsheet size={24} />
|
|
||||||
</div>
|
|
||||||
<p className="font-semibold text-sm">
|
|
||||||
{isDragActive ? '释放以开始上传' : '点击或拖拽 Excel 文件'}
|
|
||||||
</p>
|
|
||||||
<p className="text-xs text-muted-foreground mt-1">支持 .xlsx 和 .xls 格式</p>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<div className="flex items-center gap-3 p-3 bg-background rounded-xl">
|
|
||||||
<div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-500 flex items-center justify-center">
|
|
||||||
<FileSpreadsheet size={20} />
|
|
||||||
</div>
|
|
||||||
<div className="flex-1 min-w-0">
|
|
||||||
<p className="font-semibold text-sm truncate">{excelFile.name}</p>
|
|
||||||
<p className="text-xs text-muted-foreground">{formatFileSize(excelFile.size)}</p>
|
|
||||||
</div>
|
|
||||||
<div className="flex gap-2">
|
|
||||||
<Button
|
|
||||||
variant="ghost"
|
|
||||||
size="icon"
|
|
||||||
className="text-destructive hover:bg-destructive/10"
|
|
||||||
onClick={handleDeleteExcel}
|
|
||||||
>
|
|
||||||
<Trash2 size={16} />
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* AI Analysis Options */}
|
|
||||||
{excelParseResult?.success && (
|
|
||||||
<div className="space-y-3">
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="analysis-type" className="text-xs">分析类型</Label>
|
|
||||||
<Select
|
|
||||||
value={aiOptions.analysisType}
|
|
||||||
onValueChange={(value: any) => setAiOptions({ ...aiOptions, analysisType: value })}
|
|
||||||
>
|
|
||||||
<SelectTrigger id="analysis-type" className="bg-background h-9 text-sm">
|
|
||||||
<SelectValue placeholder="选择分析类型" />
|
|
||||||
</SelectTrigger>
|
|
||||||
<SelectContent>
|
|
||||||
<SelectItem value="general">综合分析</SelectItem>
|
|
||||||
<SelectItem value="summary">数据摘要</SelectItem>
|
|
||||||
<SelectItem value="statistics">统计分析</SelectItem>
|
|
||||||
<SelectItem value="insights">深度洞察</SelectItem>
|
|
||||||
</SelectContent>
|
|
||||||
</Select>
|
|
||||||
</div>
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="user-prompt" className="text-xs">自定义提示词</Label>
|
|
||||||
<Textarea
|
|
||||||
id="user-prompt"
|
|
||||||
value={aiOptions.userPrompt}
|
|
||||||
onChange={(e) => setAiOptions({ ...aiOptions, userPrompt: e.target.value })}
|
|
||||||
className="bg-background resize-none text-sm"
|
|
||||||
rows={2}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
<Button
|
|
||||||
onClick={handleAnalyzeExcel}
|
|
||||||
disabled={excelAnalyzing}
|
|
||||||
className="w-full gap-2 h-9"
|
|
||||||
variant="outline"
|
|
||||||
>
|
|
||||||
{excelAnalyzing ? <Loader2 className="animate-spin" size={14} /> : <Sparkles size={14} />}
|
|
||||||
{excelAnalyzing ? '分析中...' : 'AI 分析'}
|
|
||||||
</Button>
|
|
||||||
{excelParseResult?.success && (
|
|
||||||
<Button
|
|
||||||
onClick={handleUseExcelData}
|
|
||||||
className="w-full gap-2 h-9"
|
|
||||||
>
|
|
||||||
<CheckCircle2 size={14} />
|
|
||||||
使用此数据源
|
|
||||||
</Button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Excel Analysis Result */}
|
|
||||||
{excelAnalysis && (
|
|
||||||
<div className="mt-4 p-4 bg-background rounded-xl max-h-60 overflow-y-auto">
|
|
||||||
<div className="flex items-center gap-2 mb-3">
|
|
||||||
<Sparkles size={16} className="text-primary" />
|
|
||||||
<span className="font-semibold text-sm">AI 分析结果</span>
|
|
||||||
</div>
|
|
||||||
<Markdown content={excelAnalysis.analysis?.analysis || ''} className="text-sm" />
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Step 3: Select Documents */}
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h4 className="font-bold flex items-center gap-2 text-primary uppercase tracking-widest text-xs">
|
|
||||||
<span className="w-5 h-5 rounded-full bg-primary text-white flex items-center justify-center text-[10px]">2</span>
|
|
||||||
选择其他数据源文档
|
|
||||||
</h4>
|
|
||||||
{documents.filter(d => d.status === 'completed').length > 0 ? (
|
|
||||||
<div className="space-y-2 max-h-40 overflow-y-auto pr-2 custom-scrollbar">
|
|
||||||
{documents.filter(d => d.status === 'completed').map(doc => (
|
|
||||||
<div
|
|
||||||
key={doc.id}
|
|
||||||
className={cn(
|
|
||||||
"flex items-center gap-3 p-3 rounded-xl border transition-all cursor-pointer",
|
|
||||||
selectedDocs.includes(doc.id) ? "border-primary/50 bg-primary/5 shadow-sm" : "border-border hover:bg-muted/30"
|
|
||||||
)}
|
|
||||||
onClick={() => {
|
|
||||||
setSelectedDocs(prev =>
|
|
||||||
prev.includes(doc.id) ? prev.filter(id => id !== doc.id) : [...prev, doc.id]
|
|
||||||
);
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<Checkbox checked={selectedDocs.includes(doc.id)} onCheckedChange={() => {}} />
|
|
||||||
<div className="w-8 h-8 rounded-lg bg-blue-500/10 text-blue-500 flex items-center justify-center">
|
|
||||||
<Zap size={16} />
|
|
||||||
</div>
|
|
||||||
<span className="font-semibold text-sm truncate">{doc.name}</span>
|
|
||||||
</div>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="p-6 text-center bg-muted/30 rounded-xl border border-dashed text-xs italic text-muted-foreground">
|
|
||||||
暂无其他已解析的文档
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</ScrollArea>
|
|
||||||
|
|
||||||
<DialogFooter className="p-8 pt-4 bg-muted/20 border-t border-dashed">
|
|
||||||
<Button variant="outline" className="rounded-xl h-12 px-6" onClick={() => setOpenTaskDialog(false)}>取消</Button>
|
|
||||||
<Button
|
|
||||||
className="rounded-xl h-12 px-8 shadow-lg shadow-primary/20 gap-2"
|
|
||||||
onClick={handleCreateTask}
|
|
||||||
disabled={creating || !selectedTemplate || (selectedDocs.length === 0 && !excelParseResult?.success)}
|
|
||||||
>
|
|
||||||
{creating ? <RefreshCcw className="animate-spin h-5 w-5" /> : <Zap className="h-5 w-5 fill-current" />}
|
|
||||||
<span>启动智能填表引擎</span>
|
|
||||||
</Button>
|
|
||||||
</DialogFooter>
|
|
||||||
</DialogContent>
|
|
||||||
</Dialog>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
{/* Task List */}
|
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
|
|
||||||
{loading ? (
|
|
||||||
Array.from({ length: 3 }).map((_, i) => (
|
|
||||||
<Skeleton key={i} className="h-48 w-full rounded-3xl bg-muted" />
|
|
||||||
))
|
|
||||||
) : tasks.length > 0 ? (
|
|
||||||
tasks.map((task) => (
|
|
||||||
<Card key={task.id} className="border-none shadow-md hover:shadow-xl transition-all group rounded-3xl overflow-hidden flex flex-col">
|
|
||||||
<div className="h-1.5 w-full" style={{ backgroundColor: task.status === 'completed' ? '#10b981' : task.status === 'failed' ? '#ef4444' : '#f59e0b' }} />
|
|
||||||
<CardHeader className="p-6 pb-2">
|
|
||||||
<div className="flex justify-between items-start mb-2">
|
|
||||||
<div className="w-12 h-12 rounded-2xl bg-emerald-500/10 text-emerald-500 flex items-center justify-center shadow-inner group-hover:scale-110 transition-transform">
|
|
||||||
<TableProperties size={24} />
|
|
||||||
</div>
|
|
||||||
<Badge className={cn("text-[10px] uppercase font-bold tracking-widest", getStatusColor(task.status))}>
|
|
||||||
{task.status === 'completed' ? '已完成' : task.status === 'failed' ? '失败' : '执行中'}
|
|
||||||
</Badge>
|
|
||||||
</div>
|
|
||||||
<CardTitle className="text-lg font-bold truncate group-hover:text-primary transition-colors">{task.templates?.name || '未知模板'}</CardTitle>
|
|
||||||
<CardDescription className="text-xs flex items-center gap-1 font-medium italic">
|
|
||||||
<Clock size={12} /> {format(new Date(task.created_at!), 'yyyy/MM/dd HH:mm')}
|
|
||||||
</CardDescription>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent className="p-6 pt-2 flex-1">
|
|
||||||
<div className="space-y-4">
|
|
||||||
<div className="flex flex-wrap gap-2">
|
|
||||||
<Badge variant="outline" className="bg-muted/50 border-none text-[10px] font-bold">关联 {task.document_ids?.length} 份数据源</Badge>
|
|
||||||
</div>
|
|
||||||
{task.status === 'completed' && (
|
|
||||||
<div className="p-3 bg-emerald-500/5 rounded-2xl border border-emerald-500/10 flex items-center gap-3">
|
|
||||||
<CheckCircle2 className="text-emerald-500" size={18} />
|
|
||||||
<span className="text-xs font-semibold text-emerald-700">内容已精准聚合,表格生成完毕</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
<CardFooter className="p-6 pt-0">
|
|
||||||
<Button
|
|
||||||
className="w-full rounded-2xl h-11 bg-primary group-hover:shadow-lg group-hover:shadow-primary/30 transition-all gap-2"
|
|
||||||
disabled={task.status !== 'completed'}
|
|
||||||
onClick={() => setViewingTask(task)}
|
|
||||||
>
|
|
||||||
<Download size={18} />
|
|
||||||
<span>下载汇总表格</span>
|
|
||||||
</Button>
|
|
||||||
</CardFooter>
|
|
||||||
</Card>
|
|
||||||
))
|
|
||||||
) : (
|
|
||||||
<div className="col-span-full py-24 flex flex-col items-center justify-center text-center space-y-6">
|
|
||||||
<div className="w-24 h-24 rounded-full bg-muted flex items-center justify-center text-muted-foreground/30 border-4 border-dashed">
|
|
||||||
<TableProperties size={48} />
|
|
||||||
</div>
|
|
||||||
<div className="space-y-2 max-w-sm">
|
|
||||||
<p className="text-2xl font-extrabold tracking-tight">暂无生成任务</p>
|
|
||||||
<p className="text-muted-foreground text-sm">上传模板后,您可以将多个文档的数据自动填充到汇总表格中。</p>
|
|
||||||
</div>
|
|
||||||
<Button className="rounded-xl h-12 px-8" onClick={() => setOpenTaskDialog(true)}>立即创建首个任务</Button>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Task Result View Modal */}
|
|
||||||
<Dialog open={!!viewingTask} onOpenChange={(open) => !open && setViewingTask(null)}>
|
|
||||||
<DialogContent className="max-w-4xl max-h-[90vh] flex flex-col p-0 overflow-hidden border-none shadow-2xl rounded-3xl">
|
|
||||||
<DialogHeader className="p-8 pb-4 bg-primary text-primary-foreground">
|
|
||||||
<div className="flex items-center gap-3 mb-2">
|
|
||||||
<FileCheck size={28} />
|
|
||||||
<DialogTitle className="text-2xl font-extrabold">表格生成结果预览</DialogTitle>
|
|
||||||
</div>
|
|
||||||
<DialogDescription className="text-primary-foreground/80 italic">
|
|
||||||
系统已根据 {viewingTask?.document_ids?.length} 份文档信息自动填充完毕。
|
|
||||||
</DialogDescription>
|
|
||||||
</DialogHeader>
|
|
||||||
<ScrollArea className="flex-1 p-8 bg-muted/10">
|
|
||||||
<div className="prose dark:prose-invert max-w-none">
|
|
||||||
<div className="bg-card p-8 rounded-2xl shadow-sm border min-h-[400px]">
|
|
||||||
<Badge variant="outline" className="mb-4">数据已脱敏</Badge>
|
|
||||||
<div className="whitespace-pre-wrap font-sans text-sm leading-relaxed">
|
|
||||||
<h2 className="text-xl font-bold mb-4">汇总结果报告</h2>
|
|
||||||
<p className="text-muted-foreground mb-6">以下是根据您上传的多个文档提取并生成的汇总信息:</p>
|
|
||||||
|
|
||||||
<div className="p-4 bg-muted/30 rounded-xl border border-dashed border-primary/20 italic">
|
|
||||||
正在从云端安全下载解析结果并渲染渲染视图...
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="mt-8 space-y-4">
|
|
||||||
<p className="font-semibold text-primary">✓ 核心实体已对齐</p>
|
|
||||||
<p className="font-semibold text-primary">✓ 逻辑勾稽关系校验通过</p>
|
|
||||||
<p className="font-semibold text-primary">✓ 格式符合模板规范</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</ScrollArea>
|
|
||||||
<DialogFooter className="p-8 pt-4 border-t border-dashed">
|
|
||||||
<Button variant="outline" className="rounded-xl" onClick={() => setViewingTask(null)}>关闭</Button>
|
|
||||||
<Button className="rounded-xl px-8 gap-2 shadow-lg shadow-primary/20" onClick={() => toast.success("正在导出文件...")}>
|
|
||||||
<Download size={18} />
|
|
||||||
导出为 {viewingTask?.templates?.type?.toUpperCase() || '文件'}
|
|
||||||
</Button>
|
|
||||||
</DialogFooter>
|
|
||||||
</DialogContent>
|
|
||||||
</Dialog>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
export default FormFill;
|
|
||||||
@@ -1,184 +0,0 @@
|
|||||||
import React, { useState } from 'react';
|
|
||||||
import { useNavigate, useLocation } from 'react-router-dom';
|
|
||||||
import { useAuth } from '@/context/AuthContext';
|
|
||||||
import { Button } from '@/components/ui/button';
|
|
||||||
import { Input } from '@/components/ui/input';
|
|
||||||
import { Label } from '@/components/ui/label';
|
|
||||||
import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/components/ui/card';
|
|
||||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
|
|
||||||
import { FileText, Lock, User, CheckCircle2, AlertCircle } from 'lucide-react';
|
|
||||||
import { toast } from 'sonner';
|
|
||||||
|
|
||||||
const Login: React.FC = () => {
|
|
||||||
const [username, setUsername] = useState('');
|
|
||||||
const [password, setPassword] = useState('');
|
|
||||||
const [loading, setLoading] = useState(false);
|
|
||||||
const { signIn, signUp } = useAuth();
|
|
||||||
const navigate = useNavigate();
|
|
||||||
const location = useLocation();
|
|
||||||
|
|
||||||
const handleLogin = async (e: React.FormEvent) => {
|
|
||||||
e.preventDefault();
|
|
||||||
if (!username || !password) return toast.error('请输入用户名和密码');
|
|
||||||
|
|
||||||
setLoading(true);
|
|
||||||
try {
|
|
||||||
const email = `${username}@miaoda.com`;
|
|
||||||
const { error } = await signIn(email, password);
|
|
||||||
if (error) throw error;
|
|
||||||
toast.success('登录成功');
|
|
||||||
navigate('/');
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error(err.message || '登录失败');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleSignUp = async (e: React.FormEvent) => {
|
|
||||||
e.preventDefault();
|
|
||||||
if (!username || !password) return toast.error('请输入用户名和密码');
|
|
||||||
|
|
||||||
setLoading(true);
|
|
||||||
try {
|
|
||||||
const email = `${username}@miaoda.com`;
|
|
||||||
const { error } = await signUp(email, password);
|
|
||||||
if (error) throw error;
|
|
||||||
toast.success('注册成功,请登录');
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error(err.message || '注册失败');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="min-h-screen flex items-center justify-center bg-[radial-gradient(ellipse_at_top_left,_var(--tw-gradient-stops))] from-primary/10 via-background to-background p-4 relative overflow-hidden">
|
|
||||||
{/* Decorative elements */}
|
|
||||||
<div className="absolute top-0 left-0 w-96 h-96 bg-primary/5 rounded-full blur-3xl -translate-x-1/2 -translate-y-1/2" />
|
|
||||||
<div className="absolute bottom-0 right-0 w-64 h-64 bg-primary/5 rounded-full blur-3xl translate-x-1/3 translate-y-1/3" />
|
|
||||||
|
|
||||||
<div className="w-full max-w-md space-y-8 relative animate-fade-in">
|
|
||||||
<div className="text-center space-y-2">
|
|
||||||
<div className="inline-flex items-center justify-center w-16 h-16 rounded-2xl bg-primary text-primary-foreground shadow-2xl shadow-primary/30 mb-4 animate-slide-in">
|
|
||||||
<FileText size={32} />
|
|
||||||
</div>
|
|
||||||
<h1 className="text-4xl font-extrabold tracking-tight gradient-text">智联文档</h1>
|
|
||||||
<p className="text-muted-foreground">多源数据融合与智能文档处理系统</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<Card className="border-border/50 shadow-2xl backdrop-blur-sm bg-card/95">
|
|
||||||
<Tabs defaultValue="login" className="w-full">
|
|
||||||
<TabsList className="grid w-full grid-cols-2 rounded-t-xl h-12 bg-muted/50 p-1">
|
|
||||||
<TabsTrigger value="login" className="rounded-lg data-[state=active]:bg-background data-[state=active]:shadow-sm">登录</TabsTrigger>
|
|
||||||
<TabsTrigger value="signup" className="rounded-lg data-[state=active]:bg-background data-[state=active]:shadow-sm">注册</TabsTrigger>
|
|
||||||
</TabsList>
|
|
||||||
|
|
||||||
<TabsContent value="login">
|
|
||||||
<form onSubmit={handleLogin}>
|
|
||||||
<CardHeader>
|
|
||||||
<CardTitle>欢迎回来</CardTitle>
|
|
||||||
<CardDescription>使用您的账号登录智联文档系统</CardDescription>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent className="space-y-4">
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="username">用户名</Label>
|
|
||||||
<div className="relative">
|
|
||||||
<User className="absolute left-3 top-2.5 h-4 w-4 text-muted-foreground" />
|
|
||||||
<Input
|
|
||||||
id="username"
|
|
||||||
placeholder="请输入用户名"
|
|
||||||
className="pl-9 bg-muted/30 border-none focus-visible:ring-primary"
|
|
||||||
value={username}
|
|
||||||
onChange={(e) => setUsername(e.target.value)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="password">密码</Label>
|
|
||||||
<div className="relative">
|
|
||||||
<Lock className="absolute left-3 top-2.5 h-4 w-4 text-muted-foreground" />
|
|
||||||
<Input
|
|
||||||
id="password"
|
|
||||||
type="password"
|
|
||||||
placeholder="请输入密码"
|
|
||||||
className="pl-9 bg-muted/30 border-none focus-visible:ring-primary"
|
|
||||||
value={password}
|
|
||||||
onChange={(e) => setPassword(e.target.value)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
<CardFooter>
|
|
||||||
<Button className="w-full h-11 text-lg font-semibold rounded-xl" type="submit" disabled={loading}>
|
|
||||||
{loading ? '登录中...' : '立即登录'}
|
|
||||||
</Button>
|
|
||||||
</CardFooter>
|
|
||||||
</form>
|
|
||||||
</TabsContent>
|
|
||||||
|
|
||||||
<TabsContent value="signup">
|
|
||||||
<form onSubmit={handleSignUp}>
|
|
||||||
<CardHeader>
|
|
||||||
<CardTitle>创建账号</CardTitle>
|
|
||||||
<CardDescription>开启智能文档处理的新体验</CardDescription>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent className="space-y-4">
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="signup-username">用户名</Label>
|
|
||||||
<div className="relative">
|
|
||||||
<User className="absolute left-3 top-2.5 h-4 w-4 text-muted-foreground" />
|
|
||||||
<Input
|
|
||||||
id="signup-username"
|
|
||||||
placeholder="仅字母、数字和下划线"
|
|
||||||
className="pl-9 bg-muted/30 border-none focus-visible:ring-primary"
|
|
||||||
value={username}
|
|
||||||
onChange={(e) => setUsername(e.target.value)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label htmlFor="signup-password">密码</Label>
|
|
||||||
<div className="relative">
|
|
||||||
<Lock className="absolute left-3 top-2.5 h-4 w-4 text-muted-foreground" />
|
|
||||||
<Input
|
|
||||||
id="signup-password"
|
|
||||||
type="password"
|
|
||||||
placeholder="不少于 6 位"
|
|
||||||
className="pl-9 bg-muted/30 border-none focus-visible:ring-primary"
|
|
||||||
value={password}
|
|
||||||
onChange={(e) => setPassword(e.target.value)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
<CardFooter>
|
|
||||||
<Button className="w-full h-11 text-lg font-semibold rounded-xl" type="submit" disabled={loading}>
|
|
||||||
{loading ? '注册中...' : '注册账号'}
|
|
||||||
</Button>
|
|
||||||
</CardFooter>
|
|
||||||
</form>
|
|
||||||
</TabsContent>
|
|
||||||
</Tabs>
|
|
||||||
</Card>
|
|
||||||
|
|
||||||
<div className="grid grid-cols-2 gap-4 text-center text-xs text-muted-foreground">
|
|
||||||
<div className="flex flex-col items-center gap-1">
|
|
||||||
<CheckCircle2 size={16} className="text-primary" />
|
|
||||||
<span>智能解析</span>
|
|
||||||
</div>
|
|
||||||
<div className="flex flex-col items-center gap-1">
|
|
||||||
<CheckCircle2 size={16} className="text-primary" />
|
|
||||||
<span>极速填表</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="text-center text-sm text-muted-foreground">
|
|
||||||
© 2026 智联文档 | 多源数据融合系统
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
export default Login;
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
/**
|
|
||||||
* Sample Page
|
|
||||||
*/
|
|
||||||
|
|
||||||
import PageMeta from "../components/common/PageMeta";
|
|
||||||
|
|
||||||
export default function SamplePage() {
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
<PageMeta title="Home" description="Home Page Introduction" />
|
|
||||||
<div>
|
|
||||||
<h3>This is a sample page</h3>
|
|
||||||
</div>
|
|
||||||
</>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@@ -11,7 +11,8 @@ import {
|
|||||||
ChevronDown,
|
ChevronDown,
|
||||||
ChevronUp,
|
ChevronUp,
|
||||||
Trash2,
|
Trash2,
|
||||||
AlertCircle
|
AlertCircle,
|
||||||
|
HelpCircle
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
@@ -24,9 +25,9 @@ import { Skeleton } from '@/components/ui/skeleton';
|
|||||||
|
|
||||||
type Task = {
|
type Task = {
|
||||||
task_id: string;
|
task_id: string;
|
||||||
status: 'pending' | 'processing' | 'success' | 'failure';
|
status: 'pending' | 'processing' | 'success' | 'failure' | 'unknown';
|
||||||
created_at: string;
|
created_at: string;
|
||||||
completed_at?: string;
|
updated_at?: string;
|
||||||
message?: string;
|
message?: string;
|
||||||
result?: any;
|
result?: any;
|
||||||
error?: string;
|
error?: string;
|
||||||
@@ -38,54 +39,38 @@ const TaskHistory: React.FC = () => {
|
|||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
const [expandedTask, setExpandedTask] = useState<string | null>(null);
|
const [expandedTask, setExpandedTask] = useState<string | null>(null);
|
||||||
|
|
||||||
// Mock data for demonstration
|
// 获取任务历史数据
|
||||||
useEffect(() => {
|
const fetchTasks = async () => {
|
||||||
// 模拟任务数据,实际应该从后端获取
|
try {
|
||||||
setTasks([
|
setLoading(true);
|
||||||
{
|
const response = await backendApi.getTasks(50, 0);
|
||||||
task_id: 'task-001',
|
if (response.success && response.tasks) {
|
||||||
status: 'success',
|
// 转换后端数据格式为前端格式
|
||||||
created_at: new Date(Date.now() - 3600000).toISOString(),
|
const convertedTasks: Task[] = response.tasks.map((t: any) => ({
|
||||||
completed_at: new Date(Date.now() - 3500000).toISOString(),
|
task_id: t.task_id,
|
||||||
task_type: 'document_parse',
|
status: t.status || 'unknown',
|
||||||
message: '文档解析完成',
|
created_at: t.created_at || new Date().toISOString(),
|
||||||
result: {
|
updated_at: t.updated_at,
|
||||||
doc_id: 'doc-001',
|
message: t.message || '',
|
||||||
filename: 'report_q1_2026.docx',
|
result: t.result,
|
||||||
extracted_fields: ['标题', '作者', '日期', '金额']
|
error: t.error,
|
||||||
}
|
task_type: t.task_type || 'document_parse'
|
||||||
},
|
}));
|
||||||
{
|
setTasks(convertedTasks);
|
||||||
task_id: 'task-002',
|
} else {
|
||||||
status: 'success',
|
setTasks([]);
|
||||||
created_at: new Date(Date.now() - 7200000).toISOString(),
|
|
||||||
completed_at: new Date(Date.now() - 7100000).toISOString(),
|
|
||||||
task_type: 'excel_analysis',
|
|
||||||
message: 'Excel 分析完成',
|
|
||||||
result: {
|
|
||||||
filename: 'sales_data.xlsx',
|
|
||||||
row_count: 1250,
|
|
||||||
charts_generated: 3
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
task_id: 'task-003',
|
|
||||||
status: 'processing',
|
|
||||||
created_at: new Date(Date.now() - 600000).toISOString(),
|
|
||||||
task_type: 'template_fill',
|
|
||||||
message: '正在填充表格...'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
task_id: 'task-004',
|
|
||||||
status: 'failure',
|
|
||||||
created_at: new Date(Date.now() - 86400000).toISOString(),
|
|
||||||
completed_at: new Date(Date.now() - 86390000).toISOString(),
|
|
||||||
task_type: 'document_parse',
|
|
||||||
message: '解析失败',
|
|
||||||
error: '文件格式不支持或文件已损坏'
|
|
||||||
}
|
}
|
||||||
]);
|
} catch (error) {
|
||||||
setLoading(false);
|
console.error('获取任务列表失败:', error);
|
||||||
|
toast.error('获取任务列表失败');
|
||||||
|
setTasks([]);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchTasks();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const getStatusBadge = (status: string) => {
|
const getStatusBadge = (status: string) => {
|
||||||
@@ -96,6 +81,8 @@ const TaskHistory: React.FC = () => {
|
|||||||
return <Badge className="bg-destructive text-white text-[10px]"><XCircle size={12} className="mr-1" />失败</Badge>;
|
return <Badge className="bg-destructive text-white text-[10px]"><XCircle size={12} className="mr-1" />失败</Badge>;
|
||||||
case 'processing':
|
case 'processing':
|
||||||
return <Badge className="bg-amber-500 text-white text-[10px]"><Loader2 size={12} className="mr-1 animate-spin" />处理中</Badge>;
|
return <Badge className="bg-amber-500 text-white text-[10px]"><Loader2 size={12} className="mr-1 animate-spin" />处理中</Badge>;
|
||||||
|
case 'unknown':
|
||||||
|
return <Badge className="bg-gray-500 text-white text-[10px]"><HelpCircle size={12} className="mr-1" />未知</Badge>;
|
||||||
default:
|
default:
|
||||||
return <Badge className="bg-gray-500 text-white text-[10px]"><Clock size={12} className="mr-1" />等待</Badge>;
|
return <Badge className="bg-gray-500 text-white text-[10px]"><Clock size={12} className="mr-1" />等待</Badge>;
|
||||||
}
|
}
|
||||||
@@ -133,15 +120,22 @@ const TaskHistory: React.FC = () => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const handleDelete = async (taskId: string) => {
|
const handleDelete = async (taskId: string) => {
|
||||||
setTasks(prev => prev.filter(t => t.task_id !== taskId));
|
try {
|
||||||
toast.success('任务已删除');
|
await backendApi.deleteTask(taskId);
|
||||||
|
setTasks(prev => prev.filter(t => t.task_id !== taskId));
|
||||||
|
toast.success('任务已删除');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('删除任务失败:', error);
|
||||||
|
toast.error('删除任务失败');
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const stats = {
|
const stats = {
|
||||||
total: tasks.length,
|
total: tasks.length,
|
||||||
success: tasks.filter(t => t.status === 'success').length,
|
success: tasks.filter(t => t.status === 'success').length,
|
||||||
processing: tasks.filter(t => t.status === 'processing').length,
|
processing: tasks.filter(t => t.status === 'processing').length,
|
||||||
failure: tasks.filter(t => t.status === 'failure').length
|
failure: tasks.filter(t => t.status === 'failure').length,
|
||||||
|
unknown: tasks.filter(t => t.status === 'unknown').length
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -151,7 +145,7 @@ const TaskHistory: React.FC = () => {
|
|||||||
<h1 className="text-3xl font-extrabold tracking-tight">任务历史</h1>
|
<h1 className="text-3xl font-extrabold tracking-tight">任务历史</h1>
|
||||||
<p className="text-muted-foreground">查看和管理您所有的文档处理任务记录</p>
|
<p className="text-muted-foreground">查看和管理您所有的文档处理任务记录</p>
|
||||||
</div>
|
</div>
|
||||||
<Button variant="outline" className="rounded-xl gap-2" onClick={() => window.location.reload()}>
|
<Button variant="outline" className="rounded-xl gap-2" onClick={() => fetchTasks()}>
|
||||||
<RefreshCcw size={18} />
|
<RefreshCcw size={18} />
|
||||||
<span>刷新</span>
|
<span>刷新</span>
|
||||||
</Button>
|
</Button>
|
||||||
@@ -194,7 +188,8 @@ const TaskHistory: React.FC = () => {
|
|||||||
"w-12 h-12 rounded-xl flex items-center justify-center shrink-0",
|
"w-12 h-12 rounded-xl flex items-center justify-center shrink-0",
|
||||||
task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" :
|
task.status === 'success' ? "bg-emerald-500/10 text-emerald-500" :
|
||||||
task.status === 'failure' ? "bg-destructive/10 text-destructive" :
|
task.status === 'failure' ? "bg-destructive/10 text-destructive" :
|
||||||
"bg-amber-500/10 text-amber-500"
|
task.status === 'processing' ? "bg-amber-500/10 text-amber-500" :
|
||||||
|
"bg-gray-500/10 text-gray-500"
|
||||||
)}>
|
)}>
|
||||||
{task.status === 'processing' ? (
|
{task.status === 'processing' ? (
|
||||||
<Loader2 size={24} className="animate-spin" />
|
<Loader2 size={24} className="animate-spin" />
|
||||||
@@ -212,16 +207,16 @@ const TaskHistory: React.FC = () => {
|
|||||||
</Badge>
|
</Badge>
|
||||||
</div>
|
</div>
|
||||||
<p className="text-sm text-muted-foreground">
|
<p className="text-sm text-muted-foreground">
|
||||||
{task.message || '任务执行中...'}
|
{task.message || (task.status === 'unknown' ? '无法获取状态' : '任务执行中...')}
|
||||||
</p>
|
</p>
|
||||||
<div className="flex items-center gap-4 text-xs text-muted-foreground">
|
<div className="flex items-center gap-4 text-xs text-muted-foreground">
|
||||||
<span className="flex items-center gap-1">
|
<span className="flex items-center gap-1">
|
||||||
<Clock size={12} />
|
<Clock size={12} />
|
||||||
{format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss')}
|
{task.created_at ? format(new Date(task.created_at), 'yyyy-MM-dd HH:mm:ss') : '时间未知'}
|
||||||
</span>
|
</span>
|
||||||
{task.completed_at && (
|
{task.updated_at && task.status !== 'processing' && (
|
||||||
<span>
|
<span>
|
||||||
耗时: {Math.round((new Date(task.completed_at).getTime() - new Date(task.created_at).getTime()) / 1000)} 秒
|
更新: {format(new Date(task.updated_at), 'HH:mm:ss')}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useState, useEffect } from 'react';
|
import React, { useState, useEffect, useCallback, useRef } from 'react';
|
||||||
import { useDropzone } from 'react-dropzone';
|
import { useDropzone } from 'react-dropzone';
|
||||||
import {
|
import {
|
||||||
TableProperties,
|
TableProperties,
|
||||||
@@ -14,7 +14,12 @@ import {
|
|||||||
RefreshCcw,
|
RefreshCcw,
|
||||||
ChevronDown,
|
ChevronDown,
|
||||||
ChevronUp,
|
ChevronUp,
|
||||||
Loader2
|
Loader2,
|
||||||
|
Files,
|
||||||
|
Trash2,
|
||||||
|
Eye,
|
||||||
|
File,
|
||||||
|
Plus
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
||||||
@@ -26,6 +31,14 @@ import { format } from 'date-fns';
|
|||||||
import { toast } from 'sonner';
|
import { toast } from 'sonner';
|
||||||
import { cn } from '@/lib/utils';
|
import { cn } from '@/lib/utils';
|
||||||
import { Skeleton } from '@/components/ui/skeleton';
|
import { Skeleton } from '@/components/ui/skeleton';
|
||||||
|
import {
|
||||||
|
Dialog,
|
||||||
|
DialogContent,
|
||||||
|
DialogHeader,
|
||||||
|
DialogTitle,
|
||||||
|
} from "@/components/ui/dialog";
|
||||||
|
import { ScrollArea } from '@/components/ui/scroll-area';
|
||||||
|
import { useTemplateFill } from '@/context/TemplateFillContext';
|
||||||
|
|
||||||
type DocumentItem = {
|
type DocumentItem = {
|
||||||
doc_id: string;
|
doc_id: string;
|
||||||
@@ -41,72 +54,34 @@ type DocumentItem = {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
type TemplateField = {
|
|
||||||
cell: string;
|
|
||||||
name: string;
|
|
||||||
field_type: string;
|
|
||||||
required: boolean;
|
|
||||||
};
|
|
||||||
|
|
||||||
const TemplateFill: React.FC = () => {
|
const TemplateFill: React.FC = () => {
|
||||||
const [step, setStep] = useState<'upload-template' | 'select-source' | 'preview' | 'filling'>('upload-template');
|
const {
|
||||||
const [templateFile, setTemplateFile] = useState<File | null>(null);
|
step, setStep,
|
||||||
const [templateFields, setTemplateFields] = useState<TemplateField[]>([]);
|
templateFile, setTemplateFile,
|
||||||
const [sourceDocs, setSourceDocs] = useState<DocumentItem[]>([]);
|
templateFields, setTemplateFields,
|
||||||
const [selectedDocs, setSelectedDocs] = useState<string[]>([]);
|
sourceFiles, setSourceFiles, addSourceFiles, removeSourceFile,
|
||||||
|
sourceFilePaths, setSourceFilePaths,
|
||||||
|
sourceDocIds, setSourceDocIds, addSourceDocId, removeSourceDocId,
|
||||||
|
templateId, setTemplateId,
|
||||||
|
filledResult, setFilledResult,
|
||||||
|
reset
|
||||||
|
} = useTemplateFill();
|
||||||
|
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [filling, setFilling] = useState(false);
|
const [previewDoc, setPreviewDoc] = useState<{ name: string; content: string } | null>(null);
|
||||||
const [filledResult, setFilledResult] = useState<any>(null);
|
const [previewOpen, setPreviewOpen] = useState(false);
|
||||||
|
const [sourceMode, setSourceMode] = useState<'upload' | 'select'>('upload');
|
||||||
|
const [uploadedDocuments, setUploadedDocuments] = useState<DocumentItem[]>([]);
|
||||||
|
const [docsLoading, setDocsLoading] = useState(false);
|
||||||
|
const sourceFileInputRef = useRef<HTMLInputElement>(null);
|
||||||
|
|
||||||
// Load available source documents
|
// 模板拖拽
|
||||||
useEffect(() => {
|
const onTemplateDrop = useCallback((acceptedFiles: File[]) => {
|
||||||
loadSourceDocuments();
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const loadSourceDocuments = async () => {
|
|
||||||
setLoading(true);
|
|
||||||
try {
|
|
||||||
const result = await backendApi.getDocuments(undefined, 100);
|
|
||||||
if (result.success) {
|
|
||||||
// Filter to only non-Excel documents that can be used as data sources
|
|
||||||
const docs = (result.documents || []).filter((d: DocumentItem) =>
|
|
||||||
['docx', 'md', 'txt', 'xlsx'].includes(d.doc_type)
|
|
||||||
);
|
|
||||||
setSourceDocs(docs);
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error('加载数据源失败');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const onTemplateDrop = async (acceptedFiles: File[]) => {
|
|
||||||
const file = acceptedFiles[0];
|
const file = acceptedFiles[0];
|
||||||
if (!file) return;
|
if (file) {
|
||||||
|
setTemplateFile(file);
|
||||||
const ext = file.name.split('.').pop()?.toLowerCase();
|
|
||||||
if (!['xlsx', 'xls', 'docx'].includes(ext || '')) {
|
|
||||||
toast.error('仅支持 xlsx/xls/docx 格式的模板文件');
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
}, []);
|
||||||
setTemplateFile(file);
|
|
||||||
setLoading(true);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await backendApi.uploadTemplate(file);
|
|
||||||
if (result.success) {
|
|
||||||
setTemplateFields(result.fields || []);
|
|
||||||
setStep('select-source');
|
|
||||||
toast.success('模板上传成功');
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
toast.error('模板上传失败: ' + (err.message || '未知错误'));
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const { getRootProps: getTemplateProps, getInputProps: getTemplateInputProps, isDragActive: isTemplateDragActive } = useDropzone({
|
const { getRootProps: getTemplateProps, getInputProps: getTemplateInputProps, isDragActive: isTemplateDragActive } = useDropzone({
|
||||||
onDrop: onTemplateDrop,
|
onDrop: onTemplateDrop,
|
||||||
@@ -115,29 +90,157 @@ const TemplateFill: React.FC = () => {
|
|||||||
'application/vnd.ms-excel': ['.xls'],
|
'application/vnd.ms-excel': ['.xls'],
|
||||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx']
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx']
|
||||||
},
|
},
|
||||||
maxFiles: 1
|
maxFiles: 1,
|
||||||
|
multiple: false
|
||||||
});
|
});
|
||||||
|
|
||||||
const handleFillTemplate = async () => {
|
// 源文档拖拽
|
||||||
if (!templateFile || selectedDocs.length === 0) {
|
const onSourceDrop = useCallback((e: React.DragEvent) => {
|
||||||
toast.error('请选择数据源文档');
|
e.preventDefault();
|
||||||
|
const files = Array.from(e.dataTransfer.files).filter(f => {
|
||||||
|
const ext = f.name.split('.').pop()?.toLowerCase();
|
||||||
|
return ['xlsx', 'xls', 'docx', 'md', 'txt'].includes(ext || '');
|
||||||
|
});
|
||||||
|
if (files.length > 0) {
|
||||||
|
addSourceFiles(files.map(f => ({ file: f })));
|
||||||
|
}
|
||||||
|
}, [addSourceFiles]);
|
||||||
|
|
||||||
|
const handleSourceFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const files = Array.from(e.target.files || []);
|
||||||
|
if (files.length > 0) {
|
||||||
|
addSourceFiles(files.map(f => ({ file: f })));
|
||||||
|
toast.success(`已添加 ${files.length} 个文件`);
|
||||||
|
}
|
||||||
|
e.target.value = '';
|
||||||
|
};
|
||||||
|
|
||||||
|
// 仅添加源文档不上传
|
||||||
|
const handleAddSourceFiles = () => {
|
||||||
|
if (sourceFiles.length === 0) {
|
||||||
|
toast.error('请先选择源文档');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
toast.success(`已添加 ${sourceFiles.length} 个源文档,可继续添加更多`);
|
||||||
|
};
|
||||||
|
|
||||||
|
// 加载已上传文档
|
||||||
|
const loadUploadedDocuments = useCallback(async () => {
|
||||||
|
setDocsLoading(true);
|
||||||
|
try {
|
||||||
|
const result = await backendApi.getDocuments(undefined, 100);
|
||||||
|
if (result.success) {
|
||||||
|
// 过滤可作为数据源的文档类型
|
||||||
|
const docs = (result.documents || []).filter((d: DocumentItem) =>
|
||||||
|
['docx', 'md', 'txt', 'xlsx', 'xls'].includes(d.doc_type)
|
||||||
|
);
|
||||||
|
setUploadedDocuments(docs);
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('加载文档失败:', err);
|
||||||
|
} finally {
|
||||||
|
setDocsLoading(false);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// 删除文档
|
||||||
|
const handleDeleteDocument = async (docId: string, e: React.MouseEvent) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
if (!confirm('确定要删除该文档吗?')) return;
|
||||||
|
try {
|
||||||
|
const result = await backendApi.deleteDocument(docId);
|
||||||
|
if (result.success) {
|
||||||
|
setUploadedDocuments(prev => prev.filter(d => d.doc_id !== docId));
|
||||||
|
removeSourceDocId(docId);
|
||||||
|
toast.success('文档已删除');
|
||||||
|
} else {
|
||||||
|
toast.error(result.message || '删除失败');
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
toast.error('删除失败: ' + (err.message || '未知错误'));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (sourceMode === 'select') {
|
||||||
|
loadUploadedDocuments();
|
||||||
|
}
|
||||||
|
}, [sourceMode, loadUploadedDocuments]);
|
||||||
|
|
||||||
|
const handleJointUploadAndFill = async () => {
|
||||||
|
if (!templateFile) {
|
||||||
|
toast.error('请先上传模板文件');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
setFilling(true);
|
// 检查是否选择了数据源
|
||||||
setStep('filling');
|
if (sourceMode === 'upload' && sourceFiles.length === 0) {
|
||||||
|
toast.error('请上传源文档或从已上传文档中选择');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (sourceMode === 'select' && sourceDocIds.length === 0) {
|
||||||
|
toast.error('请选择源文档');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setLoading(true);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 调用后端填表接口
|
if (sourceMode === 'select') {
|
||||||
const result = await backendApi.fillTemplate('temp-template-id', templateFields);
|
// 使用已上传文档作为数据源
|
||||||
setFilledResult(result);
|
const result = await backendApi.uploadTemplate(templateFile);
|
||||||
setStep('preview');
|
|
||||||
toast.success('表格填写完成');
|
if (result.success) {
|
||||||
|
setTemplateFields(result.fields || []);
|
||||||
|
setTemplateId(result.template_id || 'temp');
|
||||||
|
toast.success('开始智能填表');
|
||||||
|
setStep('filling');
|
||||||
|
|
||||||
|
// 使用 source_doc_ids 进行填表
|
||||||
|
const fillResult = await backendApi.fillTemplate(
|
||||||
|
result.template_id || 'temp',
|
||||||
|
result.fields || [],
|
||||||
|
sourceDocIds,
|
||||||
|
[],
|
||||||
|
'请从以下文档中提取相关信息填写表格'
|
||||||
|
);
|
||||||
|
|
||||||
|
setFilledResult(fillResult);
|
||||||
|
setStep('preview');
|
||||||
|
toast.success('表格填写完成');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 使用联合上传API
|
||||||
|
const result = await backendApi.uploadTemplateAndSources(
|
||||||
|
templateFile,
|
||||||
|
sourceFiles.map(sf => sf.file)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
setTemplateFields(result.fields || []);
|
||||||
|
setTemplateId(result.template_id);
|
||||||
|
setSourceFilePaths(result.source_file_paths || []);
|
||||||
|
toast.success('文档上传成功,开始智能填表');
|
||||||
|
setStep('filling');
|
||||||
|
|
||||||
|
// 自动开始填表
|
||||||
|
const fillResult = await backendApi.fillTemplate(
|
||||||
|
result.template_id,
|
||||||
|
result.fields || [],
|
||||||
|
[],
|
||||||
|
result.source_file_paths || [],
|
||||||
|
'请从以下文档中提取相关信息填写表格'
|
||||||
|
);
|
||||||
|
|
||||||
|
setFilledResult(fillResult);
|
||||||
|
setStep('preview');
|
||||||
|
toast.success('表格填写完成');
|
||||||
|
}
|
||||||
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
toast.error('填表失败: ' + (err.message || '未知错误'));
|
toast.error('处理失败: ' + (err.message || '未知错误'));
|
||||||
setStep('select-source');
|
|
||||||
} finally {
|
} finally {
|
||||||
setFilling(false);
|
setLoading(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -145,7 +248,11 @@ const TemplateFill: React.FC = () => {
|
|||||||
if (!templateFile || !filledResult) return;
|
if (!templateFile || !filledResult) return;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const blob = await backendApi.exportFilledTemplate('temp', filledResult.filled_data || {}, 'xlsx');
|
const blob = await backendApi.exportFilledTemplate(
|
||||||
|
templateId || 'temp',
|
||||||
|
filledResult.filled_data || {},
|
||||||
|
'xlsx'
|
||||||
|
);
|
||||||
const url = URL.createObjectURL(blob);
|
const url = URL.createObjectURL(blob);
|
||||||
const a = document.createElement('a');
|
const a = document.createElement('a');
|
||||||
a.href = url;
|
a.href = url;
|
||||||
@@ -158,12 +265,18 @@ const TemplateFill: React.FC = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const resetFlow = () => {
|
const getFileIcon = (filename: string) => {
|
||||||
setStep('upload-template');
|
const ext = filename.split('.').pop()?.toLowerCase();
|
||||||
setTemplateFile(null);
|
if (['xlsx', 'xls'].includes(ext || '')) {
|
||||||
setTemplateFields([]);
|
return <FileSpreadsheet size={20} className="text-emerald-500" />;
|
||||||
setSelectedDocs([]);
|
}
|
||||||
setFilledResult(null);
|
if (ext === 'docx') {
|
||||||
|
return <FileText size={20} className="text-blue-500" />;
|
||||||
|
}
|
||||||
|
if (['md', 'txt'].includes(ext || '')) {
|
||||||
|
return <FileText size={20} className="text-orange-500" />;
|
||||||
|
}
|
||||||
|
return <File size={20} className="text-gray-500" />;
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -175,208 +288,248 @@ const TemplateFill: React.FC = () => {
|
|||||||
根据您的表格模板,自动聚合多源文档信息进行精准填充
|
根据您的表格模板,自动聚合多源文档信息进行精准填充
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{step !== 'upload-template' && (
|
{step !== 'upload' && (
|
||||||
<Button variant="outline" className="rounded-xl gap-2" onClick={resetFlow}>
|
<Button variant="outline" className="rounded-xl gap-2" onClick={reset}>
|
||||||
<RefreshCcw size={18} />
|
<RefreshCcw size={18} />
|
||||||
<span>重新开始</span>
|
<span>重新开始</span>
|
||||||
</Button>
|
</Button>
|
||||||
)}
|
)}
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
{/* Progress Steps */}
|
{/* Step 1: Upload - Joint Upload of Template + Source Docs */}
|
||||||
<div className="flex items-center justify-center gap-4">
|
{step === 'upload' && (
|
||||||
{['上传模板', '选择数据源', '填写预览'].map((label, idx) => {
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||||
const stepIndex = ['upload-template', 'select-source', 'preview'].indexOf(step);
|
{/* Template Upload */}
|
||||||
const isActive = idx <= stepIndex;
|
|
||||||
const isCurrent = idx === stepIndex;
|
|
||||||
|
|
||||||
return (
|
|
||||||
<React.Fragment key={idx}>
|
|
||||||
<div className={cn(
|
|
||||||
"flex items-center gap-2 px-4 py-2 rounded-full transition-all",
|
|
||||||
isActive ? "bg-primary text-primary-foreground" : "bg-muted text-muted-foreground"
|
|
||||||
)}>
|
|
||||||
<div className={cn(
|
|
||||||
"w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold",
|
|
||||||
isCurrent ? "bg-white/20" : ""
|
|
||||||
)}>
|
|
||||||
{idx + 1}
|
|
||||||
</div>
|
|
||||||
<span className="text-sm font-medium">{label}</span>
|
|
||||||
</div>
|
|
||||||
{idx < 2 && (
|
|
||||||
<div className={cn(
|
|
||||||
"w-12 h-0.5",
|
|
||||||
idx < stepIndex ? "bg-primary" : "bg-muted"
|
|
||||||
)} />
|
|
||||||
)}
|
|
||||||
</React.Fragment>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Step 1: Upload Template */}
|
|
||||||
{step === 'upload-template' && (
|
|
||||||
<div
|
|
||||||
{...getTemplateProps()}
|
|
||||||
className={cn(
|
|
||||||
"border-2 border-dashed rounded-3xl p-16 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group",
|
|
||||||
isTemplateDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5"
|
|
||||||
)}
|
|
||||||
>
|
|
||||||
<input {...getTemplateInputProps()} />
|
|
||||||
<div className="w-20 h-20 rounded-2xl bg-primary/10 text-primary flex items-center justify-center mb-6 group-hover:scale-110 transition-transform">
|
|
||||||
{loading ? <Loader2 className="animate-spin" size={40} /> : <Upload size={40} />}
|
|
||||||
</div>
|
|
||||||
<div className="space-y-2 max-w-md">
|
|
||||||
<p className="text-xl font-bold tracking-tight">
|
|
||||||
{isTemplateDragActive ? '释放以开始上传' : '点击或拖拽上传表格模板'}
|
|
||||||
</p>
|
|
||||||
<p className="text-sm text-muted-foreground">
|
|
||||||
支持 Excel (.xlsx, .xls) 或 Word (.docx) 格式的表格模板
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<div className="mt-6 flex gap-3">
|
|
||||||
<Badge variant="outline" className="bg-emerald-500/10 text-emerald-600 border-emerald-200">
|
|
||||||
<FileSpreadsheet size={14} className="mr-1" /> Excel 模板
|
|
||||||
</Badge>
|
|
||||||
<Badge variant="outline" className="bg-blue-500/10 text-blue-600 border-blue-200">
|
|
||||||
<FileText size={14} className="mr-1" /> Word 模板
|
|
||||||
</Badge>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Step 2: Select Source Documents */}
|
|
||||||
{step === 'select-source' && (
|
|
||||||
<div className="space-y-6">
|
|
||||||
{/* Template Info */}
|
|
||||||
<Card className="border-none shadow-md">
|
<Card className="border-none shadow-md">
|
||||||
<CardHeader className="pb-4">
|
<CardHeader className="pb-4">
|
||||||
<CardTitle className="text-lg flex items-center gap-2">
|
<CardTitle className="text-lg flex items-center gap-2">
|
||||||
<FileSpreadsheet className="text-primary" size={20} />
|
<FileSpreadsheet className="text-primary" size={20} />
|
||||||
已上传模板
|
表格模板
|
||||||
</CardTitle>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent>
|
|
||||||
<div className="flex items-center gap-4">
|
|
||||||
<div className="w-12 h-12 rounded-xl bg-emerald-500/10 text-emerald-500 flex items-center justify-center">
|
|
||||||
<FileSpreadsheet size={24} />
|
|
||||||
</div>
|
|
||||||
<div className="flex-1">
|
|
||||||
<p className="font-bold">{templateFile?.name}</p>
|
|
||||||
<p className="text-sm text-muted-foreground">
|
|
||||||
{templateFields.length} 个字段待填写
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<Button variant="ghost" size="sm" onClick={() => setStep('upload-template')}>
|
|
||||||
重新选择
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Template Fields Preview */}
|
|
||||||
<div className="mt-4 p-4 bg-muted/30 rounded-xl">
|
|
||||||
<p className="text-xs font-bold uppercase tracking-widest text-muted-foreground mb-3">待填写字段</p>
|
|
||||||
<div className="flex flex-wrap gap-2">
|
|
||||||
{templateFields.map((field, idx) => (
|
|
||||||
<Badge key={idx} variant="outline" className="bg-background">
|
|
||||||
{field.name}
|
|
||||||
</Badge>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
</Card>
|
|
||||||
|
|
||||||
{/* Source Documents Selection */}
|
|
||||||
<Card className="border-none shadow-md">
|
|
||||||
<CardHeader className="pb-4">
|
|
||||||
<CardTitle className="text-lg flex items-center gap-2">
|
|
||||||
<FileText className="text-primary" size={20} />
|
|
||||||
选择数据源文档
|
|
||||||
</CardTitle>
|
</CardTitle>
|
||||||
<CardDescription>
|
<CardDescription>
|
||||||
从已上传的文档中选择作为填表的数据来源,支持 Excel 和非结构化文档
|
上传需要填写的 Excel/Word 模板文件
|
||||||
</CardDescription>
|
</CardDescription>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
{loading ? (
|
{!templateFile ? (
|
||||||
<div className="space-y-3">
|
<div
|
||||||
{[1, 2, 3].map(i => <Skeleton key={i} className="h-16 w-full rounded-xl" />)}
|
{...getTemplateProps()}
|
||||||
</div>
|
className={cn(
|
||||||
) : sourceDocs.length > 0 ? (
|
"border-2 border-dashed rounded-2xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group min-h-[200px]",
|
||||||
<div className="space-y-3">
|
isTemplateDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5"
|
||||||
{sourceDocs.map(doc => (
|
)}
|
||||||
<div
|
>
|
||||||
key={doc.doc_id}
|
<input {...getTemplateInputProps()} />
|
||||||
className={cn(
|
<div className="w-14 h-14 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
|
||||||
"flex items-center gap-4 p-4 rounded-xl border-2 transition-all cursor-pointer",
|
{loading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
|
||||||
selectedDocs.includes(doc.doc_id)
|
</div>
|
||||||
? "border-primary bg-primary/5"
|
<p className="font-medium">
|
||||||
: "border-border hover:bg-muted/30"
|
{isTemplateDragActive ? '释放以上传' : '点击或拖拽上传模板'}
|
||||||
)}
|
</p>
|
||||||
onClick={() => {
|
<p className="text-xs text-muted-foreground mt-1">
|
||||||
setSelectedDocs(prev =>
|
支持 .xlsx .xls .docx
|
||||||
prev.includes(doc.doc_id)
|
</p>
|
||||||
? prev.filter(id => id !== doc.doc_id)
|
|
||||||
: [...prev, doc.doc_id]
|
|
||||||
);
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<div className={cn(
|
|
||||||
"w-6 h-6 rounded-md border-2 flex items-center justify-center transition-all",
|
|
||||||
selectedDocs.includes(doc.doc_id)
|
|
||||||
? "border-primary bg-primary text-white"
|
|
||||||
: "border-muted-foreground/30"
|
|
||||||
)}>
|
|
||||||
{selectedDocs.includes(doc.doc_id) && <CheckCircle2 size={14} />}
|
|
||||||
</div>
|
|
||||||
<div className={cn(
|
|
||||||
"w-10 h-10 rounded-lg flex items-center justify-center",
|
|
||||||
doc.doc_type === 'xlsx' ? "bg-emerald-500/10 text-emerald-500" : "bg-blue-500/10 text-blue-500"
|
|
||||||
)}>
|
|
||||||
{doc.doc_type === 'xlsx' ? <FileSpreadsheet size={20} /> : <FileText size={20} />}
|
|
||||||
</div>
|
|
||||||
<div className="flex-1 min-w-0">
|
|
||||||
<p className="font-semibold truncate">{doc.original_filename}</p>
|
|
||||||
<p className="text-xs text-muted-foreground">
|
|
||||||
{doc.doc_type.toUpperCase()} • {format(new Date(doc.created_at), 'yyyy-MM-dd')}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
{doc.metadata?.columns && (
|
|
||||||
<Badge variant="outline" className="text-xs">
|
|
||||||
{doc.metadata.columns.length} 列
|
|
||||||
</Badge>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
))}
|
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="text-center py-12 text-muted-foreground">
|
<div className="flex items-center gap-3 p-4 bg-emerald-500/5 rounded-xl border border-emerald-200">
|
||||||
<FileText size={48} className="mx-auto mb-4 opacity-30" />
|
<div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-500 flex items-center justify-center">
|
||||||
<p>暂无数据源文档,请先上传文档</p>
|
<FileSpreadsheet size={20} />
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="font-medium truncate">{templateFile.name}</p>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
{(templateFile.size / 1024).toFixed(1)} KB
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Button variant="ghost" size="sm" onClick={() => setTemplateFile(null)}>
|
||||||
|
<X size={16} />
|
||||||
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
{/* Source Documents Upload */}
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="text-lg flex items-center gap-2">
|
||||||
|
<Files className="text-primary" size={20} />
|
||||||
|
源文档
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
选择包含数据的源文档作为填表依据
|
||||||
|
</CardDescription>
|
||||||
|
{/* Source Mode Tabs */}
|
||||||
|
<div className="flex gap-2 mt-2">
|
||||||
|
<Button
|
||||||
|
variant={sourceMode === 'upload' ? 'default' : 'outline'}
|
||||||
|
size="sm"
|
||||||
|
onClick={() => setSourceMode('upload')}
|
||||||
|
>
|
||||||
|
<Upload size={14} className="mr-1" />
|
||||||
|
上传文件
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
variant={sourceMode === 'select' ? 'default' : 'outline'}
|
||||||
|
size="sm"
|
||||||
|
onClick={() => setSourceMode('select')}
|
||||||
|
>
|
||||||
|
<Files size={14} className="mr-1" />
|
||||||
|
从文档中心选择
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
{sourceMode === 'upload' ? (
|
||||||
|
<>
|
||||||
|
<div className="border-2 border-dashed rounded-2xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group min-h-[200px] border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5">
|
||||||
|
<input
|
||||||
|
id="source-file-input"
|
||||||
|
type="file"
|
||||||
|
multiple={true}
|
||||||
|
accept=".xlsx,.xls,.docx,.md,.txt"
|
||||||
|
onChange={handleSourceFileSelect}
|
||||||
|
className="hidden"
|
||||||
|
/>
|
||||||
|
<label htmlFor="source-file-input" className="cursor-pointer flex flex-col items-center">
|
||||||
|
<div className="w-14 h-14 rounded-xl bg-blue-500/10 text-blue-500 flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
|
||||||
|
{loading ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
|
||||||
|
</div>
|
||||||
|
<p className="font-medium">
|
||||||
|
点击上传源文档
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-muted-foreground mt-1">
|
||||||
|
支持 .xlsx .xls .docx .md .txt
|
||||||
|
</p>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
onDragOver={(e) => { e.preventDefault(); }}
|
||||||
|
onDrop={onSourceDrop}
|
||||||
|
className="mt-2 text-center text-xs text-muted-foreground"
|
||||||
|
>
|
||||||
|
或拖拽文件到此处
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Selected Source Files */}
|
||||||
|
{sourceFiles.length > 0 && (
|
||||||
|
<div className="mt-4 space-y-2">
|
||||||
|
{sourceFiles.map((sf, idx) => (
|
||||||
|
<div key={idx} className="flex items-center gap-3 p-3 bg-muted/50 rounded-xl">
|
||||||
|
{getFileIcon(sf.file.name)}
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="text-sm font-medium truncate">{sf.file.name}</p>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
{(sf.file.size / 1024).toFixed(1)} KB
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Button variant="ghost" size="sm" onClick={() => removeSourceFile(idx)}>
|
||||||
|
<Trash2 size={14} className="text-red-500" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
<div className="flex justify-center pt-2">
|
||||||
|
<Button variant="outline" size="sm" onClick={() => document.getElementById('source-file-input')?.click()}>
|
||||||
|
<Plus size={14} className="mr-1" />
|
||||||
|
继续添加更多文档
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
{/* Uploaded Documents Selection */}
|
||||||
|
{docsLoading ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{[1, 2, 3].map(i => (
|
||||||
|
<Skeleton key={i} className="h-16 w-full rounded-xl" />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : uploadedDocuments.length > 0 ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{sourceDocIds.length > 0 && (
|
||||||
|
<div className="flex items-center justify-between p-3 bg-primary/5 rounded-xl border border-primary/20">
|
||||||
|
<span className="text-sm font-medium">已选择 {sourceDocIds.length} 个文档</span>
|
||||||
|
<Button variant="ghost" size="sm" onClick={() => loadUploadedDocuments()}>
|
||||||
|
<RefreshCcw size={14} className="mr-1" />
|
||||||
|
刷新列表
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="max-h-[300px] overflow-y-auto space-y-2">
|
||||||
|
{uploadedDocuments.map((doc) => (
|
||||||
|
<div
|
||||||
|
key={doc.doc_id}
|
||||||
|
className={cn(
|
||||||
|
"flex items-center gap-3 p-3 rounded-xl border-2 transition-all cursor-pointer",
|
||||||
|
sourceDocIds.includes(doc.doc_id)
|
||||||
|
? "border-primary bg-primary/5"
|
||||||
|
: "border-border hover:bg-muted/30"
|
||||||
|
)}
|
||||||
|
onClick={() => {
|
||||||
|
if (sourceDocIds.includes(doc.doc_id)) {
|
||||||
|
removeSourceDocId(doc.doc_id);
|
||||||
|
} else {
|
||||||
|
addSourceDocId(doc.doc_id);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div className={cn(
|
||||||
|
"w-6 h-6 rounded-md border-2 flex items-center justify-center transition-all shrink-0",
|
||||||
|
sourceDocIds.includes(doc.doc_id)
|
||||||
|
? "border-primary bg-primary text-white"
|
||||||
|
: "border-muted-foreground/30"
|
||||||
|
)}>
|
||||||
|
{sourceDocIds.includes(doc.doc_id) && <CheckCircle2 size={14} />}
|
||||||
|
</div>
|
||||||
|
{getFileIcon(doc.original_filename)}
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="text-sm font-medium truncate">{doc.original_filename}</p>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
{doc.doc_type.toUpperCase()} • {format(new Date(doc.created_at), 'yyyy-MM-dd')}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="sm"
|
||||||
|
onClick={(e) => handleDeleteDocument(doc.doc_id, e)}
|
||||||
|
className="shrink-0"
|
||||||
|
>
|
||||||
|
<Trash2 size={14} className="text-red-500" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="text-center py-8 text-muted-foreground">
|
||||||
|
<Files size={32} className="mx-auto mb-2 opacity-30" />
|
||||||
|
<p className="text-sm">暂无可用的已上传文档</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
|
||||||
{/* Action Button */}
|
{/* Action Button */}
|
||||||
<div className="flex justify-center">
|
<div className="col-span-1 lg:col-span-2 flex justify-center">
|
||||||
<Button
|
<Button
|
||||||
size="lg"
|
size="lg"
|
||||||
className="rounded-xl px-8 shadow-lg shadow-primary/20 gap-2"
|
className="rounded-xl px-12 shadow-lg shadow-primary/20 gap-2"
|
||||||
disabled={selectedDocs.length === 0 || filling}
|
disabled={!templateFile || loading}
|
||||||
onClick={handleFillTemplate}
|
onClick={handleJointUploadAndFill}
|
||||||
>
|
>
|
||||||
{filling ? (
|
{loading ? (
|
||||||
<>
|
<>
|
||||||
<Loader2 className="animate-spin" size={20} />
|
<Loader2 className="animate-spin" size={20} />
|
||||||
<span>AI 正在分析并填表...</span>
|
<span>正在处理...</span>
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
<Sparkles size={20} />
|
<Sparkles size={20} />
|
||||||
<span>开始智能填表</span>
|
<span>上传并智能填表</span>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</Button>
|
</Button>
|
||||||
@@ -384,49 +537,7 @@ const TemplateFill: React.FC = () => {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Step 3: Preview Results */}
|
{/* Step 2: Filling State */}
|
||||||
{step === 'preview' && filledResult && (
|
|
||||||
<Card className="border-none shadow-md">
|
|
||||||
<CardHeader>
|
|
||||||
<CardTitle className="text-lg flex items-center gap-2">
|
|
||||||
<CheckCircle2 className="text-emerald-500" size={20} />
|
|
||||||
填表完成
|
|
||||||
</CardTitle>
|
|
||||||
<CardDescription>
|
|
||||||
系统已根据 {selectedDocs.length} 份文档自动完成表格填写
|
|
||||||
</CardDescription>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent className="space-y-6">
|
|
||||||
{/* Filled Data Preview */}
|
|
||||||
<div className="p-6 bg-muted/30 rounded-2xl">
|
|
||||||
<div className="space-y-4">
|
|
||||||
{templateFields.map((field, idx) => (
|
|
||||||
<div key={idx} className="flex items-center gap-4">
|
|
||||||
<div className="w-32 text-sm font-medium text-muted-foreground">{field.name}</div>
|
|
||||||
<div className="flex-1 p-3 bg-background rounded-xl border">
|
|
||||||
{(filledResult.filled_data || {})[field.name] || '-'}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Action Buttons */}
|
|
||||||
<div className="flex justify-center gap-4">
|
|
||||||
<Button variant="outline" className="rounded-xl gap-2" onClick={resetFlow}>
|
|
||||||
<RefreshCcw size={18} />
|
|
||||||
<span>继续填表</span>
|
|
||||||
</Button>
|
|
||||||
<Button className="rounded-xl gap-2 shadow-lg shadow-primary/20" onClick={handleExport}>
|
|
||||||
<Download size={18} />
|
|
||||||
<span>导出结果</span>
|
|
||||||
</Button>
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
</Card>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Filling State */}
|
|
||||||
{step === 'filling' && (
|
{step === 'filling' && (
|
||||||
<Card className="border-none shadow-md">
|
<Card className="border-none shadow-md">
|
||||||
<CardContent className="py-16 flex flex-col items-center justify-center">
|
<CardContent className="py-16 flex flex-col items-center justify-center">
|
||||||
@@ -435,11 +546,107 @@ const TemplateFill: React.FC = () => {
|
|||||||
</div>
|
</div>
|
||||||
<h3 className="text-xl font-bold mb-2">AI 正在智能分析并填表</h3>
|
<h3 className="text-xl font-bold mb-2">AI 正在智能分析并填表</h3>
|
||||||
<p className="text-muted-foreground text-center max-w-md">
|
<p className="text-muted-foreground text-center max-w-md">
|
||||||
系统正在从 {selectedDocs.length} 份文档中检索相关信息,生成字段描述,并使用 RAG 增强填写准确性...
|
系统正在从 {sourceFiles.length || sourceFilePaths.length} 份文档中检索相关信息...
|
||||||
</p>
|
</p>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Step 3: Preview Results */}
|
||||||
|
{step === 'preview' && filledResult && (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader>
|
||||||
|
<CardTitle className="text-lg flex items-center gap-2">
|
||||||
|
<CheckCircle2 className="text-emerald-500" size={20} />
|
||||||
|
填表完成
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
系统已根据 {sourceFiles.length || sourceFilePaths.length} 份文档自动完成表格填写
|
||||||
|
</CardDescription>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
{/* Filled Data Preview */}
|
||||||
|
<div className="p-6 bg-muted/30 rounded-2xl">
|
||||||
|
<div className="space-y-4">
|
||||||
|
{templateFields.map((field, idx) => {
|
||||||
|
const value = filledResult.filled_data?.[field.name];
|
||||||
|
const displayValue = Array.isArray(value)
|
||||||
|
? value.filter(v => v && String(v).trim()).join(', ') || '-'
|
||||||
|
: value || '-';
|
||||||
|
return (
|
||||||
|
<div key={idx} className="flex items-center gap-4">
|
||||||
|
<div className="w-40 text-sm font-medium text-muted-foreground">{field.name}</div>
|
||||||
|
<div className="flex-1 p-3 bg-background rounded-xl border">
|
||||||
|
{displayValue}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Source Files Info */}
|
||||||
|
<div className="mt-4 flex flex-wrap gap-2">
|
||||||
|
{sourceFiles.map((sf, idx) => (
|
||||||
|
<Badge key={idx} variant="outline" className="bg-blue-500/5">
|
||||||
|
{getFileIcon(sf.file.name)}
|
||||||
|
<span className="ml-1">{sf.file.name}</span>
|
||||||
|
</Badge>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Action Buttons */}
|
||||||
|
<div className="flex justify-center gap-4 mt-6">
|
||||||
|
<Button variant="outline" className="rounded-xl gap-2" onClick={reset}>
|
||||||
|
<RefreshCcw size={18} />
|
||||||
|
<span>继续填表</span>
|
||||||
|
</Button>
|
||||||
|
<Button className="rounded-xl gap-2 shadow-lg shadow-primary/20" onClick={handleExport}>
|
||||||
|
<Download size={18} />
|
||||||
|
<span>导出结果</span>
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
|
||||||
|
{/* Fill Details */}
|
||||||
|
{filledResult.fill_details && filledResult.fill_details.length > 0 && (
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader>
|
||||||
|
<CardTitle className="text-lg">填写详情</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
<div className="space-y-3">
|
||||||
|
{filledResult.fill_details.map((detail: any, idx: number) => (
|
||||||
|
<div key={idx} className="flex items-start gap-3 p-3 bg-muted/30 rounded-xl text-sm">
|
||||||
|
<div className="w-1 h-1 rounded-full bg-primary mt-2" />
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="font-medium">{detail.field}</div>
|
||||||
|
<div className="text-muted-foreground text-xs mt-1">
|
||||||
|
来源: {detail.source} | 置信度: {detail.confidence ? (detail.confidence * 100).toFixed(0) + '%' : 'N/A'}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Preview Dialog */}
|
||||||
|
<Dialog open={previewOpen} onOpenChange={setPreviewOpen}>
|
||||||
|
<DialogContent className="max-w-2xl">
|
||||||
|
<DialogHeader>
|
||||||
|
<DialogTitle>{previewDoc?.name || '文档预览'}</DialogTitle>
|
||||||
|
</DialogHeader>
|
||||||
|
<ScrollArea className="max-h-[60vh]">
|
||||||
|
<pre className="text-sm whitespace-pre-wrap">{previewDoc?.content}</pre>
|
||||||
|
</ScrollArea>
|
||||||
|
</DialogContent>
|
||||||
|
</Dialog>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
854
logs/docx_parser_and_template_fill.patch
Normal file
854
logs/docx_parser_and_template_fill.patch
Normal file
@@ -0,0 +1,854 @@
|
|||||||
|
diff --git a/backend/app/api/endpoints/templates.py b/backend/app/api/endpoints/templates.py
|
||||||
|
index 572d56e..706f281 100644
|
||||||
|
--- a/backend/app/api/endpoints/templates.py
|
||||||
|
+++ b/backend/app/api/endpoints/templates.py
|
||||||
|
@@ -13,7 +13,7 @@ import pandas as pd
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from app.services.template_fill_service import template_fill_service, TemplateField
|
||||||
|
-from app.services.excel_storage_service import excel_storage_service
|
||||||
|
+from app.services.file_service import file_service
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@@ -28,13 +28,15 @@ class TemplateFieldRequest(BaseModel):
|
||||||
|
name: str
|
||||||
|
field_type: str = "text"
|
||||||
|
required: bool = True
|
||||||
|
+ hint: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class FillRequest(BaseModel):
|
||||||
|
"""填写请求"""
|
||||||
|
template_id: str
|
||||||
|
template_fields: List[TemplateFieldRequest]
|
||||||
|
- source_doc_ids: Optional[List[str]] = None
|
||||||
|
+ source_doc_ids: Optional[List[str]] = None # MongoDB 文档 ID 列表
|
||||||
|
+ source_file_paths: Optional[List[str]] = None # 源文档文件路径列表
|
||||||
|
user_hint: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@@ -71,7 +73,6 @@ async def upload_template(
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 保存文件
|
||||||
|
- from app.services.file_service import file_service
|
||||||
|
content = await file.read()
|
||||||
|
saved_path = file_service.save_uploaded_file(
|
||||||
|
content,
|
||||||
|
@@ -87,7 +88,7 @@ async def upload_template(
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
- "template_id": saved_path, # 使用文件路径作为ID
|
||||||
|
+ "template_id": saved_path,
|
||||||
|
"filename": file.filename,
|
||||||
|
"file_type": file_ext,
|
||||||
|
"fields": [
|
||||||
|
@@ -95,7 +96,8 @@ async def upload_template(
|
||||||
|
"cell": f.cell,
|
||||||
|
"name": f.name,
|
||||||
|
"field_type": f.field_type,
|
||||||
|
- "required": f.required
|
||||||
|
+ "required": f.required,
|
||||||
|
+ "hint": f.hint
|
||||||
|
}
|
||||||
|
for f in template_fields
|
||||||
|
],
|
||||||
|
@@ -135,7 +137,8 @@ async def extract_template_fields(
|
||||||
|
"cell": f.cell,
|
||||||
|
"name": f.name,
|
||||||
|
"field_type": f.field_type,
|
||||||
|
- "required": f.required
|
||||||
|
+ "required": f.required,
|
||||||
|
+ "hint": f.hint
|
||||||
|
}
|
||||||
|
for f in fields
|
||||||
|
]
|
||||||
|
@@ -153,7 +156,7 @@ async def fill_template(
|
||||||
|
"""
|
||||||
|
执行表格填写
|
||||||
|
|
||||||
|
- 根据提供的字段定义,从已上传的文档中检索信息并填写
|
||||||
|
+ 根据提供的字段定义,从源文档中检索信息并填写
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: 填写请求
|
||||||
|
@@ -168,7 +171,8 @@ async def fill_template(
|
||||||
|
cell=f.cell,
|
||||||
|
name=f.name,
|
||||||
|
field_type=f.field_type,
|
||||||
|
- required=f.required
|
||||||
|
+ required=f.required,
|
||||||
|
+ hint=f.hint
|
||||||
|
)
|
||||||
|
for f in request.template_fields
|
||||||
|
]
|
||||||
|
@@ -177,6 +181,7 @@ async def fill_template(
|
||||||
|
result = await template_fill_service.fill_template(
|
||||||
|
template_fields=fields,
|
||||||
|
source_doc_ids=request.source_doc_ids,
|
||||||
|
+ source_file_paths=request.source_file_paths,
|
||||||
|
user_hint=request.user_hint
|
||||||
|
)
|
||||||
|
|
||||||
|
@@ -194,6 +199,8 @@ async def export_filled_template(
|
||||||
|
"""
|
||||||
|
导出填写后的表格
|
||||||
|
|
||||||
|
+ 支持 Excel (.xlsx) 和 Word (.docx) 格式
|
||||||
|
+
|
||||||
|
Args:
|
||||||
|
request: 导出请求
|
||||||
|
|
||||||
|
@@ -201,25 +208,124 @@ async def export_filled_template(
|
||||||
|
文件流
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
- # 创建 DataFrame
|
||||||
|
- df = pd.DataFrame([request.filled_data])
|
||||||
|
+ if request.format == "xlsx":
|
||||||
|
+ return await _export_to_excel(request.filled_data, request.template_id)
|
||||||
|
+ elif request.format == "docx":
|
||||||
|
+ return await _export_to_word(request.filled_data, request.template_id)
|
||||||
|
+ else:
|
||||||
|
+ raise HTTPException(
|
||||||
|
+ status_code=400,
|
||||||
|
+ detail=f"不支持的导出格式: {request.format},仅支持 xlsx/docx"
|
||||||
|
+ )
|
||||||
|
|
||||||
|
- # 导出为 Excel
|
||||||
|
- output = io.BytesIO()
|
||||||
|
- with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||||
|
- df.to_excel(writer, index=False, sheet_name='填写结果')
|
||||||
|
+ except HTTPException:
|
||||||
|
+ raise
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logger.error(f"导出失败: {str(e)}")
|
||||||
|
+ raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
|
||||||
|
|
||||||
|
- output.seek(0)
|
||||||
|
|
||||||
|
- # 生成文件名
|
||||||
|
- filename = f"filled_template.{request.format}"
|
||||||
|
+async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResponse:
|
||||||
|
+ """导出为 Excel 格式"""
|
||||||
|
+ # 将字典转换为单行 DataFrame
|
||||||
|
+ df = pd.DataFrame([filled_data])
|
||||||
|
|
||||||
|
- return StreamingResponse(
|
||||||
|
- io.BytesIO(output.getvalue()),
|
||||||
|
- media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
- headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
- )
|
||||||
|
+ output = io.BytesIO()
|
||||||
|
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||||
|
+ df.to_excel(writer, index=False, sheet_name='填写结果')
|
||||||
|
|
||||||
|
- except Exception as e:
|
||||||
|
- logger.error(f"导出失败: {str(e)}")
|
||||||
|
- raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
|
||||||
|
+ output.seek(0)
|
||||||
|
+
|
||||||
|
+ filename = f"filled_template.xlsx"
|
||||||
|
+
|
||||||
|
+ return StreamingResponse(
|
||||||
|
+ io.BytesIO(output.getvalue()),
|
||||||
|
+ media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+async def _export_to_word(filled_data: dict, template_id: str) -> StreamingResponse:
|
||||||
|
+ """导出为 Word 格式"""
|
||||||
|
+ from docx import Document
|
||||||
|
+ from docx.shared import Pt, RGBColor
|
||||||
|
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
+
|
||||||
|
+ doc = Document()
|
||||||
|
+
|
||||||
|
+ # 添加标题
|
||||||
|
+ title = doc.add_heading('填写结果', level=1)
|
||||||
|
+ title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
+
|
||||||
|
+ # 添加填写时间和模板信息
|
||||||
|
+ from datetime import datetime
|
||||||
|
+ info_para = doc.add_paragraph()
|
||||||
|
+ info_para.add_run(f"模板ID: {template_id}\n").bold = True
|
||||||
|
+ info_para.add_run(f"导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
+
|
||||||
|
+ doc.add_paragraph() # 空行
|
||||||
|
+
|
||||||
|
+ # 添加字段表格
|
||||||
|
+ table = doc.add_table(rows=1, cols=3)
|
||||||
|
+ table.style = 'Light Grid Accent 1'
|
||||||
|
+
|
||||||
|
+ # 表头
|
||||||
|
+ header_cells = table.rows[0].cells
|
||||||
|
+ header_cells[0].text = '字段名'
|
||||||
|
+ header_cells[1].text = '填写值'
|
||||||
|
+ header_cells[2].text = '状态'
|
||||||
|
+
|
||||||
|
+ for field_name, field_value in filled_data.items():
|
||||||
|
+ row_cells = table.add_row().cells
|
||||||
|
+ row_cells[0].text = field_name
|
||||||
|
+ row_cells[1].text = str(field_value) if field_value else ''
|
||||||
|
+ row_cells[2].text = '已填写' if field_value else '为空'
|
||||||
|
+
|
||||||
|
+ # 保存到 BytesIO
|
||||||
|
+ output = io.BytesIO()
|
||||||
|
+ doc.save(output)
|
||||||
|
+ output.seek(0)
|
||||||
|
+
|
||||||
|
+ filename = f"filled_template.docx"
|
||||||
|
+
|
||||||
|
+ return StreamingResponse(
|
||||||
|
+ io.BytesIO(output.getvalue()),
|
||||||
|
+ media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+@router.post("/export/excel")
|
||||||
|
+async def export_to_excel(
|
||||||
|
+ filled_data: dict,
|
||||||
|
+ template_id: str = Query(..., description="模板ID")
|
||||||
|
+):
|
||||||
|
+ """
|
||||||
|
+ 专门导出为 Excel 格式
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ filled_data: 填写数据
|
||||||
|
+ template_id: 模板ID
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ Excel 文件流
|
||||||
|
+ """
|
||||||
|
+ return await _export_to_excel(filled_data, template_id)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+@router.post("/export/word")
|
||||||
|
+async def export_to_word(
|
||||||
|
+ filled_data: dict,
|
||||||
|
+ template_id: str = Query(..., description="模板ID")
|
||||||
|
+):
|
||||||
|
+ """
|
||||||
|
+ 专门导出为 Word 格式
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ filled_data: 填写数据
|
||||||
|
+ template_id: 模板ID
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ Word 文件流
|
||||||
|
+ """
|
||||||
|
+ return await _export_to_word(filled_data, template_id)
|
||||||
|
diff --git a/backend/app/core/document_parser/docx_parser.py b/backend/app/core/document_parser/docx_parser.py
|
||||||
|
index 75e79da..03c341d 100644
|
||||||
|
--- a/backend/app/core/document_parser/docx_parser.py
|
||||||
|
+++ b/backend/app/core/document_parser/docx_parser.py
|
||||||
|
@@ -161,3 +161,133 @@ class DocxParser(BaseParser):
|
||||||
|
fields[field_name] = match.group(1)
|
||||||
|
|
||||||
|
return fields
|
||||||
|
+
|
||||||
|
+ def parse_tables_for_template(
|
||||||
|
+ self,
|
||||||
|
+ file_path: str
|
||||||
|
+ ) -> Dict[str, Any]:
|
||||||
|
+ """
|
||||||
|
+ 解析 Word 文档中的表格,提取模板字段
|
||||||
|
+
|
||||||
|
+ 专门用于比赛场景:解析表格模板,识别需要填写的字段
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ file_path: Word 文件路径
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ 包含表格字段信息的字典
|
||||||
|
+ """
|
||||||
|
+ from docx import Document
|
||||||
|
+ from docx.table import Table
|
||||||
|
+ from docx.oxml.ns import qn
|
||||||
|
+
|
||||||
|
+ doc = Document(file_path)
|
||||||
|
+
|
||||||
|
+ template_info = {
|
||||||
|
+ "tables": [],
|
||||||
|
+ "fields": [],
|
||||||
|
+ "field_count": 0
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for table_idx, table in enumerate(doc.tables):
|
||||||
|
+ table_info = {
|
||||||
|
+ "table_index": table_idx,
|
||||||
|
+ "rows": [],
|
||||||
|
+ "headers": [],
|
||||||
|
+ "data_rows": [],
|
||||||
|
+ "field_hints": {} # 字段名称 -> 提示词/描述
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ # 提取表头(第一行)
|
||||||
|
+ if table.rows:
|
||||||
|
+ header_cells = [cell.text.strip() for cell in table.rows[0].cells]
|
||||||
|
+ table_info["headers"] = header_cells
|
||||||
|
+
|
||||||
|
+ # 提取数据行
|
||||||
|
+ for row_idx, row in enumerate(table.rows[1:], 1):
|
||||||
|
+ row_data = [cell.text.strip() for cell in row.cells]
|
||||||
|
+ table_info["data_rows"].append(row_data)
|
||||||
|
+ table_info["rows"].append({
|
||||||
|
+ "row_index": row_idx,
|
||||||
|
+ "cells": row_data
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+ # 尝试从第二列/第三列提取提示词
|
||||||
|
+ # 比赛模板通常格式为:字段名 | 提示词 | 填写值
|
||||||
|
+ if len(table.rows[0].cells) >= 2:
|
||||||
|
+ for row_idx, row in enumerate(table.rows[1:], 1):
|
||||||
|
+ cells = [cell.text.strip() for cell in row.cells]
|
||||||
|
+ if len(cells) >= 2 and cells[0]:
|
||||||
|
+ # 第一列是字段名
|
||||||
|
+ field_name = cells[0]
|
||||||
|
+ # 第二列可能是提示词或描述
|
||||||
|
+ hint = cells[1] if len(cells) > 1 else ""
|
||||||
|
+ table_info["field_hints"][field_name] = hint
|
||||||
|
+
|
||||||
|
+ template_info["fields"].append({
|
||||||
|
+ "table_index": table_idx,
|
||||||
|
+ "row_index": row_idx,
|
||||||
|
+ "field_name": field_name,
|
||||||
|
+ "hint": hint,
|
||||||
|
+ "expected_value": cells[2] if len(cells) > 2 else ""
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+ template_info["tables"].append(table_info)
|
||||||
|
+
|
||||||
|
+ template_info["field_count"] = len(template_info["fields"])
|
||||||
|
+ return template_info
|
||||||
|
+
|
||||||
|
+ def extract_template_fields_from_docx(
|
||||||
|
+ self,
|
||||||
|
+ file_path: str
|
||||||
|
+ ) -> List[Dict[str, Any]]:
|
||||||
|
+ """
|
||||||
|
+ 从 Word 文档中提取模板字段定义
|
||||||
|
+
|
||||||
|
+ 适用于比赛评分表格:表格第一列是字段名,第二列是提示词/填写示例
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ file_path: Word 文件路径
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ 字段定义列表
|
||||||
|
+ """
|
||||||
|
+ template_info = self.parse_tables_for_template(file_path)
|
||||||
|
+
|
||||||
|
+ fields = []
|
||||||
|
+ for field in template_info["fields"]:
|
||||||
|
+ fields.append({
|
||||||
|
+ "cell": f"T{field['table_index']}R{field['row_index']}", # TableXRowY 格式
|
||||||
|
+ "name": field["field_name"],
|
||||||
|
+ "hint": field["hint"],
|
||||||
|
+ "table_index": field["table_index"],
|
||||||
|
+ "row_index": field["row_index"],
|
||||||
|
+ "field_type": self._infer_field_type_from_hint(field["hint"]),
|
||||||
|
+ "required": True
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+ return fields
|
||||||
|
+
|
||||||
|
+ def _infer_field_type_from_hint(self, hint: str) -> str:
|
||||||
|
+ """
|
||||||
|
+ 从提示词推断字段类型
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ hint: 字段提示词
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ 字段类型 (text/number/date)
|
||||||
|
+ """
|
||||||
|
+ hint_lower = hint.lower()
|
||||||
|
+
|
||||||
|
+ # 日期关键词
|
||||||
|
+ date_keywords = ["年", "月", "日", "日期", "时间", "出生"]
|
||||||
|
+ if any(kw in hint for kw in date_keywords):
|
||||||
|
+ return "date"
|
||||||
|
+
|
||||||
|
+ # 数字关键词
|
||||||
|
+ number_keywords = ["数量", "金额", "人数", "面积", "增长", "比率", "%", "率"]
|
||||||
|
+ if any(kw in hint_lower for kw in number_keywords):
|
||||||
|
+ return "number"
|
||||||
|
+
|
||||||
|
+ return "text"
|
||||||
|
diff --git a/backend/app/services/template_fill_service.py b/backend/app/services/template_fill_service.py
|
||||||
|
index 2612354..94930fb 100644
|
||||||
|
--- a/backend/app/services/template_fill_service.py
|
||||||
|
+++ b/backend/app/services/template_fill_service.py
|
||||||
|
@@ -4,13 +4,12 @@
|
||||||
|
从非结构化文档中检索信息并填写到表格模板
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
-from dataclasses import dataclass
|
||||||
|
+from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from app.core.database import mongodb
|
||||||
|
-from app.services.rag_service import rag_service
|
||||||
|
from app.services.llm_service import llm_service
|
||||||
|
-from app.services.excel_storage_service import excel_storage_service
|
||||||
|
+from app.core.document_parser import ParserFactory
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@@ -22,6 +21,17 @@ class TemplateField:
|
||||||
|
name: str # 字段名称
|
||||||
|
field_type: str = "text" # 字段类型: text/number/date
|
||||||
|
required: bool = True
|
||||||
|
+ hint: str = "" # 字段提示词
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+@dataclass
|
||||||
|
+class SourceDocument:
|
||||||
|
+ """源文档"""
|
||||||
|
+ doc_id: str
|
||||||
|
+ filename: str
|
||||||
|
+ doc_type: str
|
||||||
|
+ content: str = ""
|
||||||
|
+ structured_data: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
@@ -38,12 +48,12 @@ class TemplateFillService:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.llm = llm_service
|
||||||
|
- self.rag = rag_service
|
||||||
|
|
||||||
|
async def fill_template(
|
||||||
|
self,
|
||||||
|
template_fields: List[TemplateField],
|
||||||
|
source_doc_ids: Optional[List[str]] = None,
|
||||||
|
+ source_file_paths: Optional[List[str]] = None,
|
||||||
|
user_hint: Optional[str] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
@@ -51,7 +61,8 @@ class TemplateFillService:
|
||||||
|
|
||||||
|
Args:
|
||||||
|
template_fields: 模板字段列表
|
||||||
|
- source_doc_ids: 源文档ID列表,不指定则从所有文档检索
|
||||||
|
+ source_doc_ids: 源文档 MongoDB ID 列表
|
||||||
|
+ source_file_paths: 源文档文件路径列表
|
||||||
|
user_hint: 用户提示(如"请从合同文档中提取")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
@@ -60,28 +71,23 @@ class TemplateFillService:
|
||||||
|
filled_data = {}
|
||||||
|
fill_details = []
|
||||||
|
|
||||||
|
+ # 1. 加载源文档内容
|
||||||
|
+ source_docs = await self._load_source_documents(source_doc_ids, source_file_paths)
|
||||||
|
+
|
||||||
|
+ if not source_docs:
|
||||||
|
+ logger.warning("没有找到源文档,填表结果将全部为空")
|
||||||
|
+
|
||||||
|
+ # 2. 对每个字段进行提取
|
||||||
|
for field in template_fields:
|
||||||
|
try:
|
||||||
|
- # 1. 从 RAG 检索相关上下文
|
||||||
|
- rag_results = await self._retrieve_context(field.name, user_hint)
|
||||||
|
-
|
||||||
|
- if not rag_results:
|
||||||
|
- # 如果没有检索到结果,尝试直接询问 LLM
|
||||||
|
- result = FillResult(
|
||||||
|
- field=field.name,
|
||||||
|
- value="",
|
||||||
|
- source="未找到相关数据",
|
||||||
|
- confidence=0.0
|
||||||
|
- )
|
||||||
|
- else:
|
||||||
|
- # 2. 构建 Prompt 让 LLM 提取信息
|
||||||
|
- result = await self._extract_field_value(
|
||||||
|
- field=field,
|
||||||
|
- rag_context=rag_results,
|
||||||
|
- user_hint=user_hint
|
||||||
|
- )
|
||||||
|
-
|
||||||
|
- # 3. 存储结果
|
||||||
|
+ # 从源文档中提取字段值
|
||||||
|
+ result = await self._extract_field_value(
|
||||||
|
+ field=field,
|
||||||
|
+ source_docs=source_docs,
|
||||||
|
+ user_hint=user_hint
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+ # 存储结果
|
||||||
|
filled_data[field.name] = result.value
|
||||||
|
fill_details.append({
|
||||||
|
"field": field.name,
|
||||||
|
@@ -107,75 +113,113 @@ class TemplateFillService:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"filled_data": filled_data,
|
||||||
|
- "fill_details": fill_details
|
||||||
|
+ "fill_details": fill_details,
|
||||||
|
+ "source_doc_count": len(source_docs)
|
||||||
|
}
|
||||||
|
|
||||||
|
- async def _retrieve_context(
|
||||||
|
+ async def _load_source_documents(
|
||||||
|
self,
|
||||||
|
- field_name: str,
|
||||||
|
- user_hint: Optional[str] = None
|
||||||
|
- ) -> List[Dict[str, Any]]:
|
||||||
|
+ source_doc_ids: Optional[List[str]] = None,
|
||||||
|
+ source_file_paths: Optional[List[str]] = None
|
||||||
|
+ ) -> List[SourceDocument]:
|
||||||
|
"""
|
||||||
|
- 从 RAG 检索相关上下文
|
||||||
|
+ 加载源文档内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- field_name: 字段名称
|
||||||
|
- user_hint: 用户提示
|
||||||
|
+ source_doc_ids: MongoDB 文档 ID 列表
|
||||||
|
+ source_file_paths: 源文档文件路径列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- 检索结果列表
|
||||||
|
+ 源文档列表
|
||||||
|
"""
|
||||||
|
- # 构建查询文本
|
||||||
|
- query = field_name
|
||||||
|
- if user_hint:
|
||||||
|
- query = f"{user_hint} {field_name}"
|
||||||
|
-
|
||||||
|
- # 检索相关文档片段
|
||||||
|
- results = self.rag.retrieve(query=query, top_k=5)
|
||||||
|
-
|
||||||
|
- return results
|
||||||
|
+ source_docs = []
|
||||||
|
+
|
||||||
|
+ # 1. 从 MongoDB 加载文档
|
||||||
|
+ if source_doc_ids:
|
||||||
|
+ for doc_id in source_doc_ids:
|
||||||
|
+ try:
|
||||||
|
+ doc = await mongodb.get_document(doc_id)
|
||||||
|
+ if doc:
|
||||||
|
+ source_docs.append(SourceDocument(
|
||||||
|
+ doc_id=doc_id,
|
||||||
|
+ filename=doc.get("metadata", {}).get("original_filename", "unknown"),
|
||||||
|
+ doc_type=doc.get("doc_type", "unknown"),
|
||||||
|
+ content=doc.get("content", ""),
|
||||||
|
+ structured_data=doc.get("structured_data", {})
|
||||||
|
+ ))
|
||||||
|
+ logger.info(f"从MongoDB加载文档: {doc_id}")
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logger.error(f"从MongoDB加载文档失败 {doc_id}: {str(e)}")
|
||||||
|
+
|
||||||
|
+ # 2. 从文件路径加载文档
|
||||||
|
+ if source_file_paths:
|
||||||
|
+ for file_path in source_file_paths:
|
||||||
|
+ try:
|
||||||
|
+ parser = ParserFactory.get_parser(file_path)
|
||||||
|
+ result = parser.parse(file_path)
|
||||||
|
+ if result.success:
|
||||||
|
+ source_docs.append(SourceDocument(
|
||||||
|
+ doc_id=file_path,
|
||||||
|
+ filename=result.metadata.get("filename", file_path.split("/")[-1]),
|
||||||
|
+ doc_type=result.metadata.get("extension", "unknown").replace(".", ""),
|
||||||
|
+ content=result.data.get("content", ""),
|
||||||
|
+ structured_data=result.data.get("structured_data", {})
|
||||||
|
+ ))
|
||||||
|
+ logger.info(f"从文件加载文档: {file_path}")
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logger.error(f"从文件加载文档失败 {file_path}: {str(e)}")
|
||||||
|
+
|
||||||
|
+ return source_docs
|
||||||
|
|
||||||
|
async def _extract_field_value(
|
||||||
|
self,
|
||||||
|
field: TemplateField,
|
||||||
|
- rag_context: List[Dict[str, Any]],
|
||||||
|
+ source_docs: List[SourceDocument],
|
||||||
|
user_hint: Optional[str] = None
|
||||||
|
) -> FillResult:
|
||||||
|
"""
|
||||||
|
- 使用 LLM 从上下文中提取字段值
|
||||||
|
+ 使用 LLM 从源文档中提取字段值
|
||||||
|
|
||||||
|
Args:
|
||||||
|
field: 字段定义
|
||||||
|
- rag_context: RAG 检索到的上下文
|
||||||
|
+ source_docs: 源文档列表
|
||||||
|
user_hint: 用户提示
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
提取结果
|
||||||
|
"""
|
||||||
|
+ if not source_docs:
|
||||||
|
+ return FillResult(
|
||||||
|
+ field=field.name,
|
||||||
|
+ value="",
|
||||||
|
+ source="无源文档",
|
||||||
|
+ confidence=0.0
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
# 构建上下文文本
|
||||||
|
- context_text = "\n\n".join([
|
||||||
|
- f"【文档 {i+1}】\n{doc['content']}"
|
||||||
|
- for i, doc in enumerate(rag_context)
|
||||||
|
- ])
|
||||||
|
+ context_text = self._build_context_text(source_docs, max_length=8000)
|
||||||
|
+
|
||||||
|
+ # 构建提示词
|
||||||
|
+ hint_text = field.hint if field.hint else f"请提取{field.name}的信息"
|
||||||
|
+ if user_hint:
|
||||||
|
+ hint_text = f"{user_hint}。{hint_text}"
|
||||||
|
|
||||||
|
- # 构建 Prompt
|
||||||
|
- prompt = f"""你是一个数据提取专家。请根据以下文档内容,提取指定字段的信息。
|
||||||
|
+ prompt = f"""你是一个专业的数据提取专家。请根据以下文档内容,提取指定字段的信息。
|
||||||
|
|
||||||
|
需要提取的字段:
|
||||||
|
- 字段名称:{field.name}
|
||||||
|
- 字段类型:{field.field_type}
|
||||||
|
+- 填写提示:{hint_text}
|
||||||
|
- 是否必填:{'是' if field.required else '否'}
|
||||||
|
|
||||||
|
-{'用户提示:' + user_hint if user_hint else ''}
|
||||||
|
-
|
||||||
|
参考文档内容:
|
||||||
|
{context_text}
|
||||||
|
|
||||||
|
请严格按照以下 JSON 格式输出,不要添加任何解释:
|
||||||
|
{{
|
||||||
|
"value": "提取到的值,如果没有找到则填写空字符串",
|
||||||
|
- "source": "数据来源的文档描述",
|
||||||
|
- "confidence": 0.0到1.0之间的置信度
|
||||||
|
+ "source": "数据来源的文档描述(如:来自xxx文档)",
|
||||||
|
+ "confidence": 0.0到1.0之间的置信度,表示对提取结果的信心程度"
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
@@ -226,6 +270,54 @@ class TemplateFillService:
|
||||||
|
confidence=0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
+ def _build_context_text(self, source_docs: List[SourceDocument], max_length: int = 8000) -> str:
|
||||||
|
+ """
|
||||||
|
+ 构建上下文文本
|
||||||
|
+
|
||||||
|
+ Args:
|
||||||
|
+ source_docs: 源文档列表
|
||||||
|
+ max_length: 最大字符数
|
||||||
|
+
|
||||||
|
+ Returns:
|
||||||
|
+ 上下文文本
|
||||||
|
+ """
|
||||||
|
+ contexts = []
|
||||||
|
+ total_length = 0
|
||||||
|
+
|
||||||
|
+ for doc in source_docs:
|
||||||
|
+ # 优先使用结构化数据(表格),其次使用文本内容
|
||||||
|
+ doc_content = ""
|
||||||
|
+
|
||||||
|
+ if doc.structured_data and doc.structured_data.get("tables"):
|
||||||
|
+ # 如果有表格数据,优先使用
|
||||||
|
+ tables = doc.structured_data.get("tables", [])
|
||||||
|
+ for table in tables:
|
||||||
|
+ if isinstance(table, dict):
|
||||||
|
+ rows = table.get("rows", [])
|
||||||
|
+ if rows:
|
||||||
|
+ doc_content += f"\n【文档: {doc.filename} 表格数据】\n"
|
||||||
|
+ for row in rows[:20]: # 限制每表最多20行
|
||||||
|
+ if isinstance(row, list):
|
||||||
|
+ doc_content += " | ".join(str(cell) for cell in row) + "\n"
|
||||||
|
+ elif isinstance(row, dict):
|
||||||
|
+ doc_content += " | ".join(str(v) for v in row.values()) + "\n"
|
||||||
|
+ elif doc.content:
|
||||||
|
+ doc_content = doc.content[:5000] # 限制文本长度
|
||||||
|
+
|
||||||
|
+ if doc_content:
|
||||||
|
+ doc_context = f"【文档: {doc.filename} ({doc.doc_type})】\n{doc_content}"
|
||||||
|
+ if total_length + len(doc_context) <= max_length:
|
||||||
|
+ contexts.append(doc_context)
|
||||||
|
+ total_length += len(doc_context)
|
||||||
|
+ else:
|
||||||
|
+ # 如果超出长度,截断
|
||||||
|
+ remaining = max_length - total_length
|
||||||
|
+ if remaining > 100:
|
||||||
|
+ contexts.append(doc_context[:remaining])
|
||||||
|
+ break
|
||||||
|
+
|
||||||
|
+ return "\n\n".join(contexts) if contexts else "(源文档内容为空)"
|
||||||
|
+
|
||||||
|
async def get_template_fields_from_file(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
@@ -236,7 +328,7 @@ class TemplateFillService:
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: 模板文件路径
|
||||||
|
- file_type: 文件类型
|
||||||
|
+ file_type: 文件类型 (xlsx/xls/docx)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
字段列表
|
||||||
|
@@ -245,43 +337,108 @@ class TemplateFillService:
|
||||||
|
|
||||||
|
try:
|
||||||
|
if file_type in ["xlsx", "xls"]:
|
||||||
|
- # 从 Excel 读取表头
|
||||||
|
- import pandas as pd
|
||||||
|
- df = pd.read_excel(file_path, nrows=5)
|
||||||
|
+ fields = await self._get_template_fields_from_excel(file_path)
|
||||||
|
+ elif file_type == "docx":
|
||||||
|
+ fields = await self._get_template_fields_from_docx(file_path)
|
||||||
|
|
||||||
|
- for idx, col in enumerate(df.columns):
|
||||||
|
- # 获取单元格位置 (A, B, C, ...)
|
||||||
|
- cell = self._column_to_cell(idx)
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logger.error(f"提取模板字段失败: {str(e)}")
|
||||||
|
|
||||||
|
- fields.append(TemplateField(
|
||||||
|
- cell=cell,
|
||||||
|
- name=str(col),
|
||||||
|
- field_type=self._infer_field_type(df[col]),
|
||||||
|
- required=True
|
||||||
|
- ))
|
||||||
|
+ return fields
|
||||||
|
|
||||||
|
- elif file_type == "docx":
|
||||||
|
- # 从 Word 表格读取
|
||||||
|
- from docx import Document
|
||||||
|
- doc = Document(file_path)
|
||||||
|
-
|
||||||
|
- for table_idx, table in enumerate(doc.tables):
|
||||||
|
- for row_idx, row in enumerate(table.rows):
|
||||||
|
- for col_idx, cell in enumerate(row.cells):
|
||||||
|
- cell_text = cell.text.strip()
|
||||||
|
- if cell_text:
|
||||||
|
- fields.append(TemplateField(
|
||||||
|
- cell=self._column_to_cell(col_idx),
|
||||||
|
- name=cell_text,
|
||||||
|
- field_type="text",
|
||||||
|
- required=True
|
||||||
|
- ))
|
||||||
|
+ async def _get_template_fields_from_excel(self, file_path: str) -> List[TemplateField]:
|
||||||
|
+ """从 Excel 模板提取字段"""
|
||||||
|
+ fields = []
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ import pandas as pd
|
||||||
|
+ df = pd.read_excel(file_path, nrows=5)
|
||||||
|
+
|
||||||
|
+ for idx, col in enumerate(df.columns):
|
||||||
|
+ cell = self._column_to_cell(idx)
|
||||||
|
+ col_str = str(col)
|
||||||
|
+
|
||||||
|
+ fields.append(TemplateField(
|
||||||
|
+ cell=cell,
|
||||||
|
+ name=col_str,
|
||||||
|
+ field_type=self._infer_field_type_from_value(df[col].iloc[0] if len(df) > 0 else ""),
|
||||||
|
+ required=True,
|
||||||
|
+ hint=""
|
||||||
|
+ ))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
- logger.error(f"提取模板字段失败: {str(e)}")
|
||||||
|
+ logger.error(f"从Excel提取字段失败: {str(e)}")
|
||||||
|
|
||||||
|
return fields
|
||||||
|
|
||||||
|
+ async def _get_template_fields_from_docx(self, file_path: str) -> List[TemplateField]:
|
||||||
|
+ """从 Word 模板提取字段"""
|
||||||
|
+ fields = []
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ from docx import Document
|
||||||
|
+
|
||||||
|
+ doc = Document(file_path)
|
||||||
|
+
|
||||||
|
+ for table_idx, table in enumerate(doc.tables):
|
||||||
|
+ for row_idx, row in enumerate(table.rows):
|
||||||
|
+ cells = [cell.text.strip() for cell in row.cells]
|
||||||
|
+
|
||||||
|
+ # 假设第一列是字段名
|
||||||
|
+ if cells and cells[0]:
|
||||||
|
+ field_name = cells[0]
|
||||||
|
+ hint = cells[1] if len(cells) > 1 else ""
|
||||||
|
+
|
||||||
|
+ # 跳过空行或标题行
|
||||||
|
+ if field_name and field_name not in ["", "字段名", "名称", "项目"]:
|
||||||
|
+ fields.append(TemplateField(
|
||||||
|
+ cell=f"T{table_idx}R{row_idx}",
|
||||||
|
+ name=field_name,
|
||||||
|
+ field_type=self._infer_field_type_from_hint(hint),
|
||||||
|
+ required=True,
|
||||||
|
+ hint=hint
|
||||||
|
+ ))
|
||||||
|
+
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logger.error(f"从Word提取字段失败: {str(e)}")
|
||||||
|
+
|
||||||
|
+ return fields
|
||||||
|
+
|
||||||
|
+ def _infer_field_type_from_hint(self, hint: str) -> str:
|
||||||
|
+ """从提示词推断字段类型"""
|
||||||
|
+ hint_lower = hint.lower()
|
||||||
|
+
|
||||||
|
+ date_keywords = ["年", "月", "日", "日期", "时间", "出生"]
|
||||||
|
+ if any(kw in hint for kw in date_keywords):
|
||||||
|
+ return "date"
|
||||||
|
+
|
||||||
|
+ number_keywords = ["数量", "金额", "人数", "面积", "增长", "比率", "%", "率", "总计", "合计"]
|
||||||
|
+ if any(kw in hint_lower for kw in number_keywords):
|
||||||
|
+ return "number"
|
||||||
|
+
|
||||||
|
+ return "text"
|
||||||
|
+
|
||||||
|
+ def _infer_field_type_from_value(self, value: Any) -> str:
|
||||||
|
+ """从示例值推断字段类型"""
|
||||||
|
+ if value is None or value == "":
|
||||||
|
+ return "text"
|
||||||
|
+
|
||||||
|
+ value_str = str(value)
|
||||||
|
+
|
||||||
|
+ # 检查日期模式
|
||||||
|
+ import re
|
||||||
|
+ if re.search(r'\d{4}[年/-]\d{1,2}[月/-]\d{1,2}', value_str):
|
||||||
|
+ return "date"
|
||||||
|
+
|
||||||
|
+ # 检查数值
|
||||||
|
+ try:
|
||||||
|
+ float(value_str.replace(',', '').replace('%', ''))
|
||||||
|
+ return "number"
|
||||||
|
+ except ValueError:
|
||||||
|
+ pass
|
||||||
|
+
|
||||||
|
+ return "text"
|
||||||
|
+
|
||||||
|
def _column_to_cell(self, col_idx: int) -> str:
|
||||||
|
"""将列索引转换为单元格列名 (0 -> A, 1 -> B, ...)"""
|
||||||
|
result = ""
|
||||||
|
@@ -290,17 +447,6 @@ class TemplateFillService:
|
||||||
|
col_idx = col_idx // 26 - 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
- def _infer_field_type(self, series) -> str:
|
||||||
|
- """推断字段类型"""
|
||||||
|
- import pandas as pd
|
||||||
|
-
|
||||||
|
- if pd.api.types.is_numeric_dtype(series):
|
||||||
|
- return "number"
|
||||||
|
- elif pd.api.types.is_datetime64_any_dtype(series):
|
||||||
|
- return "date"
|
||||||
|
- else:
|
||||||
|
- return "text"
|
||||||
|
-
|
||||||
|
|
||||||
|
# ==================== 全局单例 ====================
|
||||||
|
|
||||||
53
logs/frontend_template_fill.patch
Normal file
53
logs/frontend_template_fill.patch
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts
|
||||||
|
index 8944353..94ac852 100644
|
||||||
|
--- a/frontend/src/db/backend-api.ts
|
||||||
|
+++ b/frontend/src/db/backend-api.ts
|
||||||
|
@@ -92,6 +92,7 @@ export interface TemplateField {
|
||||||
|
name: string;
|
||||||
|
field_type: string;
|
||||||
|
required: boolean;
|
||||||
|
+ hint?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 表格填写结果
|
||||||
|
@@ -625,7 +626,10 @@ export const backendApi = {
|
||||||
|
*/
|
||||||
|
async fillTemplate(
|
||||||
|
templateId: string,
|
||||||
|
- templateFields: TemplateField[]
|
||||||
|
+ templateFields: TemplateField[],
|
||||||
|
+ sourceDocIds?: string[],
|
||||||
|
+ sourceFilePaths?: string[],
|
||||||
|
+ userHint?: string
|
||||||
|
): Promise<FillResult> {
|
||||||
|
const url = `${BACKEND_BASE_URL}/templates/fill`;
|
||||||
|
|
||||||
|
@@ -636,6 +640,9 @@ export const backendApi = {
|
||||||
|
body: JSON.stringify({
|
||||||
|
template_id: templateId,
|
||||||
|
template_fields: templateFields,
|
||||||
|
+ source_doc_ids: sourceDocIds || [],
|
||||||
|
+ source_file_paths: sourceFilePaths || [],
|
||||||
|
+ user_hint: userHint || null,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
diff --git a/frontend/src/pages/TemplateFill.tsx b/frontend/src/pages/TemplateFill.tsx
|
||||||
|
index 8c330a9..f9a4a39 100644
|
||||||
|
--- a/frontend/src/pages/TemplateFill.tsx
|
||||||
|
+++ b/frontend/src/pages/TemplateFill.tsx
|
||||||
|
@@ -128,8 +128,12 @@ const TemplateFill: React.FC = () => {
|
||||||
|
setStep('filling');
|
||||||
|
|
||||||
|
try {
|
||||||
|
- // 调用后端填表接口
|
||||||
|
- const result = await backendApi.fillTemplate('temp-template-id', templateFields);
|
||||||
|
+ // 调用后端填表接口,传递选中的文档ID
|
||||||
|
+ const result = await backendApi.fillTemplate(
|
||||||
|
+ 'temp-template-id',
|
||||||
|
+ templateFields,
|
||||||
|
+ selectedDocs // 传递源文档ID列表
|
||||||
|
+ );
|
||||||
|
setFilledResult(result);
|
||||||
|
setStep('preview');
|
||||||
|
toast.success('表格填写完成');
|
||||||
221
logs/planning_doc.patch
Normal file
221
logs/planning_doc.patch
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
diff --git "a/\346\257\224\350\265\233\345\244\207\350\265\233\350\247\204\345\210\222.md" "b/\346\257\224\350\265\233\345\244\207\350\265\233\350\247\204\345\210\222.md"
|
||||||
|
index bcb48fd..440a12d 100644
|
||||||
|
--- "a/\346\257\224\350\265\233\345\244\207\350\265\233\350\247\204\345\210\222.md"
|
||||||
|
+++ "b/\346\257\224\350\265\233\345\244\207\350\265\233\350\247\204\345\210\222.md"
|
||||||
|
@@ -50,7 +50,7 @@
|
||||||
|
| `prompt_service.py` | ✅ 已完成 | Prompt 模板管理 |
|
||||||
|
| `text_analysis_service.py` | ✅ 已完成 | 文本分析 |
|
||||||
|
| `chart_generator_service.py` | ✅ 已完成 | 图表生成服务 |
|
||||||
|
-| `template_fill_service.py` | ❌ 未完成 | 模板填写服务 |
|
||||||
|
+| `template_fill_service.py` | ✅ 已完成 | 模板填写服务,支持直接读取源文档进行填表 |
|
||||||
|
|
||||||
|
### 2.2 API 接口 (`backend/app/api/endpoints/`)
|
||||||
|
|
||||||
|
@@ -61,7 +61,7 @@
|
||||||
|
| `ai_analyze.py` | `/api/v1/analyze/*` | ✅ AI 分析(Excel、Markdown、流式) |
|
||||||
|
| `rag.py` | `/api/v1/rag/*` | ⚠️ RAG 检索(当前返回空) |
|
||||||
|
| `tasks.py` | `/api/v1/tasks/*` | ✅ 异步任务状态查询 |
|
||||||
|
-| `templates.py` | `/api/v1/templates/*` | ✅ 模板管理 |
|
||||||
|
+| `templates.py` | `/api/v1/templates/*` | ✅ 模板管理 (含 Word 导出) |
|
||||||
|
| `visualization.py` | `/api/v1/visualization/*` | ✅ 可视化图表 |
|
||||||
|
| `health.py` | `/api/v1/health` | ✅ 健康检查 |
|
||||||
|
|
||||||
|
@@ -78,8 +78,8 @@
|
||||||
|
|------|----------|------|
|
||||||
|
| Excel (.xlsx/.xls) | ✅ 已完成 | pandas + XML 回退解析 |
|
||||||
|
| Markdown (.md) | ✅ 已完成 | 正则 + AI 分章节 |
|
||||||
|
-| Word (.docx) | ❌ 未完成 | 尚未实现 |
|
||||||
|
-| Text (.txt) | ❌ 未完成 | 尚未实现 |
|
||||||
|
+| Word (.docx) | ✅ 已完成 | python-docx 解析,支持表格提取和字段识别 |
|
||||||
|
+| Text (.txt) | ✅ 已完成 | chardet 编码检测,支持文本清洗和结构化提取 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
@@ -87,7 +87,7 @@
|
||||||
|
|
||||||
|
### 3.1 模板填写模块(最优先)
|
||||||
|
|
||||||
|
-**这是比赛的核心评测功能,必须完成。**
|
||||||
|
+**当前状态**:✅ 已完成
|
||||||
|
|
||||||
|
```
|
||||||
|
用户上传模板表格(Word/Excel)
|
||||||
|
@@ -103,30 +103,34 @@ AI 根据字段提示词从源数据中提取信息
|
||||||
|
返回填写完成的表格
|
||||||
|
```
|
||||||
|
|
||||||
|
-**需要实现**:
|
||||||
|
-- [ ] `template_fill_service.py` - 模板填写核心服务
|
||||||
|
-- [ ] Word 模板解析 (`docx_parser.py` 需新建)
|
||||||
|
-- [ ] Text 模板解析 (`txt_parser.py` 需新建)
|
||||||
|
-- [ ] 模板字段识别与提示词提取
|
||||||
|
-- [ ] 多文档数据聚合与冲突处理
|
||||||
|
-- [ ] 结果导出为 Word/Excel
|
||||||
|
+**已完成实现**:
|
||||||
|
+- [x] `template_fill_service.py` - 模板填写核心服务
|
||||||
|
+- [x] Word 模板解析 (`docx_parser.py` - parse_tables_for_template, extract_template_fields_from_docx)
|
||||||
|
+- [x] Text 模板解析 (`txt_parser.py` - 已完成)
|
||||||
|
+- [x] 模板字段识别与提示词提取
|
||||||
|
+- [x] 多文档数据聚合与冲突处理
|
||||||
|
+- [x] 结果导出为 Word/Excel
|
||||||
|
|
||||||
|
### 3.2 Word 文档解析
|
||||||
|
|
||||||
|
-**当前状态**:仅有框架,尚未实现具体解析逻辑
|
||||||
|
+**当前状态**:✅ 已完成
|
||||||
|
|
||||||
|
-**需要实现**:
|
||||||
|
-- [ ] `docx_parser.py` - Word 文档解析器
|
||||||
|
-- [ ] 提取段落文本
|
||||||
|
-- [ ] 提取表格内容
|
||||||
|
-- [ ] 提取关键信息(标题、列表等)
|
||||||
|
+**已实现功能**:
|
||||||
|
+- [x] `docx_parser.py` - Word 文档解析器
|
||||||
|
+- [x] 提取段落文本
|
||||||
|
+- [x] 提取表格内容
|
||||||
|
+- [x] 提取关键信息(标题、列表等)
|
||||||
|
+- [x] 表格模板字段提取 (`parse_tables_for_template`, `extract_template_fields_from_docx`)
|
||||||
|
+- [x] 字段类型推断 (`_infer_field_type_from_hint`)
|
||||||
|
|
||||||
|
### 3.3 Text 文档解析
|
||||||
|
|
||||||
|
-**需要实现**:
|
||||||
|
-- [ ] `txt_parser.py` - 文本文件解析器
|
||||||
|
-- [ ] 编码自动检测
|
||||||
|
-- [ ] 文本清洗
|
||||||
|
+**当前状态**:✅ 已完成
|
||||||
|
+
|
||||||
|
+**已实现功能**:
|
||||||
|
+- [x] `txt_parser.py` - 文本文件解析器
|
||||||
|
+- [x] 编码自动检测 (chardet)
|
||||||
|
+- [x] 文本清洗
|
||||||
|
|
||||||
|
### 3.4 文档模板匹配(已有框架)
|
||||||
|
|
||||||
|
@@ -215,5 +219,122 @@ docs/test/
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
-*文档版本: v1.0*
|
||||||
|
-*最后更新: 2026-04-08*
|
||||||
|
\ No newline at end of file
|
||||||
|
+*文档版本: v1.1*
|
||||||
|
+*最后更新: 2026-04-08*
|
||||||
|
+
|
||||||
|
+---
|
||||||
|
+
|
||||||
|
+## 八、技术实现细节
|
||||||
|
+
|
||||||
|
+### 8.1 模板填表流程(已实现)
|
||||||
|
+
|
||||||
|
+#### 流程图
|
||||||
|
+```
|
||||||
|
+┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
+│ 上传模板 │ ──► │ 选择数据源 │ ──► │ AI 智能填表 │
|
||||||
|
+└─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
+ │
|
||||||
|
+ ▼
|
||||||
|
+ ┌─────────────┐
|
||||||
|
+ │ 导出结果 │
|
||||||
|
+ └─────────────┘
|
||||||
|
+```
|
||||||
|
+
|
||||||
|
+#### 核心组件
|
||||||
|
+
|
||||||
|
+| 组件 | 文件 | 说明 |
|
||||||
|
+|------|------|------|
|
||||||
|
+| 模板上传 | `templates.py` `/templates/upload` | 接收模板文件,提取字段 |
|
||||||
|
+| 字段提取 | `template_fill_service.py` | 从 Word/Excel 表格提取字段定义 |
|
||||||
|
+| 文档解析 | `docx_parser.py`, `xlsx_parser.py`, `txt_parser.py` | 解析源文档内容 |
|
||||||
|
+| 智能填表 | `template_fill_service.py` `fill_template()` | 使用 LLM 从源文档提取信息 |
|
||||||
|
+| 结果导出 | `templates.py` `/templates/export` | 导出为 Excel 或 Word |
|
||||||
|
+
|
||||||
|
+### 8.2 源文档加载方式
|
||||||
|
+
|
||||||
|
+模板填表服务支持两种方式加载源文档:
|
||||||
|
+
|
||||||
|
+1. **通过 MongoDB 文档 ID**:`source_doc_ids`
|
||||||
|
+ - 文档已上传并存入 MongoDB
|
||||||
|
+ - 服务直接查询 MongoDB 获取文档内容
|
||||||
|
+
|
||||||
|
+2. **通过文件路径**:`source_file_paths`
|
||||||
|
+ - 直接读取本地文件
|
||||||
|
+ - 使用对应的解析器解析内容
|
||||||
|
+
|
||||||
|
+### 8.3 Word 表格模板解析
|
||||||
|
+
|
||||||
|
+比赛评分表格通常是 Word 格式,`docx_parser.py` 提供了专门的解析方法:
|
||||||
|
+
|
||||||
|
+```python
|
||||||
|
+# 提取表格模板字段
|
||||||
|
+fields = docx_parser.extract_template_fields_from_docx(file_path)
|
||||||
|
+
|
||||||
|
+# 返回格式
|
||||||
|
+# [
|
||||||
|
+# {
|
||||||
|
+# "cell": "T0R1", # 表格0,行1
|
||||||
|
+# "name": "字段名",
|
||||||
|
+# "hint": "提示词",
|
||||||
|
+# "field_type": "text/number/date",
|
||||||
|
+# "required": True
|
||||||
|
+# },
|
||||||
|
+# ...
|
||||||
|
+# ]
|
||||||
|
+```
|
||||||
|
+
|
||||||
|
+### 8.4 字段类型推断
|
||||||
|
+
|
||||||
|
+系统支持从提示词自动推断字段类型:
|
||||||
|
+
|
||||||
|
+| 关键词 | 推断类型 | 示例 |
|
||||||
|
+|--------|----------|------|
|
||||||
|
+| 年、月、日、日期、时间、出生 | date | 出生日期 |
|
||||||
|
+| 数量、金额、比率、%、率、合计 | number | 增长比率 |
|
||||||
|
+| 其他 | text | 姓名、地址 |
|
||||||
|
+
|
||||||
|
+### 8.5 API 接口
|
||||||
|
+
|
||||||
|
+#### POST `/api/v1/templates/fill`
|
||||||
|
+
|
||||||
|
+填写请求:
|
||||||
|
+```json
|
||||||
|
+{
|
||||||
|
+ "template_id": "模板ID",
|
||||||
|
+ "template_fields": [
|
||||||
|
+ {"cell": "A1", "name": "姓名", "field_type": "text", "required": true, "hint": "提取人员姓名"}
|
||||||
|
+ ],
|
||||||
|
+ "source_doc_ids": ["mongodb_doc_id_1", "mongodb_doc_id_2"],
|
||||||
|
+ "source_file_paths": [],
|
||||||
|
+ "user_hint": "请从合同文档中提取"
|
||||||
|
+}
|
||||||
|
+```
|
||||||
|
+
|
||||||
|
+响应:
|
||||||
|
+```json
|
||||||
|
+{
|
||||||
|
+ "success": true,
|
||||||
|
+ "filled_data": {"姓名": "张三"},
|
||||||
|
+ "fill_details": [
|
||||||
|
+ {
|
||||||
|
+ "field": "姓名",
|
||||||
|
+ "cell": "A1",
|
||||||
|
+ "value": "张三",
|
||||||
|
+ "source": "来自:合同文档.docx",
|
||||||
|
+ "confidence": 0.95
|
||||||
|
+ }
|
||||||
|
+ ],
|
||||||
|
+ "source_doc_count": 2
|
||||||
|
+}
|
||||||
|
+```
|
||||||
|
+
|
||||||
|
+#### POST `/api/v1/templates/export`
|
||||||
|
+
|
||||||
|
+导出请求:
|
||||||
|
+```json
|
||||||
|
+{
|
||||||
|
+ "template_id": "模板ID",
|
||||||
|
+ "filled_data": {"姓名": "张三", "金额": "10000"},
|
||||||
|
+ "format": "xlsx" // 或 "docx"
|
||||||
|
+}
|
||||||
|
+```
|
||||||
|
\ No newline at end of file
|
||||||
59
logs/rag_disable_note.txt
Normal file
59
logs/rag_disable_note.txt
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
RAG 服务临时禁用说明
|
||||||
|
========================
|
||||||
|
日期: 2026-04-08
|
||||||
|
|
||||||
|
修改内容:
|
||||||
|
----------
|
||||||
|
应需求,RAG 向量检索功能已临时禁用,具体如下:
|
||||||
|
|
||||||
|
1. 修改文件: backend/app/services/rag_service.py
|
||||||
|
|
||||||
|
2. 关键变更:
|
||||||
|
- 在 RAGService.__init__ 中添加 self._disabled = True 标志
|
||||||
|
- index_field() - 添加 _disabled 检查,跳过实际索引操作并记录日志
|
||||||
|
- index_document_content() - 添加 _disabled 检查,跳过实际索引操作并记录日志
|
||||||
|
- retrieve() - 添加 _disabled 检查,返回空列表并记录日志
|
||||||
|
- get_vector_count() - 添加 _disabled 检查,返回 0 并记录日志
|
||||||
|
- clear() - 添加 _disabled 检查,跳过实际清空操作并记录日志
|
||||||
|
|
||||||
|
3. 行为变更:
|
||||||
|
- 所有 RAG 索引构建操作会被记录到日志 ([RAG DISABLED] 前缀)
|
||||||
|
- 所有 RAG 检索操作返回空结果
|
||||||
|
- 向量计数始终返回 0
|
||||||
|
- 实际向量数据库操作被跳过
|
||||||
|
|
||||||
|
4. 恢复方式:
|
||||||
|
- 将 RAGService.__init__ 中的 self._disabled = True 改为 self._disabled = False
|
||||||
|
- 重新启动服务即可恢复 RAG 功能
|
||||||
|
|
||||||
|
目的:
|
||||||
|
------
|
||||||
|
保留 RAG 索引构建功能的前端界面和代码结构,暂不实际调用向量数据库 API,
|
||||||
|
待后续需要时再启用。
|
||||||
|
|
||||||
|
影响范围:
|
||||||
|
---------
|
||||||
|
- /api/v1/rag/search - RAG 搜索接口 (返回空结果)
|
||||||
|
- /api/v1/rag/status - RAG 状态接口 (返回 vector_count=0)
|
||||||
|
- /api/v1/rag/rebuild - RAG 重建接口 (仅记录日志)
|
||||||
|
- Excel/文档上传时的 RAG 索引构建 (仅记录日志)
|
||||||
|
|
||||||
|
========================
|
||||||
|
后续补充 (2026-04-08):
|
||||||
|
========================
|
||||||
|
修改文件: backend/app/services/table_rag_service.py
|
||||||
|
|
||||||
|
关键变更:
|
||||||
|
- 在 TableRAGService.__init__ 中添加 self._disabled = True 标志
|
||||||
|
- build_table_rag_index() - RAG 索引部分被跳过,仅记录日志
|
||||||
|
- index_document_table() - RAG 索引部分被跳过,仅记录日志
|
||||||
|
|
||||||
|
行为变更:
|
||||||
|
- Excel 上传时,MySQL 存储仍然正常进行
|
||||||
|
- AI 字段描述仍然正常生成(调用 LLM)
|
||||||
|
- 只有向量数据库索引操作被跳过
|
||||||
|
|
||||||
|
恢复方式:
|
||||||
|
- 将 TableRAGService.__init__ 中的 self._disabled = True 改为 self._disabled = False
|
||||||
|
- 或将 rag_service.py 中的 self._disabled = True 改为 self._disabled = False
|
||||||
|
- 两者需同时改为 False 才能完全恢复 RAG 功能
|
||||||
144
logs/template_fill_feature_changes.md
Normal file
144
logs/template_fill_feature_changes.md
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
# 模板填表功能变更日志
|
||||||
|
|
||||||
|
**变更日期**: 2026-04-08
|
||||||
|
**变更类型**: 功能完善
|
||||||
|
**变更内容**: Word 表格解析和模板填表功能
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 变更概述
|
||||||
|
|
||||||
|
本次变更完善了 Word 表格解析、表格模板构建和填写功能,实现了从源文档(MongoDB/文件)读取数据并智能填表的核心流程。
|
||||||
|
|
||||||
|
### 涉及文件
|
||||||
|
|
||||||
|
| 文件 | 变更行数 | 说明 |
|
||||||
|
|------|----------|------|
|
||||||
|
| backend/app/api/endpoints/templates.py | +156 | API 端点完善,添加 Word 导出 |
|
||||||
|
| backend/app/core/document_parser/docx_parser.py | +130 | Word 表格解析增强 |
|
||||||
|
| backend/app/services/template_fill_service.py | +340 | 核心填表服务重写 |
|
||||||
|
| frontend/src/db/backend-api.ts | +9 | 前端 API 更新 |
|
||||||
|
| frontend/src/pages/TemplateFill.tsx | +8 | 前端页面更新 |
|
||||||
|
| 比赛备赛规划.md | +169 | 文档更新 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 详细变更
|
||||||
|
|
||||||
|
### 1. backend/app/core/document_parser/docx_parser.py
|
||||||
|
|
||||||
|
**新增方法**:
|
||||||
|
|
||||||
|
- `parse_tables_for_template(file_path)` - 解析 Word 文档中的表格,提取模板字段
|
||||||
|
- `extract_template_fields_from_docx(file_path)` - 从 Word 文档提取模板字段定义
|
||||||
|
- `_infer_field_type_from_hint(hint)` - 从提示词推断字段类型
|
||||||
|
|
||||||
|
**功能说明**:
|
||||||
|
- 专门用于比赛场景:解析表格模板,识别需要填写的字段
|
||||||
|
- 支持从表格第一列提取字段名,第二列提取提示词/描述
|
||||||
|
- 自动推断字段类型(text/number/date)
|
||||||
|
|
||||||
|
### 2. backend/app/services/template_fill_service.py
|
||||||
|
|
||||||
|
**重构内容**:
|
||||||
|
|
||||||
|
- 不再依赖 RAG 服务,直接从 MongoDB 或文件读取源文档
|
||||||
|
- 新增 `SourceDocument` 数据类
|
||||||
|
- 完善 `fill_template()` 方法,支持 `source_doc_ids` 和 `source_file_paths`
|
||||||
|
- 新增 `_load_source_documents()` - 加载源文档内容
|
||||||
|
- 新增 `_extract_field_value()` - 使用 LLM 提取字段值
|
||||||
|
- 新增 `_build_context_text()` - 构建上下文(优先使用表格数据)
|
||||||
|
- 完善 `_get_template_fields_from_docx()` - Word 模板字段提取
|
||||||
|
|
||||||
|
**核心流程**:
|
||||||
|
```
|
||||||
|
1. 加载源文档(MongoDB 或文件)
|
||||||
|
2. 对每个字段调用 LLM 提取值
|
||||||
|
3. 返回填写结果
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. backend/app/api/endpoints/templates.py
|
||||||
|
|
||||||
|
**新增内容**:
|
||||||
|
|
||||||
|
- `FillRequest` 添加 `source_doc_ids`, `source_file_paths`, `user_hint` 字段
|
||||||
|
- `ExportRequest` 添加 `format` 字段
|
||||||
|
- `_export_to_word()` - 导出为 Word 格式
|
||||||
|
- `/templates/export/excel` - 专门导出 Excel
|
||||||
|
- `/templates/export/word` - 专门导出 Word
|
||||||
|
|
||||||
|
### 4. frontend/src/db/backend-api.ts
|
||||||
|
|
||||||
|
**更新内容**:
|
||||||
|
|
||||||
|
- `TemplateField` 接口添加 `hint` 字段
|
||||||
|
- `fillTemplate()` 方法添加 `sourceDocIds`, `sourceFilePaths`, `userHint` 参数
|
||||||
|
|
||||||
|
### 5. frontend/src/pages/TemplateFill.tsx
|
||||||
|
|
||||||
|
**更新内容**:
|
||||||
|
|
||||||
|
- `handleFillTemplate()` 传递 `selectedDocs` 作为 `sourceDocIds` 参数
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API 接口变更
|
||||||
|
|
||||||
|
### POST /api/v1/templates/fill
|
||||||
|
|
||||||
|
**请求体**:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_id": "模板ID",
|
||||||
|
"template_fields": [
|
||||||
|
{
|
||||||
|
"cell": "A1",
|
||||||
|
"name": "姓名",
|
||||||
|
"field_type": "text",
|
||||||
|
"required": true,
|
||||||
|
"hint": "提取人员姓名"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source_doc_ids": ["mongodb_doc_id"],
|
||||||
|
"source_file_paths": [],
|
||||||
|
"user_hint": "请从xxx文档中提取"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**响应**:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"filled_data": {"姓名": "张三"},
|
||||||
|
"fill_details": [...],
|
||||||
|
"source_doc_count": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### POST /api/v1/templates/export
|
||||||
|
|
||||||
|
**新增支持 format=dicx**,可导出为 Word 格式
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 技术细节
|
||||||
|
|
||||||
|
### 字段类型推断
|
||||||
|
|
||||||
|
| 关键词 | 推断类型 |
|
||||||
|
|--------|----------|
|
||||||
|
| 年、月、日、日期、时间、出生 | date |
|
||||||
|
| 数量、金额、比率、%、率、合计 | number |
|
||||||
|
| 其他 | text |
|
||||||
|
|
||||||
|
### 上下文构建
|
||||||
|
|
||||||
|
源文档内容构建优先级:
|
||||||
|
1. 结构化数据(表格数据)
|
||||||
|
2. 原始文本内容(限制 5000 字符)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 相关文档
|
||||||
|
|
||||||
|
- [比赛备赛规划.md](../比赛备赛规划.md) - 已更新功能状态和技术实现细节
|
||||||
169
比赛备赛规划.md
169
比赛备赛规划.md
@@ -50,7 +50,7 @@
|
|||||||
| `prompt_service.py` | ✅ 已完成 | Prompt 模板管理 |
|
| `prompt_service.py` | ✅ 已完成 | Prompt 模板管理 |
|
||||||
| `text_analysis_service.py` | ✅ 已完成 | 文本分析 |
|
| `text_analysis_service.py` | ✅ 已完成 | 文本分析 |
|
||||||
| `chart_generator_service.py` | ✅ 已完成 | 图表生成服务 |
|
| `chart_generator_service.py` | ✅ 已完成 | 图表生成服务 |
|
||||||
| `template_fill_service.py` | ❌ 未完成 | 模板填写服务 |
|
| `template_fill_service.py` | ✅ 已完成 | 模板填写服务,支持直接读取源文档进行填表 |
|
||||||
|
|
||||||
### 2.2 API 接口 (`backend/app/api/endpoints/`)
|
### 2.2 API 接口 (`backend/app/api/endpoints/`)
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@
|
|||||||
| `ai_analyze.py` | `/api/v1/analyze/*` | ✅ AI 分析(Excel、Markdown、流式) |
|
| `ai_analyze.py` | `/api/v1/analyze/*` | ✅ AI 分析(Excel、Markdown、流式) |
|
||||||
| `rag.py` | `/api/v1/rag/*` | ⚠️ RAG 检索(当前返回空) |
|
| `rag.py` | `/api/v1/rag/*` | ⚠️ RAG 检索(当前返回空) |
|
||||||
| `tasks.py` | `/api/v1/tasks/*` | ✅ 异步任务状态查询 |
|
| `tasks.py` | `/api/v1/tasks/*` | ✅ 异步任务状态查询 |
|
||||||
| `templates.py` | `/api/v1/templates/*` | ✅ 模板管理 |
|
| `templates.py` | `/api/v1/templates/*` | ✅ 模板管理 (含 Word 导出) |
|
||||||
| `visualization.py` | `/api/v1/visualization/*` | ✅ 可视化图表 |
|
| `visualization.py` | `/api/v1/visualization/*` | ✅ 可视化图表 |
|
||||||
| `health.py` | `/api/v1/health` | ✅ 健康检查 |
|
| `health.py` | `/api/v1/health` | ✅ 健康检查 |
|
||||||
|
|
||||||
@@ -78,8 +78,8 @@
|
|||||||
|------|----------|------|
|
|------|----------|------|
|
||||||
| Excel (.xlsx/.xls) | ✅ 已完成 | pandas + XML 回退解析 |
|
| Excel (.xlsx/.xls) | ✅ 已完成 | pandas + XML 回退解析 |
|
||||||
| Markdown (.md) | ✅ 已完成 | 正则 + AI 分章节 |
|
| Markdown (.md) | ✅ 已完成 | 正则 + AI 分章节 |
|
||||||
| Word (.docx) | ❌ 未完成 | 尚未实现 |
|
| Word (.docx) | ✅ 已完成 | python-docx 解析,支持表格提取和字段识别 |
|
||||||
| Text (.txt) | ❌ 未完成 | 尚未实现 |
|
| Text (.txt) | ✅ 已完成 | chardet 编码检测,支持文本清洗和结构化提取 |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -87,7 +87,7 @@
|
|||||||
|
|
||||||
### 3.1 模板填写模块(最优先)
|
### 3.1 模板填写模块(最优先)
|
||||||
|
|
||||||
**这是比赛的核心评测功能,必须完成。**
|
**当前状态**:✅ 已完成
|
||||||
|
|
||||||
```
|
```
|
||||||
用户上传模板表格(Word/Excel)
|
用户上传模板表格(Word/Excel)
|
||||||
@@ -103,30 +103,34 @@ AI 根据字段提示词从源数据中提取信息
|
|||||||
返回填写完成的表格
|
返回填写完成的表格
|
||||||
```
|
```
|
||||||
|
|
||||||
**需要实现**:
|
**已完成实现**:
|
||||||
- [ ] `template_fill_service.py` - 模板填写核心服务
|
- [x] `template_fill_service.py` - 模板填写核心服务
|
||||||
- [ ] Word 模板解析 (`docx_parser.py` 需新建)
|
- [x] Word 模板解析 (`docx_parser.py` - parse_tables_for_template, extract_template_fields_from_docx)
|
||||||
- [ ] Text 模板解析 (`txt_parser.py` 需新建)
|
- [x] Text 模板解析 (`txt_parser.py` - 已完成)
|
||||||
- [ ] 模板字段识别与提示词提取
|
- [x] 模板字段识别与提示词提取
|
||||||
- [ ] 多文档数据聚合与冲突处理
|
- [x] 多文档数据聚合与冲突处理
|
||||||
- [ ] 结果导出为 Word/Excel
|
- [x] 结果导出为 Word/Excel
|
||||||
|
|
||||||
### 3.2 Word 文档解析
|
### 3.2 Word 文档解析
|
||||||
|
|
||||||
**当前状态**:仅有框架,尚未实现具体解析逻辑
|
**当前状态**:✅ 已完成
|
||||||
|
|
||||||
**需要实现**:
|
**已实现功能**:
|
||||||
- [ ] `docx_parser.py` - Word 文档解析器
|
- [x] `docx_parser.py` - Word 文档解析器
|
||||||
- [ ] 提取段落文本
|
- [x] 提取段落文本
|
||||||
- [ ] 提取表格内容
|
- [x] 提取表格内容
|
||||||
- [ ] 提取关键信息(标题、列表等)
|
- [x] 提取关键信息(标题、列表等)
|
||||||
|
- [x] 表格模板字段提取 (`parse_tables_for_template`, `extract_template_fields_from_docx`)
|
||||||
|
- [x] 字段类型推断 (`_infer_field_type_from_hint`)
|
||||||
|
|
||||||
### 3.3 Text 文档解析
|
### 3.3 Text 文档解析
|
||||||
|
|
||||||
**需要实现**:
|
**当前状态**:✅ 已完成
|
||||||
- [ ] `txt_parser.py` - 文本文件解析器
|
|
||||||
- [ ] 编码自动检测
|
**已实现功能**:
|
||||||
- [ ] 文本清洗
|
- [x] `txt_parser.py` - 文本文件解析器
|
||||||
|
- [x] 编码自动检测 (chardet)
|
||||||
|
- [x] 文本清洗
|
||||||
|
|
||||||
### 3.4 文档模板匹配(已有框架)
|
### 3.4 文档模板匹配(已有框架)
|
||||||
|
|
||||||
@@ -215,5 +219,122 @@ docs/test/
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*文档版本: v1.0*
|
*文档版本: v1.1*
|
||||||
*最后更新: 2026-04-08*
|
*最后更新: 2026-04-08*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、技术实现细节
|
||||||
|
|
||||||
|
### 8.1 模板填表流程(已实现)
|
||||||
|
|
||||||
|
#### 流程图
|
||||||
|
```
|
||||||
|
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
│ 上传模板 │ ──► │ 选择数据源 │ ──► │ AI 智能填表 │
|
||||||
|
└─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ 导出结果 │
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 核心组件
|
||||||
|
|
||||||
|
| 组件 | 文件 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 模板上传 | `templates.py` `/templates/upload` | 接收模板文件,提取字段 |
|
||||||
|
| 字段提取 | `template_fill_service.py` | 从 Word/Excel 表格提取字段定义 |
|
||||||
|
| 文档解析 | `docx_parser.py`, `xlsx_parser.py`, `txt_parser.py` | 解析源文档内容 |
|
||||||
|
| 智能填表 | `template_fill_service.py` `fill_template()` | 使用 LLM 从源文档提取信息 |
|
||||||
|
| 结果导出 | `templates.py` `/templates/export` | 导出为 Excel 或 Word |
|
||||||
|
|
||||||
|
### 8.2 源文档加载方式
|
||||||
|
|
||||||
|
模板填表服务支持两种方式加载源文档:
|
||||||
|
|
||||||
|
1. **通过 MongoDB 文档 ID**:`source_doc_ids`
|
||||||
|
- 文档已上传并存入 MongoDB
|
||||||
|
- 服务直接查询 MongoDB 获取文档内容
|
||||||
|
|
||||||
|
2. **通过文件路径**:`source_file_paths`
|
||||||
|
- 直接读取本地文件
|
||||||
|
- 使用对应的解析器解析内容
|
||||||
|
|
||||||
|
### 8.3 Word 表格模板解析
|
||||||
|
|
||||||
|
比赛评分表格通常是 Word 格式,`docx_parser.py` 提供了专门的解析方法:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 提取表格模板字段
|
||||||
|
fields = docx_parser.extract_template_fields_from_docx(file_path)
|
||||||
|
|
||||||
|
# 返回格式
|
||||||
|
# [
|
||||||
|
# {
|
||||||
|
# "cell": "T0R1", # 表格0,行1
|
||||||
|
# "name": "字段名",
|
||||||
|
# "hint": "提示词",
|
||||||
|
# "field_type": "text/number/date",
|
||||||
|
# "required": True
|
||||||
|
# },
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.4 字段类型推断
|
||||||
|
|
||||||
|
系统支持从提示词自动推断字段类型:
|
||||||
|
|
||||||
|
| 关键词 | 推断类型 | 示例 |
|
||||||
|
|--------|----------|------|
|
||||||
|
| 年、月、日、日期、时间、出生 | date | 出生日期 |
|
||||||
|
| 数量、金额、比率、%、率、合计 | number | 增长比率 |
|
||||||
|
| 其他 | text | 姓名、地址 |
|
||||||
|
|
||||||
|
### 8.5 API 接口
|
||||||
|
|
||||||
|
#### POST `/api/v1/templates/fill`
|
||||||
|
|
||||||
|
填写请求:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_id": "模板ID",
|
||||||
|
"template_fields": [
|
||||||
|
{"cell": "A1", "name": "姓名", "field_type": "text", "required": true, "hint": "提取人员姓名"}
|
||||||
|
],
|
||||||
|
"source_doc_ids": ["mongodb_doc_id_1", "mongodb_doc_id_2"],
|
||||||
|
"source_file_paths": [],
|
||||||
|
"user_hint": "请从合同文档中提取"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
响应:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"filled_data": {"姓名": "张三"},
|
||||||
|
"fill_details": [
|
||||||
|
{
|
||||||
|
"field": "姓名",
|
||||||
|
"cell": "A1",
|
||||||
|
"value": "张三",
|
||||||
|
"source": "来自:合同文档.docx",
|
||||||
|
"confidence": 0.95
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source_doc_count": 2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### POST `/api/v1/templates/export`
|
||||||
|
|
||||||
|
导出请求:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_id": "模板ID",
|
||||||
|
"filled_data": {"姓名": "张三", "金额": "10000"},
|
||||||
|
"format": "xlsx" // 或 "docx"
|
||||||
|
}
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user