173 lines
4.1 KiB
Python
173 lines
4.1 KiB
Python
"""
|
|
文档数据模型
|
|
|
|
定义文档相关的 Pydantic 模型
|
|
"""
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class DocumentType(str, Enum):
|
|
"""文档类型枚举"""
|
|
DOCX = "docx"
|
|
XLSX = "xlsx"
|
|
MD = "md"
|
|
TXT = "txt"
|
|
|
|
|
|
class TaskStatus(str, Enum):
|
|
"""任务状态枚举"""
|
|
PENDING = "pending"
|
|
PROCESSING = "processing"
|
|
SUCCESS = "success"
|
|
FAILURE = "failure"
|
|
|
|
|
|
# ==================== 解析结果模型 ====================
|
|
|
|
class DocumentMetadata(BaseModel):
|
|
"""文档元数据"""
|
|
filename: str
|
|
extension: str
|
|
file_size: int = 0
|
|
doc_type: Optional[str] = None
|
|
sheet_count: Optional[int] = None
|
|
sheet_names: Optional[List[str]] = None
|
|
current_sheet: Optional[str] = None
|
|
row_count: Optional[int] = None
|
|
column_count: Optional[int] = None
|
|
columns: Optional[List[str]] = None
|
|
encoding: Optional[str] = None
|
|
|
|
|
|
class ParseResultData(BaseModel):
|
|
"""解析结果数据"""
|
|
columns: List[str] = Field(default_factory=list)
|
|
rows: List[Dict[str, Any]] = Field(default_factory=list)
|
|
row_count: int = 0
|
|
column_count: int = 0
|
|
|
|
|
|
class ParseResult(BaseModel):
|
|
"""文档解析结果"""
|
|
success: bool
|
|
data: Optional[ParseResultData] = None
|
|
metadata: Optional[DocumentMetadata] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
# ==================== 存储模型 ====================
|
|
|
|
class DocumentStore(BaseModel):
|
|
"""文档存储模型"""
|
|
doc_id: str
|
|
doc_type: DocumentType
|
|
content: str
|
|
metadata: DocumentMetadata
|
|
structured_data: Optional[Dict[str, Any]] = None
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
class RAGEntry(BaseModel):
|
|
"""RAG索引条目"""
|
|
table_name: str
|
|
field_name: str
|
|
field_description: str
|
|
embedding: List[float]
|
|
metadata: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
# ==================== 任务模型 ====================
|
|
|
|
class TaskCreate(BaseModel):
|
|
"""任务创建请求"""
|
|
task_type: str
|
|
input_params: Dict[str, Any]
|
|
|
|
|
|
class TaskStatusResponse(BaseModel):
|
|
"""任务状态响应"""
|
|
task_id: str
|
|
status: TaskStatus
|
|
progress: int = 0
|
|
message: Optional[str] = None
|
|
result: Optional[Any] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
# ==================== 模板填写模型 ====================
|
|
|
|
class TemplateField(BaseModel):
|
|
"""模板字段"""
|
|
cell: str = Field(description="单元格位置, 如 A1")
|
|
name: str = Field(description="字段名称")
|
|
field_type: str = Field(default="text", description="字段类型: text/number/date")
|
|
required: bool = Field(default=True, description="是否必填")
|
|
|
|
|
|
class TemplateSheet(BaseModel):
|
|
"""模板工作表"""
|
|
name: str
|
|
fields: List[TemplateField]
|
|
|
|
|
|
class TemplateInfo(BaseModel):
|
|
"""模板信息"""
|
|
file_path: str
|
|
file_type: str # xlsx/docx
|
|
sheets: List[TemplateSheet]
|
|
|
|
|
|
class FillRequest(BaseModel):
|
|
"""填写请求"""
|
|
template_path: str
|
|
template_fields: List[TemplateField]
|
|
source_doc_ids: Optional[List[str]] = None
|
|
|
|
|
|
class FillResult(BaseModel):
|
|
"""填写结果"""
|
|
success: bool
|
|
filled_data: Dict[str, Any]
|
|
fill_details: List[Dict[str, Any]]
|
|
source_documents: List[str] = Field(default_factory=list)
|
|
|
|
|
|
# ==================== API 响应模型 ====================
|
|
|
|
class UploadResponse(BaseModel):
|
|
"""上传响应"""
|
|
task_id: str
|
|
file_count: int
|
|
message: str
|
|
status_url: str
|
|
|
|
|
|
class AnalyzeResponse(BaseModel):
|
|
"""分析响应"""
|
|
success: bool
|
|
analysis: Optional[str] = None
|
|
structured_data: Optional[Dict[str, Any]] = None
|
|
model: Optional[str] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
class QueryRequest(BaseModel):
|
|
"""查询请求"""
|
|
user_intent: str
|
|
table_name: Optional[str] = None
|
|
top_k: int = Field(default=5, ge=1, le=20)
|
|
|
|
|
|
class QueryResponse(BaseModel):
|
|
"""查询响应"""
|
|
success: bool
|
|
sql_query: Optional[str] = None
|
|
results: Optional[List[Dict[str, Any]]] = None
|
|
rag_context: Optional[List[str]] = None
|
|
error: Optional[str] = None
|