Files

173 lines
4.1 KiB
Python

"""
文档数据模型
定义文档相关的 Pydantic 模型
"""
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class DocumentType(str, Enum):
"""文档类型枚举"""
DOCX = "docx"
XLSX = "xlsx"
MD = "md"
TXT = "txt"
class TaskStatus(str, Enum):
"""任务状态枚举"""
PENDING = "pending"
PROCESSING = "processing"
SUCCESS = "success"
FAILURE = "failure"
# ==================== 解析结果模型 ====================
class DocumentMetadata(BaseModel):
"""文档元数据"""
filename: str
extension: str
file_size: int = 0
doc_type: Optional[str] = None
sheet_count: Optional[int] = None
sheet_names: Optional[List[str]] = None
current_sheet: Optional[str] = None
row_count: Optional[int] = None
column_count: Optional[int] = None
columns: Optional[List[str]] = None
encoding: Optional[str] = None
class ParseResultData(BaseModel):
"""解析结果数据"""
columns: List[str] = Field(default_factory=list)
rows: List[Dict[str, Any]] = Field(default_factory=list)
row_count: int = 0
column_count: int = 0
class ParseResult(BaseModel):
"""文档解析结果"""
success: bool
data: Optional[ParseResultData] = None
metadata: Optional[DocumentMetadata] = None
error: Optional[str] = None
# ==================== 存储模型 ====================
class DocumentStore(BaseModel):
"""文档存储模型"""
doc_id: str
doc_type: DocumentType
content: str
metadata: DocumentMetadata
structured_data: Optional[Dict[str, Any]] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class RAGEntry(BaseModel):
"""RAG索引条目"""
table_name: str
field_name: str
field_description: str
embedding: List[float]
metadata: Optional[Dict[str, Any]] = None
# ==================== 任务模型 ====================
class TaskCreate(BaseModel):
"""任务创建请求"""
task_type: str
input_params: Dict[str, Any]
class TaskStatusResponse(BaseModel):
"""任务状态响应"""
task_id: str
status: TaskStatus
progress: int = 0
message: Optional[str] = None
result: Optional[Any] = None
error: Optional[str] = None
# ==================== 模板填写模型 ====================
class TemplateField(BaseModel):
"""模板字段"""
cell: str = Field(description="单元格位置, 如 A1")
name: str = Field(description="字段名称")
field_type: str = Field(default="text", description="字段类型: text/number/date")
required: bool = Field(default=True, description="是否必填")
class TemplateSheet(BaseModel):
"""模板工作表"""
name: str
fields: List[TemplateField]
class TemplateInfo(BaseModel):
"""模板信息"""
file_path: str
file_type: str # xlsx/docx
sheets: List[TemplateSheet]
class FillRequest(BaseModel):
"""填写请求"""
template_path: str
template_fields: List[TemplateField]
source_doc_ids: Optional[List[str]] = None
class FillResult(BaseModel):
"""填写结果"""
success: bool
filled_data: Dict[str, Any]
fill_details: List[Dict[str, Any]]
source_documents: List[str] = Field(default_factory=list)
# ==================== API 响应模型 ====================
class UploadResponse(BaseModel):
"""上传响应"""
task_id: str
file_count: int
message: str
status_url: str
class AnalyzeResponse(BaseModel):
"""分析响应"""
success: bool
analysis: Optional[str] = None
structured_data: Optional[Dict[str, Any]] = None
model: Optional[str] = None
error: Optional[str] = None
class QueryRequest(BaseModel):
"""查询请求"""
user_intent: str
table_name: Optional[str] = None
top_k: int = Field(default=5, ge=1, le=20)
class QueryResponse(BaseModel):
"""查询响应"""
success: bool
sql_query: Optional[str] = None
results: Optional[List[Dict[str, Any]]] = None
rag_context: Optional[List[str]] = None
error: Optional[str] = None