完成后端数据库连接配置

This commit is contained in:
2026-03-26 19:49:40 +08:00
parent d3bdb17e87
commit 4bdc3f9707
19 changed files with 2843 additions and 302 deletions

View File

@@ -0,0 +1,172 @@
"""
文档数据模型
定义文档相关的 Pydantic 模型
"""
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class DocumentType(str, Enum):
"""文档类型枚举"""
DOCX = "docx"
XLSX = "xlsx"
MD = "md"
TXT = "txt"
class TaskStatus(str, Enum):
"""任务状态枚举"""
PENDING = "pending"
PROCESSING = "processing"
SUCCESS = "success"
FAILURE = "failure"
# ==================== 解析结果模型 ====================
class DocumentMetadata(BaseModel):
"""文档元数据"""
filename: str
extension: str
file_size: int = 0
doc_type: Optional[str] = None
sheet_count: Optional[int] = None
sheet_names: Optional[List[str]] = None
current_sheet: Optional[str] = None
row_count: Optional[int] = None
column_count: Optional[int] = None
columns: Optional[List[str]] = None
encoding: Optional[str] = None
class ParseResultData(BaseModel):
"""解析结果数据"""
columns: List[str] = Field(default_factory=list)
rows: List[Dict[str, Any]] = Field(default_factory=list)
row_count: int = 0
column_count: int = 0
class ParseResult(BaseModel):
"""文档解析结果"""
success: bool
data: Optional[ParseResultData] = None
metadata: Optional[DocumentMetadata] = None
error: Optional[str] = None
# ==================== 存储模型 ====================
class DocumentStore(BaseModel):
"""文档存储模型"""
doc_id: str
doc_type: DocumentType
content: str
metadata: DocumentMetadata
structured_data: Optional[Dict[str, Any]] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class RAGEntry(BaseModel):
"""RAG索引条目"""
table_name: str
field_name: str
field_description: str
embedding: List[float]
metadata: Optional[Dict[str, Any]] = None
# ==================== 任务模型 ====================
class TaskCreate(BaseModel):
"""任务创建请求"""
task_type: str
input_params: Dict[str, Any]
class TaskStatusResponse(BaseModel):
"""任务状态响应"""
task_id: str
status: TaskStatus
progress: int = 0
message: Optional[str] = None
result: Optional[Any] = None
error: Optional[str] = None
# ==================== 模板填写模型 ====================
class TemplateField(BaseModel):
"""模板字段"""
cell: str = Field(description="单元格位置, 如 A1")
name: str = Field(description="字段名称")
field_type: str = Field(default="text", description="字段类型: text/number/date")
required: bool = Field(default=True, description="是否必填")
class TemplateSheet(BaseModel):
"""模板工作表"""
name: str
fields: List[TemplateField]
class TemplateInfo(BaseModel):
"""模板信息"""
file_path: str
file_type: str # xlsx/docx
sheets: List[TemplateSheet]
class FillRequest(BaseModel):
"""填写请求"""
template_path: str
template_fields: List[TemplateField]
source_doc_ids: Optional[List[str]] = None
class FillResult(BaseModel):
"""填写结果"""
success: bool
filled_data: Dict[str, Any]
fill_details: List[Dict[str, Any]]
source_documents: List[str] = Field(default_factory=list)
# ==================== API 响应模型 ====================
class UploadResponse(BaseModel):
"""上传响应"""
task_id: str
file_count: int
message: str
status_url: str
class AnalyzeResponse(BaseModel):
"""分析响应"""
success: bool
analysis: Optional[str] = None
structured_data: Optional[Dict[str, Any]] = None
model: Optional[str] = None
error: Optional[str] = None
class QueryRequest(BaseModel):
"""查询请求"""
user_intent: str
table_name: Optional[str] = None
top_k: int = Field(default=5, ge=1, le=20)
class QueryResponse(BaseModel):
"""查询响应"""
success: bool
sql_query: Optional[str] = None
results: Optional[List[Dict[str, Any]]] = None
rag_context: Optional[List[str]] = None
error: Optional[str] = None