前后端基本架构和完全excel表的解析及统计图表的生成以及excel表的到出
This commit is contained in:
87
backend/app/core/document_parser/base.py
Normal file
87
backend/app/core/document_parser/base.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""
|
||||
解析器基类 - 定义所有解析器的通用接口
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class ParseResult:
|
||||
"""解析结果类"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
success: bool,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
error: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
self.success = success
|
||||
self.data = data or {}
|
||||
self.error = error
|
||||
self.metadata = metadata or {}
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"success": self.success,
|
||||
"data": self.data,
|
||||
"error": self.error,
|
||||
"metadata": self.metadata
|
||||
}
|
||||
|
||||
|
||||
class BaseParser(ABC):
|
||||
"""文档解析器基类"""
|
||||
|
||||
def __init__(self):
|
||||
self.supported_extensions: List[str] = []
|
||||
self.parser_name: str = "base_parser"
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, file_path: str, **kwargs) -> ParseResult:
|
||||
"""
|
||||
解析文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
**kwargs: 其他解析参数
|
||||
|
||||
Returns:
|
||||
ParseResult: 解析结果
|
||||
"""
|
||||
pass
|
||||
|
||||
def can_parse(self, file_path: str) -> bool:
|
||||
"""
|
||||
检查是否可以解析该文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
bool: 是否可以解析
|
||||
"""
|
||||
ext = Path(file_path).suffix.lower()
|
||||
return ext in self.supported_extensions
|
||||
|
||||
def get_file_info(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
获取文件基本信息
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 文件信息
|
||||
"""
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
return {"error": "File not found"}
|
||||
|
||||
return {
|
||||
"filename": path.name,
|
||||
"extension": path.suffix.lower(),
|
||||
"size": path.stat().st_size,
|
||||
"parser": self.parser_name
|
||||
}
|
||||
Reference in New Issue
Block a user