完成后端数据库连接配置

This commit is contained in:
2026-03-26 19:49:40 +08:00
parent d3bdb17e87
commit 4bdc3f9707
19 changed files with 2843 additions and 302 deletions

View File

@@ -1,7 +1,48 @@
"""
文档解析模块 - 支持多种文件格式的解析
"""
from .base import BaseParser
from pathlib import Path
from typing import Dict, Optional
from .base import BaseParser, ParseResult
from .xlsx_parser import XlsxParser
__all__ = ['BaseParser', 'XlsxParser']
# 导入其他解析器 (需要先实现)
# from .docx_parser import DocxParser
# from .md_parser import MarkdownParser
# from .txt_parser import TxtParser
class ParserFactory:
"""解析器工厂,根据文件类型返回对应解析器"""
_parsers: Dict[str, BaseParser] = {
'.xlsx': XlsxParser(),
'.xls': XlsxParser(),
# '.docx': DocxParser(), # TODO: 待实现
# '.md': MarkdownParser(), # TODO: 待实现
# '.txt': TxtParser(), # TODO: 待实现
}
@classmethod
def get_parser(cls, file_path: str) -> BaseParser:
"""根据文件扩展名获取解析器"""
ext = Path(file_path).suffix.lower()
parser = cls._parsers.get(ext)
if not parser:
raise ValueError(f"不支持的文件格式: {ext},支持的格式: {list(cls._parsers.keys())}")
return parser
@classmethod
def parse(cls, file_path: str, **kwargs) -> ParseResult:
"""统一解析接口"""
parser = cls.get_parser(file_path)
return parser.parse(file_path, **kwargs)
@classmethod
def register_parser(cls, ext: str, parser: BaseParser):
"""注册新的解析器"""
cls._parsers[ext.lower()] = parser
__all__ = ['BaseParser', 'ParseResult', 'XlsxParser', 'ParserFactory']