feat(config): 添加RAG/Embedding配置选项

- 新增EMBEDDING_MODEL配置项,默认值为"all-MiniLM-L6-v2"
- 用于支持RAG服务的嵌入模型配置

feat(database): 增强MySQL数据库初始化功能

- 实现数据库自动创建功能,若数据库不存在则自动创建
- 使用临时连接在不指定数据库的情况下执行CREATE DATABASE语句
- 支持utf8mb4字符集和排序规则设置

refactor(excel): 优化Excel表创建逻辑

- 将表创建方式从ORM模型改为原生SQL语句
- 提高异步操作的兼容性
- 增加自动时间戳字段(created_at, updated_at)

feat(rag): 增强RAG服务嵌入模型错误处理

- 添加嵌入模型加载异常处理机制
- 当配置的模型加载失败时自动回退到默认模型
- 改进日志记录,提供更详细的初始化信息
```
This commit is contained in:
2026-04-02 02:42:03 +08:00
parent 8b12cb9322
commit 8e1ddb8aff
4 changed files with 46 additions and 9 deletions

View File

@@ -29,6 +29,9 @@ class Settings(BaseSettings):
LLM_BASE_URL: str = "https://api.minimax.chat"
LLM_MODEL_NAME: str = "MiniMax-Text-01"
# ==================== RAG/Embedding 配置 ====================
EMBEDDING_MODEL: str = "all-MiniLM-L6-v2"
# ==================== Supabase 配置 ====================
SUPABASE_URL: str = ""
SUPABASE_ANON_KEY: str = ""

View File

@@ -72,6 +72,26 @@ class MySQLDB:
async def init_db(self):
"""初始化数据库,创建所有表"""
try:
# 先创建数据库(如果不存在)
from sqlalchemy import text
db_name = settings.MYSQL_DATABASE
# 连接时不指定数据库来创建数据库
temp_url = (
f"mysql+aiomysql://{settings.MYSQL_USER}:{settings.MYSQL_PASSWORD}"
f"@{settings.MYSQL_HOST}:{settings.MYSQL_PORT}/"
f"?charset={settings.MYSQL_CHARSET}"
)
from sqlalchemy.ext.asyncio import create_async_engine
temp_engine = create_async_engine(temp_url, echo=False)
try:
async with temp_engine.connect() as conn:
await conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"))
await conn.commit()
logger.info(f"MySQL 数据库 {db_name} 创建或已存在")
finally:
await temp_engine.dispose()
# 然后创建表
async with self.async_engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
logger.info("MySQL 数据库表初始化完成")

View File

@@ -203,13 +203,19 @@ class ExcelStorageService:
"type": col_type
})
# 创建表
model_class = self._create_table_model(table_name, df.columns, column_types)
# 创建表结构
# 创建表 - 使用原始 SQL 以兼容异步
logger.info(f"正在创建MySQL表: {table_name}")
async with self.mysql_db.get_session() as session:
model_class.__table__.create(session.bind, checkfirst=True)
from sqlalchemy import text
sql_columns = ["id INT AUTO_INCREMENT PRIMARY KEY"]
for col in df.columns:
col_name = self._sanitize_column_name(col)
col_type = column_types.get(col, "TEXT")
sql_type = "INT" if col_type == "INTEGER" else "FLOAT" if col_type == "FLOAT" else "DATETIME" if col_type == "DATETIME" else "TEXT"
sql_columns.append(f"`{col_name}` {sql_type}")
sql_columns.append("created_at DATETIME DEFAULT CURRENT_TIMESTAMP")
sql_columns.append("updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP")
create_sql = text(f"CREATE TABLE IF NOT EXISTS `{table_name}` ({', '.join(sql_columns)})")
await self.mysql_db.execute_raw_sql(str(create_sql))
logger.info(f"MySQL表创建完成: {table_name}")
# 插入数据

View File

@@ -40,9 +40,17 @@ class RAGService:
def _init_embeddings(self):
"""初始化嵌入模型"""
if self.embedding_model is None:
self.embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL)
model_name = getattr(settings, 'EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
try:
self.embedding_model = SentenceTransformer(model_name)
self._dimension = self.embedding_model.get_sentence_embedding_dimension()
logger.info(f"RAG 嵌入模型初始化完成: {settings.EMBEDDING_MODEL}, 维度: {self._dimension}")
logger.info(f"RAG 嵌入模型初始化完成: {model_name}, 维度: {self._dimension}")
except Exception as e:
logger.warning(f"嵌入模型 {model_name} 加载失败,使用默认模型: {e}")
# 使用轻量级默认模型
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
self._dimension = self.embedding_model.get_sentence_embedding_dimension()
logger.info(f"RAG 嵌入模型使用默认: all-MiniLM-L6-v2, 维度: {self._dimension}")
def _init_vector_store(self):
"""初始化向量存储"""