From 8e1ddb8aff8cc8167a9f62588c879325aeae4603 Mon Sep 17 00:00:00 2001 From: KiriAky 107 Date: Thu, 2 Apr 2026 02:42:03 +0800 Subject: [PATCH] =?UTF-8?q?```=20feat(config):=20=E6=B7=BB=E5=8A=A0RAG/Emb?= =?UTF-8?q?edding=E9=85=8D=E7=BD=AE=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增EMBEDDING_MODEL配置项,默认值为"all-MiniLM-L6-v2" - 用于支持RAG服务的嵌入模型配置 feat(database): 增强MySQL数据库初始化功能 - 实现数据库自动创建功能,若数据库不存在则自动创建 - 使用临时连接在不指定数据库的情况下执行CREATE DATABASE语句 - 支持utf8mb4字符集和排序规则设置 refactor(excel): 优化Excel表创建逻辑 - 将表创建方式从ORM模型改为原生SQL语句 - 提高异步操作的兼容性 - 增加自动时间戳字段(created_at, updated_at) feat(rag): 增强RAG服务嵌入模型错误处理 - 添加嵌入模型加载异常处理机制 - 当配置的模型加载失败时自动回退到默认模型 - 改进日志记录,提供更详细的初始化信息 ``` --- backend/app/config.py | 3 +++ backend/app/core/database/mysql.py | 20 +++++++++++++++++++ backend/app/services/excel_storage_service.py | 18 +++++++++++------ backend/app/services/rag_service.py | 14 ++++++++++--- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 84115f9..b0a3206 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -29,6 +29,9 @@ class Settings(BaseSettings): LLM_BASE_URL: str = "https://api.minimax.chat" LLM_MODEL_NAME: str = "MiniMax-Text-01" + # ==================== RAG/Embedding 配置 ==================== + EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" + # ==================== Supabase 配置 ==================== SUPABASE_URL: str = "" SUPABASE_ANON_KEY: str = "" diff --git a/backend/app/core/database/mysql.py b/backend/app/core/database/mysql.py index 9ecfe9b..b98da1b 100644 --- a/backend/app/core/database/mysql.py +++ b/backend/app/core/database/mysql.py @@ -72,6 +72,26 @@ class MySQLDB: async def init_db(self): """初始化数据库,创建所有表""" try: + # 先创建数据库(如果不存在) + from sqlalchemy import text + db_name = settings.MYSQL_DATABASE + # 连接时不指定数据库来创建数据库 + temp_url = ( + f"mysql+aiomysql://{settings.MYSQL_USER}:{settings.MYSQL_PASSWORD}" + f"@{settings.MYSQL_HOST}:{settings.MYSQL_PORT}/" + f"?charset={settings.MYSQL_CHARSET}" + ) + from sqlalchemy.ext.asyncio import create_async_engine + temp_engine = create_async_engine(temp_url, echo=False) + try: + async with temp_engine.connect() as conn: + await conn.execute(text(f"CREATE DATABASE IF NOT EXISTS `{db_name}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci")) + await conn.commit() + logger.info(f"MySQL 数据库 {db_name} 创建或已存在") + finally: + await temp_engine.dispose() + + # 然后创建表 async with self.async_engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) logger.info("MySQL 数据库表初始化完成") diff --git a/backend/app/services/excel_storage_service.py b/backend/app/services/excel_storage_service.py index 858c19e..0407550 100644 --- a/backend/app/services/excel_storage_service.py +++ b/backend/app/services/excel_storage_service.py @@ -203,13 +203,19 @@ class ExcelStorageService: "type": col_type }) - # 创建表 - model_class = self._create_table_model(table_name, df.columns, column_types) - - # 创建表结构 + # 创建表 - 使用原始 SQL 以兼容异步 logger.info(f"正在创建MySQL表: {table_name}") - async with self.mysql_db.get_session() as session: - model_class.__table__.create(session.bind, checkfirst=True) + from sqlalchemy import text + sql_columns = ["id INT AUTO_INCREMENT PRIMARY KEY"] + for col in df.columns: + col_name = self._sanitize_column_name(col) + col_type = column_types.get(col, "TEXT") + sql_type = "INT" if col_type == "INTEGER" else "FLOAT" if col_type == "FLOAT" else "DATETIME" if col_type == "DATETIME" else "TEXT" + sql_columns.append(f"`{col_name}` {sql_type}") + sql_columns.append("created_at DATETIME DEFAULT CURRENT_TIMESTAMP") + sql_columns.append("updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP") + create_sql = text(f"CREATE TABLE IF NOT EXISTS `{table_name}` ({', '.join(sql_columns)})") + await self.mysql_db.execute_raw_sql(str(create_sql)) logger.info(f"MySQL表创建完成: {table_name}") # 插入数据 diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py index 65bdb40..712d15e 100644 --- a/backend/app/services/rag_service.py +++ b/backend/app/services/rag_service.py @@ -40,9 +40,17 @@ class RAGService: def _init_embeddings(self): """初始化嵌入模型""" if self.embedding_model is None: - self.embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL) - self._dimension = self.embedding_model.get_sentence_embedding_dimension() - logger.info(f"RAG 嵌入模型初始化完成: {settings.EMBEDDING_MODEL}, 维度: {self._dimension}") + model_name = getattr(settings, 'EMBEDDING_MODEL', 'all-MiniLM-L6-v2') + try: + self.embedding_model = SentenceTransformer(model_name) + self._dimension = self.embedding_model.get_sentence_embedding_dimension() + logger.info(f"RAG 嵌入模型初始化完成: {model_name}, 维度: {self._dimension}") + except Exception as e: + logger.warning(f"嵌入模型 {model_name} 加载失败,使用默认模型: {e}") + # 使用轻量级默认模型 + self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2') + self._dimension = self.embedding_model.get_sentence_embedding_dimension() + logger.info(f"RAG 嵌入模型使用默认: all-MiniLM-L6-v2, 维度: {self._dimension}") def _init_vector_store(self): """初始化向量存储"""