From c122f1d63b414ac041371ca1f68bbea00d4b3acd Mon Sep 17 00:00:00 2001 From: KiriAky 107 Date: Mon, 30 Mar 2026 21:24:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E5=96=84=E5=90=8E=E7=AB=AF=E6=97=A5?= =?UTF-8?q?=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/api/endpoints/documents.py | 26 ++--- backend/app/api/endpoints/upload.py | 20 +++- backend/app/core/database/mongodb.py | 6 +- backend/app/main.py | 94 +++++++++++++++++-- backend/app/services/excel_storage_service.py | 11 ++- backend/app/services/file_service.py | 6 ++ backend/app/services/table_rag_service.py | 30 +++++- 7 files changed, 167 insertions(+), 26 deletions(-) diff --git a/backend/app/api/endpoints/documents.py b/backend/app/api/endpoints/documents.py index a0bd91c..848a582 100644 --- a/backend/app/api/endpoints/documents.py +++ b/backend/app/api/endpoints/documents.py @@ -196,18 +196,22 @@ async def process_document( meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"} ) - # 使用 TableRAG 服务完成建表和RAG索引 - rag_result = await table_rag_service.build_table_rag_index( - file_path=file_path, - filename=original_filename, - sheet_name=parse_options.get("sheet_name"), - header_row=parse_options.get("header_row", 0) - ) + try: + # 使用 TableRAG 服务完成建表和RAG索引 + logger.info(f"开始存储Excel到MySQL: {original_filename}, file_path: {file_path}") + rag_result = await table_rag_service.build_table_rag_index( + file_path=file_path, + filename=original_filename, + sheet_name=parse_options.get("sheet_name"), + header_row=parse_options.get("header_row", 0) + ) - if rag_result.get("success"): - logger.info(f"RAG索引构建成功: {original_filename}") - else: - logger.warning(f"RAG索引构建失败: {rag_result.get('error')}") + if rag_result.get("success"): + logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {rag_result.get('table_name')}") + else: + logger.error(f"RAG索引构建失败: {rag_result.get('error')}") + except Exception as e: + logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True) else: # 非结构化文档 diff --git a/backend/app/api/endpoints/upload.py b/backend/app/api/endpoints/upload.py index dbf17c1..2c3af5d 100644 --- a/backend/app/api/endpoints/upload.py +++ b/backend/app/api/endpoints/upload.py @@ -10,6 +10,7 @@ import io from app.services.file_service import file_service from app.core.document_parser import XlsxParser +from app.services.table_rag_service import table_rag_service logger = logging.getLogger(__name__) @@ -27,7 +28,7 @@ async def upload_excel( header_row: int = Query(0, description="表头所在的行索引") ): """ - 上传并解析 Excel 文件 + 上传并解析 Excel 文件,同时存储到 MySQL 数据库 Args: file: 上传的 Excel 文件 @@ -77,6 +78,23 @@ async def upload_excel( result.metadata['saved_path'] = saved_path result.metadata['original_filename'] = file.filename + # 存储到 MySQL 数据库 + try: + store_result = await table_rag_service.build_table_rag_index( + file_path=saved_path, + filename=file.filename, + sheet_name=sheet_name if sheet_name else None, + header_row=header_row + ) + if store_result.get("success"): + result.metadata['mysql_table'] = store_result.get('table_name') + result.metadata['row_count'] = store_result.get('row_count') + logger.info(f"Excel已存储到MySQL: {file.filename}, 表: {store_result.get('table_name')}") + else: + logger.warning(f"Excel存储到MySQL失败: {store_result.get('error')}") + except Exception as e: + logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True) + return result.to_dict() except HTTPException: diff --git a/backend/app/core/database/mongodb.py b/backend/app/core/database/mongodb.py index 39763b8..79ffa06 100644 --- a/backend/app/core/database/mongodb.py +++ b/backend/app/core/database/mongodb.py @@ -87,8 +87,10 @@ class MongoDB: "updated_at": datetime.utcnow(), } result = await self.documents.insert_one(document) - logger.info(f"文档已插入MongoDB: {result.inserted_id}") - return str(result.inserted_id) + doc_id = str(result.inserted_id) + filename = metadata.get("original_filename", "unknown") + logger.info(f"✓ 文档已存入MongoDB: [{doc_type}] {filename} | ID: {doc_id}") + return doc_id async def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: """根据ID获取文档""" diff --git a/backend/app/main.py b/backend/app/main.py index d52d61b..f1fe817 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -2,23 +2,102 @@ FastAPI 应用主入口 """ import logging +import sys +import uuid from contextlib import asynccontextmanager +from typing import Callable +from functools import wraps -from fastapi import FastAPI +from fastapi import FastAPI, Request, Response from fastapi.middleware.cors import CORSMiddleware +from starlette.middleware.base import BaseHTTPMiddleware from app.config import settings from app.api import api_router from app.core.database import mysql_db, mongodb, redis_db -# 配置日志 -logging.basicConfig( - level=logging.INFO if settings.DEBUG else logging.WARNING, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" -) +# ==================== 日志配置 ==================== + +def setup_logging(): + """配置应用日志系统""" + # 根日志配置 + log_level = logging.DEBUG if settings.DEBUG else logging.INFO + + # 控制台处理器 + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(log_level) + console_formatter = logging.Formatter( + fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + console_handler.setFormatter(console_formatter) + + # 根日志器 + root_logger = logging.getLogger() + root_logger.setLevel(log_level) + root_logger.handlers = [] + root_logger.addHandler(console_handler) + + # 第三方库日志级别 + for lib in ["uvicorn", "uvicorn.access", "fastapi", "httpx", "sqlalchemy"]: + logging.getLogger(lib).setLevel(logging.WARNING) + + return root_logger + +# 初始化日志 +setup_logging() logger = logging.getLogger(__name__) +# ==================== 请求日志中间件 ==================== + +class RequestLoggingMiddleware(BaseHTTPMiddleware): + """请求日志中间件 - 记录每个请求的详细信息""" + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + # 生成请求ID + request_id = str(uuid.uuid4())[:8] + request.state.request_id = request_id + + # 记录请求 + logger.info(f"→ [{request_id}] {request.method} {request.url.path}") + + try: + response = await call_next(request) + + # 记录响应 + logger.info( + f"← [{request_id}] {request.method} {request.url.path} " + f"| 状态: {response.status_code} | 耗时: N/A" + ) + + # 添加请求ID到响应头 + response.headers["X-Request-ID"] = request_id + return response + + except Exception as e: + logger.error(f"✗ [{request_id}] {request.method} {request.url.path} | 异常: {str(e)}") + raise + + +# ==================== 请求追踪装饰器 ==================== + +def log_async_function(func: Callable) -> Callable: + """异步函数日志装饰器""" + @wraps(func) + async def wrapper(*args, **kwargs): + func_name = func.__name__ + logger.debug(f"→ {func_name} 开始执行") + try: + result = await func(*args, **kwargs) + logger.debug(f"← {func_name} 执行完成") + return result + except Exception as e: + logger.error(f"✗ {func_name} 执行失败: {str(e)}") + raise + return wrapper + + @asynccontextmanager async def lifespan(app: FastAPI): """ @@ -83,6 +162,9 @@ app.add_middleware( allow_headers=["*"], ) +# 添加请求日志中间件 +app.add_middleware(RequestLoggingMiddleware) + # 注册 API 路由 app.include_router(api_router, prefix=settings.API_V1_STR) diff --git a/backend/app/services/excel_storage_service.py b/backend/app/services/excel_storage_service.py index eb6d98a..858c19e 100644 --- a/backend/app/services/excel_storage_service.py +++ b/backend/app/services/excel_storage_service.py @@ -23,6 +23,8 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.database.mysql import Base, mysql_db logger = logging.getLogger(__name__) +# 设置该模块的日志级别 +logger.setLevel(logging.DEBUG) class ExcelStorageService: @@ -174,12 +176,15 @@ class ExcelStorageService: } try: + logger.info(f"开始读取Excel文件: {file_path}") # 读取 Excel if sheet_name: df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row) else: df = pd.read_excel(file_path, header=header_row) + logger.info(f"Excel读取完成,行数: {len(df)}, 列数: {len(df.columns)}") + if df.empty: return {"success": False, "error": "Excel 文件为空"} @@ -202,8 +207,10 @@ class ExcelStorageService: model_class = self._create_table_model(table_name, df.columns, column_types) # 创建表结构 + logger.info(f"正在创建MySQL表: {table_name}") async with self.mysql_db.get_session() as session: model_class.__table__.create(session.bind, checkfirst=True) + logger.info(f"MySQL表创建完成: {table_name}") # 插入数据 records = [] @@ -231,11 +238,13 @@ class ExcelStorageService: records.append(record) + logger.info(f"正在插入 {len(records)} 条数据到 MySQL...") # 批量插入 async with self.mysql_db.get_session() as session: for record in records: session.add(model_class(**record)) await session.commit() + logger.info(f"数据插入完成: {len(records)} 条") results["row_count"] = len(records) logger.info(f"Excel 数据已存储到 MySQL 表 {table_name},共 {len(records)} 行") @@ -243,7 +252,7 @@ class ExcelStorageService: return results except Exception as e: - logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}") + logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}", exc_info=True) return {"success": False, "error": str(e)} async def store_structured_data( diff --git a/backend/app/services/file_service.py b/backend/app/services/file_service.py index 813e26d..8b639c7 100644 --- a/backend/app/services/file_service.py +++ b/backend/app/services/file_service.py @@ -3,6 +3,7 @@ """ import os import shutil +import logging from pathlib import Path from datetime import datetime from typing import Optional @@ -10,6 +11,8 @@ import uuid from app.config import settings +logger = logging.getLogger(__name__) + class FileService: """文件服务类,负责文件的存储、读取和管理""" @@ -17,6 +20,7 @@ class FileService: def __init__(self): self.upload_dir = Path(settings.UPLOAD_DIR) self._ensure_upload_dir() + logger.info(f"FileService 初始化,上传目录: {self.upload_dir}") def _ensure_upload_dir(self): """确保上传目录存在""" @@ -56,6 +60,8 @@ class FileService: with open(file_path, 'wb') as f: f.write(file_content) + file_size = len(file_content) + logger.info(f"文件已保存: {filename} -> {file_path} ({file_size} bytes)") return str(file_path) def read_file(self, file_path: str) -> bytes: diff --git a/backend/app/services/table_rag_service.py b/backend/app/services/table_rag_service.py index 4471e1d..fb12c76 100644 --- a/backend/app/services/table_rag_service.py +++ b/backend/app/services/table_rag_service.py @@ -126,26 +126,45 @@ class TableRAGService: } try: - # 1. 读取 Excel + # 1. 先检查 Excel 文件是否有效 + logger.info(f"正在检查Excel文件: {file_path}") + try: + xls_file = pd.ExcelFile(file_path) + sheet_names = xls_file.sheet_names + logger.info(f"Excel文件工作表: {sheet_names}") + if not sheet_names: + return {"success": False, "error": "Excel 文件没有工作表"} + except Exception as e: + logger.error(f"读取Excel文件失败: {file_path}, error: {e}") + return {"success": False, "error": f"无法读取Excel文件: {str(e)}"} + + # 2. 读取 Excel if sheet_name: + # 验证指定的sheet_name是否存在 + if sheet_name not in sheet_names: + logger.warning(f"指定的工作表 '{sheet_name}' 不存在,使用第一个工作表: {sheet_names[0]}") + sheet_name = sheet_names[0] df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row) else: df = pd.read_excel(file_path, header=header_row) + logger.info(f"读取到数据: {len(df)} 行, {len(df.columns)} 列") + if df.empty: return {"success": False, "error": "Excel 文件为空"} # 清理列名 df.columns = [str(c) for c in df.columns] - table_name = excel_storage._sanitize_table_name(filename) + table_name = self.excel_storage._sanitize_table_name(filename) results["table_name"] = table_name results["field_count"] = len(df.columns) + logger.info(f"表名: {table_name}, 字段数: {len(df.columns)}") - # 2. 初始化 RAG (如果需要) + # 3. 初始化 RAG (如果需要) if not self.rag._initialized: self.rag._init_vector_store() - # 3. 为每个字段生成描述并索引 + # 4. 为每个字段生成描述并索引 all_fields_data = {} for col in df.columns: # 采样示例值 @@ -187,7 +206,8 @@ class TableRAGService: logger.error(error_msg) results["errors"].append(error_msg) - # 4. 存储到 MySQL + # 5. 存储到 MySQL + logger.info(f"开始存储到MySQL: {filename}") store_result = await self.excel_storage.store_excel( file_path=file_path, filename=filename,