完善后端日志

This commit is contained in:
2026-03-30 21:24:13 +08:00
parent 332f0f636d
commit c122f1d63b
7 changed files with 167 additions and 26 deletions

View File

@@ -196,18 +196,22 @@ async def process_document(
meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"}
)
# 使用 TableRAG 服务完成建表和RAG索引
rag_result = await table_rag_service.build_table_rag_index(
file_path=file_path,
filename=original_filename,
sheet_name=parse_options.get("sheet_name"),
header_row=parse_options.get("header_row", 0)
)
try:
# 使用 TableRAG 服务完成建表和RAG索引
logger.info(f"开始存储Excel到MySQL: {original_filename}, file_path: {file_path}")
rag_result = await table_rag_service.build_table_rag_index(
file_path=file_path,
filename=original_filename,
sheet_name=parse_options.get("sheet_name"),
header_row=parse_options.get("header_row", 0)
)
if rag_result.get("success"):
logger.info(f"RAG索引构建成功: {original_filename}")
else:
logger.warning(f"RAG索引构建失败: {rag_result.get('error')}")
if rag_result.get("success"):
logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {rag_result.get('table_name')}")
else:
logger.error(f"RAG索引构建失败: {rag_result.get('error')}")
except Exception as e:
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
else:
# 非结构化文档

View File

@@ -10,6 +10,7 @@ import io
from app.services.file_service import file_service
from app.core.document_parser import XlsxParser
from app.services.table_rag_service import table_rag_service
logger = logging.getLogger(__name__)
@@ -27,7 +28,7 @@ async def upload_excel(
header_row: int = Query(0, description="表头所在的行索引")
):
"""
上传并解析 Excel 文件
上传并解析 Excel 文件,同时存储到 MySQL 数据库
Args:
file: 上传的 Excel 文件
@@ -77,6 +78,23 @@ async def upload_excel(
result.metadata['saved_path'] = saved_path
result.metadata['original_filename'] = file.filename
# 存储到 MySQL 数据库
try:
store_result = await table_rag_service.build_table_rag_index(
file_path=saved_path,
filename=file.filename,
sheet_name=sheet_name if sheet_name else None,
header_row=header_row
)
if store_result.get("success"):
result.metadata['mysql_table'] = store_result.get('table_name')
result.metadata['row_count'] = store_result.get('row_count')
logger.info(f"Excel已存储到MySQL: {file.filename}, 表: {store_result.get('table_name')}")
else:
logger.warning(f"Excel存储到MySQL失败: {store_result.get('error')}")
except Exception as e:
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
return result.to_dict()
except HTTPException:

View File

@@ -87,8 +87,10 @@ class MongoDB:
"updated_at": datetime.utcnow(),
}
result = await self.documents.insert_one(document)
logger.info(f"文档已插入MongoDB: {result.inserted_id}")
return str(result.inserted_id)
doc_id = str(result.inserted_id)
filename = metadata.get("original_filename", "unknown")
logger.info(f"✓ 文档已存入MongoDB: [{doc_type}] {filename} | ID: {doc_id}")
return doc_id
async def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
"""根据ID获取文档"""

View File

@@ -2,23 +2,102 @@
FastAPI 应用主入口
"""
import logging
import sys
import uuid
from contextlib import asynccontextmanager
from typing import Callable
from functools import wraps
from fastapi import FastAPI
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from app.config import settings
from app.api import api_router
from app.core.database import mysql_db, mongodb, redis_db
# 配置日志
logging.basicConfig(
level=logging.INFO if settings.DEBUG else logging.WARNING,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# ==================== 日志配置 ====================
def setup_logging():
"""配置应用日志系统"""
# 根日志配置
log_level = logging.DEBUG if settings.DEBUG else logging.INFO
# 控制台处理器
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(log_level)
console_formatter = logging.Formatter(
fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
console_handler.setFormatter(console_formatter)
# 根日志器
root_logger = logging.getLogger()
root_logger.setLevel(log_level)
root_logger.handlers = []
root_logger.addHandler(console_handler)
# 第三方库日志级别
for lib in ["uvicorn", "uvicorn.access", "fastapi", "httpx", "sqlalchemy"]:
logging.getLogger(lib).setLevel(logging.WARNING)
return root_logger
# 初始化日志
setup_logging()
logger = logging.getLogger(__name__)
# ==================== 请求日志中间件 ====================
class RequestLoggingMiddleware(BaseHTTPMiddleware):
"""请求日志中间件 - 记录每个请求的详细信息"""
async def dispatch(self, request: Request, call_next: Callable) -> Response:
# 生成请求ID
request_id = str(uuid.uuid4())[:8]
request.state.request_id = request_id
# 记录请求
logger.info(f"→ [{request_id}] {request.method} {request.url.path}")
try:
response = await call_next(request)
# 记录响应
logger.info(
f"← [{request_id}] {request.method} {request.url.path} "
f"| 状态: {response.status_code} | 耗时: N/A"
)
# 添加请求ID到响应头
response.headers["X-Request-ID"] = request_id
return response
except Exception as e:
logger.error(f"✗ [{request_id}] {request.method} {request.url.path} | 异常: {str(e)}")
raise
# ==================== 请求追踪装饰器 ====================
def log_async_function(func: Callable) -> Callable:
"""异步函数日志装饰器"""
@wraps(func)
async def wrapper(*args, **kwargs):
func_name = func.__name__
logger.debug(f"{func_name} 开始执行")
try:
result = await func(*args, **kwargs)
logger.debug(f"{func_name} 执行完成")
return result
except Exception as e:
logger.error(f"{func_name} 执行失败: {str(e)}")
raise
return wrapper
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
@@ -83,6 +162,9 @@ app.add_middleware(
allow_headers=["*"],
)
# 添加请求日志中间件
app.add_middleware(RequestLoggingMiddleware)
# 注册 API 路由
app.include_router(api_router, prefix=settings.API_V1_STR)

View File

@@ -23,6 +23,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database.mysql import Base, mysql_db
logger = logging.getLogger(__name__)
# 设置该模块的日志级别
logger.setLevel(logging.DEBUG)
class ExcelStorageService:
@@ -174,12 +176,15 @@ class ExcelStorageService:
}
try:
logger.info(f"开始读取Excel文件: {file_path}")
# 读取 Excel
if sheet_name:
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
else:
df = pd.read_excel(file_path, header=header_row)
logger.info(f"Excel读取完成行数: {len(df)}, 列数: {len(df.columns)}")
if df.empty:
return {"success": False, "error": "Excel 文件为空"}
@@ -202,8 +207,10 @@ class ExcelStorageService:
model_class = self._create_table_model(table_name, df.columns, column_types)
# 创建表结构
logger.info(f"正在创建MySQL表: {table_name}")
async with self.mysql_db.get_session() as session:
model_class.__table__.create(session.bind, checkfirst=True)
logger.info(f"MySQL表创建完成: {table_name}")
# 插入数据
records = []
@@ -231,11 +238,13 @@ class ExcelStorageService:
records.append(record)
logger.info(f"正在插入 {len(records)} 条数据到 MySQL...")
# 批量插入
async with self.mysql_db.get_session() as session:
for record in records:
session.add(model_class(**record))
await session.commit()
logger.info(f"数据插入完成: {len(records)}")
results["row_count"] = len(records)
logger.info(f"Excel 数据已存储到 MySQL 表 {table_name},共 {len(records)}")
@@ -243,7 +252,7 @@ class ExcelStorageService:
return results
except Exception as e:
logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}")
logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}", exc_info=True)
return {"success": False, "error": str(e)}
async def store_structured_data(

View File

@@ -3,6 +3,7 @@
"""
import os
import shutil
import logging
from pathlib import Path
from datetime import datetime
from typing import Optional
@@ -10,6 +11,8 @@ import uuid
from app.config import settings
logger = logging.getLogger(__name__)
class FileService:
"""文件服务类,负责文件的存储、读取和管理"""
@@ -17,6 +20,7 @@ class FileService:
def __init__(self):
self.upload_dir = Path(settings.UPLOAD_DIR)
self._ensure_upload_dir()
logger.info(f"FileService 初始化,上传目录: {self.upload_dir}")
def _ensure_upload_dir(self):
"""确保上传目录存在"""
@@ -56,6 +60,8 @@ class FileService:
with open(file_path, 'wb') as f:
f.write(file_content)
file_size = len(file_content)
logger.info(f"文件已保存: {filename} -> {file_path} ({file_size} bytes)")
return str(file_path)
def read_file(self, file_path: str) -> bytes:

View File

@@ -126,26 +126,45 @@ class TableRAGService:
}
try:
# 1. 读取 Excel
# 1. 先检查 Excel 文件是否有效
logger.info(f"正在检查Excel文件: {file_path}")
try:
xls_file = pd.ExcelFile(file_path)
sheet_names = xls_file.sheet_names
logger.info(f"Excel文件工作表: {sheet_names}")
if not sheet_names:
return {"success": False, "error": "Excel 文件没有工作表"}
except Exception as e:
logger.error(f"读取Excel文件失败: {file_path}, error: {e}")
return {"success": False, "error": f"无法读取Excel文件: {str(e)}"}
# 2. 读取 Excel
if sheet_name:
# 验证指定的sheet_name是否存在
if sheet_name not in sheet_names:
logger.warning(f"指定的工作表 '{sheet_name}' 不存在,使用第一个工作表: {sheet_names[0]}")
sheet_name = sheet_names[0]
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
else:
df = pd.read_excel(file_path, header=header_row)
logger.info(f"读取到数据: {len(df)} 行, {len(df.columns)}")
if df.empty:
return {"success": False, "error": "Excel 文件为空"}
# 清理列名
df.columns = [str(c) for c in df.columns]
table_name = excel_storage._sanitize_table_name(filename)
table_name = self.excel_storage._sanitize_table_name(filename)
results["table_name"] = table_name
results["field_count"] = len(df.columns)
logger.info(f"表名: {table_name}, 字段数: {len(df.columns)}")
# 2. 初始化 RAG (如果需要)
# 3. 初始化 RAG (如果需要)
if not self.rag._initialized:
self.rag._init_vector_store()
# 3. 为每个字段生成描述并索引
# 4. 为每个字段生成描述并索引
all_fields_data = {}
for col in df.columns:
# 采样示例值
@@ -187,7 +206,8 @@ class TableRAGService:
logger.error(error_msg)
results["errors"].append(error_msg)
# 4. 存储到 MySQL
# 5. 存储到 MySQL
logger.info(f"开始存储到MySQL: {filename}")
store_result = await self.excel_storage.store_excel(
file_path=file_path,
filename=filename,