完善后端日志
This commit is contained in:
@@ -196,7 +196,9 @@ async def process_document(
|
||||
meta={"progress": 50, "message": "正在存储到MySQL并生成字段描述"}
|
||||
)
|
||||
|
||||
try:
|
||||
# 使用 TableRAG 服务完成建表和RAG索引
|
||||
logger.info(f"开始存储Excel到MySQL: {original_filename}, file_path: {file_path}")
|
||||
rag_result = await table_rag_service.build_table_rag_index(
|
||||
file_path=file_path,
|
||||
filename=original_filename,
|
||||
@@ -205,9 +207,11 @@ async def process_document(
|
||||
)
|
||||
|
||||
if rag_result.get("success"):
|
||||
logger.info(f"RAG索引构建成功: {original_filename}")
|
||||
logger.info(f"Excel存储到MySQL成功: {original_filename}, table: {rag_result.get('table_name')}")
|
||||
else:
|
||||
logger.warning(f"RAG索引构建失败: {rag_result.get('error')}")
|
||||
logger.error(f"RAG索引构建失败: {rag_result.get('error')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
|
||||
|
||||
else:
|
||||
# 非结构化文档
|
||||
|
||||
@@ -10,6 +10,7 @@ import io
|
||||
|
||||
from app.services.file_service import file_service
|
||||
from app.core.document_parser import XlsxParser
|
||||
from app.services.table_rag_service import table_rag_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,7 +28,7 @@ async def upload_excel(
|
||||
header_row: int = Query(0, description="表头所在的行索引")
|
||||
):
|
||||
"""
|
||||
上传并解析 Excel 文件
|
||||
上传并解析 Excel 文件,同时存储到 MySQL 数据库
|
||||
|
||||
Args:
|
||||
file: 上传的 Excel 文件
|
||||
@@ -77,6 +78,23 @@ async def upload_excel(
|
||||
result.metadata['saved_path'] = saved_path
|
||||
result.metadata['original_filename'] = file.filename
|
||||
|
||||
# 存储到 MySQL 数据库
|
||||
try:
|
||||
store_result = await table_rag_service.build_table_rag_index(
|
||||
file_path=saved_path,
|
||||
filename=file.filename,
|
||||
sheet_name=sheet_name if sheet_name else None,
|
||||
header_row=header_row
|
||||
)
|
||||
if store_result.get("success"):
|
||||
result.metadata['mysql_table'] = store_result.get('table_name')
|
||||
result.metadata['row_count'] = store_result.get('row_count')
|
||||
logger.info(f"Excel已存储到MySQL: {file.filename}, 表: {store_result.get('table_name')}")
|
||||
else:
|
||||
logger.warning(f"Excel存储到MySQL失败: {store_result.get('error')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True)
|
||||
|
||||
return result.to_dict()
|
||||
|
||||
except HTTPException:
|
||||
|
||||
@@ -87,8 +87,10 @@ class MongoDB:
|
||||
"updated_at": datetime.utcnow(),
|
||||
}
|
||||
result = await self.documents.insert_one(document)
|
||||
logger.info(f"文档已插入MongoDB: {result.inserted_id}")
|
||||
return str(result.inserted_id)
|
||||
doc_id = str(result.inserted_id)
|
||||
filename = metadata.get("original_filename", "unknown")
|
||||
logger.info(f"✓ 文档已存入MongoDB: [{doc_type}] {filename} | ID: {doc_id}")
|
||||
return doc_id
|
||||
|
||||
async def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""根据ID获取文档"""
|
||||
|
||||
@@ -2,23 +2,102 @@
|
||||
FastAPI 应用主入口
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Callable
|
||||
from functools import wraps
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi import FastAPI, Request, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
from app.config import settings
|
||||
from app.api import api_router
|
||||
from app.core.database import mysql_db, mongodb, redis_db
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO if settings.DEBUG else logging.WARNING,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
# ==================== 日志配置 ====================
|
||||
|
||||
def setup_logging():
|
||||
"""配置应用日志系统"""
|
||||
# 根日志配置
|
||||
log_level = logging.DEBUG if settings.DEBUG else logging.INFO
|
||||
|
||||
# 控制台处理器
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(log_level)
|
||||
console_formatter = logging.Formatter(
|
||||
fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
console_handler.setFormatter(console_formatter)
|
||||
|
||||
# 根日志器
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(log_level)
|
||||
root_logger.handlers = []
|
||||
root_logger.addHandler(console_handler)
|
||||
|
||||
# 第三方库日志级别
|
||||
for lib in ["uvicorn", "uvicorn.access", "fastapi", "httpx", "sqlalchemy"]:
|
||||
logging.getLogger(lib).setLevel(logging.WARNING)
|
||||
|
||||
return root_logger
|
||||
|
||||
# 初始化日志
|
||||
setup_logging()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ==================== 请求日志中间件 ====================
|
||||
|
||||
class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
||||
"""请求日志中间件 - 记录每个请求的详细信息"""
|
||||
|
||||
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
||||
# 生成请求ID
|
||||
request_id = str(uuid.uuid4())[:8]
|
||||
request.state.request_id = request_id
|
||||
|
||||
# 记录请求
|
||||
logger.info(f"→ [{request_id}] {request.method} {request.url.path}")
|
||||
|
||||
try:
|
||||
response = await call_next(request)
|
||||
|
||||
# 记录响应
|
||||
logger.info(
|
||||
f"← [{request_id}] {request.method} {request.url.path} "
|
||||
f"| 状态: {response.status_code} | 耗时: N/A"
|
||||
)
|
||||
|
||||
# 添加请求ID到响应头
|
||||
response.headers["X-Request-ID"] = request_id
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ [{request_id}] {request.method} {request.url.path} | 异常: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
# ==================== 请求追踪装饰器 ====================
|
||||
|
||||
def log_async_function(func: Callable) -> Callable:
|
||||
"""异步函数日志装饰器"""
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
func_name = func.__name__
|
||||
logger.debug(f"→ {func_name} 开始执行")
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
logger.debug(f"← {func_name} 执行完成")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"✗ {func_name} 执行失败: {str(e)}")
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""
|
||||
@@ -83,6 +162,9 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 添加请求日志中间件
|
||||
app.add_middleware(RequestLoggingMiddleware)
|
||||
|
||||
# 注册 API 路由
|
||||
app.include_router(api_router, prefix=settings.API_V1_STR)
|
||||
|
||||
|
||||
@@ -23,6 +23,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database.mysql import Base, mysql_db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# 设置该模块的日志级别
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class ExcelStorageService:
|
||||
@@ -174,12 +176,15 @@ class ExcelStorageService:
|
||||
}
|
||||
|
||||
try:
|
||||
logger.info(f"开始读取Excel文件: {file_path}")
|
||||
# 读取 Excel
|
||||
if sheet_name:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
|
||||
else:
|
||||
df = pd.read_excel(file_path, header=header_row)
|
||||
|
||||
logger.info(f"Excel读取完成,行数: {len(df)}, 列数: {len(df.columns)}")
|
||||
|
||||
if df.empty:
|
||||
return {"success": False, "error": "Excel 文件为空"}
|
||||
|
||||
@@ -202,8 +207,10 @@ class ExcelStorageService:
|
||||
model_class = self._create_table_model(table_name, df.columns, column_types)
|
||||
|
||||
# 创建表结构
|
||||
logger.info(f"正在创建MySQL表: {table_name}")
|
||||
async with self.mysql_db.get_session() as session:
|
||||
model_class.__table__.create(session.bind, checkfirst=True)
|
||||
logger.info(f"MySQL表创建完成: {table_name}")
|
||||
|
||||
# 插入数据
|
||||
records = []
|
||||
@@ -231,11 +238,13 @@ class ExcelStorageService:
|
||||
|
||||
records.append(record)
|
||||
|
||||
logger.info(f"正在插入 {len(records)} 条数据到 MySQL...")
|
||||
# 批量插入
|
||||
async with self.mysql_db.get_session() as session:
|
||||
for record in records:
|
||||
session.add(model_class(**record))
|
||||
await session.commit()
|
||||
logger.info(f"数据插入完成: {len(records)} 条")
|
||||
|
||||
results["row_count"] = len(records)
|
||||
logger.info(f"Excel 数据已存储到 MySQL 表 {table_name},共 {len(records)} 行")
|
||||
@@ -243,7 +252,7 @@ class ExcelStorageService:
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}")
|
||||
logger.error(f"存储 Excel 到 MySQL 失败: {str(e)}", exc_info=True)
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def store_structured_data(
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
@@ -10,6 +11,8 @@ import uuid
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileService:
|
||||
"""文件服务类,负责文件的存储、读取和管理"""
|
||||
@@ -17,6 +20,7 @@ class FileService:
|
||||
def __init__(self):
|
||||
self.upload_dir = Path(settings.UPLOAD_DIR)
|
||||
self._ensure_upload_dir()
|
||||
logger.info(f"FileService 初始化,上传目录: {self.upload_dir}")
|
||||
|
||||
def _ensure_upload_dir(self):
|
||||
"""确保上传目录存在"""
|
||||
@@ -56,6 +60,8 @@ class FileService:
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(file_content)
|
||||
|
||||
file_size = len(file_content)
|
||||
logger.info(f"文件已保存: {filename} -> {file_path} ({file_size} bytes)")
|
||||
return str(file_path)
|
||||
|
||||
def read_file(self, file_path: str) -> bytes:
|
||||
|
||||
@@ -126,26 +126,45 @@ class TableRAGService:
|
||||
}
|
||||
|
||||
try:
|
||||
# 1. 读取 Excel
|
||||
# 1. 先检查 Excel 文件是否有效
|
||||
logger.info(f"正在检查Excel文件: {file_path}")
|
||||
try:
|
||||
xls_file = pd.ExcelFile(file_path)
|
||||
sheet_names = xls_file.sheet_names
|
||||
logger.info(f"Excel文件工作表: {sheet_names}")
|
||||
if not sheet_names:
|
||||
return {"success": False, "error": "Excel 文件没有工作表"}
|
||||
except Exception as e:
|
||||
logger.error(f"读取Excel文件失败: {file_path}, error: {e}")
|
||||
return {"success": False, "error": f"无法读取Excel文件: {str(e)}"}
|
||||
|
||||
# 2. 读取 Excel
|
||||
if sheet_name:
|
||||
# 验证指定的sheet_name是否存在
|
||||
if sheet_name not in sheet_names:
|
||||
logger.warning(f"指定的工作表 '{sheet_name}' 不存在,使用第一个工作表: {sheet_names[0]}")
|
||||
sheet_name = sheet_names[0]
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
|
||||
else:
|
||||
df = pd.read_excel(file_path, header=header_row)
|
||||
|
||||
logger.info(f"读取到数据: {len(df)} 行, {len(df.columns)} 列")
|
||||
|
||||
if df.empty:
|
||||
return {"success": False, "error": "Excel 文件为空"}
|
||||
|
||||
# 清理列名
|
||||
df.columns = [str(c) for c in df.columns]
|
||||
table_name = excel_storage._sanitize_table_name(filename)
|
||||
table_name = self.excel_storage._sanitize_table_name(filename)
|
||||
results["table_name"] = table_name
|
||||
results["field_count"] = len(df.columns)
|
||||
logger.info(f"表名: {table_name}, 字段数: {len(df.columns)}")
|
||||
|
||||
# 2. 初始化 RAG (如果需要)
|
||||
# 3. 初始化 RAG (如果需要)
|
||||
if not self.rag._initialized:
|
||||
self.rag._init_vector_store()
|
||||
|
||||
# 3. 为每个字段生成描述并索引
|
||||
# 4. 为每个字段生成描述并索引
|
||||
all_fields_data = {}
|
||||
for col in df.columns:
|
||||
# 采样示例值
|
||||
@@ -187,7 +206,8 @@ class TableRAGService:
|
||||
logger.error(error_msg)
|
||||
results["errors"].append(error_msg)
|
||||
|
||||
# 4. 存储到 MySQL
|
||||
# 5. 存储到 MySQL
|
||||
logger.info(f"开始存储到MySQL: {filename}")
|
||||
store_result = await self.excel_storage.store_excel(
|
||||
file_path=file_path,
|
||||
filename=filename,
|
||||
|
||||
Reference in New Issue
Block a user