From 8e46e635f1a90770a020a9c01a3fa47d05839f32 Mon Sep 17 00:00:00 2001
From: dj <431634905@qq.com>
Date: Thu, 16 Apr 2026 19:59:56 +0800
Subject: [PATCH] =?UTF-8?q?rag=E6=97=A5=E5=BF=97=E6=94=B9=E4=B8=BAinfo?=
 =?UTF-8?q?=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/app/core/document_parser/docx_parser.py | 10 +++++++---
 backend/app/services/llm_service.py             | 14 +++++++++++++-
 backend/app/services/rag_service.py             |  2 +-
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/backend/app/core/document_parser/docx_parser.py b/backend/app/core/document_parser/docx_parser.py
index e2bcb0e..24ff54a 100644
--- a/backend/app/core/document_parser/docx_parser.py
+++ b/backend/app/core/document_parser/docx_parser.py
@@ -91,11 +91,15 @@ class DocxParser(BaseParser):
                 table_rows.append(row_data)
 
             if table_rows:
+                # 第一行作为表头，其余行作为数据
+                headers = table_rows[0] if table_rows else []
+                data_rows = table_rows[1:] if len(table_rows) > 1 else []
                 tables_data.append({
                     "table_index": i,
-                    "rows": table_rows,
-                    "row_count": len(table_rows),
-                    "column_count": len(table_rows[0]) if table_rows else 0
+                    "headers": headers,  # 添加 headers 字段
+                    "rows": data_rows,  # 数据行（不含表头）
+                    "row_count": len(data_rows),
+                    "column_count": len(headers) if headers else 0
                 })
 
         # 提取图片/嵌入式对象信息
diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py
index fac51e3..c0a5dd9 100644
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -54,6 +54,9 @@ class LLMService:
         # 添加其他参数
         payload.update(kwargs)
 
+        import time
+        _start_time = time.time()
+        logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
         try:
             async with httpx.AsyncClient(timeout=60.0) as client:
                 response = await client.post(
@@ -62,7 +65,10 @@ class LLMService:
                     json=payload
                 )
                 response.raise_for_status()
-                return response.json()
+                result = response.json()
+                _elapsed = time.time() - _start_time
+                logger.info(f"✅ [LLM] DeepSeek API 响应成功 | 模型: {self.model_name} | 耗时: {_elapsed:.2f}s | Token: {result.get('usage', {}).get('total_tokens', 'N/A')}")
+                return result
 
         except httpx.HTTPStatusError as e:
             error_detail = e.response.text
@@ -133,6 +139,9 @@ class LLMService:
 
         payload.update(kwargs)
 
+        import time
+        _start_time = time.time()
+        logger.info(f"🤖 [LLM] 正在调用 DeepSeek API (流式) | 模型: {self.model_name}")
         try:
             async with httpx.AsyncClient(timeout=120.0) as client:
                 async with client.stream(
@@ -141,10 +150,13 @@ class LLMService:
                     headers=headers,
                     json=payload
                 ) as response:
+                    _elapsed = time.time() - _start_time
+                    logger.info(f"✅ [LLM] DeepSeek API 流式响应开始 | 模型: {self.model_name} | 耗时: {_elapsed:.2f}s")
                     async for line in response.aiter_lines():
                         if line.startswith("data: "):
                             data = line[6:]  # Remove "data: " prefix
                             if data == "[DONE]":
+                                logger.info(f"✅ [LLM] DeepSeek API 流式响应完成")
                                 break
                             try:
                                 import json as json_module
diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py
index 230800c..866b7ff 100644
--- a/backend/app/services/rag_service.py
+++ b/backend/app/services/rag_service.py
@@ -669,7 +669,7 @@ class RAGService:
         # 按融合分数降序排序
         fused_results.sort(key=lambda x: x["score"], reverse=True)
 
-        logger.debug(f"混合融合: {len(fused_results)} 个文档, 向量:{len(vector_results)}, BM25:{len(bm25_results)}")
+        logger.info(f"RRF 混合融合: {len(fused_results)} 个文档参与融合, 向量检索命中:{len(vector_results)}, BM25命中:{len(bm25_results)}")
 
         return fused_results[:top_k]