diff --git a/backend/app/api/endpoints/library.py b/backend/app/api/endpoints/library.py index e621367..bb6f2f5 100644 --- a/backend/app/api/endpoints/library.py +++ b/backend/app/api/endpoints/library.py @@ -3,6 +3,7 @@ 提供文档列表、详情查询和删除功能 """ +import logging from typing import Optional, List from fastapi import APIRouter, HTTPException, Query @@ -10,6 +11,8 @@ from pydantic import BaseModel from app.core.database import mongodb +logger = logging.getLogger(__name__) + router = APIRouter(prefix="/documents", tags=["文档库"]) @@ -26,7 +29,8 @@ class DocumentItem(BaseModel): @router.get("") async def get_documents( doc_type: Optional[str] = Query(None, description="文档类型过滤"), - limit: int = Query(50, ge=1, le=100, description="返回数量") + limit: int = Query(20, ge=1, le=100, description="返回数量"), + skip: int = Query(0, ge=0, description="跳过数量") ): """ 获取文档列表 @@ -40,11 +44,25 @@ async def get_documents( if doc_type: query["doc_type"] = doc_type - # 查询文档 - cursor = mongodb.documents.find(query).sort("created_at", -1).limit(limit) + logger.info(f"开始查询文档列表, query: {query}, limit: {limit}") + + # 使用 batch_size 和 max_time_ms 来控制查询 + cursor = mongodb.documents.find( + query, + {"content": 0} # 不返回 content 字段,减少数据传输 + ).sort("created_at", -1).skip(skip).limit(limit) + + # 设置 10 秒超时 + cursor.max_time_ms(10000) + + logger.info("Cursor created with 10s timeout, executing...") + + # 使用 batch_size 逐批获取 + documents_raw = await cursor.to_list(length=limit) + logger.info(f"查询到原始文档数: {len(documents_raw)}") documents = [] - async for doc in cursor: + for doc in documents_raw: documents.append({ "doc_id": str(doc["_id"]), "filename": doc.get("metadata", {}).get("filename", ""), @@ -55,10 +73,12 @@ async def get_documents( "metadata": { "row_count": doc.get("metadata", {}).get("row_count"), "column_count": doc.get("metadata", {}).get("column_count"), - "columns": doc.get("metadata", {}).get("columns", [])[:10] # 只返回前10列 + "columns": doc.get("metadata", {}).get("columns", [])[:10] } }) + logger.info(f"文档列表处理完成: {len(documents)} 个文档") + return { "success": True, "documents": documents, @@ -66,6 +86,17 @@ async def get_documents( } except Exception as e: + err_str = str(e) + # 如果是超时错误,返回空列表而不是报错 + if "timeout" in err_str.lower() or "time" in err_str.lower(): + logger.warning(f"文档查询超时,返回空列表: {err_str}") + return { + "success": True, + "documents": [], + "total": 0, + "warning": "查询超时,请稍后重试" + } + logger.error(f"获取文档列表失败: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=f"获取文档列表失败: {str(e)}") diff --git a/backend/app/api/endpoints/templates.py b/backend/app/api/endpoints/templates.py index 706f281..8aaa296 100644 --- a/backend/app/api/endpoints/templates.py +++ b/backend/app/api/endpoints/templates.py @@ -226,9 +226,42 @@ async def export_filled_template( async def _export_to_excel(filled_data: dict, template_id: str) -> StreamingResponse: - """导出为 Excel 格式""" - # 将字典转换为单行 DataFrame - df = pd.DataFrame([filled_data]) + """导出为 Excel 格式(支持多行)""" + import logging + logger = logging.getLogger(__name__) + + logger.info(f"导出填表数据: {len(filled_data)} 个字段") + + # 计算最大行数 + max_rows = 1 + for k, v in filled_data.items(): + if isinstance(v, list) and len(v) > max_rows: + max_rows = len(v) + logger.info(f" {k}: {type(v).__name__} = {str(v)[:80]}") + + logger.info(f"最大行数: {max_rows}") + + # 构建多行数据 + rows_data = [] + for row_idx in range(max_rows): + row = {} + for col_name, values in filled_data.items(): + if isinstance(values, list): + # 取对应行的值,不足则填空 + row[col_name] = values[row_idx] if row_idx < len(values) else "" + else: + # 非列表,整个值填入第一行 + row[col_name] = values if row_idx == 0 else "" + rows_data.append(row) + + df = pd.DataFrame(rows_data) + + # 确保列顺序 + if not df.empty: + df = df[list(filled_data.keys())] + + logger.info(f"DataFrame 形状: {df.shape}") + logger.info(f"DataFrame 列: {list(df.columns)}") output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: diff --git a/backend/app/api/endpoints/upload.py b/backend/app/api/endpoints/upload.py index 2c3af5d..d9d9ada 100644 --- a/backend/app/api/endpoints/upload.py +++ b/backend/app/api/endpoints/upload.py @@ -11,6 +11,7 @@ import io from app.services.file_service import file_service from app.core.document_parser import XlsxParser from app.services.table_rag_service import table_rag_service +from app.core.database import mongodb logger = logging.getLogger(__name__) @@ -95,6 +96,56 @@ async def upload_excel( except Exception as e: logger.error(f"Excel存储到MySQL异常: {str(e)}", exc_info=True) + # 存储到 MongoDB(用于文档列表展示) + try: + content = "" + # 构建文本内容用于展示 + if result.data: + if isinstance(result.data, dict): + # 单 sheet 格式: {columns, rows, ...} + if 'columns' in result.data and 'rows' in result.data: + content += f"Sheet: {result.metadata.get('current_sheet', 'Sheet1') if result.metadata else 'Sheet1'}\n" + content += ", ".join(str(h) for h in result.data['columns']) + "\n" + for row in result.data['rows'][:100]: + if isinstance(row, dict): + content += ", ".join(str(row.get(col, "")) for col in result.data['columns']) + "\n" + elif isinstance(row, list): + content += ", ".join(str(cell) for cell in row) + "\n" + content += f"... (共 {len(result.data['rows'])} 行)\n\n" + # 多 sheet 格式: {sheets: {sheet_name: {columns, rows}}} + elif 'sheets' in result.data: + for sheet_name_key, sheet_data in result.data['sheets'].items(): + if isinstance(sheet_data, dict) and 'columns' in sheet_data and 'rows' in sheet_data: + content += f"Sheet: {sheet_name_key}\n" + content += ", ".join(str(h) for h in sheet_data['columns']) + "\n" + for row in sheet_data['rows'][:100]: + if isinstance(row, dict): + content += ", ".join(str(row.get(col, "")) for col in sheet_data['columns']) + "\n" + elif isinstance(row, list): + content += ", ".join(str(cell) for cell in row) + "\n" + content += f"... (共 {len(sheet_data['rows'])} 行)\n\n" + + doc_metadata = { + "filename": saved_path.split("/")[-1] if "/" in saved_path else saved_path.split("\\")[-1], + "original_filename": file.filename, + "saved_path": saved_path, + "file_size": len(content), + "row_count": result.metadata.get('row_count', 0) if result.metadata else 0, + "column_count": result.metadata.get('column_count', 0) if result.metadata else 0, + "columns": result.metadata.get('columns', []) if result.metadata else [], + "mysql_table": result.metadata.get('mysql_table') if result.metadata else None, + "sheet_count": result.metadata.get('sheet_count', 1) if result.metadata else 1, + } + await mongodb.insert_document( + doc_type="xlsx", + content=content, + metadata=doc_metadata, + structured_data=result.data if result.data else None + ) + logger.info(f"Excel文档已存储到MongoDB: {file.filename}, content长度: {len(content)}") + except Exception as e: + logger.error(f"Excel存储到MongoDB异常: {str(e)}", exc_info=True) + return result.to_dict() except HTTPException: diff --git a/backend/app/core/database/mongodb.py b/backend/app/core/database/mongodb.py index 79ffa06..e8481ec 100644 --- a/backend/app/core/database/mongodb.py +++ b/backend/app/core/database/mongodb.py @@ -26,7 +26,9 @@ class MongoDB: try: self.client = AsyncIOMotorClient( settings.MONGODB_URL, - serverSelectionTimeoutMS=5000, + serverSelectionTimeoutMS=30000, # 30秒超时,适应远程服务器 + connectTimeoutMS=30000, # 连接超时 + socketTimeoutMS=60000, # Socket 超时 ) self.db = self.client[settings.MONGODB_DB_NAME] # 验证连接 diff --git a/backend/app/services/template_fill_service.py b/backend/app/services/template_fill_service.py index 94930fb..33ca32f 100644 --- a/backend/app/services/template_fill_service.py +++ b/backend/app/services/template_fill_service.py @@ -38,10 +38,15 @@ class SourceDocument: class FillResult: """填写结果""" field: str - value: Any - source: str # 来源文档 + values: List[Any] = None # 支持多个值 + value: Any = "" # 保留兼容 + source: str = "" # 来源文档 confidence: float = 1.0 # 置信度 + def __post_init__(self): + if self.values is None: + self.values = [] + class TemplateFillService: """表格填写服务""" @@ -71,15 +76,20 @@ class TemplateFillService: filled_data = {} fill_details = [] + logger.info(f"开始填表: {len(template_fields)} 个字段, {len(source_doc_ids or [])} 个源文档") + # 1. 加载源文档内容 source_docs = await self._load_source_documents(source_doc_ids, source_file_paths) + logger.info(f"加载了 {len(source_docs)} 个源文档") + if not source_docs: logger.warning("没有找到源文档,填表结果将全部为空") # 2. 对每个字段进行提取 - for field in template_fields: + for idx, field in enumerate(template_fields): try: + logger.info(f"提取字段 [{idx+1}/{len(template_fields)}]: {field.name}") # 从源文档中提取字段值 result = await self._extract_field_value( field=field, @@ -87,34 +97,41 @@ class TemplateFillService: user_hint=user_hint ) - # 存储结果 - filled_data[field.name] = result.value + # 存储结果 - 使用 values 数组 + filled_data[field.name] = result.values if result.values else [""] fill_details.append({ "field": field.name, "cell": field.cell, + "values": result.values, "value": result.value, "source": result.source, "confidence": result.confidence }) - logger.info(f"字段 {field.name} 填写完成: {result.value}") + logger.info(f"字段 {field.name} 填写完成: {len(result.values)} 个值") except Exception as e: - logger.error(f"填写字段 {field.name} 失败: {str(e)}") - filled_data[field.name] = f"[提取失败: {str(e)}]" + logger.error(f"填写字段 {field.name} 失败: {str(e)}", exc_info=True) + filled_data[field.name] = [f"[提取失败: {str(e)}]"] fill_details.append({ "field": field.name, "cell": field.cell, + "values": [f"[提取失败]"], "value": f"[提取失败]", "source": "error", "confidence": 0.0 }) + # 计算最大行数 + max_rows = max(len(v) for v in filled_data.values()) if filled_data else 1 + logger.info(f"填表完成: {len(filled_data)} 个字段, 最大行数: {max_rows}") + return { "success": True, "filled_data": filled_data, "fill_details": fill_details, - "source_doc_count": len(source_docs) + "source_doc_count": len(source_docs), + "max_rows": max_rows } async def _load_source_documents( @@ -158,14 +175,22 @@ class TemplateFillService: parser = ParserFactory.get_parser(file_path) result = parser.parse(file_path) if result.success: + # result.data 的结构取决于解析器类型: + # - Excel 单 sheet: {columns: [...], rows: [...], row_count, column_count} + # - Excel 多 sheet: {sheets: {sheet_name: {columns, rows, ...}}} + # - Word/TXT: {content: "...", structured_data: {...}} + doc_data = result.data if result.data else {} + doc_content = doc_data.get("content", "") if isinstance(doc_data, dict) else "" + doc_structured = doc_data if isinstance(doc_data, dict) and "rows" in doc_data or isinstance(doc_data, dict) and "sheets" in doc_data else {} + source_docs.append(SourceDocument( doc_id=file_path, filename=result.metadata.get("filename", file_path.split("/")[-1]), doc_type=result.metadata.get("extension", "unknown").replace(".", ""), - content=result.data.get("content", ""), - structured_data=result.data.get("structured_data", {}) + content=doc_content, + structured_data=doc_structured )) - logger.info(f"从文件加载文档: {file_path}") + logger.info(f"从文件加载文档: {file_path}, content长度: {len(doc_content)}, structured数据: {bool(doc_structured)}") except Exception as e: logger.error(f"从文件加载文档失败 {file_path}: {str(e)}") @@ -196,30 +221,27 @@ class TemplateFillService: confidence=0.0 ) - # 构建上下文文本 - context_text = self._build_context_text(source_docs, max_length=8000) + # 构建上下文文本 - 传入字段名,只提取该列数据 + context_text = self._build_context_text(source_docs, field_name=field.name, max_length=8000) # 构建提示词 hint_text = field.hint if field.hint else f"请提取{field.name}的信息" if user_hint: hint_text = f"{user_hint}。{hint_text}" - prompt = f"""你是一个专业的数据提取专家。请根据以下文档内容,提取指定字段的信息。 + prompt = f"""你是一个专业的数据提取专家。请从以下文档内容中提取"{field.name}"字段的所有行数据。 -需要提取的字段: -- 字段名称:{field.name} -- 字段类型:{field.field_type} -- 填写提示:{hint_text} -- 是否必填:{'是' if field.required else '否'} - -参考文档内容: +参考文档内容(已提取" {field.name}"列的数据): {context_text} +请提取上述所有行的" {field.name}"值,存入数组。每一行对应数组中的一个元素。 +如果某行该字段为空,请用空字符串""占位。 + 请严格按照以下 JSON 格式输出,不要添加任何解释: {{ - "value": "提取到的值,如果没有找到则填写空字符串", - "source": "数据来源的文档描述(如:来自xxx文档)", - "confidence": 0.0到1.0之间的置信度,表示对提取结果的信心程度" + "values": ["第1行的值", "第2行的值", "第3行的值", ...], + "source": "数据来源的文档描述", + "confidence": 0.0到1.0之间的置信度 }} """ @@ -242,40 +264,86 @@ class TemplateFillService: import json import re - # 尝试提取 JSON - json_match = re.search(r'\{[\s\S]*\}', content) - if json_match: - result = json.loads(json_match.group()) - return FillResult( - field=field.name, - value=result.get("value", ""), - source=result.get("source", "LLM生成"), - confidence=result.get("confidence", 0.5) - ) - else: - # 如果无法解析,返回原始内容 - return FillResult( - field=field.name, - value=content.strip(), - source="直接提取", - confidence=0.5 - ) + # 尝试提取 JSON,使用更严格的匹配 + extracted_values = [] + extracted_value = "" + extracted_source = "LLM生成" + confidence = 0.5 + + try: + # 方法1: 尝试直接解析整个 content + result = json.loads(content) + if isinstance(result, dict): + # 优先使用 values 数组格式 + if "values" in result and isinstance(result["values"], list): + extracted_values = [str(v) for v in result["values"]] + logger.info(f"字段 {field.name} 使用 values 数组格式: {len(extracted_values)} 个值") + elif "value" in result: + extracted_value = str(result.get("value", "")) + extracted_values = [extracted_value] if extracted_value else [] + extracted_source = result.get("source", "LLM生成") + confidence = float(result.get("confidence", 0.5)) + logger.info(f"字段 {field.name} 直接 JSON 解析成功") + except json.JSONDecodeError: + # 方法2: 尝试提取 JSON 对象 + json_match = re.search(r'\{[\s\S]*\}', content) + if json_match: + try: + result = json.loads(json_match.group()) + if isinstance(result, dict): + # 优先使用 values 数组格式 + if "values" in result and isinstance(result["values"], list): + extracted_values = [str(v) for v in result["values"]] + logger.info(f"字段 {field.name} 使用 values 数组格式: {len(extracted_values)} 个值") + elif "value" in result: + extracted_value = str(result.get("value", "")) + extracted_values = [extracted_value] if extracted_value else [] + extracted_source = result.get("source", "LLM生成") + confidence = float(result.get("confidence", 0.5)) + logger.info(f"字段 {field.name} 正则 JSON 解析成功") + else: + logger.warning(f"字段 {field.name} JSON 不是字典格式") + except json.JSONDecodeError as e: + logger.error(f"字段 {field.name} JSON 解析失败: {str(e)}") + # 如果 JSON 解析失败,尝试从文本中提取 + extracted_values = self._extract_values_from_text(content, field.name) + extracted_source = "文本提取" + confidence = 0.3 + else: + logger.warning(f"字段 {field.name} 未找到 JSON: {content[:200]}") + extracted_values = self._extract_values_from_text(content, field.name) + extracted_source = "文本提取" + confidence = 0.3 + + # 如果没有提取到值,返回空 + if not extracted_values: + extracted_values = [""] + + return FillResult( + field=field.name, + values=extracted_values, + value=extracted_values[0] if extracted_values else "", + source=extracted_source, + confidence=confidence + ) except Exception as e: logger.error(f"LLM 提取失败: {str(e)}") return FillResult( field=field.name, + values=[""], value="", source=f"提取失败: {str(e)}", confidence=0.0 ) - def _build_context_text(self, source_docs: List[SourceDocument], max_length: int = 8000) -> str: + def _build_context_text(self, source_docs: List[SourceDocument], field_name: str = None, max_length: int = 8000) -> str: """ 构建上下文文本 Args: source_docs: 源文档列表 + field_name: 需要提取的字段名(可选,用于只提取特定列) max_length: 最大字符数 Returns: @@ -287,36 +355,113 @@ class TemplateFillService: for doc in source_docs: # 优先使用结构化数据(表格),其次使用文本内容 doc_content = "" + row_count = 0 - if doc.structured_data and doc.structured_data.get("tables"): - # 如果有表格数据,优先使用 - tables = doc.structured_data.get("tables", []) - for table in tables: - if isinstance(table, dict): - rows = table.get("rows", []) - if rows: - doc_content += f"\n【文档: {doc.filename} 表格数据】\n" - for row in rows[:20]: # 限制每表最多20行 - if isinstance(row, list): + if doc.structured_data and doc.structured_data.get("sheets"): + # parse_all_sheets 格式: {sheets: {sheet_name: {columns, rows}}} + sheets = doc.structured_data.get("sheets", {}) + for sheet_name, sheet_data in sheets.items(): + if isinstance(sheet_data, dict): + columns = sheet_data.get("columns", []) + rows = sheet_data.get("rows", []) + if rows and columns: + doc_content += f"\n【文档: {doc.filename} - {sheet_name},共 {len(rows)} 行】\n" + # 如果指定了字段名,只提取该列数据 + if field_name: + # 查找匹配的列(模糊匹配) + target_col = None + for col in columns: + if field_name.lower() in str(col).lower() or str(col).lower() in field_name.lower(): + target_col = col + break + if target_col: + doc_content += f"列名: {target_col}\n" + for row_idx, row in enumerate(rows): + if isinstance(row, dict): + val = row.get(target_col, "") + elif isinstance(row, list) and target_col in columns: + val = row[columns.index(target_col)] + else: + val = "" + doc_content += f"行{row_idx+1}: {val}\n" + row_count += 1 + else: + # 列名不匹配,输出所有列(但只输出关键列) + doc_content += " | ".join(str(col) for col in columns) + "\n" + for row in rows: + if isinstance(row, dict): + doc_content += " | ".join(str(row.get(col, "")) for col in columns) + "\n" + elif isinstance(row, list): + doc_content += " | ".join(str(cell) for cell in row) + "\n" + row_count += 1 + else: + # 输出所有列和行 + doc_content += " | ".join(str(col) for col in columns) + "\n" + for row in rows: + if isinstance(row, dict): + doc_content += " | ".join(str(row.get(col, "")) for col in columns) + "\n" + elif isinstance(row, list): + doc_content += " | ".join(str(cell) for cell in row) + "\n" + row_count += 1 + elif doc.structured_data and doc.structured_data.get("rows"): + # Excel 单 sheet 格式: {columns: [...], rows: [...], ...} + columns = doc.structured_data.get("columns", []) + rows = doc.structured_data.get("rows", []) + if rows and columns: + doc_content += f"\n【文档: {doc.filename},共 {len(rows)} 行】\n" + if field_name: + target_col = None + for col in columns: + if field_name.lower() in str(col).lower() or str(col).lower() in field_name.lower(): + target_col = col + break + if target_col: + doc_content += f"列名: {target_col}\n" + for row_idx, row in enumerate(rows): + if isinstance(row, dict): + val = row.get(target_col, "") + elif isinstance(row, list) and target_col in columns: + val = row[columns.index(target_col)] + else: + val = "" + doc_content += f"行{row_idx+1}: {val}\n" + row_count += 1 + else: + doc_content += " | ".join(str(col) for col in columns) + "\n" + for row in rows: + if isinstance(row, dict): + doc_content += " | ".join(str(row.get(col, "")) for col in columns) + "\n" + elif isinstance(row, list): doc_content += " | ".join(str(cell) for cell in row) + "\n" - elif isinstance(row, dict): - doc_content += " | ".join(str(v) for v in row.values()) + "\n" + row_count += 1 + else: + doc_content += " | ".join(str(col) for col in columns) + "\n" + for row in rows: + if isinstance(row, dict): + doc_content += " | ".join(str(row.get(col, "")) for col in columns) + "\n" + elif isinstance(row, list): + doc_content += " | ".join(str(cell) for cell in row) + "\n" + row_count += 1 elif doc.content: - doc_content = doc.content[:5000] # 限制文本长度 + doc_content = doc.content[:5000] if doc_content: doc_context = f"【文档: {doc.filename} ({doc.doc_type})】\n{doc_content}" + logger.info(f"文档 {doc.filename} 上下文长度: {len(doc_context)}, 行数: {row_count}") if total_length + len(doc_context) <= max_length: contexts.append(doc_context) total_length += len(doc_context) else: - # 如果超出长度,截断 remaining = max_length - total_length if remaining > 100: - contexts.append(doc_context[:remaining]) + doc_context = doc_context[:remaining] + f"\n...(内容被截断)" + contexts.append(doc_context) + logger.warning(f"上下文被截断: {doc.filename}, 总长度: {total_length + len(doc_context)}") break - return "\n\n".join(contexts) if contexts else "(源文档内容为空)" + result = "\n\n".join(contexts) if contexts else "(源文档内容为空)" + logger.info(f"最终上下文长度: {len(result)}") + return result async def get_template_fields_from_file( self, @@ -447,6 +592,83 @@ class TemplateFillService: col_idx = col_idx // 26 - 1 return result + def _extract_value_from_text(self, text: str, field_name: str) -> str: + """ + 从非 JSON 文本中提取字段值(单值版本) + + Args: + text: 原始文本 + field_name: 字段名称 + + Returns: + 提取的值 + """ + values = self._extract_values_from_text(text, field_name) + return values[0] if values else "" + + def _extract_values_from_text(self, text: str, field_name: str) -> List[str]: + """ + 从非 JSON 文本中提取多个字段值 + + Args: + text: 原始文本 + field_name: 字段名称 + + Returns: + 提取的值列表 + """ + import re + + # 尝试匹配 JSON 数组格式 + array_match = re.search(r'\[[\s\S]*\]', text) + if array_match: + try: + arr = json.loads(array_match.group()) + if isinstance(arr, list): + return [str(v) for v in arr if v] + except: + pass + + # 尝试用分号分割(如果文本中有分号分隔的多个值) + if ';' in text or ';' in text: + separator = ';' if ';' in text else ';' + parts = text.split(separator) + values = [] + for part in parts: + part = part.strip() + if part and len(part) < 500: + # 清理 Markdown 格式 + part = re.sub(r'^\*\*|\*\*$', '', part) + part = re.sub(r'^\*|\*$', '', part) + values.append(part.strip()) + if values: + return values + + # 尝试多种模式匹配 + patterns = [ + # "字段名: 值" 或 "字段名:值" 格式 + rf'{re.escape(field_name)}[::]\s*(.+?)(?:\n|$)', + # "值" 在引号中 + rf'"value"\s*:\s*"([^"]+)"', + # "值" 在单引号中 + rf"['\"]?value['\"]?\s*:\s*['\"]([^'\"]+)['\"]", + ] + + for pattern in patterns: + match = re.search(pattern, text, re.DOTALL) + if match: + value = match.group(1).strip() + # 清理 Markdown 格式 + value = re.sub(r'^\*\*|\*\*$', '', value) + value = re.sub(r'^\*|\*$', '', value) + value = value.strip() + if value and len(value) < 1000: + return [value] + + # 如果无法匹配,返回原始内容 + content = text.strip()[:500] if text.strip() else "" + return [content] if content else [] + # ==================== 全局单例 ==================== diff --git a/backend/readme.md b/backend/readme.md index f45c4b2..b22c399 100644 --- a/backend/readme.md +++ b/backend/readme.md @@ -115,8 +115,7 @@ pip install -r requirements.txt 在终端输入以下命令: ```bash cd backend #确保启动时在后端跟目录下 -./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000 - --reload #启动后端项目 +./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload #启动后端项目 ``` 先启动后端项目,再启动前端项目 diff --git a/frontend - 副本 b/frontend - 副本 deleted file mode 160000 index 7971259..0000000 --- a/frontend - 副本 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 797125940be7e8415e1a1d7c6051a2cb9f896d97 diff --git a/frontend/src/pages/Documents.tsx b/frontend/src/pages/Documents.tsx index 7e1abd6..d0d9c2e 100644 --- a/frontend/src/pages/Documents.tsx +++ b/frontend/src/pages/Documents.tsx @@ -235,6 +235,7 @@ const Documents: React.FC = () => { if (result.success) { toast.success(`解析成功: ${file.name}`); setParseResult(result); + loadDocuments(); // 刷新文档列表 if (result.metadata?.sheet_count === 1) { setExpandedSheet(Object.keys(result.data?.sheets || {})[0] || null); }