Compare commits
10 Commits
c2f50d3bd8
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 8f6d8a43d3 | |||
| 6ec45b73ad | |||
| 73f1c2804f | |||
| 74d40f91c5 | |||
| d2e3c2db3e | |||
| be302839ee | |||
| 581e2b0ae0 | |||
| 975ebf536b | |||
| 38b0c7e62e | |||
| 8e46e635f1 |
7
.claude/settings.local.json
Normal file
7
.claude/settings.local.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"WebSearch"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
35
.env.example
Normal file
35
.env.example
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# ============================================================
|
||||||
|
# FilesReadSystem 环境变量配置模板
|
||||||
|
# 复制此文件为 .env 并填入实际值
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# ==================== 应用配置 ====================
|
||||||
|
DEBUG=false
|
||||||
|
|
||||||
|
# ==================== MongoDB ====================
|
||||||
|
MONGO_ROOT_USER=admin
|
||||||
|
MONGO_ROOT_PASSWORD=your_mongo_password
|
||||||
|
MONGODB_DB_NAME=document_system
|
||||||
|
|
||||||
|
# ==================== MySQL ====================
|
||||||
|
MYSQL_PASSWORD=your_mysql_password
|
||||||
|
MYSQL_DATABASE=document
|
||||||
|
|
||||||
|
# ==================== Redis ====================
|
||||||
|
REDIS_PASSWORD=your_redis_password
|
||||||
|
|
||||||
|
# ==================== LLM AI ====================
|
||||||
|
LLM_API_KEY=your_llm_api_key
|
||||||
|
LLM_BASE_URL=https://api.deepseek.com
|
||||||
|
LLM_MODEL_NAME=deepseek-chat
|
||||||
|
|
||||||
|
# ==================== Supabase ====================
|
||||||
|
SUPABASE_URL=https://your-project.supabase.co
|
||||||
|
SUPABASE_ANON_KEY=your_anon_key
|
||||||
|
SUPABASE_SERVICE_KEY=your_service_key
|
||||||
|
|
||||||
|
# ==================== Embedding / RAG ====================
|
||||||
|
EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
# ==================== 前端配置 ====================
|
||||||
|
VITE_APP_ID=your_app_id
|
||||||
175
README.md
175
README.md
@@ -1,4 +1,4 @@
|
|||||||
# FilesReadSystem
|
# 智联文档
|
||||||
|
|
||||||
## 项目介绍 / Project Introduction
|
## 项目介绍 / Project Introduction
|
||||||
|
|
||||||
@@ -26,37 +26,79 @@ A document understanding and multi-source data fusion system based on Large Lang
|
|||||||
|
|
||||||
## 项目架构 / Project Architecture
|
## 项目架构 / Project Architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TB
|
||||||
|
subgraph UI["用户界面 / User Interface"]
|
||||||
|
Frontend["React + TypeScript + shadcn/ui"]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Backend["FastAPI 后端 / Backend"]
|
||||||
|
Upload["上传 API<br/>/upload"]
|
||||||
|
Documents["文档管理<br/>/documents"]
|
||||||
|
RAG["RAG 检索<br/>/rag/search"]
|
||||||
|
AI["AI 分析<br/>/ai/analyze"]
|
||||||
|
Template["模板填充<br/>/templates/fill"]
|
||||||
|
Instruction["自然语言指令<br/>/instruction/execute"]
|
||||||
|
Visual["可视化<br/>/visualization"]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Data["数据层 / Data Layer"]
|
||||||
|
MongoDB["MongoDB<br/>文档存储"]
|
||||||
|
MySQL["MySQL<br/>结构化数据"]
|
||||||
|
Redis["Redis<br/>缓存/队列"]
|
||||||
|
FAISS["FAISS<br/>向量索引"]
|
||||||
|
end
|
||||||
|
|
||||||
|
UI --> Backend
|
||||||
|
Backend --> MongoDB
|
||||||
|
Backend --> MySQL
|
||||||
|
Backend --> Redis
|
||||||
|
MongoDB --> FAISS
|
||||||
```
|
```
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ User Interface │
|
---
|
||||||
│ (React + TypeScript + shadcn/ui) │
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
## 程序流程 / Program Flow
|
||||||
│
|
|
||||||
▼
|
```mermaid
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
flowchart TD
|
||||||
│ FastAPI Backend │
|
Start([用户上传文档<br/>User Uploads Document]) --> Parse{解析文档格式<br/>Parse Document Format}
|
||||||
│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────────────┐ │
|
|
||||||
│ │ Upload API │ │ RAG Search │ │ Natural Language │ │
|
Parse -->|Excel| ParseXlsx["解析 Excel<br/>Parse XLSX"]
|
||||||
│ │ /documents │ │ /rag/search │ │ /instruction/execute │ │
|
Parse -->|Word| ParseDocx["解析 Word<br/>Parse DOCX"]
|
||||||
│ └─────────────┘ └──────────────┘ └─────────────────────────┘ │
|
Parse -->|Markdown| ParseMd["解析 Markdown<br/>Parse Markdown"]
|
||||||
│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────────────┐ │
|
Parse -->|Text| ParseTxt["解析文本<br/>Parse Text"]
|
||||||
│ │ AI Analyze │ │ Template Fill│ │ Visualization │ │
|
|
||||||
│ │ /ai/analyze │ │ /templates │ │ /visualization │ │
|
ParseXlsx --> Store1[(存储到<br/>MongoDB)]
|
||||||
│ └─────────────┘ └──────────────┘ └─────────────────────────┘ │
|
ParseDocx --> Store1
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
ParseMd --> Store1
|
||||||
│
|
ParseTxt --> Store1
|
||||||
┌─────────────────────┼─────────────────────┐
|
|
||||||
▼ ▼ ▼
|
Store1 --> Embed["Embedding 向量化<br/>Create Embeddings"]
|
||||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
Embed --> Index[(索引到<br/>FAISS)]
|
||||||
│ MongoDB │ │ MySQL │ │ Redis │
|
|
||||||
│ (Documents) │ │ (Structured) │ │ (Cache/Queue) │
|
Index --> TaskCreated{创建任务<br/>Create Task}
|
||||||
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
|
||||||
│
|
TaskCreated -->|同步| ProcessSync["同步处理<br/>Sync Process"]
|
||||||
▼
|
TaskCreated -->|异步| QueueTask["加入任务队列<br/>Queue to Celery"]
|
||||||
┌─────────────────┐
|
|
||||||
│ FAISS │
|
ProcessSync --> ReturnResult["返回结果<br/>Return Result"]
|
||||||
│ (Vector Index) │
|
|
||||||
└─────────────────┘
|
QueueTask --> CeleryWorker["Celery Worker<br/>异步处理"]
|
||||||
|
CeleryWorker --> LLM["调用 LLM<br/>Call LLM API"]
|
||||||
|
LLM --> StoreResult["存储结果<br/>Store Result"]
|
||||||
|
StoreResult --> ReturnAsync["返回任务ID<br/>Return Task ID"]
|
||||||
|
|
||||||
|
ReturnResult --> End([完成<br/>Complete])
|
||||||
|
ReturnAsync --> Poll{轮询任务状态<br/>Poll Task Status}
|
||||||
|
Poll -->|进行中| Poll
|
||||||
|
Poll -->|完成| GetResult["获取结果<br/>Get Result"]
|
||||||
|
GetResult --> End
|
||||||
|
|
||||||
|
style Start fill:#e1f5fe
|
||||||
|
style End fill:#c8e6c9
|
||||||
|
style LLM fill:#fff3e0
|
||||||
|
style CeleryWorker fill:#fff3e0
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -233,6 +275,77 @@ pnpm dev
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Docker 部署 / Docker Deployment
|
||||||
|
|
||||||
|
### 快速启动 / Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 复制环境变量模板并编辑
|
||||||
|
cp .env.example .env
|
||||||
|
# 编辑 .env 填入实际配置
|
||||||
|
|
||||||
|
# 2. 启动所有服务
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 3. 查看日志
|
||||||
|
docker compose logs -f
|
||||||
|
|
||||||
|
# 4. 检查服务状态
|
||||||
|
docker compose ps
|
||||||
|
|
||||||
|
# 5. 更新部署
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
### 服务说明 / Services
|
||||||
|
|
||||||
|
| 服务 | 端口 | 说明 |
|
||||||
|
|:---|:---|:---|
|
||||||
|
| frontend | 80 | React 前端 (Nginx) |
|
||||||
|
| backend | 8000 | FastAPI 后端 |
|
||||||
|
| mongodb | 27017 | MongoDB 数据库 |
|
||||||
|
| mysql | 3306 | MySQL 数据库 |
|
||||||
|
| redis | 6379 | Redis 缓存/队列 |
|
||||||
|
|
||||||
|
### 环境变量 / Environment Variables
|
||||||
|
|
||||||
|
创建 `.env` 文件,参考 `.env.example`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 数据库配置
|
||||||
|
MONGO_ROOT_USER=admin
|
||||||
|
MONGO_ROOT_PASSWORD=your_password
|
||||||
|
MONGODB_DB_NAME=document_system
|
||||||
|
MYSQL_PASSWORD=your_password
|
||||||
|
MYSQL_DATABASE=document
|
||||||
|
REDIS_PASSWORD=your_password
|
||||||
|
|
||||||
|
# LLM 配置
|
||||||
|
LLM_API_KEY=your_api_key
|
||||||
|
LLM_BASE_URL=https://api.deepseek.com
|
||||||
|
LLM_MODEL_NAME=deepseek-chat
|
||||||
|
|
||||||
|
# Supabase 配置
|
||||||
|
SUPABASE_URL=https://your-project.supabase.co
|
||||||
|
SUPABASE_ANON_KEY=your_anon_key
|
||||||
|
SUPABASE_SERVICE_KEY=your_service_key
|
||||||
|
```
|
||||||
|
|
||||||
|
### 验证部署 / Verify Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 检查所有服务状态
|
||||||
|
docker compose ps
|
||||||
|
|
||||||
|
# 访问前端
|
||||||
|
curl http://localhost
|
||||||
|
|
||||||
|
# 检查后端健康
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 许可证 / License
|
## 许可证 / License
|
||||||
|
|
||||||
ISC
|
ISC
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ REDIS_URL="redis://localhost:6379/0"
|
|||||||
# - 模型: glm-4-flash (快速文本模型), glm-4 (标准), glm-4-plus (高性能)
|
# - 模型: glm-4-flash (快速文本模型), glm-4 (标准), glm-4-plus (高性能)
|
||||||
# - API: https://open.bigmodel.cn
|
# - API: https://open.bigmodel.cn
|
||||||
# - API Key: https://open.bigmodel.cn/usercenter/apikeys
|
# - API Key: https://open.bigmodel.cn/usercenter/apikeys
|
||||||
LLM_API_KEY="ca79ad9f96524cd5afc3e43ca97f347d.cpiLLx2oyitGvTeU"
|
LLM_API_KEY="your_llm_api_key_here"
|
||||||
LLM_BASE_URL="https://open.bigmodel.cn/api/paas/v4"
|
LLM_BASE_URL="https://api.deepseek.com"
|
||||||
LLM_MODEL_NAME="glm-4v-plus"
|
LLM_MODEL_NAME="deepseek-chat"
|
||||||
|
|
||||||
# ==================== Supabase 配置 ====================
|
# ==================== Supabase 配置 ====================
|
||||||
# Supabase 项目配置
|
# Supabase 项目配置
|
||||||
@@ -45,10 +45,14 @@ SUPABASE_ANON_KEY="your_supabase_anon_key_here"
|
|||||||
SUPABASE_SERVICE_KEY="your_supabase_service_key_here"
|
SUPABASE_SERVICE_KEY="your_supabase_service_key_here"
|
||||||
|
|
||||||
# ==================== 文件路径配置 ====================
|
# ==================== 文件路径配置 ====================
|
||||||
# 上传文件存储目录 (相对于项目根目录)
|
# 上传文件存储目录
|
||||||
|
# 本地开发: ./data/uploads
|
||||||
|
# Docker部署: /app/data/uploads
|
||||||
UPLOAD_DIR="./data/uploads"
|
UPLOAD_DIR="./data/uploads"
|
||||||
|
|
||||||
# Faiss 向量数据库持久化目录 (LangChain + Faiss 实现)
|
# Faiss 向量数据库持久化目录
|
||||||
|
# 本地开发: ./data/faiss
|
||||||
|
# Docker部署: /app/data/faiss
|
||||||
FAISS_INDEX_DIR="./data/faiss"
|
FAISS_INDEX_DIR="./data/faiss"
|
||||||
|
|
||||||
# ==================== RAG 配置 ====================
|
# ==================== RAG 配置 ====================
|
||||||
|
|||||||
7
backend/=4.0.0
Normal file
7
backend/=4.0.0
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
Collecting reportlab
|
||||||
|
Using cached reportlab-4.4.10-py3-none-any.whl.metadata (1.7 kB)
|
||||||
|
Requirement already satisfied: pillow>=9.0.0 in d:\code\filesreadsystem\backend\venv\lib\site-packages (from reportlab) (12.1.1)
|
||||||
|
Requirement already satisfied: charset-normalizer in d:\code\filesreadsystem\backend\venv\lib\site-packages (from reportlab) (3.4.6)
|
||||||
|
Using cached reportlab-4.4.10-py3-none-any.whl (2.0 MB)
|
||||||
|
Installing collected packages: reportlab
|
||||||
|
Successfully installed reportlab-4.4.10
|
||||||
40
backend/Dockerfile
Normal file
40
backend/Dockerfile
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# ============================================================
|
||||||
|
# FilesReadSystem Backend Docker Image
|
||||||
|
# ============================================================
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# 安装系统依赖 (FAISS, Pillow, tesseract 等)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc \
|
||||||
|
g++ \
|
||||||
|
libgl1-mesa-glx \
|
||||||
|
libglib2.0-0 \
|
||||||
|
tesseract-ocr \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 先复制依赖文件,再安装(利用 Docker 缓存)
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# 安装 Python 依赖
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# 复制应用代码
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# 创建数据目录
|
||||||
|
RUN mkdir -p /app/data/uploads /app/data/faiss /app/data/logs
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# 健康检查
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
|
||||||
|
CMD python -c "import httpx; httpx.get('http://localhost:8000/health')" || exit 1
|
||||||
|
|
||||||
|
# 启动命令
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
@@ -15,6 +15,7 @@ from app.api.endpoints import (
|
|||||||
health,
|
health,
|
||||||
instruction, # 智能指令
|
instruction, # 智能指令
|
||||||
conversation, # 对话历史
|
conversation, # 对话历史
|
||||||
|
pdf_converter, # PDF转换
|
||||||
)
|
)
|
||||||
|
|
||||||
# 创建主路由
|
# 创建主路由
|
||||||
@@ -33,3 +34,4 @@ api_router.include_router(visualization.router) # 可视化
|
|||||||
api_router.include_router(analysis_charts.router) # 分析图表
|
api_router.include_router(analysis_charts.router) # 分析图表
|
||||||
api_router.include_router(instruction.router) # 智能指令
|
api_router.include_router(instruction.router) # 智能指令
|
||||||
api_router.include_router(conversation.router) # 对话历史
|
api_router.include_router(conversation.router) # 对话历史
|
||||||
|
api_router.include_router(pdf_converter.router) # PDF转换
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
AI 分析 API 接口
|
AI 分析 API 接口
|
||||||
"""
|
"""
|
||||||
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
|
from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body, Form
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import logging
|
import logging
|
||||||
@@ -21,7 +21,8 @@ router = APIRouter(prefix="/ai", tags=["AI 分析"])
|
|||||||
|
|
||||||
@router.post("/analyze/excel")
|
@router.post("/analyze/excel")
|
||||||
async def analyze_excel(
|
async def analyze_excel(
|
||||||
file: UploadFile = File(...),
|
file: Optional[UploadFile] = File(None),
|
||||||
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||||
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
|
analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
|
||||||
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
|
parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
|
||||||
@@ -30,7 +31,8 @@ async def analyze_excel(
|
|||||||
上传并使用 AI 分析 Excel 文件
|
上传并使用 AI 分析 Excel 文件
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: 上传的 Excel 文件
|
file: 上传的 Excel 文件(与 doc_id 二选一)
|
||||||
|
doc_id: 文档ID(从数据库读取)
|
||||||
user_prompt: 用户自定义提示词
|
user_prompt: 用户自定义提示词
|
||||||
analysis_type: 分析类型
|
analysis_type: 分析类型
|
||||||
parse_all_sheets: 是否分析所有工作表
|
parse_all_sheets: 是否分析所有工作表
|
||||||
@@ -38,7 +40,57 @@ async def analyze_excel(
|
|||||||
Returns:
|
Returns:
|
||||||
dict: 分析结果,包含 Excel 数据和 AI 分析结果
|
dict: 分析结果,包含 Excel 数据和 AI 分析结果
|
||||||
"""
|
"""
|
||||||
# 检查文件类型
|
filename = None
|
||||||
|
|
||||||
|
# 从数据库读取模式
|
||||||
|
if doc_id:
|
||||||
|
try:
|
||||||
|
from app.core.database.mongodb import mongodb
|
||||||
|
doc = await mongodb.get_document(doc_id)
|
||||||
|
if not doc:
|
||||||
|
raise HTTPException(status_code=404, detail=f"文档不存在: {doc_id}")
|
||||||
|
|
||||||
|
filename = doc.get("metadata", {}).get("original_filename", "unknown.xlsx")
|
||||||
|
file_ext = filename.split('.')[-1].lower()
|
||||||
|
|
||||||
|
if file_ext not in ['xlsx', 'xls']:
|
||||||
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Excel: {file_ext}")
|
||||||
|
|
||||||
|
file_path = doc.get("metadata", {}).get("file_path")
|
||||||
|
if not file_path:
|
||||||
|
raise HTTPException(status_code=400, detail="文档没有存储文件路径,请重新上传")
|
||||||
|
|
||||||
|
# 使用文件路径进行 AI 分析
|
||||||
|
if parse_all_sheets:
|
||||||
|
result = await excel_ai_service.batch_analyze_sheets_from_path(
|
||||||
|
file_path=file_path,
|
||||||
|
filename=filename,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
analysis_type=analysis_type
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result = await excel_ai_service.analyze_excel_file_from_path(
|
||||||
|
file_path=file_path,
|
||||||
|
filename=filename,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
analysis_type=analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.get("success"):
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"从数据库读取 Excel 文档失败: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"读取文档失败: {str(e)}")
|
||||||
|
|
||||||
|
# 文件上传模式
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=400, detail="请提供文件或文档ID")
|
||||||
|
|
||||||
if not file.filename:
|
if not file.filename:
|
||||||
raise HTTPException(status_code=400, detail="文件名为空")
|
raise HTTPException(status_code=400, detail="文件名为空")
|
||||||
|
|
||||||
@@ -61,7 +113,11 @@ async def analyze_excel(
|
|||||||
# 读取文件内容
|
# 读取文件内容
|
||||||
content = await file.read()
|
content = await file.read()
|
||||||
|
|
||||||
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}")
|
# 验证文件内容不为空
|
||||||
|
if not content:
|
||||||
|
raise HTTPException(status_code=400, detail="文件内容为空,请确保文件已正确上传")
|
||||||
|
|
||||||
|
logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}, 文件大小: {len(content)} bytes")
|
||||||
|
|
||||||
# 调用 AI 分析服务
|
# 调用 AI 分析服务
|
||||||
if parse_all_sheets:
|
if parse_all_sheets:
|
||||||
@@ -155,7 +211,7 @@ async def analyze_text(
|
|||||||
@router.post("/analyze/md")
|
@router.post("/analyze/md")
|
||||||
async def analyze_markdown(
|
async def analyze_markdown(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
|
analysis_type: str = Query("summary", description="分析类型: summary, outline, key_points, questions, tags, qa, statistics, section, charts"),
|
||||||
user_prompt: str = Query("", description="用户自定义提示词"),
|
user_prompt: str = Query("", description="用户自定义提示词"),
|
||||||
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
section_number: Optional[str] = Query(None, description="指定章节编号,如 '一' 或 '(一)'")
|
||||||
@@ -198,7 +254,7 @@ async def analyze_markdown(
|
|||||||
if file_ext not in ['md', 'markdown']:
|
if file_ext not in ['md', 'markdown']:
|
||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Markdown: {file_ext}")
|
||||||
|
|
||||||
content = doc.get("content", "")
|
content = doc.get("content") or ""
|
||||||
if not content:
|
if not content:
|
||||||
raise HTTPException(status_code=400, detail="文档内容为空")
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
|
|
||||||
@@ -392,7 +448,7 @@ async def get_markdown_outline(
|
|||||||
@router.post("/analyze/txt")
|
@router.post("/analyze/txt")
|
||||||
async def analyze_txt(
|
async def analyze_txt(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -427,7 +483,7 @@ async def analyze_txt(
|
|||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 TXT: {file_ext}")
|
||||||
|
|
||||||
# 使用数据库中的 content
|
# 使用数据库中的 content
|
||||||
text_content = doc.get("content", "")
|
text_content = doc.get("content") or ""
|
||||||
|
|
||||||
if not text_content:
|
if not text_content:
|
||||||
raise HTTPException(status_code=400, detail="文档内容为空")
|
raise HTTPException(status_code=400, detail="文档内容为空")
|
||||||
@@ -498,8 +554,8 @@ async def analyze_txt(
|
|||||||
@router.post("/analyze/word")
|
@router.post("/analyze/word")
|
||||||
async def analyze_word(
|
async def analyze_word(
|
||||||
file: Optional[UploadFile] = File(None),
|
file: Optional[UploadFile] = File(None),
|
||||||
doc_id: Optional[str] = Query(None, description="文档ID(从数据库读取)"),
|
doc_id: Optional[str] = Form(None, description="文档ID(从数据库读取)"),
|
||||||
user_hint: str = Query("", description="用户提示词,如'请提取表格数据'"),
|
user_hint: str = Form("", description="用户提示词,如'请提取表格数据'"),
|
||||||
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
analysis_type: str = Query("structured", description="分析类型: structured, charts")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -536,8 +592,9 @@ async def analyze_word(
|
|||||||
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
|
raise HTTPException(status_code=400, detail=f"文档类型不是 Word: {file_ext}")
|
||||||
|
|
||||||
# 使用数据库中的 content 进行分析
|
# 使用数据库中的 content 进行分析
|
||||||
content = doc.get("content", "")
|
content = doc.get("content", "") or ""
|
||||||
tables = doc.get("structured_data", {}).get("tables", [])
|
structured_data = doc.get("structured_data") or {}
|
||||||
|
tables = structured_data.get("tables", [])
|
||||||
|
|
||||||
# 调用 AI 分析服务,传入数据库内容
|
# 调用 AI 分析服务,传入数据库内容
|
||||||
if analysis_type == "charts":
|
if analysis_type == "charts":
|
||||||
|
|||||||
208
backend/app/api/endpoints/pdf_converter.py
Normal file
208
backend/app/api/endpoints/pdf_converter.py
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
"""
|
||||||
|
PDF 转换 API 接口
|
||||||
|
|
||||||
|
提供将 Word、Excel、Txt、Markdown 转换为 PDF 的功能
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
|
from app.services.pdf_converter_service import pdf_converter_service
|
||||||
|
from app.services.file_service import file_service
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/pdf", tags=["PDF转换"])
|
||||||
|
|
||||||
|
# 临时存储转换后的 PDF(key: download_id, value: (pdf_content, original_filename))
|
||||||
|
_pdf_cache: dict = {}
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== 请求/响应模型 ====================
|
||||||
|
|
||||||
|
class ConvertResponse:
|
||||||
|
"""转换响应"""
|
||||||
|
def __init__(self, success: bool, message: str = "", filename: str = ""):
|
||||||
|
self.success = success
|
||||||
|
self.message = message
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== 接口 ====================
|
||||||
|
|
||||||
|
@router.post("/convert")
|
||||||
|
async def convert_to_pdf(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
将上传的文件转换为 PDF
|
||||||
|
|
||||||
|
支持格式: docx, xlsx, txt, md
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: 上传的文件
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PDF 文件流
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 检查文件格式
|
||||||
|
filename = file.filename or "document"
|
||||||
|
file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||||
|
|
||||||
|
if file_ext not in pdf_converter_service.supported_formats:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"不支持的格式: {file_ext},支持的格式: {', '.join(pdf_converter_service.supported_formats)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 读取文件内容
|
||||||
|
content = await file.read()
|
||||||
|
if not content:
|
||||||
|
raise HTTPException(status_code=400, detail="文件内容为空")
|
||||||
|
|
||||||
|
logger.info(f"开始转换文件: {filename} ({file_ext})")
|
||||||
|
|
||||||
|
# 转换为 PDF
|
||||||
|
pdf_content, error = await pdf_converter_service.convert_to_pdf(
|
||||||
|
file_content=content,
|
||||||
|
source_format=file_ext,
|
||||||
|
filename=filename.rsplit('.', 1)[0] if '.' in filename else filename
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
raise HTTPException(status_code=500, detail=error)
|
||||||
|
|
||||||
|
# 直接返回 PDF 文件流
|
||||||
|
return StreamingResponse(
|
||||||
|
iter([pdf_content]),
|
||||||
|
media_type="application/pdf",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename*=UTF-8''converted.pdf"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PDF转换失败: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/download/{download_id}")
|
||||||
|
async def download_pdf(download_id: str):
|
||||||
|
"""
|
||||||
|
通过下载 ID 下载 PDF(支持 IDM 拦截)
|
||||||
|
"""
|
||||||
|
if download_id not in _pdf_cache:
|
||||||
|
raise HTTPException(status_code=404, detail="下载链接已过期或不存在")
|
||||||
|
|
||||||
|
pdf_content, filename = _pdf_cache.pop(download_id) # 下载后删除
|
||||||
|
|
||||||
|
# 使用 RFC 5987 编码支持中文文件名
|
||||||
|
from starlette.responses import StreamingResponse
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
# URL 编码中文文件名
|
||||||
|
encoded_filename = urllib.parse.quote(f"{filename}.pdf")
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
iter([pdf_content]),
|
||||||
|
media_type="application/pdf",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/formats")
|
||||||
|
async def get_supported_formats():
|
||||||
|
"""
|
||||||
|
获取支持的源文件格式
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
支持的格式列表
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"formats": pdf_converter_service.get_supported_formats()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/convert/batch")
|
||||||
|
async def batch_convert_to_pdf(
|
||||||
|
files: list[UploadFile] = File(...),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
批量将多个文件转换为 PDF
|
||||||
|
|
||||||
|
注意: 批量转换会返回多个 PDF 文件打包的 zip
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: 上传的文件列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ZIP 压缩包(包含所有PDF)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import io
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
results = []
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
try:
|
||||||
|
filename = file.filename or "document"
|
||||||
|
file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||||
|
|
||||||
|
if file_ext not in pdf_converter_service.supported_formats:
|
||||||
|
errors.append(f"{filename}: 不支持的格式")
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
pdf_content, error = await pdf_converter_service.convert_to_pdf(
|
||||||
|
file_content=content,
|
||||||
|
source_format=file_ext,
|
||||||
|
filename=filename.rsplit('.', 1)[0] if '.' in filename else filename
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
errors.append(f"{filename}: {error}")
|
||||||
|
else:
|
||||||
|
results.append((filename, pdf_content))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(f"{file.filename}: {str(e)}")
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"没有可转换的文件。错误: {'; '.join(errors)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 创建 ZIP 包
|
||||||
|
zip_buffer = io.BytesIO()
|
||||||
|
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||||||
|
for original_name, pdf_content in results:
|
||||||
|
pdf_name = f"{original_name.rsplit('.', 1)[0] if '.' in original_name else original_name}.pdf"
|
||||||
|
zip_file.writestr(pdf_name, pdf_content)
|
||||||
|
|
||||||
|
zip_buffer.seek(0)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
iter([zip_buffer.getvalue()]),
|
||||||
|
media_type="application/zip",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": "attachment; filename*=UTF-8''converted_pdfs.zip"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"批量PDF转换失败: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"批量转换失败: {str(e)}")
|
||||||
27
backend/app/celery_app.py
Normal file
27
backend/app/celery_app.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Celery 应用配置
|
||||||
|
# ============================================================
|
||||||
|
from celery import Celery
|
||||||
|
|
||||||
|
# 优先使用环境变量,否则使用默认值
|
||||||
|
import os
|
||||||
|
|
||||||
|
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/1")
|
||||||
|
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/2")
|
||||||
|
|
||||||
|
celery_app = Celery(
|
||||||
|
"filesread",
|
||||||
|
broker=CELERY_BROKER_URL,
|
||||||
|
backend=CELERY_RESULT_BACKEND,
|
||||||
|
)
|
||||||
|
|
||||||
|
celery_app.conf.update(
|
||||||
|
task_serializer="json",
|
||||||
|
accept_content=["json"],
|
||||||
|
result_serializer="json",
|
||||||
|
timezone="Asia/Shanghai",
|
||||||
|
enable_utc=True,
|
||||||
|
task_track_started=True,
|
||||||
|
task_time_limit=3600, # 1小时超时
|
||||||
|
worker_prefetch_multiplier=1,
|
||||||
|
)
|
||||||
@@ -91,11 +91,15 @@ class DocxParser(BaseParser):
|
|||||||
table_rows.append(row_data)
|
table_rows.append(row_data)
|
||||||
|
|
||||||
if table_rows:
|
if table_rows:
|
||||||
|
# 第一行作为表头,其余行作为数据
|
||||||
|
headers = table_rows[0] if table_rows else []
|
||||||
|
data_rows = table_rows[1:] if len(table_rows) > 1 else []
|
||||||
tables_data.append({
|
tables_data.append({
|
||||||
"table_index": i,
|
"table_index": i,
|
||||||
"rows": table_rows,
|
"headers": headers, # 添加 headers 字段
|
||||||
"row_count": len(table_rows),
|
"rows": data_rows, # 数据行(不含表头)
|
||||||
"column_count": len(table_rows[0]) if table_rows else 0
|
"row_count": len(data_rows),
|
||||||
|
"column_count": len(headers) if headers else 0
|
||||||
})
|
})
|
||||||
|
|
||||||
# 提取图片/嵌入式对象信息
|
# 提取图片/嵌入式对象信息
|
||||||
|
|||||||
@@ -34,8 +34,8 @@ def setup_logging():
|
|||||||
# 根日志配置
|
# 根日志配置
|
||||||
log_level = logging.DEBUG if settings.DEBUG else logging.INFO
|
log_level = logging.DEBUG if settings.DEBUG else logging.INFO
|
||||||
|
|
||||||
# 日志目录
|
# 日志目录 (使用 settings.BASE_DIR 确保跨平台一致)
|
||||||
log_dir = Path("data/logs")
|
log_dir = settings.BASE_DIR / "data" / "logs"
|
||||||
log_dir.mkdir(parents=True, exist_ok=True)
|
log_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# 日志文件路径
|
# 日志文件路径
|
||||||
|
|||||||
@@ -223,6 +223,177 @@ class ExcelAIService:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def analyze_excel_file_from_path(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
filename: str,
|
||||||
|
user_prompt: str = "",
|
||||||
|
analysis_type: str = "general",
|
||||||
|
parse_options: Optional[Dict[str, Any]] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文件路径分析 Excel 文件(用于从数据库加载的文档)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Excel 文件路径
|
||||||
|
filename: 文件名
|
||||||
|
user_prompt: 用户自定义提示词
|
||||||
|
analysis_type: 分析类型
|
||||||
|
parse_options: 解析选项
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: 分析结果
|
||||||
|
"""
|
||||||
|
# 1. 解析 Excel 文件
|
||||||
|
excel_data = None
|
||||||
|
parse_result_metadata = None
|
||||||
|
try:
|
||||||
|
parse_options = parse_options or {}
|
||||||
|
parse_result = self.parser.parse(file_path, **parse_options)
|
||||||
|
|
||||||
|
if not parse_result.success:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": parse_result.error,
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
excel_data = parse_result.data
|
||||||
|
parse_result_metadata = parse_result.metadata
|
||||||
|
logger.info(f"Excel 解析成功: {parse_result_metadata}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel 解析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Excel 解析失败: {str(e)}",
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. 调用 LLM 进行分析
|
||||||
|
try:
|
||||||
|
if user_prompt and user_prompt.strip():
|
||||||
|
llm_result = await self.llm_service.analyze_with_template(
|
||||||
|
excel_data,
|
||||||
|
user_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
llm_result = await self.llm_service.analyze_excel_data(
|
||||||
|
excel_data,
|
||||||
|
user_prompt,
|
||||||
|
analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"AI 分析完成: {llm_result['success']}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"excel": {
|
||||||
|
"data": excel_data,
|
||||||
|
"metadata": parse_result_metadata,
|
||||||
|
"saved_path": file_path
|
||||||
|
},
|
||||||
|
"analysis": llm_result
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"AI 分析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"AI 分析失败: {str(e)}",
|
||||||
|
"excel": {
|
||||||
|
"data": excel_data,
|
||||||
|
"metadata": parse_result_metadata
|
||||||
|
},
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
async def batch_analyze_sheets_from_path(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
filename: str,
|
||||||
|
user_prompt: str = "",
|
||||||
|
analysis_type: str = "general"
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从文件路径批量分析 Excel 文件的所有工作表(用于从数据库加载的文档)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Excel 文件路径
|
||||||
|
filename: 文件名
|
||||||
|
user_prompt: 用户自定义提示词
|
||||||
|
analysis_type: 分析类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: 分析结果
|
||||||
|
"""
|
||||||
|
# 1. 解析所有工作表
|
||||||
|
try:
|
||||||
|
parse_result = self.parser.parse_all_sheets(file_path)
|
||||||
|
|
||||||
|
if not parse_result.success:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": parse_result.error,
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
sheets_data = parse_result.data.get("sheets", {})
|
||||||
|
logger.info(f"Excel 解析成功,共 {len(sheets_data)} 个工作表")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel 解析失败: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Excel 解析失败: {str(e)}",
|
||||||
|
"analysis": None
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. 批量分析每个工作表
|
||||||
|
sheet_analyses = {}
|
||||||
|
errors = {}
|
||||||
|
|
||||||
|
for sheet_name, sheet_data in sheets_data.items():
|
||||||
|
try:
|
||||||
|
if user_prompt and user_prompt.strip():
|
||||||
|
llm_result = await self.llm_service.analyze_with_template(
|
||||||
|
sheet_data,
|
||||||
|
user_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
llm_result = await self.llm_service.analyze_excel_data(
|
||||||
|
sheet_data,
|
||||||
|
user_prompt,
|
||||||
|
analysis_type
|
||||||
|
)
|
||||||
|
|
||||||
|
sheet_analyses[sheet_name] = llm_result
|
||||||
|
|
||||||
|
if not llm_result["success"]:
|
||||||
|
errors[sheet_name] = llm_result.get("error", "未知错误")
|
||||||
|
|
||||||
|
logger.info(f"工作表 '{sheet_name}' 分析完成")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"工作表 '{sheet_name}' 分析失败: {str(e)}")
|
||||||
|
errors[sheet_name] = str(e)
|
||||||
|
|
||||||
|
# 3. 组合结果
|
||||||
|
return {
|
||||||
|
"success": len(errors) == 0,
|
||||||
|
"excel": {
|
||||||
|
"sheets": sheets_data,
|
||||||
|
"metadata": parse_result.metadata,
|
||||||
|
"saved_path": file_path
|
||||||
|
},
|
||||||
|
"analysis": {
|
||||||
|
"sheets": sheet_analyses,
|
||||||
|
"total_sheets": len(sheets_data),
|
||||||
|
"successful": len(sheet_analyses) - len(errors),
|
||||||
|
"errors": errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_supported_analysis_types(self) -> List[str]:
|
def get_supported_analysis_types(self) -> List[str]:
|
||||||
"""获取支持的分析类型"""
|
"""获取支持的分析类型"""
|
||||||
return [
|
return [
|
||||||
|
|||||||
@@ -54,15 +54,21 @@ class LLMService:
|
|||||||
# 添加其他参数
|
# 添加其他参数
|
||||||
payload.update(kwargs)
|
payload.update(kwargs)
|
||||||
|
|
||||||
|
import time
|
||||||
|
_start_time = time.time()
|
||||||
|
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{self.base_url}/chat/completions",
|
f"{self.base_url}/chat/completions",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=payload
|
json=payload
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
result = response.json()
|
||||||
|
_elapsed = time.time() - _start_time
|
||||||
|
logger.info(f"✅ [LLM] DeepSeek API 响应成功 | 模型: {self.model_name} | 耗时: {_elapsed:.2f}s | Token: {result.get('usage', {}).get('total_tokens', 'N/A')}")
|
||||||
|
return result
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
error_detail = e.response.text
|
error_detail = e.response.text
|
||||||
@@ -78,7 +84,7 @@ class LLMService:
|
|||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM API 调用异常: {str(e)}")
|
logger.error(f"LLM API 调用异常: {repr(e)} - {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
||||||
@@ -133,6 +139,9 @@ class LLMService:
|
|||||||
|
|
||||||
payload.update(kwargs)
|
payload.update(kwargs)
|
||||||
|
|
||||||
|
import time
|
||||||
|
_start_time = time.time()
|
||||||
|
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API (流式) | 模型: {self.model_name}")
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
@@ -141,10 +150,13 @@ class LLMService:
|
|||||||
headers=headers,
|
headers=headers,
|
||||||
json=payload
|
json=payload
|
||||||
) as response:
|
) as response:
|
||||||
|
_elapsed = time.time() - _start_time
|
||||||
|
logger.info(f"✅ [LLM] DeepSeek API 流式响应开始 | 模型: {self.model_name} | 耗时: {_elapsed:.2f}s")
|
||||||
async for line in response.aiter_lines():
|
async for line in response.aiter_lines():
|
||||||
if line.startswith("data: "):
|
if line.startswith("data: "):
|
||||||
data = line[6:] # Remove "data: " prefix
|
data = line[6:] # Remove "data: " prefix
|
||||||
if data == "[DONE]":
|
if data == "[DONE]":
|
||||||
|
logger.info(f"✅ [LLM] DeepSeek API 流式响应完成")
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
import json as json_module
|
import json as json_module
|
||||||
|
|||||||
403
backend/app/services/pdf_converter_service.py
Normal file
403
backend/app/services/pdf_converter_service.py
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
"""
|
||||||
|
PDF 转换服务
|
||||||
|
|
||||||
|
支持将 Word(docx)、Excel(xlsx)、Txt、Markdown(md) 格式转换为 PDF
|
||||||
|
策略:所有格式先转为 Markdown,再通过 Markdown 转 PDF
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import platform
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||||
|
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
||||||
|
from reportlab.pdfbase import pdfmetrics
|
||||||
|
from reportlab.pdfbase.ttfonts import TTFont
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PDFConverterService:
|
||||||
|
"""PDF 转换服务"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.supported_formats = ["docx", "xlsx", "txt", "md"]
|
||||||
|
self._font_name = None
|
||||||
|
self._styles = None
|
||||||
|
self._page_width = None
|
||||||
|
self._page_height = None
|
||||||
|
self._setup_fonts()
|
||||||
|
|
||||||
|
def _setup_fonts(self):
|
||||||
|
"""设置字体"""
|
||||||
|
try:
|
||||||
|
self._page_width, self._page_height = A4
|
||||||
|
|
||||||
|
# 查找中文字体
|
||||||
|
font_path = self._find_chinese_font()
|
||||||
|
if font_path:
|
||||||
|
try:
|
||||||
|
font = TTFont('ChineseFont', font_path)
|
||||||
|
pdfmetrics.registerFont(font)
|
||||||
|
from reportlab.pdfbase.pdfmetrics import registerFontFamily
|
||||||
|
registerFontFamily('ChineseFont', normal='ChineseFont')
|
||||||
|
self._font_name = 'ChineseFont'
|
||||||
|
logger.info(f"成功注册中文字体: {font_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"字体注册失败: {e}, 使用Helvetica")
|
||||||
|
self._font_name = 'Helvetica'
|
||||||
|
else:
|
||||||
|
self._font_name = 'Helvetica'
|
||||||
|
logger.warning("未找到中文字体,使用 Helvetica(不支持中文)")
|
||||||
|
|
||||||
|
# 创建样式
|
||||||
|
styles = getSampleStyleSheet()
|
||||||
|
|
||||||
|
styles.add(ParagraphStyle(
|
||||||
|
name='ChineseTitle',
|
||||||
|
fontName=self._font_name,
|
||||||
|
fontSize=16,
|
||||||
|
leading=22,
|
||||||
|
alignment=TA_CENTER,
|
||||||
|
spaceAfter=12,
|
||||||
|
))
|
||||||
|
|
||||||
|
styles.add(ParagraphStyle(
|
||||||
|
name='ChineseHeading',
|
||||||
|
fontName=self._font_name,
|
||||||
|
fontSize=14,
|
||||||
|
leading=20,
|
||||||
|
spaceBefore=10,
|
||||||
|
spaceAfter=8,
|
||||||
|
))
|
||||||
|
|
||||||
|
styles.add(ParagraphStyle(
|
||||||
|
name='ChineseBody',
|
||||||
|
fontName=self._font_name,
|
||||||
|
fontSize=10,
|
||||||
|
leading=14,
|
||||||
|
alignment=TA_JUSTIFY,
|
||||||
|
spaceAfter=6,
|
||||||
|
))
|
||||||
|
|
||||||
|
styles.add(ParagraphStyle(
|
||||||
|
name='ChineseCode',
|
||||||
|
fontName='Courier',
|
||||||
|
fontSize=9,
|
||||||
|
leading=12,
|
||||||
|
))
|
||||||
|
|
||||||
|
self._styles = styles
|
||||||
|
logger.info("PDF服务初始化完成")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PDF服务初始化失败: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _find_chinese_font(self) -> str:
|
||||||
|
"""查找中文字体"""
|
||||||
|
system = platform.system()
|
||||||
|
|
||||||
|
if system == "Windows":
|
||||||
|
fonts = [
|
||||||
|
"C:/Windows/Fonts/simhei.ttf",
|
||||||
|
"C:/Windows/Fonts/simsun.ttc",
|
||||||
|
"C:/Windows/Fonts/msyh.ttc",
|
||||||
|
"C:/Windows/Fonts/simsun.ttf",
|
||||||
|
]
|
||||||
|
elif system == "Darwin":
|
||||||
|
fonts = [
|
||||||
|
"/System/Library/Fonts/STHeiti Light.ttc",
|
||||||
|
"/System/Library/Fonts/PingFang.ttc",
|
||||||
|
"/Library/Fonts/Arial Unicode.ttf",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
fonts = [
|
||||||
|
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
||||||
|
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
||||||
|
]
|
||||||
|
|
||||||
|
for font in fonts:
|
||||||
|
if Path(font).exists():
|
||||||
|
return font
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _sanitize_text(self, text: str) -> str:
|
||||||
|
"""清理文本"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
return text.replace('\x00', '')
|
||||||
|
|
||||||
|
async def convert_to_pdf(
|
||||||
|
self,
|
||||||
|
file_content: bytes,
|
||||||
|
source_format: str,
|
||||||
|
filename: str = "document"
|
||||||
|
) -> Tuple[bytes, str]:
|
||||||
|
"""将文档转换为 PDF"""
|
||||||
|
try:
|
||||||
|
if source_format.lower() not in self.supported_formats:
|
||||||
|
return b"", f"不支持的格式: {source_format}"
|
||||||
|
|
||||||
|
# 第一步:转换为 Markdown
|
||||||
|
markdown_content, error = await self._convert_to_markdown(file_content, source_format, filename)
|
||||||
|
if error:
|
||||||
|
return b"", error
|
||||||
|
|
||||||
|
# 第二步:Markdown 转 PDF
|
||||||
|
return await self._convert_markdown_to_pdf(markdown_content, filename)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PDF转换失败: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(f"详细错误: {traceback.format_exc()}")
|
||||||
|
return b"", f"转换失败: {str(e)}"
|
||||||
|
|
||||||
|
async def _convert_to_markdown(
|
||||||
|
self,
|
||||||
|
file_content: bytes,
|
||||||
|
source_format: str,
|
||||||
|
filename: str
|
||||||
|
) -> Tuple[str, str]:
|
||||||
|
"""将各种格式转换为 Markdown"""
|
||||||
|
converters = {
|
||||||
|
"docx": self._convert_docx_to_markdown,
|
||||||
|
"xlsx": self._convert_xlsx_to_markdown,
|
||||||
|
"txt": self._convert_txt_to_markdown,
|
||||||
|
"md": self._convert_md_to_markdown,
|
||||||
|
}
|
||||||
|
return await converters[source_format.lower()](file_content, filename)
|
||||||
|
|
||||||
|
async def _convert_txt_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]:
|
||||||
|
"""Txt 转 Markdown"""
|
||||||
|
try:
|
||||||
|
text = self._decode_content(file_content)
|
||||||
|
text = self._sanitize_text(text)
|
||||||
|
return f"# {filename}\n\n{text}", ""
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Txt转Markdown失败: {e}")
|
||||||
|
return "", f"文本文件处理失败: {str(e)}"
|
||||||
|
|
||||||
|
async def _convert_md_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]:
|
||||||
|
"""Markdown 原样返回"""
|
||||||
|
try:
|
||||||
|
content = self._decode_content(file_content)
|
||||||
|
content = self._sanitize_text(content)
|
||||||
|
return f"# {filename}\n\n{content}", ""
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Markdown处理失败: {e}")
|
||||||
|
return "", f"Markdown处理失败: {str(e)}"
|
||||||
|
|
||||||
|
async def _convert_docx_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]:
|
||||||
|
"""Word 转 Markdown - 使用 zipfile 直接解析,更加健壮"""
|
||||||
|
try:
|
||||||
|
import zipfile
|
||||||
|
import re
|
||||||
|
|
||||||
|
lines = [f"# {filename}", ""]
|
||||||
|
|
||||||
|
# 直接使用 zipfile 解析 DOCX,避免 python-docx 的严格验证
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(file_content), 'r') as zf:
|
||||||
|
# 读取主文档内容
|
||||||
|
xml_content = zf.read('word/document.xml').decode('utf-8')
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
return "", "文件不是有效的 DOCX 格式"
|
||||||
|
except KeyError:
|
||||||
|
return "", "DOCX 文件损坏:找不到 document.xml"
|
||||||
|
|
||||||
|
# 简单的 XML 解析 - 提取文本段落
|
||||||
|
# 移除 XML 标签,提取纯文本
|
||||||
|
xml_content = re.sub(r'<w:br[^>]*>', '\n', xml_content)
|
||||||
|
xml_content = re.sub(r'</w:p>', '\n', xml_content)
|
||||||
|
xml_content = re.sub(r'<[^>]+>', '', xml_content)
|
||||||
|
xml_content = re.sub(r'\n\s*\n', '\n\n', xml_content)
|
||||||
|
|
||||||
|
# 解码 HTML 实体
|
||||||
|
xml_content = xml_content.replace('&', '&')
|
||||||
|
xml_content = xml_content.replace('<', '<')
|
||||||
|
xml_content = xml_content.replace('>', '>')
|
||||||
|
xml_content = xml_content.replace('"', '"')
|
||||||
|
xml_content = xml_content.replace(''', "'")
|
||||||
|
|
||||||
|
# 清理空白
|
||||||
|
lines_text = [line.strip() for line in xml_content.split('\n') if line.strip()]
|
||||||
|
|
||||||
|
# 生成 Markdown
|
||||||
|
for text in lines_text[:500]: # 限制最多500行
|
||||||
|
if text:
|
||||||
|
lines.append(text)
|
||||||
|
|
||||||
|
return '\n'.join(lines), ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Word转Markdown失败: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
return "", f"Word文档处理失败: {str(e)}"
|
||||||
|
for table in doc.tables:
|
||||||
|
lines.append("")
|
||||||
|
for row in table.rows:
|
||||||
|
row_data = [cell.text.strip() for cell in row.cells]
|
||||||
|
lines.append("| " + " | ".join(row_data) + " |")
|
||||||
|
# 表头分隔符
|
||||||
|
if table.rows:
|
||||||
|
lines.append("| " + " | ".join(["---"] * len(table.rows[0].cells)) + " |")
|
||||||
|
|
||||||
|
return "\n".join(lines), ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Word转Markdown失败: {e}")
|
||||||
|
return "", f"Word文档处理失败: {str(e)}"
|
||||||
|
|
||||||
|
async def _convert_xlsx_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]:
|
||||||
|
"""Excel 转 Markdown"""
|
||||||
|
try:
|
||||||
|
import openpyxl
|
||||||
|
|
||||||
|
wb = openpyxl.load_workbook(io.BytesIO(file_content))
|
||||||
|
lines = [f"# {filename} - Excel数据", ""]
|
||||||
|
|
||||||
|
for sheet_name in wb.sheetnames[:10]:
|
||||||
|
ws = wb[sheet_name]
|
||||||
|
lines.append(f"## 工作表: {sheet_name}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(ws.iter_rows(max_row=50, values_only=True)):
|
||||||
|
row_data = [str(cell) if cell is not None else "" for cell in row]
|
||||||
|
if not any(row_data):
|
||||||
|
continue
|
||||||
|
lines.append("| " + " | ".join(row_data) + " |")
|
||||||
|
if row_idx == 0:
|
||||||
|
lines.append("| " + " | ".join(["---"] * len(row_data)) + " |")
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines), ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Excel转Markdown失败: {e}")
|
||||||
|
return "", f"Excel处理失败: {str(e)}"
|
||||||
|
|
||||||
|
async def _convert_markdown_to_pdf(self, markdown_content: str, filename: str) -> Tuple[bytes, str]:
|
||||||
|
"""Markdown 转 PDF"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Markdown转PDF开始 - filename={filename}, 字体={self._font_name}")
|
||||||
|
logger.info(f"styles['ChineseTitle'].fontName={self._styles['ChineseTitle'].fontName}")
|
||||||
|
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
story = []
|
||||||
|
|
||||||
|
safe_filename = self._sanitize_text(filename)
|
||||||
|
logger.info(f"safe_filename={repr(safe_filename[:50])}")
|
||||||
|
|
||||||
|
story.append(Paragraph(text=safe_filename, style=self._styles['ChineseTitle']))
|
||||||
|
story.append(Spacer(1, 12))
|
||||||
|
|
||||||
|
in_code = False
|
||||||
|
for line in markdown_content.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
if line.startswith('```'):
|
||||||
|
in_code = not in_code
|
||||||
|
story.append(Spacer(1, 6))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_code:
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line), style=self._styles['ChineseCode']))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not line:
|
||||||
|
story.append(Spacer(1, 6))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 标题处理
|
||||||
|
if line.startswith('# '):
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line[2:]), style=self._styles['ChineseHeading']))
|
||||||
|
elif line.startswith('## '):
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line[3:]), style=self._styles['ChineseHeading']))
|
||||||
|
elif line.startswith('### '):
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line[4:]), style=self._styles['ChineseHeading']))
|
||||||
|
elif line.startswith('#### '):
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line[5:]), style=self._styles['ChineseHeading']))
|
||||||
|
elif line.startswith('- ') or line.startswith('* '):
|
||||||
|
story.append(Paragraph(text="• " + self._sanitize_text(line[2:]), style=self._styles['ChineseBody']))
|
||||||
|
# 表格处理
|
||||||
|
elif line.startswith('|'):
|
||||||
|
# 跳过 markdown 表格分隔符
|
||||||
|
if set(line.replace('|', '').replace('-', '').replace(':', '').replace(' ', '')) == set():
|
||||||
|
continue
|
||||||
|
# 解析并创建表格
|
||||||
|
table_lines = []
|
||||||
|
for _ in range(50): # 最多50行
|
||||||
|
if line.startswith('|'):
|
||||||
|
row = [cell.strip() for cell in line.split('|')[1:-1]]
|
||||||
|
if not any(row) or set(''.join(row).replace('-', '').replace(':', '').replace(' ', '')) == set():
|
||||||
|
break
|
||||||
|
table_lines.append(row)
|
||||||
|
try:
|
||||||
|
line = next(markdown_content.split('\n').__iter__()).strip()
|
||||||
|
except StopIteration:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if table_lines:
|
||||||
|
# 创建表格
|
||||||
|
t = Table(table_lines, colWidths=[100] * len(table_lines[0]))
|
||||||
|
t.setStyle(TableStyle([
|
||||||
|
('FONTNAME', (0, 0), (-1, -1), self._font_name),
|
||||||
|
('FONTSIZE', (0, 0), (-1, -1), 9),
|
||||||
|
('GRID', (0, 0), (-1, -1), 0.5, '#999999'),
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), '#4472C4'),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), '#FFFFFF'),
|
||||||
|
]))
|
||||||
|
story.append(t)
|
||||||
|
story.append(Spacer(1, 6))
|
||||||
|
else:
|
||||||
|
story.append(Paragraph(text=self._sanitize_text(line), style=self._styles['ChineseBody']))
|
||||||
|
|
||||||
|
logger.info(f"准备构建PDF,story长度={len(story)}")
|
||||||
|
|
||||||
|
pdf_doc = SimpleDocTemplate(
|
||||||
|
buffer,
|
||||||
|
pagesize=(self._page_width, self._page_height),
|
||||||
|
rightMargin=72,
|
||||||
|
leftMargin=72,
|
||||||
|
topMargin=72,
|
||||||
|
bottomMargin=72
|
||||||
|
)
|
||||||
|
logger.info("调用pdf_doc.build()")
|
||||||
|
pdf_doc.build(story)
|
||||||
|
logger.info("pdf_doc.build()完成")
|
||||||
|
|
||||||
|
result = buffer.getvalue()
|
||||||
|
buffer.close()
|
||||||
|
return result, ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Markdown转PDF失败: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(f"详细错误: {traceback.format_exc()}")
|
||||||
|
return b"", f"Markdown转PDF失败: {str(e)}"
|
||||||
|
|
||||||
|
def _decode_content(self, file_content: bytes) -> str:
|
||||||
|
"""解码文件内容"""
|
||||||
|
encodings = ['utf-8', 'gbk', 'gb2312', 'gb18030', 'latin-1']
|
||||||
|
for enc in encodings:
|
||||||
|
try:
|
||||||
|
return file_content.decode(enc)
|
||||||
|
except (UnicodeDecodeError, LookupError):
|
||||||
|
continue
|
||||||
|
return file_content.decode('utf-8', errors='replace')
|
||||||
|
|
||||||
|
def get_supported_formats(self) -> List[str]:
|
||||||
|
"""获取支持的格式"""
|
||||||
|
return self.supported_formats
|
||||||
|
|
||||||
|
|
||||||
|
# 全局单例
|
||||||
|
pdf_converter_service = PDFConverterService()
|
||||||
@@ -669,7 +669,7 @@ class RAGService:
|
|||||||
# 按融合分数降序排序
|
# 按融合分数降序排序
|
||||||
fused_results.sort(key=lambda x: x["score"], reverse=True)
|
fused_results.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
|
||||||
logger.debug(f"混合融合: {len(fused_results)} 个文档, 向量:{len(vector_results)}, BM25:{len(bm25_results)}")
|
logger.info(f"RRF 混合融合: {len(fused_results)} 个文档参与融合, 向量检索命中:{len(vector_results)}, BM25命中:{len(bm25_results)}")
|
||||||
|
|
||||||
return fused_results[:top_k]
|
return fused_results[:top_k]
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ class TxtAIService:
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.parser = TxtParser()
|
self.parser = TxtParser()
|
||||||
|
self.llm = llm_service
|
||||||
|
|
||||||
async def analyze_txt_with_ai(
|
async def analyze_txt_with_ai(
|
||||||
self,
|
self,
|
||||||
@@ -114,7 +115,7 @@ class TxtAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content_text = self.llm.extract_message_content(response)
|
content_text = self.llm.extract_message_content(response)
|
||||||
@@ -220,7 +221,7 @@ class TxtAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content_text = self.llm.extract_message_content(response)
|
content_text = self.llm.extract_message_content(response)
|
||||||
|
|||||||
@@ -53,7 +53,11 @@ class VisualizationService:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 转换为 DataFrame
|
# 转换为 DataFrame
|
||||||
df = pd.DataFrame(rows, columns=columns)
|
# 过滤掉行数与列数不匹配的数据
|
||||||
|
valid_rows = [row for row in rows if len(row) == len(columns)]
|
||||||
|
if len(valid_rows) < len(rows):
|
||||||
|
logger.warning(f"过滤了 {len(rows) - len(valid_rows)} 行无效数据(列数不匹配)")
|
||||||
|
df = pd.DataFrame(valid_rows, columns=columns)
|
||||||
|
|
||||||
# 根据列类型分类
|
# 根据列类型分类
|
||||||
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||||
@@ -141,18 +145,18 @@ class VisualizationService:
|
|||||||
charts = {}
|
charts = {}
|
||||||
|
|
||||||
# 1. 数值型列的直方图
|
# 1. 数值型列的直方图
|
||||||
charts["histograms"] = []
|
charts["numeric_charts"] = []
|
||||||
for col in numeric_columns[:5]: # 限制最多 5 个数值列
|
for col in numeric_columns[:5]: # 限制最多 5 个数值列
|
||||||
chart_data = self._create_histogram(df[col], col)
|
chart_data = self._create_histogram(df[col], col)
|
||||||
if chart_data:
|
if chart_data:
|
||||||
charts["histograms"].append(chart_data)
|
charts["numeric_charts"].append(chart_data)
|
||||||
|
|
||||||
# 2. 分类型列的条形图
|
# 2. 分类型列的条形图
|
||||||
charts["bar_charts"] = []
|
charts["categorical_charts"] = []
|
||||||
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
|
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
|
||||||
chart_data = self._create_bar_chart(df[col], col)
|
chart_data = self._create_bar_chart(df[col], col)
|
||||||
if chart_data:
|
if chart_data:
|
||||||
charts["bar_charts"].append(chart_data)
|
charts["categorical_charts"].append(chart_data)
|
||||||
|
|
||||||
# 3. 数值型列的箱线图
|
# 3. 数值型列的箱线图
|
||||||
charts["box_plots"] = []
|
charts["box_plots"] = []
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ class WordAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content = self.llm.extract_message_content(response)
|
content = self.llm.extract_message_content(response)
|
||||||
@@ -276,7 +276,7 @@ class WordAIService:
|
|||||||
response = await self.llm.chat(
|
response = await self.llm.chat(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=50000
|
max_tokens=8000
|
||||||
)
|
)
|
||||||
|
|
||||||
content = self.llm.extract_message_content(response)
|
content = self.llm.extract_message_content(response)
|
||||||
@@ -849,10 +849,12 @@ class WordAIService:
|
|||||||
|
|
||||||
# 提取可用于图表的数据
|
# 提取可用于图表的数据
|
||||||
chart_data = None
|
chart_data = None
|
||||||
|
logger.info(f"准备提取图表数据,structured_data type: {structured_data.get('type')}, keys: {list(structured_data.keys())}")
|
||||||
|
|
||||||
if structured_data.get("type") == "table_data":
|
if structured_data.get("type") == "table_data":
|
||||||
headers = structured_data.get("headers", [])
|
headers = structured_data.get("headers", [])
|
||||||
rows = structured_data.get("rows", [])
|
rows = structured_data.get("rows", [])
|
||||||
|
logger.info(f"table_data类型: headers数量={len(headers)}, rows数量={len(rows)}")
|
||||||
if headers and rows:
|
if headers and rows:
|
||||||
chart_data = {
|
chart_data = {
|
||||||
"columns": headers,
|
"columns": headers,
|
||||||
@@ -860,15 +862,19 @@ class WordAIService:
|
|||||||
}
|
}
|
||||||
elif structured_data.get("type") == "structured_text":
|
elif structured_data.get("type") == "structured_text":
|
||||||
tables_data = structured_data.get("tables", [])
|
tables_data = structured_data.get("tables", [])
|
||||||
|
logger.info(f"structured_text类型: tables数量={len(tables_data)}")
|
||||||
if tables_data and len(tables_data) > 0:
|
if tables_data and len(tables_data) > 0:
|
||||||
first_table = tables_data[0]
|
first_table = tables_data[0]
|
||||||
headers = first_table.get("headers", [])
|
headers = first_table.get("headers", [])
|
||||||
rows = first_table.get("rows", [])
|
rows = first_table.get("rows", [])
|
||||||
|
logger.info(f"第一个表格: headers={headers[:5]}, rows数量={len(rows)}")
|
||||||
if headers and rows:
|
if headers and rows:
|
||||||
chart_data = {
|
chart_data = {
|
||||||
"columns": headers,
|
"columns": headers,
|
||||||
"rows": rows
|
"rows": rows
|
||||||
}
|
}
|
||||||
|
else:
|
||||||
|
logger.warning(f"无法识别的structured_data类型: {structured_data.get('type')}")
|
||||||
|
|
||||||
# 生成可视化图表
|
# 生成可视化图表
|
||||||
if chart_data:
|
if chart_data:
|
||||||
@@ -904,3 +910,6 @@ class WordAIService:
|
|||||||
"success": False,
|
"success": False,
|
||||||
"error": str(e)
|
"error": str(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
word_ai_service = WordAIService()
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ chardet==5.2.0
|
|||||||
Pillow>=10.0.0
|
Pillow>=10.0.0
|
||||||
pytesseract>=0.3.10
|
pytesseract>=0.3.10
|
||||||
|
|
||||||
|
# ==================== PDF 生成 ====================
|
||||||
|
reportlab>=4.0.0
|
||||||
|
|
||||||
# ==================== AI / LLM ====================
|
# ==================== AI / LLM ====================
|
||||||
httpx==0.25.2
|
httpx==0.25.2
|
||||||
|
|
||||||
|
|||||||
203
docker-compose.yml
Normal file
203
docker-compose.yml
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
# ============================================================
|
||||||
|
# FilesReadSystem Docker Compose
|
||||||
|
# 全栈 AI 文档理解与数据融合系统
|
||||||
|
# ============================================================
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ==================== 数据库服务 ====================
|
||||||
|
|
||||||
|
mongodb:
|
||||||
|
image: mongo:7.0
|
||||||
|
container_name: filesread_mongodb
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "27017:27017"
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: ${MONGO_ROOT_USER:-admin}
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: ${MONGO_ROOT_PASSWORD:-20060825fhy}
|
||||||
|
MONGO_INITDB_DATABASE: ${MONGODB_DB_NAME:-document_system}
|
||||||
|
volumes:
|
||||||
|
- mongodb_data:/data/db
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')", "--quiet"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
mysql:
|
||||||
|
image: mysql:8.0
|
||||||
|
container_name: filesread_mysql
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "3306:3306"
|
||||||
|
environment:
|
||||||
|
MYSQL_ROOT_PASSWORD: ${MYSQL_PASSWORD:-123456}
|
||||||
|
MYSQL_DATABASE: ${MYSQL_DATABASE:-document}
|
||||||
|
volumes:
|
||||||
|
- mysql_data:/var/lib/mysql
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-p${MYSQL_PASSWORD:-123456}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
container_name: filesread_redis
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD:-}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# ==================== 应用服务 ====================
|
||||||
|
|
||||||
|
backend:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: filesread_backend
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
# 应用配置
|
||||||
|
APP_NAME: FilesReadSystem
|
||||||
|
DEBUG: ${DEBUG:-false}
|
||||||
|
API_V1_STR: /api/v1
|
||||||
|
|
||||||
|
# MongoDB 配置 (使用 docker-compose 服务名)
|
||||||
|
MONGODB_URL: mongodb://${MONGO_ROOT_USER:-admin}:${MONGO_ROOT_PASSWORD:-20060825fhy}@mongodb:27017/admin
|
||||||
|
MONGODB_DB_NAME: ${MONGODB_DB_NAME:-document_system}
|
||||||
|
|
||||||
|
# MySQL 配置
|
||||||
|
MYSQL_HOST: mysql
|
||||||
|
MYSQL_PORT: 3306
|
||||||
|
MYSQL_USER: root
|
||||||
|
MYSQL_PASSWORD: ${MYSQL_PASSWORD:-123456}
|
||||||
|
MYSQL_DATABASE: ${MYSQL_DATABASE:-document}
|
||||||
|
MYSQL_CHARSET: utf8mb4
|
||||||
|
|
||||||
|
# Redis 配置
|
||||||
|
REDIS_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/0
|
||||||
|
|
||||||
|
# LLM AI 配置
|
||||||
|
LLM_API_KEY: ${LLM_API_KEY}
|
||||||
|
LLM_BASE_URL: ${LLM_BASE_URL:-https://api.deepseek.com}
|
||||||
|
LLM_MODEL_NAME: ${LLM_MODEL_NAME:-deepseek-chat}
|
||||||
|
|
||||||
|
# Supabase 配置
|
||||||
|
SUPABASE_URL: ${SUPABASE_URL}
|
||||||
|
SUPABASE_ANON_KEY: ${SUPABASE_ANON_KEY}
|
||||||
|
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_KEY}
|
||||||
|
|
||||||
|
# Embedding / RAG 配置
|
||||||
|
EMBEDDING_MODEL: ${EMBEDDING_MODEL:-all-MiniLM-L6-v2}
|
||||||
|
FAISS_INDEX_DIR: /app/data/faiss
|
||||||
|
|
||||||
|
# 文件路径配置
|
||||||
|
UPLOAD_DIR: /app/data/uploads
|
||||||
|
MAX_UPLOAD_SIZE: 104857600
|
||||||
|
|
||||||
|
# Celery 配置
|
||||||
|
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/1
|
||||||
|
CELERY_RESULT_BACKEND: redis://:${REDIS_PASSWORD:-}@redis:6379/2
|
||||||
|
volumes:
|
||||||
|
- backend_data:/app/data
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
depends_on:
|
||||||
|
mongodb:
|
||||||
|
condition: service_healthy
|
||||||
|
mysql:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "python", "-c", "import httpx; httpx.get('http://localhost:8000/health')"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
|
||||||
|
celery_worker:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: filesread_celery
|
||||||
|
restart: unless-stopped
|
||||||
|
command: celery -A app.celery_app worker --loglevel=info --prefetch-multiplier=1
|
||||||
|
environment:
|
||||||
|
# Celery 配置
|
||||||
|
CELERY_BROKER_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/1
|
||||||
|
CELERY_RESULT_BACKEND: redis://:${REDIS_PASSWORD:-}@redis:6379/2
|
||||||
|
|
||||||
|
# 复用后端的数据库配置
|
||||||
|
MONGODB_URL: mongodb://${MONGO_ROOT_USER:-admin}:${MONGO_ROOT_PASSWORD:-20060825fhy}@mongodb:27017/admin
|
||||||
|
MONGODB_DB_NAME: ${MONGODB_DB_NAME:-document_system}
|
||||||
|
MYSQL_HOST: mysql
|
||||||
|
MYSQL_PORT: 3306
|
||||||
|
MYSQL_USER: root
|
||||||
|
MYSQL_PASSWORD: ${MYSQL_PASSWORD:-123456}
|
||||||
|
MYSQL_DATABASE: ${MYSQL_DATABASE:-document}
|
||||||
|
REDIS_URL: redis://:${REDIS_PASSWORD:-}@redis:6379/0
|
||||||
|
|
||||||
|
# LLM 配置
|
||||||
|
LLM_API_KEY: ${LLM_API_KEY}
|
||||||
|
LLM_BASE_URL: ${LLM_BASE_URL:-https://api.deepseek.com}
|
||||||
|
LLM_MODEL_NAME: ${LLM_MODEL_NAME:-deepseek-chat}
|
||||||
|
|
||||||
|
# Embedding 配置
|
||||||
|
EMBEDDING_MODEL: ${EMBEDDING_MODEL:-all-MiniLM-L6-v2}
|
||||||
|
FAISS_INDEX_DIR: /app/data/faiss
|
||||||
|
volumes:
|
||||||
|
- backend_data:/app/data
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
- mongodb
|
||||||
|
- mysql
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: ./frontend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: filesread_frontend
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
environment:
|
||||||
|
VITE_APP_ID: ${VITE_APP_ID:-}
|
||||||
|
VITE_SUPABASE_URL: ${SUPABASE_URL}
|
||||||
|
VITE_SUPABASE_ANON_KEY: ${SUPABASE_ANON_KEY}
|
||||||
|
VITE_BACKEND_API_URL: /api/v1
|
||||||
|
networks:
|
||||||
|
- filesread_network
|
||||||
|
depends_on:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
networks:
|
||||||
|
filesread_network:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
mongodb_data:
|
||||||
|
mysql_data:
|
||||||
|
redis_data:
|
||||||
|
backend_data:
|
||||||
169
docs/architecture.drawio
Normal file
169
docs/architecture.drawio
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
<mxfile host="app.diagrams.net" modified="2026-04-16T14:00:00.000Z" agent="Claude" version="24.0.0">
|
||||||
|
<diagram name="系统架构图" id="architecture">
|
||||||
|
<mxGraphModel dx="1200" dy="800" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1920" pageHeight="1080" math="0" shadow="0">
|
||||||
|
<root>
|
||||||
|
<mxCell id="0" />
|
||||||
|
<mxCell id="1" parent="0" />
|
||||||
|
|
||||||
|
<!-- 用户访问层 -->
|
||||||
|
<mxCell id="layer1" value="用户访问层" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a1a2e;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="20" width="120" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="browser" value="浏览器
(Browser)" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e3f2fd;strokeColor=#1976d2;fontColor=#0d47a1;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="860" y="60" width="120" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 前端展示层 -->
|
||||||
|
<mxCell id="layer2" value="前端展示层" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a1a2e;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="140" width="120" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="frontend_box" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f3e5f5;strokeColor=#7b1fa2;strokeWidth=2;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="200" y="180" width="1520" height="140" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="frontend_title" value="React 18 + TypeScript + Vite + shadcn/ui" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#4a148c;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="760" y="185" width="280" height="25" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="dashboard" value="Dashboard
首页仪表盘" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ce93d8;strokeColor=#8e24aa;fontColor=#fff;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="240" y="220" width="120" height="80" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="documents" value="Documents
文档管理" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ce93d8;strokeColor=#8e24aa;fontColor=#fff;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="400" y="220" width="120" height="80" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="template" value="TemplateFill
智能填表" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ce93d8;strokeColor=#8e24aa;fontColor=#fff;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="560" y="220" width="120" height="80" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="instruction" value="Instruction
指令助手" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ce93d8;strokeColor=#8e24aa;fontColor=#fff;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="720" y="220" width="120" height="80" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="taskhistory" value="TaskHistory
任务历史" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ce93d8;strokeColor=#8e24aa;fontColor=#fff;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="880" y="220" width="120" height="80" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="frontend_libs" value="Recharts + Lucide Icons + React Router" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=11;fontColor=#6a1b9a;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1040" y="250" width="280" height="25" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 连接线:浏览器到前端 -->
|
||||||
|
<mxCell id="conn1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#1976d2;strokeWidth=2;" edge="1" parent="1" source="browser" target="frontend_box">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 后端服务层 -->
|
||||||
|
<mxCell id="layer3" value="后端服务层" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a1a2e;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="350" width="120" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="backend_box" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e8f5e9;strokeColor=#388e3c;strokeWidth=2;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="200" y="390" width="1520" height="180" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="backend_title" value="FastAPI + Uvicorn + Celery" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="395" width="200" height="25" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="upload" value="文档上传
/upload/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="240" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="ai" value="AI分析
/ai/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="420" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="rag" value="RAG检索
/rag/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="600" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="template_api" value="模板填充
/templates/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="780" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="instruction_api" value="指令解析
/instruction/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="960" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="visualization" value="可视化
/visualization/*" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#81c784;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1140" y="430" width="140" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="celery" value="Celery
任务调度" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#a5d6a7;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1320" y="430" width="120" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="logging" value="监控日志" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#a5d6a7;strokeColor=#2e7d32;fontColor=#1b5e20;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1480" y="430" width="100" height="60" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 连接线:前端到后端 -->
|
||||||
|
<mxCell id="conn2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#388e3c;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="frontend_box" target="backend_box">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- AI服务层 -->
|
||||||
|
<mxCell id="layer4" value="AI服务层" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a1a2e;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="600" width="120" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="ai_box" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff3e0;strokeColor=#f57c00;strokeWidth=2;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="300" y="640" width="1320" height="120" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="llm_title" value="LLMService - 大模型服务" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;fontStyle=1;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="820" y="645" width="200" height="25" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="minimax" value="MiniMax-Text-01" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffcc80;strokeColor=#ef6c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="400" y="680" width="150" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="deepseek" value="DeepSeek-chat" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#ffcc80;strokeColor=#ef6c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="600" y="680" width="150" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="excel_ai" value="ExcelAIService" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffe0b2;strokeColor=#f57c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="820" y="680" width="130" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="word_ai" value="WordAIService" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffe0b2;strokeColor=#f57c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="980" y="680" width="130" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="md_ai" value="MarkdownAIService" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffe0b2;strokeColor=#f57c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1140" y="680" width="130" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="txt_ai" value="TxtAIService" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffe0b2;strokeColor=#f57c00;fontColor=#e65100;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1300" y="680" width="130" height="50" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 连接线:后端到AI -->
|
||||||
|
<mxCell id="conn3" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#f57c00;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="backend_box" target="ai_box">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 数据存储层 -->
|
||||||
|
<mxCell id="layer5" value="数据存储层" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=16;fontStyle=1;fontColor=#1a1a2e;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="790" width="120" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="mongodb" value="MongoDB
文档数据库

• 原始文档内容
• 元数据信息
• 文档标签
• 处理状态" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e0e0e0;strokeColor=#616161;fontColor=#212121;align=left;spacingLeft=10;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="240" y="830" width="200" height="160" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="mysql" value="MySQL
关系数据库

• Excel表格数据
• 结构化数据
• 字段描述
• RAG索引" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e0e0e0;strokeColor=#616161;fontColor=#212121;align=left;spacingLeft=10;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="520" y="830" width="200" height="160" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="redis" value="Redis
缓存/队列

• 会话缓存
• 任务队列
• Celery broker
• 临时数据" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e0e0e0;strokeColor=#616161;fontColor=#212121;align=left;spacingLeft=10;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="800" y="830" width="200" height="160" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="faiss" value="FAISS
向量数据库

• 文档向量索引
• 语义相似度
• RAG检索
• sentence-transformers" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e0e0e0;strokeColor=#616161;fontColor=#212121;align=left;spacingLeft=10;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1080" y="830" width="240" height="160" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 连接线:AI到存储 -->
|
||||||
|
<mxCell id="conn4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#616161;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="ai_box" target="mongodb">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="conn5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#616161;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="ai_box" target="mysql">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="conn6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#616161;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="ai_box" target="redis">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="conn7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;strokeColor=#616161;strokeWidth=2;dashed=1;dashPattern=8 8;" edge="1" parent="1" source="ai_box" target="faiss">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
<!-- 标注 -->
|
||||||
|
<mxCell id="arrow1" value="HTTP/HTTPS
WebSocket" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontColor=#1976d2;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1020" y="130" width="80" height="30" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="arrow2" value="API调用" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontColor=#388e3c;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1020" y="570" width="60" height="20" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="arrow3" value="数据读写" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=10;fontColor=#616161;" vertex="1" parent="1">
|
||||||
|
<mxGeometry x="1020" y="770" width="60" height="20" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
|
||||||
|
</root>
|
||||||
|
</mxGraphModel>
|
||||||
|
</diagram>
|
||||||
|
</mxfile>
|
||||||
36
frontend/Dockerfile
Normal file
36
frontend/Dockerfile
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# ============================================================
|
||||||
|
# FilesReadSystem Frontend - React + Vite
|
||||||
|
# 多阶段构建: Node 构建 -> Nginx 运行
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# === 阶段1: 构建阶段 ===
|
||||||
|
FROM node:20-alpine AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 复制 package 文件和锁文件
|
||||||
|
COPY package.json pnpm-lock.yaml* ./
|
||||||
|
|
||||||
|
# 安装 pnpm 并安装依赖
|
||||||
|
RUN npm install -g pnpm && \
|
||||||
|
pnpm install --frozen-lockfile
|
||||||
|
|
||||||
|
# 复制源码
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# 构建生产版本
|
||||||
|
RUN pnpm build
|
||||||
|
|
||||||
|
# === 阶段2: 运行阶段 ===
|
||||||
|
FROM nginx:alpine
|
||||||
|
|
||||||
|
# 复制 nginx 配置
|
||||||
|
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||||
|
|
||||||
|
# 复制构建产物
|
||||||
|
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 80
|
||||||
|
|
||||||
|
CMD ["nginx", "-g", "daemon off;"]
|
||||||
47
frontend/nginx.conf
Normal file
47
frontend/nginx.conf
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# ============================================================
|
||||||
|
# FilesReadSystem Nginx 配置
|
||||||
|
# 反向代理 API 请求到后端
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
server_name localhost;
|
||||||
|
|
||||||
|
# 前端静态文件
|
||||||
|
root /usr/share/nginx/html;
|
||||||
|
index index.html;
|
||||||
|
|
||||||
|
# SPA 支持 - 所有请求都尝试返回 index.html
|
||||||
|
location / {
|
||||||
|
try_files $uri $uri/ /index.html;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 静态资源缓存
|
||||||
|
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
|
||||||
|
expires 1y;
|
||||||
|
add_header Cache-Control "public, immutable";
|
||||||
|
}
|
||||||
|
|
||||||
|
# API 反向代理到后端
|
||||||
|
location /api/ {
|
||||||
|
proxy_pass http://backend:8000/api/;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
|
||||||
|
# 超时设置
|
||||||
|
proxy_connect_timeout 60s;
|
||||||
|
proxy_send_timeout 60s;
|
||||||
|
proxy_read_timeout 60s;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 文件上传代理
|
||||||
|
location /uploads/ {
|
||||||
|
proxy_pass http://backend:8000/uploads/;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
client_max_body_size 100M;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,7 +8,8 @@ import {
|
|||||||
Menu,
|
Menu,
|
||||||
ChevronRight,
|
ChevronRight,
|
||||||
Sparkles,
|
Sparkles,
|
||||||
Clock
|
Clock,
|
||||||
|
FileDown
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import { cn } from '@/lib/utils';
|
import { cn } from '@/lib/utils';
|
||||||
@@ -19,6 +20,7 @@ const navItems = [
|
|||||||
{ name: '文档中心', path: '/documents', icon: FileText },
|
{ name: '文档中心', path: '/documents', icon: FileText },
|
||||||
{ name: '智能填表', path: '/form-fill', icon: TableProperties },
|
{ name: '智能填表', path: '/form-fill', icon: TableProperties },
|
||||||
{ name: '智能助手', path: '/assistant', icon: MessageSquareCode },
|
{ name: '智能助手', path: '/assistant', icon: MessageSquareCode },
|
||||||
|
{ name: '文档转PDF', path: '/pdf-converter', icon: FileDown },
|
||||||
{ name: '任务历史', path: '/task-history', icon: Clock },
|
{ name: '任务历史', path: '/task-history', icon: Clock },
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -32,7 +34,7 @@ const MainLayout: React.FC = () => {
|
|||||||
<FileText size={24} />
|
<FileText size={24} />
|
||||||
</div>
|
</div>
|
||||||
<div className="flex flex-col">
|
<div className="flex flex-col">
|
||||||
<span className="font-bold text-lg tracking-tight text-sidebar-foreground">智联文档</span>
|
<span className="font-bold text-lg tracking-tight text-sidebar-foreground">表易智融</span>
|
||||||
<span className="text-xs text-muted-foreground">多源数据融合平台</span>
|
<span className="text-xs text-muted-foreground">多源数据融合平台</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -66,7 +68,7 @@ const MainLayout: React.FC = () => {
|
|||||||
<Sparkles size={20} className="text-primary" />
|
<Sparkles size={20} className="text-primary" />
|
||||||
</div>
|
</div>
|
||||||
<div className="flex flex-col overflow-hidden">
|
<div className="flex flex-col overflow-hidden">
|
||||||
<span className="font-semibold text-sm truncate">智联文档</span>
|
<span className="font-semibold text-sm truncate">表易智融</span>
|
||||||
<span className="text-[10px] uppercase tracking-wider text-muted-foreground">多源数据融合</span>
|
<span className="text-[10px] uppercase tracking-wider text-muted-foreground">多源数据融合</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1153,6 +1153,120 @@ export const backendApi = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// ==================== PDF 转换 API ====================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将文件转换为 PDF
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* PDF转换并直接下载(使用XHR,支持IDM拦截)
|
||||||
|
*/
|
||||||
|
async convertAndDownloadPdf(file: File): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const xhr = new XMLHttpRequest();
|
||||||
|
xhr.open('POST', `${BACKEND_BASE_URL}/pdf/convert`);
|
||||||
|
|
||||||
|
xhr.onload = function() {
|
||||||
|
if (xhr.status >= 200 && xhr.status < 300) {
|
||||||
|
// 创建 blob 并触发下载
|
||||||
|
const blob = xhr.response;
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement('a');
|
||||||
|
a.href = url;
|
||||||
|
a.download = `${file.name.replace(/\.[^.]+$/, '')}.pdf`;
|
||||||
|
document.body.appendChild(a);
|
||||||
|
a.click();
|
||||||
|
document.body.removeChild(a);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
resolve();
|
||||||
|
} else {
|
||||||
|
reject(new Error(`转换失败: ${xhr.status}`));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
xhr.onerror = function() {
|
||||||
|
reject(new Error('网络错误'));
|
||||||
|
};
|
||||||
|
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
xhr.responseType = 'blob';
|
||||||
|
xhr.send(formData);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF转换(返回Blob)
|
||||||
|
*/
|
||||||
|
async convertToPdf(file: File): Promise<Blob> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const xhr = new XMLHttpRequest();
|
||||||
|
xhr.open('POST', `${BACKEND_BASE_URL}/pdf/convert`);
|
||||||
|
|
||||||
|
xhr.onload = function() {
|
||||||
|
if (xhr.status >= 200 && xhr.status < 300) {
|
||||||
|
resolve(xhr.response);
|
||||||
|
} else {
|
||||||
|
reject(new Error(`转换失败: ${xhr.status}`));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
xhr.onerror = function() {
|
||||||
|
reject(new Error('网络错误'));
|
||||||
|
};
|
||||||
|
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
xhr.responseType = 'blob';
|
||||||
|
xhr.send(formData);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 批量将文件转换为 PDF
|
||||||
|
*/
|
||||||
|
async batchConvertToPdf(files: File[]): Promise<Blob> {
|
||||||
|
const formData = new FormData();
|
||||||
|
files.forEach(file => formData.append('files', file));
|
||||||
|
|
||||||
|
const url = `${BACKEND_BASE_URL}/pdf/convert/batch`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.json();
|
||||||
|
throw new Error(error.detail || '批量PDF转换失败');
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.blob();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('批量PDF转换失败:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取支持的 PDF 转换格式
|
||||||
|
*/
|
||||||
|
async getPdfSupportedFormats(): Promise<{
|
||||||
|
success: boolean;
|
||||||
|
formats: string[];
|
||||||
|
}> {
|
||||||
|
const url = `${BACKEND_BASE_URL}/pdf/formats`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) throw new Error('获取支持的格式失败');
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('获取支持的格式失败:', error);
|
||||||
|
return { success: false, formats: ['docx', 'xlsx', 'txt', 'md'] };
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// ==================== AI 分析 API ====================
|
// ==================== AI 分析 API ====================
|
||||||
@@ -1187,11 +1301,19 @@ export const aiApi = {
|
|||||||
* 上传并使用 AI 分析 Excel 文件
|
* 上传并使用 AI 分析 Excel 文件
|
||||||
*/
|
*/
|
||||||
async analyzeExcel(
|
async analyzeExcel(
|
||||||
file: File,
|
file: File | null,
|
||||||
options: AIAnalyzeOptions = {}
|
options: AIAnalyzeOptions = {},
|
||||||
|
docId: string | null = null
|
||||||
): Promise<AIExcelAnalyzeResult> {
|
): Promise<AIExcelAnalyzeResult> {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('file', file);
|
|
||||||
|
if (docId) {
|
||||||
|
formData.append('doc_id', docId);
|
||||||
|
} else if (file) {
|
||||||
|
formData.append('file', file);
|
||||||
|
} else {
|
||||||
|
throw new Error('必须提供文件或文档ID');
|
||||||
|
}
|
||||||
|
|
||||||
const params = new URLSearchParams();
|
const params = new URLSearchParams();
|
||||||
if (options.userPrompt) {
|
if (options.userPrompt) {
|
||||||
@@ -1268,7 +1390,9 @@ export const aiApi = {
|
|||||||
try {
|
try {
|
||||||
const response = await fetch(url);
|
const response = await fetch(url);
|
||||||
if (!response.ok) throw new Error('获取分析类型失败');
|
if (!response.ok) throw new Error('获取分析类型失败');
|
||||||
return await response.json();
|
const data = await response.json();
|
||||||
|
// 转换后端返回格式 {excel_types: [], markdown_types: []} 为前端期望的 {types: []}
|
||||||
|
return { types: data.excel_types || [] };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('获取分析类型失败:', error);
|
console.error('获取分析类型失败:', error);
|
||||||
throw error;
|
throw error;
|
||||||
@@ -1795,5 +1919,6 @@ export const aiApi = {
|
|||||||
console.error('获取会话列表失败:', error);
|
console.error('获取会话列表失败:', error);
|
||||||
return { success: false, conversations: [] };
|
return { success: false, conversations: [] };
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ const Assistant: React.FC = () => {
|
|||||||
{
|
{
|
||||||
id: '1',
|
id: '1',
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: '您好!我是智联文档 AI 助手。您可以告诉我您想对文档进行的操作,例如:\n- "帮我列出最近上传的所有 docx 文档"\n- "从 2026 财报文档中提取出关键的利润数据"\n- "帮我创建一个汇总各部门报销单的填表任务"\n\n请问有什么我可以帮您的?',
|
content: '您好!我是表易智融 AI 助手。您可以告诉我您想对文档进行的操作,例如:\n- "帮我列出最近上传的所有 docx 文档"\n- "从 2026 财报文档中提取出关键的利润数据"\n- "帮我创建一个汇总各部门报销单的填表任务"\n\n请问有什么我可以帮您的?',
|
||||||
created_at: new Date().toISOString()
|
created_at: new Date().toISOString()
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ const Dashboard: React.FC = () => {
|
|||||||
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
<h1 className="text-3xl font-extrabold tracking-tight">
|
<h1 className="text-3xl font-extrabold tracking-tight">
|
||||||
欢迎使用 <span className="text-primary">智联文档</span> 系统 👋
|
欢迎使用 <span className="text-primary">表易智融</span> 系统 👋
|
||||||
</h1>
|
</h1>
|
||||||
<p className="text-muted-foreground">基于大语言模型的文档理解与多源数据融合系统</p>
|
<p className="text-muted-foreground">基于大语言模型的文档理解与多源数据融合系统</p>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -472,11 +472,17 @@ const Documents: React.FC = () => {
|
|||||||
setAnalysisCharts(null);
|
setAnalysisCharts(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await aiApi.analyzeExcel(uploadedFile, {
|
// 判断是从历史文档还是本地上传
|
||||||
userPrompt: aiOptions.userPrompt,
|
const docId = selectedDocument?.doc_id && uploadedFile.size === 0 ? selectedDocument.doc_id : null;
|
||||||
analysisType: aiOptions.analysisType,
|
const result = await aiApi.analyzeExcel(
|
||||||
parseAllSheets: aiOptions.parseAllSheetsForAI
|
uploadedFile.size > 0 ? uploadedFile : null,
|
||||||
});
|
{
|
||||||
|
userPrompt: aiOptions.userPrompt,
|
||||||
|
analysisType: aiOptions.analysisType,
|
||||||
|
parseAllSheets: aiOptions.parseAllSheetsForAI
|
||||||
|
},
|
||||||
|
docId
|
||||||
|
);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
toast.success('AI 分析完成');
|
toast.success('AI 分析完成');
|
||||||
@@ -706,6 +712,12 @@ const Documents: React.FC = () => {
|
|||||||
|
|
||||||
const handleSelectDocument = async (docId: string) => {
|
const handleSelectDocument = async (docId: string) => {
|
||||||
setLoadingDocument(true);
|
setLoadingDocument(true);
|
||||||
|
// 重置所有 AI 分析结果,避免显示上一个文档的分析
|
||||||
|
setAiAnalysis(null);
|
||||||
|
setAnalysisCharts(null);
|
||||||
|
setMdAnalysis(null);
|
||||||
|
setWordAnalysis(null);
|
||||||
|
setTxtAnalysis(null);
|
||||||
try {
|
try {
|
||||||
const result = await backendApi.getDocument(docId);
|
const result = await backendApi.getDocument(docId);
|
||||||
if (result.success && result.document) {
|
if (result.success && result.document) {
|
||||||
@@ -2264,39 +2276,57 @@ const Documents: React.FC = () => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
// 数据表格组件
|
// 数据表格组件 - 滑动窗口样式
|
||||||
const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> = ({ columns, rows }) => {
|
const DataTable: React.FC<{ columns: string[]; rows: Record<string, any>[] }> = ({ columns, rows }) => {
|
||||||
if (!columns.length || !rows.length) {
|
if (!columns.length || !rows.length) {
|
||||||
return <div className="text-center py-8 text-muted-foreground text-sm">暂无数据</div>;
|
return <div className="text-center py-8 text-muted-foreground text-sm">暂无数据</div>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const displayRows = rows.slice(0, 500); // 限制最多显示500行
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="rounded-lg border overflow-x-auto">
|
<div className="rounded-lg border overflow-hidden">
|
||||||
<TableComponent>
|
{/* 表头 - 固定 */}
|
||||||
<TableHeader>
|
<div className="overflow-x-auto">
|
||||||
<TableRow>
|
<TableComponent>
|
||||||
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
|
<TableHeader>
|
||||||
{columns.map((col, idx) => (
|
<TableRow className="bg-muted/50">
|
||||||
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
|
<TableHead className="w-16 text-center text-muted-foreground">#</TableHead>
|
||||||
))}
|
{columns.map((col, idx) => (
|
||||||
</TableRow>
|
<TableHead key={idx} className="whitespace-nowrap">{col || `<列${idx + 1}>`}</TableHead>
|
||||||
</TableHeader>
|
|
||||||
<TableBody>
|
|
||||||
{rows.slice(0, 100).map((row, rowIdx) => (
|
|
||||||
<TableRow key={rowIdx}>
|
|
||||||
<TableCell className="text-center text-muted-foreground font-medium">{rowIdx + 1}</TableCell>
|
|
||||||
{columns.map((col, colIdx) => (
|
|
||||||
<TableCell key={colIdx} className="whitespace-nowrap">
|
|
||||||
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
|
|
||||||
</TableCell>
|
|
||||||
))}
|
))}
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))}
|
</TableHeader>
|
||||||
</TableBody>
|
</TableComponent>
|
||||||
</TableComponent>
|
</div>
|
||||||
{rows.length > 100 && (
|
{/* 表体 - 可滚动 */}
|
||||||
|
<div
|
||||||
|
className="overflow-y-auto"
|
||||||
|
style={{ maxHeight: '400px' }}
|
||||||
|
>
|
||||||
|
<TableComponent>
|
||||||
|
<TableBody>
|
||||||
|
{displayRows.map((row, rowIdx) => (
|
||||||
|
<TableRow key={rowIdx}>
|
||||||
|
<TableCell className="text-center text-muted-foreground font-medium w-16">{rowIdx + 1}</TableCell>
|
||||||
|
{columns.map((col, colIdx) => (
|
||||||
|
<TableCell key={colIdx} className="whitespace-nowrap">
|
||||||
|
{row[col] !== null && row[col] !== undefined ? String(row[col]) : '-'}
|
||||||
|
</TableCell>
|
||||||
|
))}
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</TableComponent>
|
||||||
|
</div>
|
||||||
|
{rows.length > 500 && (
|
||||||
<div className="p-3 text-center text-sm text-muted-foreground bg-muted/30">
|
<div className="p-3 text-center text-sm text-muted-foreground bg-muted/30">
|
||||||
仅显示前 100 行数据
|
仅显示前 500 行数据(共 {rows.length} 行)
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{rows.length > 100 && rows.length <= 500 && (
|
||||||
|
<div className="p-2 text-center text-xs text-muted-foreground bg-muted/20">
|
||||||
|
共 {rows.length} 行数据,向下滚动查看更多
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ const InstructionChat: React.FC = () => {
|
|||||||
{
|
{
|
||||||
id: 'welcome',
|
id: 'welcome',
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: `您好!我是智联文档 AI 助手。
|
content: `您好!我是表易智融 AI 助手。
|
||||||
|
|
||||||
**📄 文档智能操作**
|
**📄 文档智能操作**
|
||||||
- "提取文档中的医院数量和床位数"
|
- "提取文档中的医院数量和床位数"
|
||||||
|
|||||||
446
frontend/src/pages/PdfConverter.tsx
Normal file
446
frontend/src/pages/PdfConverter.tsx
Normal file
@@ -0,0 +1,446 @@
|
|||||||
|
/**
|
||||||
|
* PDF 转换页面
|
||||||
|
* 支持将 Word、Excel、Txt、Markdown 格式转换为 PDF
|
||||||
|
*/
|
||||||
|
import React, { useState, useCallback } from 'react';
|
||||||
|
import { useDropzone } from 'react-dropzone';
|
||||||
|
import {
|
||||||
|
FileText,
|
||||||
|
Upload,
|
||||||
|
Download,
|
||||||
|
FileSpreadsheet,
|
||||||
|
File as FileIcon,
|
||||||
|
Loader2,
|
||||||
|
CheckCircle,
|
||||||
|
AlertCircle,
|
||||||
|
Trash2,
|
||||||
|
FileDown,
|
||||||
|
X,
|
||||||
|
Copy
|
||||||
|
} from 'lucide-react';
|
||||||
|
import { Button } from '@/components/ui/button';
|
||||||
|
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card';
|
||||||
|
import { Badge } from '@/components/ui/badge';
|
||||||
|
import { Label } from '@/components/ui/label';
|
||||||
|
import { toast } from 'sonner';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
import { backendApi } from '@/db/backend-api';
|
||||||
|
|
||||||
|
type FileState = {
|
||||||
|
file: File;
|
||||||
|
status: 'pending' | 'converting' | 'success' | 'failed';
|
||||||
|
progress: number;
|
||||||
|
pdfBlob?: Blob;
|
||||||
|
error?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const SUPPORTED_FORMATS = [
|
||||||
|
{ ext: 'docx', name: 'Word 文档', icon: FileText, color: 'blue' },
|
||||||
|
{ ext: 'xlsx', name: 'Excel 表格', icon: FileSpreadsheet, color: 'emerald' },
|
||||||
|
{ ext: 'txt', name: '文本文件', icon: FileIcon, color: 'gray' },
|
||||||
|
{ ext: 'md', name: 'Markdown', icon: FileText, color: 'purple' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const PdfConverter: React.FC = () => {
|
||||||
|
const [files, setFiles] = useState<FileState[]>([]);
|
||||||
|
const [converting, setConverting] = useState(false);
|
||||||
|
const [convertedCount, setConvertedCount] = useState(0);
|
||||||
|
|
||||||
|
const onDrop = useCallback((acceptedFiles: File[]) => {
|
||||||
|
const newFiles: FileState[] = acceptedFiles.map(file => ({
|
||||||
|
file,
|
||||||
|
status: 'pending',
|
||||||
|
progress: 0,
|
||||||
|
}));
|
||||||
|
setFiles(prev => [...prev, ...newFiles]);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||||
|
onDrop,
|
||||||
|
accept: {
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
|
||||||
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
||||||
|
'application/vnd.ms-excel': ['.xls'],
|
||||||
|
'text/markdown': ['.md'],
|
||||||
|
'text/plain': ['.txt'],
|
||||||
|
},
|
||||||
|
multiple: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const handleConvert = async () => {
|
||||||
|
if (files.length === 0) {
|
||||||
|
toast.error('请先上传文件');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setConverting(true);
|
||||||
|
setConvertedCount(0);
|
||||||
|
|
||||||
|
const pendingFiles = files.filter(f => f.status === 'pending' || f.status === 'failed');
|
||||||
|
let successCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < pendingFiles.length; i++) {
|
||||||
|
const fileState = pendingFiles[i];
|
||||||
|
const fileIndex = files.findIndex(f => f.file === fileState.file);
|
||||||
|
|
||||||
|
// 更新状态为转换中
|
||||||
|
setFiles(prev => prev.map((f, idx) =>
|
||||||
|
idx === fileIndex ? { ...f, status: 'converting', progress: 10 } : f
|
||||||
|
));
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 获取 PDF blob
|
||||||
|
const pdfBlob = await backendApi.convertToPdf(fileState.file);
|
||||||
|
|
||||||
|
// 触发下载
|
||||||
|
const url = URL.createObjectURL(pdfBlob);
|
||||||
|
const a = document.createElement('a');
|
||||||
|
a.href = url;
|
||||||
|
a.download = `${fileState.file.name.replace(/\.[^.]+$/, '')}.pdf`;
|
||||||
|
document.body.appendChild(a);
|
||||||
|
a.click();
|
||||||
|
document.body.removeChild(a);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
|
||||||
|
// 保存 blob 以便批量下载
|
||||||
|
setFiles(prev => prev.map((f, idx) =>
|
||||||
|
idx === fileIndex ? { ...f, status: 'success', progress: 100, pdfBlob } : f
|
||||||
|
));
|
||||||
|
successCount++;
|
||||||
|
setConvertedCount(successCount);
|
||||||
|
toast.success(`${fileState.file.name} 下载已开始`);
|
||||||
|
} catch (error: any) {
|
||||||
|
setFiles(prev => prev.map((f, idx) =>
|
||||||
|
idx === fileIndex ? { ...f, status: 'failed', error: error.message || '转换失败' } : f
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setConverting(false);
|
||||||
|
toast.success(`转换完成:${successCount}/${pendingFiles.length} 个文件`);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleDownload = (fileState: FileState) => {
|
||||||
|
if (!fileState.pdfBlob) return;
|
||||||
|
|
||||||
|
const url = URL.createObjectURL(fileState.pdfBlob);
|
||||||
|
const link = document.createElement('a');
|
||||||
|
link.href = url;
|
||||||
|
link.download = `${fileState.file.name.replace(/\.[^.]+$/, '')}.pdf`;
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleDownloadAll = async () => {
|
||||||
|
const successFiles = files.filter(f => f.status === 'success' && f.pdfBlob);
|
||||||
|
|
||||||
|
if (successFiles.length === 0) {
|
||||||
|
toast.error('没有可下载的文件');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (successFiles.length === 1) {
|
||||||
|
handleDownload(successFiles[0]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 多个文件,下载 ZIP
|
||||||
|
try {
|
||||||
|
const zipBlob = await backendApi.batchConvertToPdf(
|
||||||
|
successFiles.map(f => f.file)
|
||||||
|
);
|
||||||
|
const url = URL.createObjectURL(zipBlob);
|
||||||
|
const link = document.createElement('a');
|
||||||
|
link.href = url;
|
||||||
|
link.download = 'converted_pdfs.zip';
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
toast.success('ZIP 下载开始');
|
||||||
|
} catch (error: any) {
|
||||||
|
toast.error(error.message || '下载失败');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleRemove = (index: number) => {
|
||||||
|
setFiles(prev => prev.filter((_, i) => i !== index));
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleClear = () => {
|
||||||
|
setFiles([]);
|
||||||
|
setConvertedCount(0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileIcon = (filename: string) => {
|
||||||
|
const ext = filename.split('.').pop()?.toLowerCase();
|
||||||
|
const format = SUPPORTED_FORMATS.find(f => f.ext === ext);
|
||||||
|
if (!format) return FileIcon;
|
||||||
|
return format.icon;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFileColor = (filename: string) => {
|
||||||
|
const ext = filename.split('.').pop()?.toLowerCase();
|
||||||
|
const format = SUPPORTED_FORMATS.find(f => f.ext === ext);
|
||||||
|
return format?.color || 'gray';
|
||||||
|
};
|
||||||
|
|
||||||
|
const colorClasses: Record<string, string> = {
|
||||||
|
blue: 'bg-blue-500/10 text-blue-500',
|
||||||
|
emerald: 'bg-emerald-500/10 text-emerald-500',
|
||||||
|
purple: 'bg-purple-500/10 text-purple-500',
|
||||||
|
gray: 'bg-gray-500/10 text-gray-500',
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-8 pb-10">
|
||||||
|
<section className="flex flex-col md:flex-row md:items-center justify-between gap-4">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<h1 className="text-3xl font-extrabold tracking-tight">文档转 PDF</h1>
|
||||||
|
<p className="text-muted-foreground">将 Word、Excel、文本、Markdown 文件转换为 PDF 格式</p>
|
||||||
|
</div>
|
||||||
|
{files.length > 0 && (
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<Button variant="outline" onClick={handleClear}>
|
||||||
|
<Trash2 size={18} className="mr-2" />
|
||||||
|
清空
|
||||||
|
</Button>
|
||||||
|
<Button onClick={handleDownloadAll} disabled={files.filter(f => f.status === 'success').length === 0}>
|
||||||
|
<Download size={18} className="mr-2" />
|
||||||
|
打包下载 ({files.filter(f => f.status === 'success').length})
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||||
|
{/* 左侧:上传区域 */}
|
||||||
|
<div className="lg:col-span-1 space-y-6">
|
||||||
|
{/* 上传卡片 */}
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Upload className="text-primary" size={20} />
|
||||||
|
上传文件
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>拖拽或点击上传要转换的文件</CardDescription>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-4">
|
||||||
|
<div
|
||||||
|
{...getRootProps()}
|
||||||
|
className={cn(
|
||||||
|
"border-2 border-dashed rounded-2xl p-8 transition-all duration-300 flex flex-col items-center justify-center text-center cursor-pointer group",
|
||||||
|
isDragActive ? "border-primary bg-primary/5" : "border-muted-foreground/20 hover:border-primary/50 hover:bg-primary/5",
|
||||||
|
converting && "opacity-50 pointer-events-none"
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
<input {...getInputProps()} />
|
||||||
|
<div className="w-14 h-14 rounded-xl bg-primary/10 text-primary flex items-center justify-center mb-4 group-hover:scale-110 transition-transform">
|
||||||
|
{converting ? <Loader2 className="animate-spin" size={28} /> : <Upload size={28} />}
|
||||||
|
</div>
|
||||||
|
<p className="font-semibold text-sm">
|
||||||
|
{isDragActive ? '释放以开始上传' : '点击或拖拽文件到这里'}
|
||||||
|
</p>
|
||||||
|
<div className="mt-4 flex flex-wrap justify-center gap-2">
|
||||||
|
{SUPPORTED_FORMATS.map(format => (
|
||||||
|
<Badge key={format.ext} variant="outline" className={cn("text-xs", colorClasses[format.color])}>
|
||||||
|
{format.name}
|
||||||
|
</Badge>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* 转换按钮 */}
|
||||||
|
{files.length > 0 && (
|
||||||
|
<Button
|
||||||
|
onClick={handleConvert}
|
||||||
|
disabled={converting || files.filter(f => f.status === 'pending' || f.status === 'failed').length === 0}
|
||||||
|
className="w-full bg-gradient-to-r from-primary to-purple-600 hover:from-primary/90 hover:to-purple-600/90"
|
||||||
|
>
|
||||||
|
{converting ? (
|
||||||
|
<>
|
||||||
|
<Loader2 className="mr-2 animate-spin" size={16} />
|
||||||
|
转换中... ({convertedCount}/{files.filter(f => f.status === 'pending' || f.status === 'failed').length})
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<FileDown className="mr-2" size={16} />
|
||||||
|
开始转换 ({files.filter(f => f.status === 'pending' || f.status === 'failed').length})
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
|
||||||
|
{/* 格式说明 */}
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<FileText className="text-primary" size={20} />
|
||||||
|
支持的格式
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
<div className="space-y-3">
|
||||||
|
{SUPPORTED_FORMATS.map(format => {
|
||||||
|
const Icon = format.icon;
|
||||||
|
return (
|
||||||
|
<div key={format.ext} className="flex items-center gap-3 p-2 rounded-lg hover:bg-muted/30 transition-colors">
|
||||||
|
<div className={cn("w-8 h-8 rounded flex items-center justify-center", colorClasses[format.color])}>
|
||||||
|
<Icon size={16} />
|
||||||
|
</div>
|
||||||
|
<div className="flex-1">
|
||||||
|
<p className="text-sm font-medium">.{format.ext.toUpperCase()}</p>
|
||||||
|
<p className="text-xs text-muted-foreground">{format.name}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* 右侧:文件列表 */}
|
||||||
|
<div className="lg:col-span-2 space-y-6">
|
||||||
|
<Card className="border-none shadow-md">
|
||||||
|
<CardHeader>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="space-y-1">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<FileIcon className="text-primary" size={20} />
|
||||||
|
文件列表
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
共 {files.length} 个文件,已转换 {files.filter(f => f.status === 'success').length} 个
|
||||||
|
</CardDescription>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
{files.length === 0 ? (
|
||||||
|
<div className="text-center py-12 text-muted-foreground">
|
||||||
|
<FileIcon size={48} className="mx-auto mb-4 opacity-30" />
|
||||||
|
<p>暂无文件,上传文件开始转换</p>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{files.map((fileState, index) => {
|
||||||
|
const Icon = getFileIcon(fileState.file.name);
|
||||||
|
const color = getFileColor(fileState.file.name);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={index}
|
||||||
|
className="flex items-center gap-4 p-4 rounded-xl border bg-card hover:bg-muted/30 transition-colors"
|
||||||
|
>
|
||||||
|
<div className={cn("w-10 h-10 rounded-lg flex items-center justify-center shrink-0", colorClasses[color])}>
|
||||||
|
<Icon size={20} />
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="font-semibold truncate">{fileState.file.name}</p>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className="text-xs text-muted-foreground">
|
||||||
|
{(fileState.file.size / 1024).toFixed(1)} KB
|
||||||
|
</span>
|
||||||
|
{fileState.status === 'pending' && (
|
||||||
|
<Badge variant="secondary" className="text-xs">待转换</Badge>
|
||||||
|
)}
|
||||||
|
{fileState.status === 'converting' && (
|
||||||
|
<Badge variant="default" className="text-xs bg-blue-500">转换中</Badge>
|
||||||
|
)}
|
||||||
|
{fileState.status === 'success' && (
|
||||||
|
<Badge variant="default" className="text-xs bg-emerald-500">已转换</Badge>
|
||||||
|
)}
|
||||||
|
{fileState.status === 'failed' && (
|
||||||
|
<Badge variant="destructive" className="text-xs">失败</Badge>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{fileState.status === 'converting' && (
|
||||||
|
<div className="mt-1 h-1 bg-muted rounded-full overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-full bg-primary transition-all duration-300"
|
||||||
|
style={{ width: `${fileState.progress}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{fileState.error && (
|
||||||
|
<p className="text-xs text-destructive mt-1">{fileState.error}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2 shrink-0">
|
||||||
|
{fileState.status === 'success' && (
|
||||||
|
<>
|
||||||
|
<Button variant="ghost" size="icon" onClick={() => handleDownload(fileState)}>
|
||||||
|
<Download size={18} className="text-emerald-500" />
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
onClick={() => {
|
||||||
|
// 复制下载链接到剪贴板
|
||||||
|
if (fileState.pdfBlob) {
|
||||||
|
const url = URL.createObjectURL(fileState.pdfBlob);
|
||||||
|
navigator.clipboard.writeText(url);
|
||||||
|
toast.success('链接已复制');
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Copy size={18} />
|
||||||
|
</Button>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{(fileState.status === 'pending' || fileState.status === 'failed') && (
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="icon"
|
||||||
|
onClick={() => handleRemove(index)}
|
||||||
|
className="text-destructive hover:bg-destructive/10"
|
||||||
|
>
|
||||||
|
<X size={18} />
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
|
||||||
|
{/* 使用说明 */}
|
||||||
|
<Card className="border-none shadow-md bg-gradient-to-br from-primary/5 to-purple-500/5">
|
||||||
|
<CardHeader className="pb-4">
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<FileText className="text-primary" size={20} />
|
||||||
|
使用说明
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
<div className="space-y-3 text-sm text-muted-foreground">
|
||||||
|
<div className="flex gap-3">
|
||||||
|
<div className="w-6 h-6 rounded-full bg-primary/10 text-primary flex items-center justify-center shrink-0 text-xs font-bold">1</div>
|
||||||
|
<p>上传要转换的文件,支持 Word(.docx)、Excel(.xlsx)、文本(.txt)、Markdown(.md) 格式</p>
|
||||||
|
</div>
|
||||||
|
<div className="flex gap-3">
|
||||||
|
<div className="w-6 h-6 rounded-full bg-primary/10 text-primary flex items-center justify-center shrink-0 text-xs font-bold">2</div>
|
||||||
|
<p>点击「开始转换」按钮,系统将自动将文件转换为 PDF 格式</p>
|
||||||
|
</div>
|
||||||
|
<div className="flex gap-3">
|
||||||
|
<div className="w-6 h-6 rounded-full bg-primary/10 text-primary flex items-center justify-center shrink-0 text-xs font-bold">3</div>
|
||||||
|
<p>转换完成后,点击下载按钮获取 PDF 文件,或使用「打包下载」一次性下载所有文件</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default PdfConverter;
|
||||||
@@ -4,6 +4,7 @@ import Documents from '@/pages/Documents';
|
|||||||
import TemplateFill from '@/pages/TemplateFill';
|
import TemplateFill from '@/pages/TemplateFill';
|
||||||
import InstructionChat from '@/pages/InstructionChat';
|
import InstructionChat from '@/pages/InstructionChat';
|
||||||
import TaskHistory from '@/pages/TaskHistory';
|
import TaskHistory from '@/pages/TaskHistory';
|
||||||
|
import PdfConverter from '@/pages/PdfConverter';
|
||||||
import MainLayout from '@/components/layouts/MainLayout';
|
import MainLayout from '@/components/layouts/MainLayout';
|
||||||
|
|
||||||
export const routes = [
|
export const routes = [
|
||||||
@@ -31,6 +32,10 @@ export const routes = [
|
|||||||
path: '/task-history',
|
path: '/task-history',
|
||||||
element: <TaskHistory />,
|
element: <TaskHistory />,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: '/pdf-converter',
|
||||||
|
element: <PdfConverter />,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -23,7 +23,6 @@
|
|||||||
"noUnusedParameters": true,
|
"noUnusedParameters": true,
|
||||||
"noFallthroughCasesInSwitch": true,
|
"noFallthroughCasesInSwitch": true,
|
||||||
"noUncheckedSideEffectImports": true,
|
"noUncheckedSideEffectImports": true,
|
||||||
"baseUrl": ".",
|
|
||||||
"paths": {
|
"paths": {
|
||||||
"@/*": ["./src/*"]
|
"@/*": ["./src/*"]
|
||||||
},
|
},
|
||||||
|
|||||||
BIN
屏幕截图 2026-04-18 002609.png
Normal file
BIN
屏幕截图 2026-04-18 002609.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 552 KiB |
Reference in New Issue
Block a user