diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..0a37a01 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "WebSearch" + ] + } +} diff --git a/backend/=4.0.0 b/backend/=4.0.0 new file mode 100644 index 0000000..7ce733b --- /dev/null +++ b/backend/=4.0.0 @@ -0,0 +1,7 @@ +Collecting reportlab + Using cached reportlab-4.4.10-py3-none-any.whl.metadata (1.7 kB) +Requirement already satisfied: pillow>=9.0.0 in d:\code\filesreadsystem\backend\venv\lib\site-packages (from reportlab) (12.1.1) +Requirement already satisfied: charset-normalizer in d:\code\filesreadsystem\backend\venv\lib\site-packages (from reportlab) (3.4.6) +Using cached reportlab-4.4.10-py3-none-any.whl (2.0 MB) +Installing collected packages: reportlab +Successfully installed reportlab-4.4.10 diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index a0c7f7a..00d4338 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -15,6 +15,7 @@ from app.api.endpoints import ( health, instruction, # 智能指令 conversation, # 对话历史 + pdf_converter, # PDF转换 ) # 创建主路由 @@ -33,3 +34,4 @@ api_router.include_router(visualization.router) # 可视化 api_router.include_router(analysis_charts.router) # 分析图表 api_router.include_router(instruction.router) # 智能指令 api_router.include_router(conversation.router) # 对话历史 +api_router.include_router(pdf_converter.router) # PDF转换 diff --git a/backend/app/api/endpoints/pdf_converter.py b/backend/app/api/endpoints/pdf_converter.py new file mode 100644 index 0000000..483f697 --- /dev/null +++ b/backend/app/api/endpoints/pdf_converter.py @@ -0,0 +1,208 @@ +""" +PDF 转换 API 接口 + +提供将 Word、Excel、Txt、Markdown 转换为 PDF 的功能 +""" +import logging +import uuid +from typing import Optional + +from fastapi import APIRouter, UploadFile, File, Form, HTTPException +from fastapi.responses import StreamingResponse + +from app.services.pdf_converter_service import pdf_converter_service +from app.services.file_service import file_service + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/pdf", tags=["PDF转换"]) + +# 临时存储转换后的 PDF(key: download_id, value: (pdf_content, original_filename)) +_pdf_cache: dict = {} + + +# ==================== 请求/响应模型 ==================== + +class ConvertResponse: + """转换响应""" + def __init__(self, success: bool, message: str = "", filename: str = ""): + self.success = success + self.message = message + self.filename = filename + + +# ==================== 接口 ==================== + +@router.post("/convert") +async def convert_to_pdf( + file: UploadFile = File(...), +): + """ + 将上传的文件转换为 PDF + + 支持格式: docx, xlsx, txt, md + + Args: + file: 上传的文件 + + Returns: + PDF 文件流 + """ + try: + # 检查文件格式 + filename = file.filename or "document" + file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + + if file_ext not in pdf_converter_service.supported_formats: + raise HTTPException( + status_code=400, + detail=f"不支持的格式: {file_ext},支持的格式: {', '.join(pdf_converter_service.supported_formats)}" + ) + + # 读取文件内容 + content = await file.read() + if not content: + raise HTTPException(status_code=400, detail="文件内容为空") + + logger.info(f"开始转换文件: {filename} ({file_ext})") + + # 转换为 PDF + pdf_content, error = await pdf_converter_service.convert_to_pdf( + file_content=content, + source_format=file_ext, + filename=filename.rsplit('.', 1)[0] if '.' in filename else filename + ) + + if error: + raise HTTPException(status_code=500, detail=error) + + # 直接返回 PDF 文件流 + return StreamingResponse( + iter([pdf_content]), + media_type="application/pdf", + headers={ + "Content-Disposition": f"attachment; filename*=UTF-8''converted.pdf" + } + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"PDF转换失败: {e}") + raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}") + + +@router.get("/download/{download_id}") +async def download_pdf(download_id: str): + """ + 通过下载 ID 下载 PDF(支持 IDM 拦截) + """ + if download_id not in _pdf_cache: + raise HTTPException(status_code=404, detail="下载链接已过期或不存在") + + pdf_content, filename = _pdf_cache.pop(download_id) # 下载后删除 + + # 使用 RFC 5987 编码支持中文文件名 + from starlette.responses import StreamingResponse + import urllib.parse + + # URL 编码中文文件名 + encoded_filename = urllib.parse.quote(f"{filename}.pdf") + + return StreamingResponse( + iter([pdf_content]), + media_type="application/pdf", + headers={ + "Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}" + } + ) + + +@router.get("/formats") +async def get_supported_formats(): + """ + 获取支持的源文件格式 + + Returns: + 支持的格式列表 + """ + return { + "success": True, + "formats": pdf_converter_service.get_supported_formats() + } + + +@router.post("/convert/batch") +async def batch_convert_to_pdf( + files: list[UploadFile] = File(...), +): + """ + 批量将多个文件转换为 PDF + + 注意: 批量转换会返回多个 PDF 文件打包的 zip + + Args: + files: 上传的文件列表 + + Returns: + ZIP 压缩包(包含所有PDF) + """ + try: + import io + import zipfile + + results = [] + errors = [] + + for file in files: + try: + filename = file.filename or "document" + file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + + if file_ext not in pdf_converter_service.supported_formats: + errors.append(f"{filename}: 不支持的格式") + continue + + content = await file.read() + pdf_content, error = await pdf_converter_service.convert_to_pdf( + file_content=content, + source_format=file_ext, + filename=filename.rsplit('.', 1)[0] if '.' in filename else filename + ) + + if error: + errors.append(f"{filename}: {error}") + else: + results.append((filename, pdf_content)) + + except Exception as e: + errors.append(f"{file.filename}: {str(e)}") + + if not results: + raise HTTPException( + status_code=400, + detail=f"没有可转换的文件。错误: {'; '.join(errors)}" + ) + + # 创建 ZIP 包 + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + for original_name, pdf_content in results: + pdf_name = f"{original_name.rsplit('.', 1)[0] if '.' in original_name else original_name}.pdf" + zip_file.writestr(pdf_name, pdf_content) + + zip_buffer.seek(0) + + return StreamingResponse( + iter([zip_buffer.getvalue()]), + media_type="application/zip", + headers={ + "Content-Disposition": "attachment; filename*=UTF-8''converted_pdfs.zip" + } + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"批量PDF转换失败: {e}") + raise HTTPException(status_code=500, detail=f"批量转换失败: {str(e)}") diff --git a/backend/app/services/pdf_converter_service.py b/backend/app/services/pdf_converter_service.py new file mode 100644 index 0000000..213fffb --- /dev/null +++ b/backend/app/services/pdf_converter_service.py @@ -0,0 +1,403 @@ +""" +PDF 转换服务 + +支持将 Word(docx)、Excel(xlsx)、Txt、Markdown(md) 格式转换为 PDF +策略:所有格式先转为 Markdown,再通过 Markdown 转 PDF +""" +import io +import logging +import platform +from pathlib import Path +from typing import List, Tuple + +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.ttfonts import TTFont + +logger = logging.getLogger(__name__) + + +class PDFConverterService: + """PDF 转换服务""" + + def __init__(self): + self.supported_formats = ["docx", "xlsx", "txt", "md"] + self._font_name = None + self._styles = None + self._page_width = None + self._page_height = None + self._setup_fonts() + + def _setup_fonts(self): + """设置字体""" + try: + self._page_width, self._page_height = A4 + + # 查找中文字体 + font_path = self._find_chinese_font() + if font_path: + try: + font = TTFont('ChineseFont', font_path) + pdfmetrics.registerFont(font) + from reportlab.pdfbase.pdfmetrics import registerFontFamily + registerFontFamily('ChineseFont', normal='ChineseFont') + self._font_name = 'ChineseFont' + logger.info(f"成功注册中文字体: {font_path}") + except Exception as e: + logger.warning(f"字体注册失败: {e}, 使用Helvetica") + self._font_name = 'Helvetica' + else: + self._font_name = 'Helvetica' + logger.warning("未找到中文字体,使用 Helvetica(不支持中文)") + + # 创建样式 + styles = getSampleStyleSheet() + + styles.add(ParagraphStyle( + name='ChineseTitle', + fontName=self._font_name, + fontSize=16, + leading=22, + alignment=TA_CENTER, + spaceAfter=12, + )) + + styles.add(ParagraphStyle( + name='ChineseHeading', + fontName=self._font_name, + fontSize=14, + leading=20, + spaceBefore=10, + spaceAfter=8, + )) + + styles.add(ParagraphStyle( + name='ChineseBody', + fontName=self._font_name, + fontSize=10, + leading=14, + alignment=TA_JUSTIFY, + spaceAfter=6, + )) + + styles.add(ParagraphStyle( + name='ChineseCode', + fontName='Courier', + fontSize=9, + leading=12, + )) + + self._styles = styles + logger.info("PDF服务初始化完成") + + except Exception as e: + logger.error(f"PDF服务初始化失败: {e}") + raise + + def _find_chinese_font(self) -> str: + """查找中文字体""" + system = platform.system() + + if system == "Windows": + fonts = [ + "C:/Windows/Fonts/simhei.ttf", + "C:/Windows/Fonts/simsun.ttc", + "C:/Windows/Fonts/msyh.ttc", + "C:/Windows/Fonts/simsun.ttf", + ] + elif system == "Darwin": + fonts = [ + "/System/Library/Fonts/STHeiti Light.ttc", + "/System/Library/Fonts/PingFang.ttc", + "/Library/Fonts/Arial Unicode.ttf", + ] + else: + fonts = [ + "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", + "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", + ] + + for font in fonts: + if Path(font).exists(): + return font + return None + + def _sanitize_text(self, text: str) -> str: + """清理文本""" + if not text: + return "" + return text.replace('\x00', '') + + async def convert_to_pdf( + self, + file_content: bytes, + source_format: str, + filename: str = "document" + ) -> Tuple[bytes, str]: + """将文档转换为 PDF""" + try: + if source_format.lower() not in self.supported_formats: + return b"", f"不支持的格式: {source_format}" + + # 第一步:转换为 Markdown + markdown_content, error = await self._convert_to_markdown(file_content, source_format, filename) + if error: + return b"", error + + # 第二步:Markdown 转 PDF + return await self._convert_markdown_to_pdf(markdown_content, filename) + + except Exception as e: + logger.error(f"PDF转换失败: {e}") + import traceback + logger.error(f"详细错误: {traceback.format_exc()}") + return b"", f"转换失败: {str(e)}" + + async def _convert_to_markdown( + self, + file_content: bytes, + source_format: str, + filename: str + ) -> Tuple[str, str]: + """将各种格式转换为 Markdown""" + converters = { + "docx": self._convert_docx_to_markdown, + "xlsx": self._convert_xlsx_to_markdown, + "txt": self._convert_txt_to_markdown, + "md": self._convert_md_to_markdown, + } + return await converters[source_format.lower()](file_content, filename) + + async def _convert_txt_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]: + """Txt 转 Markdown""" + try: + text = self._decode_content(file_content) + text = self._sanitize_text(text) + return f"# {filename}\n\n{text}", "" + except Exception as e: + logger.error(f"Txt转Markdown失败: {e}") + return "", f"文本文件处理失败: {str(e)}" + + async def _convert_md_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]: + """Markdown 原样返回""" + try: + content = self._decode_content(file_content) + content = self._sanitize_text(content) + return f"# {filename}\n\n{content}", "" + except Exception as e: + logger.error(f"Markdown处理失败: {e}") + return "", f"Markdown处理失败: {str(e)}" + + async def _convert_docx_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]: + """Word 转 Markdown - 使用 zipfile 直接解析,更加健壮""" + try: + import zipfile + import re + + lines = [f"# {filename}", ""] + + # 直接使用 zipfile 解析 DOCX,避免 python-docx 的严格验证 + try: + with zipfile.ZipFile(io.BytesIO(file_content), 'r') as zf: + # 读取主文档内容 + xml_content = zf.read('word/document.xml').decode('utf-8') + except zipfile.BadZipFile: + return "", "文件不是有效的 DOCX 格式" + except KeyError: + return "", "DOCX 文件损坏:找不到 document.xml" + + # 简单的 XML 解析 - 提取文本段落 + # 移除 XML 标签,提取纯文本 + xml_content = re.sub(r']*>', '\n', xml_content) + xml_content = re.sub(r'', '\n', xml_content) + xml_content = re.sub(r'<[^>]+>', '', xml_content) + xml_content = re.sub(r'\n\s*\n', '\n\n', xml_content) + + # 解码 HTML 实体 + xml_content = xml_content.replace('&', '&') + xml_content = xml_content.replace('<', '<') + xml_content = xml_content.replace('>', '>') + xml_content = xml_content.replace('"', '"') + xml_content = xml_content.replace(''', "'") + + # 清理空白 + lines_text = [line.strip() for line in xml_content.split('\n') if line.strip()] + + # 生成 Markdown + for text in lines_text[:500]: # 限制最多500行 + if text: + lines.append(text) + + return '\n'.join(lines), "" + + except Exception as e: + logger.error(f"Word转Markdown失败: {e}") + import traceback + logger.error(traceback.format_exc()) + return "", f"Word文档处理失败: {str(e)}" + for table in doc.tables: + lines.append("") + for row in table.rows: + row_data = [cell.text.strip() for cell in row.cells] + lines.append("| " + " | ".join(row_data) + " |") + # 表头分隔符 + if table.rows: + lines.append("| " + " | ".join(["---"] * len(table.rows[0].cells)) + " |") + + return "\n".join(lines), "" + + except Exception as e: + logger.error(f"Word转Markdown失败: {e}") + return "", f"Word文档处理失败: {str(e)}" + + async def _convert_xlsx_to_markdown(self, file_content: bytes, filename: str) -> Tuple[str, str]: + """Excel 转 Markdown""" + try: + import openpyxl + + wb = openpyxl.load_workbook(io.BytesIO(file_content)) + lines = [f"# {filename} - Excel数据", ""] + + for sheet_name in wb.sheetnames[:10]: + ws = wb[sheet_name] + lines.append(f"## 工作表: {sheet_name}") + lines.append("") + + for row_idx, row in enumerate(ws.iter_rows(max_row=50, values_only=True)): + row_data = [str(cell) if cell is not None else "" for cell in row] + if not any(row_data): + continue + lines.append("| " + " | ".join(row_data) + " |") + if row_idx == 0: + lines.append("| " + " | ".join(["---"] * len(row_data)) + " |") + + lines.append("") + + return "\n".join(lines), "" + + except Exception as e: + logger.error(f"Excel转Markdown失败: {e}") + return "", f"Excel处理失败: {str(e)}" + + async def _convert_markdown_to_pdf(self, markdown_content: str, filename: str) -> Tuple[bytes, str]: + """Markdown 转 PDF""" + try: + logger.info(f"Markdown转PDF开始 - filename={filename}, 字体={self._font_name}") + logger.info(f"styles['ChineseTitle'].fontName={self._styles['ChineseTitle'].fontName}") + + buffer = io.BytesIO() + story = [] + + safe_filename = self._sanitize_text(filename) + logger.info(f"safe_filename={repr(safe_filename[:50])}") + + story.append(Paragraph(text=safe_filename, style=self._styles['ChineseTitle'])) + story.append(Spacer(1, 12)) + + in_code = False + for line in markdown_content.split('\n'): + line = line.strip() + + if line.startswith('```'): + in_code = not in_code + story.append(Spacer(1, 6)) + continue + + if in_code: + story.append(Paragraph(text=self._sanitize_text(line), style=self._styles['ChineseCode'])) + continue + + if not line: + story.append(Spacer(1, 6)) + continue + + # 标题处理 + if line.startswith('# '): + story.append(Paragraph(text=self._sanitize_text(line[2:]), style=self._styles['ChineseHeading'])) + elif line.startswith('## '): + story.append(Paragraph(text=self._sanitize_text(line[3:]), style=self._styles['ChineseHeading'])) + elif line.startswith('### '): + story.append(Paragraph(text=self._sanitize_text(line[4:]), style=self._styles['ChineseHeading'])) + elif line.startswith('#### '): + story.append(Paragraph(text=self._sanitize_text(line[5:]), style=self._styles['ChineseHeading'])) + elif line.startswith('- ') or line.startswith('* '): + story.append(Paragraph(text="• " + self._sanitize_text(line[2:]), style=self._styles['ChineseBody'])) + # 表格处理 + elif line.startswith('|'): + # 跳过 markdown 表格分隔符 + if set(line.replace('|', '').replace('-', '').replace(':', '').replace(' ', '')) == set(): + continue + # 解析并创建表格 + table_lines = [] + for _ in range(50): # 最多50行 + if line.startswith('|'): + row = [cell.strip() for cell in line.split('|')[1:-1]] + if not any(row) or set(''.join(row).replace('-', '').replace(':', '').replace(' ', '')) == set(): + break + table_lines.append(row) + try: + line = next(markdown_content.split('\n').__iter__()).strip() + except StopIteration: + break + else: + break + + if table_lines: + # 创建表格 + t = Table(table_lines, colWidths=[100] * len(table_lines[0])) + t.setStyle(TableStyle([ + ('FONTNAME', (0, 0), (-1, -1), self._font_name), + ('FONTSIZE', (0, 0), (-1, -1), 9), + ('GRID', (0, 0), (-1, -1), 0.5, '#999999'), + ('BACKGROUND', (0, 0), (-1, 0), '#4472C4'), + ('TEXTCOLOR', (0, 0), (-1, 0), '#FFFFFF'), + ])) + story.append(t) + story.append(Spacer(1, 6)) + else: + story.append(Paragraph(text=self._sanitize_text(line), style=self._styles['ChineseBody'])) + + logger.info(f"准备构建PDF,story长度={len(story)}") + + pdf_doc = SimpleDocTemplate( + buffer, + pagesize=(self._page_width, self._page_height), + rightMargin=72, + leftMargin=72, + topMargin=72, + bottomMargin=72 + ) + logger.info("调用pdf_doc.build()") + pdf_doc.build(story) + logger.info("pdf_doc.build()完成") + + result = buffer.getvalue() + buffer.close() + return result, "" + + except Exception as e: + logger.error(f"Markdown转PDF失败: {e}") + import traceback + logger.error(f"详细错误: {traceback.format_exc()}") + return b"", f"Markdown转PDF失败: {str(e)}" + + def _decode_content(self, file_content: bytes) -> str: + """解码文件内容""" + encodings = ['utf-8', 'gbk', 'gb2312', 'gb18030', 'latin-1'] + for enc in encodings: + try: + return file_content.decode(enc) + except (UnicodeDecodeError, LookupError): + continue + return file_content.decode('utf-8', errors='replace') + + def get_supported_formats(self) -> List[str]: + """获取支持的格式""" + return self.supported_formats + + +# 全局单例 +pdf_converter_service = PDFConverterService() \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index c586179..f63261a 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -42,6 +42,9 @@ chardet==5.2.0 Pillow>=10.0.0 pytesseract>=0.3.10 +# ==================== PDF 生成 ==================== +reportlab>=4.0.0 + # ==================== AI / LLM ==================== httpx==0.25.2 diff --git a/frontend/src/components/layouts/MainLayout.tsx b/frontend/src/components/layouts/MainLayout.tsx index 3f0009d..be1d8a8 100644 --- a/frontend/src/components/layouts/MainLayout.tsx +++ b/frontend/src/components/layouts/MainLayout.tsx @@ -8,7 +8,8 @@ import { Menu, ChevronRight, Sparkles, - Clock + Clock, + FileDown } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { cn } from '@/lib/utils'; @@ -19,6 +20,7 @@ const navItems = [ { name: '文档中心', path: '/documents', icon: FileText }, { name: '智能填表', path: '/form-fill', icon: TableProperties }, { name: '智能助手', path: '/assistant', icon: MessageSquareCode }, + { name: '文档转PDF', path: '/pdf-converter', icon: FileDown }, { name: '任务历史', path: '/task-history', icon: Clock }, ]; diff --git a/frontend/src/db/backend-api.ts b/frontend/src/db/backend-api.ts index b5ecdcb..be74b44 100644 --- a/frontend/src/db/backend-api.ts +++ b/frontend/src/db/backend-api.ts @@ -1153,6 +1153,120 @@ export const backendApi = { } }, + // ==================== PDF 转换 API ==================== + + /** + * 将文件转换为 PDF + */ + /** + * PDF转换并直接下载(使用XHR,支持IDM拦截) + */ + async convertAndDownloadPdf(file: File): Promise { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.open('POST', `${BACKEND_BASE_URL}/pdf/convert`); + + xhr.onload = function() { + if (xhr.status >= 200 && xhr.status < 300) { + // 创建 blob 并触发下载 + const blob = xhr.response; + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `${file.name.replace(/\.[^.]+$/, '')}.pdf`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + resolve(); + } else { + reject(new Error(`转换失败: ${xhr.status}`)); + } + }; + + xhr.onerror = function() { + reject(new Error('网络错误')); + }; + + const formData = new FormData(); + formData.append('file', file); + xhr.responseType = 'blob'; + xhr.send(formData); + }); + }, + + /** + * PDF转换(返回Blob) + */ + async convertToPdf(file: File): Promise { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.open('POST', `${BACKEND_BASE_URL}/pdf/convert`); + + xhr.onload = function() { + if (xhr.status >= 200 && xhr.status < 300) { + resolve(xhr.response); + } else { + reject(new Error(`转换失败: ${xhr.status}`)); + } + }; + + xhr.onerror = function() { + reject(new Error('网络错误')); + }; + + const formData = new FormData(); + formData.append('file', file); + xhr.responseType = 'blob'; + xhr.send(formData); + }); + }, + + /** + * 批量将文件转换为 PDF + */ + async batchConvertToPdf(files: File[]): Promise { + const formData = new FormData(); + files.forEach(file => formData.append('files', file)); + + const url = `${BACKEND_BASE_URL}/pdf/convert/batch`; + + try { + const response = await fetch(url, { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + const error = await response.json(); + throw new Error(error.detail || '批量PDF转换失败'); + } + + return await response.blob(); + } catch (error) { + console.error('批量PDF转换失败:', error); + throw error; + } + }, + + /** + * 获取支持的 PDF 转换格式 + */ + async getPdfSupportedFormats(): Promise<{ + success: boolean; + formats: string[]; + }> { + const url = `${BACKEND_BASE_URL}/pdf/formats`; + + try { + const response = await fetch(url); + if (!response.ok) throw new Error('获取支持的格式失败'); + return await response.json(); + } catch (error) { + console.error('获取支持的格式失败:', error); + return { success: false, formats: ['docx', 'xlsx', 'txt', 'md'] }; + } + } }; // ==================== AI 分析 API ==================== @@ -1805,5 +1919,6 @@ export const aiApi = { console.error('获取会话列表失败:', error); return { success: false, conversations: [] }; } - } + }, + }; diff --git a/frontend/src/pages/PdfConverter.tsx b/frontend/src/pages/PdfConverter.tsx new file mode 100644 index 0000000..b444f7a --- /dev/null +++ b/frontend/src/pages/PdfConverter.tsx @@ -0,0 +1,446 @@ +/** + * PDF 转换页面 + * 支持将 Word、Excel、Txt、Markdown 格式转换为 PDF + */ +import React, { useState, useCallback } from 'react'; +import { useDropzone } from 'react-dropzone'; +import { + FileText, + Upload, + Download, + FileSpreadsheet, + File as FileIcon, + Loader2, + CheckCircle, + AlertCircle, + Trash2, + FileDown, + X, + Copy +} from 'lucide-react'; +import { Button } from '@/components/ui/button'; +import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card'; +import { Badge } from '@/components/ui/badge'; +import { Label } from '@/components/ui/label'; +import { toast } from 'sonner'; +import { cn } from '@/lib/utils'; +import { backendApi } from '@/db/backend-api'; + +type FileState = { + file: File; + status: 'pending' | 'converting' | 'success' | 'failed'; + progress: number; + pdfBlob?: Blob; + error?: string; +}; + +const SUPPORTED_FORMATS = [ + { ext: 'docx', name: 'Word 文档', icon: FileText, color: 'blue' }, + { ext: 'xlsx', name: 'Excel 表格', icon: FileSpreadsheet, color: 'emerald' }, + { ext: 'txt', name: '文本文件', icon: FileIcon, color: 'gray' }, + { ext: 'md', name: 'Markdown', icon: FileText, color: 'purple' }, +]; + +const PdfConverter: React.FC = () => { + const [files, setFiles] = useState([]); + const [converting, setConverting] = useState(false); + const [convertedCount, setConvertedCount] = useState(0); + + const onDrop = useCallback((acceptedFiles: File[]) => { + const newFiles: FileState[] = acceptedFiles.map(file => ({ + file, + status: 'pending', + progress: 0, + })); + setFiles(prev => [...prev, ...newFiles]); + }, []); + + const { getRootProps, getInputProps, isDragActive } = useDropzone({ + onDrop, + accept: { + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], + 'application/vnd.ms-excel': ['.xls'], + 'text/markdown': ['.md'], + 'text/plain': ['.txt'], + }, + multiple: true, + }); + + const handleConvert = async () => { + if (files.length === 0) { + toast.error('请先上传文件'); + return; + } + + setConverting(true); + setConvertedCount(0); + + const pendingFiles = files.filter(f => f.status === 'pending' || f.status === 'failed'); + let successCount = 0; + + for (let i = 0; i < pendingFiles.length; i++) { + const fileState = pendingFiles[i]; + const fileIndex = files.findIndex(f => f.file === fileState.file); + + // 更新状态为转换中 + setFiles(prev => prev.map((f, idx) => + idx === fileIndex ? { ...f, status: 'converting', progress: 10 } : f + )); + + try { + // 获取 PDF blob + const pdfBlob = await backendApi.convertToPdf(fileState.file); + + // 触发下载 + const url = URL.createObjectURL(pdfBlob); + const a = document.createElement('a'); + a.href = url; + a.download = `${fileState.file.name.replace(/\.[^.]+$/, '')}.pdf`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + + // 保存 blob 以便批量下载 + setFiles(prev => prev.map((f, idx) => + idx === fileIndex ? { ...f, status: 'success', progress: 100, pdfBlob } : f + )); + successCount++; + setConvertedCount(successCount); + toast.success(`${fileState.file.name} 下载已开始`); + } catch (error: any) { + setFiles(prev => prev.map((f, idx) => + idx === fileIndex ? { ...f, status: 'failed', error: error.message || '转换失败' } : f + )); + } + } + + setConverting(false); + toast.success(`转换完成:${successCount}/${pendingFiles.length} 个文件`); + }; + + const handleDownload = (fileState: FileState) => { + if (!fileState.pdfBlob) return; + + const url = URL.createObjectURL(fileState.pdfBlob); + const link = document.createElement('a'); + link.href = url; + link.download = `${fileState.file.name.replace(/\.[^.]+$/, '')}.pdf`; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + }; + + const handleDownloadAll = async () => { + const successFiles = files.filter(f => f.status === 'success' && f.pdfBlob); + + if (successFiles.length === 0) { + toast.error('没有可下载的文件'); + return; + } + + if (successFiles.length === 1) { + handleDownload(successFiles[0]); + return; + } + + // 多个文件,下载 ZIP + try { + const zipBlob = await backendApi.batchConvertToPdf( + successFiles.map(f => f.file) + ); + const url = URL.createObjectURL(zipBlob); + const link = document.createElement('a'); + link.href = url; + link.download = 'converted_pdfs.zip'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + toast.success('ZIP 下载开始'); + } catch (error: any) { + toast.error(error.message || '下载失败'); + } + }; + + const handleRemove = (index: number) => { + setFiles(prev => prev.filter((_, i) => i !== index)); + }; + + const handleClear = () => { + setFiles([]); + setConvertedCount(0); + }; + + const getFileIcon = (filename: string) => { + const ext = filename.split('.').pop()?.toLowerCase(); + const format = SUPPORTED_FORMATS.find(f => f.ext === ext); + if (!format) return FileIcon; + return format.icon; + }; + + const getFileColor = (filename: string) => { + const ext = filename.split('.').pop()?.toLowerCase(); + const format = SUPPORTED_FORMATS.find(f => f.ext === ext); + return format?.color || 'gray'; + }; + + const colorClasses: Record = { + blue: 'bg-blue-500/10 text-blue-500', + emerald: 'bg-emerald-500/10 text-emerald-500', + purple: 'bg-purple-500/10 text-purple-500', + gray: 'bg-gray-500/10 text-gray-500', + }; + + return ( +
+
+
+

文档转 PDF

+

将 Word、Excel、文本、Markdown 文件转换为 PDF 格式

+
+ {files.length > 0 && ( +
+ + +
+ )} +
+ +
+ {/* 左侧:上传区域 */} +
+ {/* 上传卡片 */} + + + + + 上传文件 + + 拖拽或点击上传要转换的文件 + + +
+ +
+ {converting ? : } +
+

+ {isDragActive ? '释放以开始上传' : '点击或拖拽文件到这里'} +

+
+ {SUPPORTED_FORMATS.map(format => ( + + {format.name} + + ))} +
+
+ + {/* 转换按钮 */} + {files.length > 0 && ( + + )} +
+
+ + {/* 格式说明 */} + + + + + 支持的格式 + + + +
+ {SUPPORTED_FORMATS.map(format => { + const Icon = format.icon; + return ( +
+
+ +
+
+

.{format.ext.toUpperCase()}

+

{format.name}

+
+
+ ); + })} +
+
+
+
+ + {/* 右侧:文件列表 */} +
+ + +
+
+ + + 文件列表 + + + 共 {files.length} 个文件,已转换 {files.filter(f => f.status === 'success').length} 个 + +
+
+
+ + {files.length === 0 ? ( +
+ +

暂无文件,上传文件开始转换

+
+ ) : ( +
+ {files.map((fileState, index) => { + const Icon = getFileIcon(fileState.file.name); + const color = getFileColor(fileState.file.name); + + return ( +
+
+ +
+
+

{fileState.file.name}

+
+ + {(fileState.file.size / 1024).toFixed(1)} KB + + {fileState.status === 'pending' && ( + 待转换 + )} + {fileState.status === 'converting' && ( + 转换中 + )} + {fileState.status === 'success' && ( + 已转换 + )} + {fileState.status === 'failed' && ( + 失败 + )} +
+ {fileState.status === 'converting' && ( +
+
+
+ )} + {fileState.error && ( +

{fileState.error}

+ )} +
+
+ {fileState.status === 'success' && ( + <> + + + + )} + {(fileState.status === 'pending' || fileState.status === 'failed') && ( + + )} +
+
+ ); + })} +
+ )} + + + + {/* 使用说明 */} + + + + + 使用说明 + + + +
+
+
1
+

上传要转换的文件,支持 Word(.docx)、Excel(.xlsx)、文本(.txt)、Markdown(.md) 格式

+
+
+
2
+

点击「开始转换」按钮,系统将自动将文件转换为 PDF 格式

+
+
+
3
+

转换完成后,点击下载按钮获取 PDF 文件,或使用「打包下载」一次性下载所有文件

+
+
+
+
+
+
+
+ ); +}; + +export default PdfConverter; diff --git a/frontend/src/routes.tsx b/frontend/src/routes.tsx index 2b26b87..c815735 100644 --- a/frontend/src/routes.tsx +++ b/frontend/src/routes.tsx @@ -4,6 +4,7 @@ import Documents from '@/pages/Documents'; import TemplateFill from '@/pages/TemplateFill'; import InstructionChat from '@/pages/InstructionChat'; import TaskHistory from '@/pages/TaskHistory'; +import PdfConverter from '@/pages/PdfConverter'; import MainLayout from '@/components/layouts/MainLayout'; export const routes = [ @@ -31,6 +32,10 @@ export const routes = [ path: '/task-history', element: , }, + { + path: '/pdf-converter', + element: , + }, ], }, { diff --git a/frontend/tsconfig.app.json b/frontend/tsconfig.app.json index d6f64d5..30c8423 100644 --- a/frontend/tsconfig.app.json +++ b/frontend/tsconfig.app.json @@ -23,7 +23,6 @@ "noUnusedParameters": true, "noFallthroughCasesInSwitch": true, "noUncheckedSideEffectImports": true, - "baseUrl": ".", "paths": { "@/*": ["./src/*"] }, diff --git a/屏幕截图 2026-04-18 002609.png b/屏幕截图 2026-04-18 002609.png new file mode 100644 index 0000000..6a36ecb Binary files /dev/null and b/屏幕截图 2026-04-18 002609.png differ