- 后端添加 PDF 转换服务,支持 Word(docx)、Excel(xlsx)、文本(txt)、Markdown(md) 格式转换为 PDF - 使用 reportlab 库,支持中文字体(simhei.ttf) - 添加 FastAPI 接口:POST /api/v1/pdf/convert 单文件转换,POST /api/v1/pdf/convert/batch 批量转换 - 前端添加 PdfConverter 页面,支持拖拽上传、转换进度显示、批量下载 - 转换流程:所有格式先转为 Markdown,再通过 Markdown 转 PDF,保证输出一致性 - DOCX 解析使用 zipfile 直接读取 XML,避免 python-docx 的兼容性问题的
209 lines
6.0 KiB
Python
209 lines
6.0 KiB
Python
"""
|
||
PDF 转换 API 接口
|
||
|
||
提供将 Word、Excel、Txt、Markdown 转换为 PDF 的功能
|
||
"""
|
||
import logging
|
||
import uuid
|
||
from typing import Optional
|
||
|
||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
||
from fastapi.responses import StreamingResponse
|
||
|
||
from app.services.pdf_converter_service import pdf_converter_service
|
||
from app.services.file_service import file_service
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(prefix="/pdf", tags=["PDF转换"])
|
||
|
||
# 临时存储转换后的 PDF(key: download_id, value: (pdf_content, original_filename))
|
||
_pdf_cache: dict = {}
|
||
|
||
|
||
# ==================== 请求/响应模型 ====================
|
||
|
||
class ConvertResponse:
|
||
"""转换响应"""
|
||
def __init__(self, success: bool, message: str = "", filename: str = ""):
|
||
self.success = success
|
||
self.message = message
|
||
self.filename = filename
|
||
|
||
|
||
# ==================== 接口 ====================
|
||
|
||
@router.post("/convert")
|
||
async def convert_to_pdf(
|
||
file: UploadFile = File(...),
|
||
):
|
||
"""
|
||
将上传的文件转换为 PDF
|
||
|
||
支持格式: docx, xlsx, txt, md
|
||
|
||
Args:
|
||
file: 上传的文件
|
||
|
||
Returns:
|
||
PDF 文件流
|
||
"""
|
||
try:
|
||
# 检查文件格式
|
||
filename = file.filename or "document"
|
||
file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||
|
||
if file_ext not in pdf_converter_service.supported_formats:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"不支持的格式: {file_ext},支持的格式: {', '.join(pdf_converter_service.supported_formats)}"
|
||
)
|
||
|
||
# 读取文件内容
|
||
content = await file.read()
|
||
if not content:
|
||
raise HTTPException(status_code=400, detail="文件内容为空")
|
||
|
||
logger.info(f"开始转换文件: {filename} ({file_ext})")
|
||
|
||
# 转换为 PDF
|
||
pdf_content, error = await pdf_converter_service.convert_to_pdf(
|
||
file_content=content,
|
||
source_format=file_ext,
|
||
filename=filename.rsplit('.', 1)[0] if '.' in filename else filename
|
||
)
|
||
|
||
if error:
|
||
raise HTTPException(status_code=500, detail=error)
|
||
|
||
# 直接返回 PDF 文件流
|
||
return StreamingResponse(
|
||
iter([pdf_content]),
|
||
media_type="application/pdf",
|
||
headers={
|
||
"Content-Disposition": f"attachment; filename*=UTF-8''converted.pdf"
|
||
}
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"PDF转换失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||
|
||
|
||
@router.get("/download/{download_id}")
|
||
async def download_pdf(download_id: str):
|
||
"""
|
||
通过下载 ID 下载 PDF(支持 IDM 拦截)
|
||
"""
|
||
if download_id not in _pdf_cache:
|
||
raise HTTPException(status_code=404, detail="下载链接已过期或不存在")
|
||
|
||
pdf_content, filename = _pdf_cache.pop(download_id) # 下载后删除
|
||
|
||
# 使用 RFC 5987 编码支持中文文件名
|
||
from starlette.responses import StreamingResponse
|
||
import urllib.parse
|
||
|
||
# URL 编码中文文件名
|
||
encoded_filename = urllib.parse.quote(f"{filename}.pdf")
|
||
|
||
return StreamingResponse(
|
||
iter([pdf_content]),
|
||
media_type="application/pdf",
|
||
headers={
|
||
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
|
||
}
|
||
)
|
||
|
||
|
||
@router.get("/formats")
|
||
async def get_supported_formats():
|
||
"""
|
||
获取支持的源文件格式
|
||
|
||
Returns:
|
||
支持的格式列表
|
||
"""
|
||
return {
|
||
"success": True,
|
||
"formats": pdf_converter_service.get_supported_formats()
|
||
}
|
||
|
||
|
||
@router.post("/convert/batch")
|
||
async def batch_convert_to_pdf(
|
||
files: list[UploadFile] = File(...),
|
||
):
|
||
"""
|
||
批量将多个文件转换为 PDF
|
||
|
||
注意: 批量转换会返回多个 PDF 文件打包的 zip
|
||
|
||
Args:
|
||
files: 上传的文件列表
|
||
|
||
Returns:
|
||
ZIP 压缩包(包含所有PDF)
|
||
"""
|
||
try:
|
||
import io
|
||
import zipfile
|
||
|
||
results = []
|
||
errors = []
|
||
|
||
for file in files:
|
||
try:
|
||
filename = file.filename or "document"
|
||
file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||
|
||
if file_ext not in pdf_converter_service.supported_formats:
|
||
errors.append(f"{filename}: 不支持的格式")
|
||
continue
|
||
|
||
content = await file.read()
|
||
pdf_content, error = await pdf_converter_service.convert_to_pdf(
|
||
file_content=content,
|
||
source_format=file_ext,
|
||
filename=filename.rsplit('.', 1)[0] if '.' in filename else filename
|
||
)
|
||
|
||
if error:
|
||
errors.append(f"{filename}: {error}")
|
||
else:
|
||
results.append((filename, pdf_content))
|
||
|
||
except Exception as e:
|
||
errors.append(f"{file.filename}: {str(e)}")
|
||
|
||
if not results:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"没有可转换的文件。错误: {'; '.join(errors)}"
|
||
)
|
||
|
||
# 创建 ZIP 包
|
||
zip_buffer = io.BytesIO()
|
||
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||
for original_name, pdf_content in results:
|
||
pdf_name = f"{original_name.rsplit('.', 1)[0] if '.' in original_name else original_name}.pdf"
|
||
zip_file.writestr(pdf_name, pdf_content)
|
||
|
||
zip_buffer.seek(0)
|
||
|
||
return StreamingResponse(
|
||
iter([zip_buffer.getvalue()]),
|
||
media_type="application/zip",
|
||
headers={
|
||
"Content-Disposition": "attachment; filename*=UTF-8''converted_pdfs.zip"
|
||
}
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"批量PDF转换失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"批量转换失败: {str(e)}")
|