前后端基本架构和完全excel表的解析及统计图表的生成以及excel表的到出

2026-03-19 01:51:34 +08:00
parent c23b93bb70
commit 2f630695ff
194 changed files with 23354 additions and 174 deletions
--- a/backend/app/pycache/main.cpython-312.pyc
+++ b/backend/app/pycache/main.cpython-312.pyc
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
@@ -0,0 +1,14 @@
+"""
+API 路由注册模块
+"""
+from fastapi import APIRouter
+from app.api.endpoints import upload, ai_analyze, visualization, analysis_charts
+
+# 创建主路由
+api_router = APIRouter()
+
+# 注册各模块路由
+api_router.include_router(upload.router)
+api_router.include_router(ai_analyze.router)
+api_router.include_router(visualization.router)
+api_router.include_router(analysis_charts.router)
--- a/backend/app/api/pycache/init.cpython-312.pyc
+++ b/backend/app/api/pycache/init.cpython-312.pyc
--- a/backend/app/api/endpoints/pycache/ai_analyze.cpython-312.pyc
+++ b/backend/app/api/endpoints/pycache/ai_analyze.cpython-312.pyc
--- a/backend/app/api/endpoints/pycache/analysis_charts.cpython-312.pyc
+++ b/backend/app/api/endpoints/pycache/analysis_charts.cpython-312.pyc
--- a/backend/app/api/endpoints/pycache/upload.cpython-312.pyc
+++ b/backend/app/api/endpoints/pycache/upload.cpython-312.pyc
--- a/backend/app/api/endpoints/pycache/upload.cpython-313.pyc
+++ b/backend/app/api/endpoints/pycache/upload.cpython-313.pyc
--- a/backend/app/api/endpoints/pycache/visualization.cpython-312.pyc
+++ b/backend/app/api/endpoints/pycache/visualization.cpython-312.pyc
--- a/backend/app/api/endpoints/ai_analyze.py
+++ b/backend/app/api/endpoints/ai_analyze.py
@@ -0,0 +1,144 @@
+"""
+AI 分析 API 接口
+"""
+from fastapi import APIRouter, UploadFile, File, HTTPException, Query, Body
+from typing import Optional
+import logging
+
+from app.services.excel_ai_service import excel_ai_service
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/ai", tags=["AI 分析"])
+
+
+@router.post("/analyze/excel")
+async def analyze_excel(
+    file: UploadFile = File(...),
+    user_prompt: str = Query("", description="用户自定义提示词"),
+    analysis_type: str = Query("general", description="分析类型: general, summary, statistics, insights"),
+    parse_all_sheets: bool = Query(False, description="是否分析所有工作表")
+):
+    """
+    上传并使用 AI 分析 Excel 文件
+
+    Args:
+        file: 上传的 Excel 文件
+        user_prompt: 用户自定义提示词
+        analysis_type: 分析类型
+        parse_all_sheets: 是否分析所有工作表
+
+    Returns:
+        dict: 分析结果，包含 Excel 数据和 AI 分析结果
+    """
+    # 检查文件类型
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="文件名为空")
+
+    file_ext = file.filename.split('.')[-1].lower()
+    if file_ext not in ['xlsx', 'xls']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的文件类型: {file_ext}，仅支持 .xlsx 和 .xls"
+        )
+
+    # 验证分析类型
+    supported_types = ['general', 'summary', 'statistics', 'insights']
+    if analysis_type not in supported_types:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的分析类型: {analysis_type}，支持的类型: {', '.join(supported_types)}"
+        )
+
+    try:
+        # 读取文件内容
+        content = await file.read()
+
+        logger.info(f"开始分析文件: {file.filename}, 分析类型: {analysis_type}")
+
+        # 调用 AI 分析服务
+        if parse_all_sheets:
+            result = await excel_ai_service.batch_analyze_sheets(
+                content,
+                file.filename,
+                user_prompt=user_prompt,
+                analysis_type=analysis_type
+            )
+        else:
+            # 解析选项
+            parse_options = {"header_row": 0}
+
+            result = await excel_ai_service.analyze_excel_file(
+                content,
+                file.filename,
+                user_prompt=user_prompt,
+                analysis_type=analysis_type,
+                parse_options=parse_options
+            )
+
+        logger.info(f"文件分析完成: {file.filename}, 成功: {result['success']}")
+
+        return result
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"AI 分析过程中出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
+
+
+@router.get("/analysis/types")
+async def get_analysis_types():
+    """
+    获取支持的分析类型列表
+
+    Returns:
+        list: 支持的分析类型
+    """
+    return {
+        "types": excel_ai_service.get_supported_analysis_types()
+    }
+
+
+@router.post("/analyze/text")
+async def analyze_text(
+    excel_data: dict = Body(..., description="Excel 解析后的数据"),
+    user_prompt: str = Body("", description="用户提示词"),
+    analysis_type: str = Body("general", description="分析类型")
+):
+    """
+    对已解析的 Excel 数据进行 AI 分析
+
+    Args:
+        excel_data: Excel 数据
+        user_prompt: 用户提示词
+        analysis_type: 分析类型
+
+    Returns:
+        dict: 分析结果
+    """
+    try:
+        logger.info(f"开始文本分析, 分析类型: {analysis_type}")
+
+        # 调用 LLM 服务
+        from app.services.llm_service import llm_service
+
+        if user_prompt and user_prompt.strip():
+            result = await llm_service.analyze_with_template(
+                excel_data,
+                user_prompt
+            )
+        else:
+            result = await llm_service.analyze_excel_data(
+                excel_data,
+                user_prompt,
+                analysis_type
+            )
+
+        logger.info(f"文本分析完成, 成功: {result['success']}")
+
+        return result
+
+    except Exception as e:
+        logger.error(f"文本分析失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
--- a/backend/app/api/endpoints/analysis_charts.py
+++ b/backend/app/api/endpoints/analysis_charts.py
@@ -0,0 +1,105 @@
+"""
+分析结果图表 API - 根据文本分析结果生成图表
+"""
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional
+import logging
+
+from app.services.text_analysis_service import text_analysis_service
+from app.services.chart_generator_service import chart_generator_service
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/analysis", tags=["分析结果图表"])
+
+
+class AnalysisChartRequest(BaseModel):
+    """分析图表生成请求模型"""
+    analysis_text: str
+    original_filename: Optional[str] = ""
+    file_type: Optional[str] = "text"
+
+
+@router.post("/extract-and-chart")
+async def extract_and_generate_charts(request: AnalysisChartRequest):
+    """
+    从 AI 分析结果中提取数据并生成图表
+
+    Args:
+        request: 包含分析文本的请求
+
+    Returns:
+        dict: 包含图表数据的结果
+    """
+    if not request.analysis_text or not request.analysis_text.strip():
+        raise HTTPException(status_code=400, detail="分析文本不能为空")
+
+    try:
+        logger.info("开始从分析结果中提取结构化数据...")
+
+        # 1. 使用 LLM 提取结构化数据
+        extract_result = await text_analysis_service.extract_structured_data(
+            analysis_text=request.analysis_text,
+            original_filename=request.original_filename or "unknown",
+            file_type=request.file_type or "text"
+        )
+
+        if not extract_result.get("success"):
+            raise HTTPException(
+                status_code=500,
+                detail=f"提取结构化数据失败: {extract_result.get('error', '未知错误')}"
+            )
+
+        logger.info("结构化数据提取成功，开始生成图表...")
+
+        # 2. 根据提取的数据生成图表
+        chart_result = chart_generator_service.generate_charts_from_analysis(extract_result)
+
+        if not chart_result.get("success"):
+            raise HTTPException(
+                status_code=500,
+                detail=f"生成图表失败: {chart_result.get('error', '未知错误')}"
+            )
+
+        logger.info("图表生成成功")
+
+        return chart_result
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"分析结果图表生成失败: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"图表生成失败: {str(e)}"
+        )
+
+
+@router.post("/analyze-text")
+async def analyze_text_only(request: AnalysisChartRequest):
+    """
+    仅提取结构化数据（不生成图表），用于调试
+
+    Args:
+        request: 包含分析文本的请求
+
+    Returns:
+        dict: 提取的结构化数据
+    """
+    if not request.analysis_text or not request.analysis_text.strip():
+        raise HTTPException(status_code=400, detail="分析文本不能为空")
+
+    try:
+        result = await text_analysis_service.extract_structured_data(
+            analysis_text=request.analysis_text,
+            original_filename=request.original_filename or "unknown",
+            file_type=request.file_type or "text"
+        )
+        return result
+    except Exception as e:
+        logger.error(f"文本分析失败: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"文本分析失败: {str(e)}"
+        )
--- a/backend/app/api/endpoints/upload.py
+++ b/backend/app/api/endpoints/upload.py
@@ -0,0 +1,205 @@
+"""
+文件上传 API 接口
+"""
+from fastapi import APIRouter, UploadFile, File, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from typing import Optional
+import logging
+import pandas as pd
+import io
+
+from app.services.file_service import file_service
+from app.core.document_parser import XlsxParser
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/upload", tags=["文件上传"])
+
+# 初始化解析器
+excel_parser = XlsxParser()
+
+
+@router.post("/excel")
+async def upload_excel(
+    file: UploadFile = File(...),
+    parse_all_sheets: bool = Query(False, description="是否解析所有工作表"),
+    sheet_name: Optional[str] = Query(None, description="指定解析的工作表名称"),
+    header_row: int = Query(0, description="表头所在的行索引")
+):
+    """
+    上传并解析 Excel 文件
+
+    Args:
+        file: 上传的 Excel 文件
+        parse_all_sheets: 是否解析所有工作表
+        sheet_name: 指定解析的工作表名称
+        header_row: 表头所在的行索引
+
+    Returns:
+        dict: 解析结果
+    """
+    # 检查文件类型
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="文件名为空")
+
+    file_ext = file.filename.split('.')[-1].lower()
+    if file_ext not in ['xlsx', 'xls']:
+        raise HTTPException(
+            status_code=400,
+            detail=f"不支持的文件类型: {file_ext}，仅支持 .xlsx 和 .xls"
+        )
+
+    try:
+        # 读取文件内容
+        content = await file.read()
+
+        # 保存文件
+        saved_path = file_service.save_uploaded_file(
+            content,
+            file.filename,
+            subfolder="excel"
+        )
+
+        logger.info(f"文件已保存: {saved_path}")
+
+        # 解析文件
+        if parse_all_sheets:
+            result = excel_parser.parse_all_sheets(saved_path)
+        else:
+            # 如果指定了 sheet_name，使用指定的，否则使用默认的第一个
+            if sheet_name:
+                result = excel_parser.parse(saved_path, sheet_name=sheet_name, header_row=header_row)
+            else:
+                result = excel_parser.parse(saved_path, header_row=header_row)
+
+        # 添加文件路径到元数据
+        if result.metadata:
+            result.metadata['saved_path'] = saved_path
+            result.metadata['original_filename'] = file.filename
+
+        return result.to_dict()
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"解析 Excel 文件时出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"解析失败: {str(e)}")
+
+
+@router.get("/excel/preview/{file_path:path}")
+async def get_excel_preview(
+    file_path: str,
+    sheet_name: Optional[str] = Query(None, description="工作表名称"),
+    max_rows: int = Query(10, description="最多返回的行数", ge=1, le=100)
+):
+    """
+    获取 Excel 文件的预览数据
+
+    Args:
+        file_path: 文件路径
+        sheet_name: 工作表名称
+        max_rows: 最多返回的行数
+
+    Returns:
+        dict: 预览数据
+    """
+    try:
+        # 解析工作表名称参数
+        sheet_param = sheet_name if sheet_name else 0
+
+        result = excel_parser.get_sheet_preview(
+            file_path,
+            sheet_name=sheet_param,
+            max_rows=max_rows
+        )
+
+        return result.to_dict()
+
+    except Exception as e:
+        logger.error(f"获取预览数据时出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"获取预览失败: {str(e)}")
+
+
+@router.delete("/file")
+async def delete_uploaded_file(file_path: str = Query(..., description="要删除的文件路径")):
+    """
+    删除已上传的文件
+
+    Args:
+        file_path: 文件路径
+
+    Returns:
+        dict: 删除结果
+    """
+    try:
+        success = file_service.delete_file(file_path)
+
+        if success:
+            return {"success": True, "message": "文件删除成功"}
+        else:
+            return {"success": False, "message": "文件不存在或删除失败"}
+
+    except Exception as e:
+        logger.error(f"删除文件时出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
+
+
+@router.get("/excel/export/{file_path:path}")
+async def export_excel(
+    file_path: str,
+    sheet_name: Optional[str] = Query(None, description="工作表名称"),
+    columns: Optional[str] = Query(None, description="要导出的列，逗号分隔")
+):
+    """
+    导出 Excel 文件（可选择工作表和列）
+
+    Args:
+        file_path: 原始文件路径
+        sheet_name: 工作表名称（可选）
+        columns: 要导出的列名，逗号分隔（可选）
+
+    Returns:
+        StreamingResponse: Excel 文件
+    """
+    try:
+        # 读取 Excel 文件
+        if sheet_name:
+            df = pd.read_excel(file_path, sheet_name=sheet_name)
+        else:
+            df = pd.read_excel(file_path)
+
+        # 如果指定了列，只选择这些列
+        if columns:
+            column_list = [col.strip() for col in columns.split(',')]
+            # 过滤掉不存在的列
+            available_columns = [col for col in column_list if col in df.columns]
+            if available_columns:
+                df = df[available_columns]
+
+        # 创建 Excel 文件
+        output = io.BytesIO()
+        with pd.ExcelWriter(output, engine='openpyxl') as writer:
+            df.to_excel(writer, index=False, sheet_name=sheet_name or 'Sheet1')
+
+        output.seek(0)
+
+        # 生成文件名
+        original_name = file_path.split('/')[-1] if '/' in file_path else file_path
+        if columns:
+            export_name = f"export_{sheet_name or 'data'}_{len(column_list) if columns else 'all'}_cols.xlsx"
+        else:
+            export_name = f"export_{original_name}"
+
+        # 返回文件流
+        return StreamingResponse(
+            io.BytesIO(output.getvalue()),
+            media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            headers={"Content-Disposition": f"attachment; filename={export_name}"}
+        )
+
+    except FileNotFoundError:
+        logger.error(f"文件不存在: {file_path}")
+        raise HTTPException(status_code=404, detail="文件不存在")
+    except Exception as e:
+        logger.error(f"导出 Excel 文件时出错: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"导出失败: {str(e)}")
--- a/backend/app/api/endpoints/visualization.py
+++ b/backend/app/api/endpoints/visualization.py
@@ -0,0 +1,90 @@
+"""
+可视化 API 接口 - 生成统计图表
+"""
+from fastapi import APIRouter, HTTPException, Body
+from typing import Dict, Any
+import logging
+
+from app.services.visualization_service import visualization_service
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/visualization", tags=["数据可视化"])
+
+
+class StatisticsRequest(BaseModel):
+    """统计图表生成请求模型"""
+    excel_data: Dict[str, Any]
+    analysis_type: str = "statistics"
+
+
+@router.post("/statistics")
+async def generate_statistics(request: StatisticsRequest):
+    """
+    生成统计信息和可视化图表
+
+    Args:
+        request: 包含 excel_data 和 analysis_type 的请求体
+
+    Returns:
+        dict: 包含统计信息和图表数据的结果
+    """
+    excel_data = request.excel_data
+    analysis_type = request.analysis_type
+
+    if not excel_data:
+        raise HTTPException(status_code=400, detail="未提供 Excel 数据")
+
+    try:
+        result = visualization_service.analyze_and_visualize(
+            excel_data,
+            analysis_type
+        )
+
+        if not result.get("success"):
+            raise HTTPException(status_code=500, detail=result.get("error", "分析失败"))
+
+        logger.info("统计图表生成成功")
+
+        return result
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"统计图表生成失败: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"图表生成失败: {str(e)}")
+
+
+@router.get("/chart-types")
+async def get_chart_types():
+    """
+    获取支持的图表类型
+
+    Returns:
+        dict: 支持的图表类型列表
+    """
+    return {
+        "chart_types": [
+            {
+                "value": "histogram",
+                "label": "直方图",
+                "description": "显示数值型列的分布情况"
+            },
+            {
+                "value": "bar_chart",
+                "label": "条形图",
+                "description": "显示分类列的频次分布"
+            },
+            {
+                "value": "box_plot",
+                "label": "箱线图",
+                "description": "显示数值列的四分位数和异常值"
+            },
+            {
+                "value": "correlation_heatmap",
+                "label": "相关性热力图",
+                "description": "显示数值列之间的相关性"
+            }
+        ]
+    }
--- a/backend/app/core/document_parser/init.py
+++ b/backend/app/core/document_parser/init.py
@@ -0,0 +1,7 @@
+"""
+文档解析模块 - 支持多种文件格式的解析
+"""
+from .base import BaseParser
+from .xlsx_parser import XlsxParser
+
+__all__ = ['BaseParser', 'XlsxParser']
--- a/backend/app/core/document_parser/pycache/init.cpython-312.pyc
+++ b/backend/app/core/document_parser/pycache/init.cpython-312.pyc
--- a/backend/app/core/document_parser/pycache/base.cpython-312.pyc
+++ b/backend/app/core/document_parser/pycache/base.cpython-312.pyc
--- a/backend/app/core/document_parser/pycache/xlsx_parser.cpython-312.pyc
+++ b/backend/app/core/document_parser/pycache/xlsx_parser.cpython-312.pyc
--- a/backend/app/core/document_parser/base.py
+++ b/backend/app/core/document_parser/base.py
@@ -0,0 +1,87 @@
+"""
+解析器基类 - 定义所有解析器的通用接口
+"""
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+from pathlib import Path
+
+
+class ParseResult:
+    """解析结果类"""
+
+    def __init__(
+        self,
+        success: bool,
+        data: Optional[Dict[str, Any]] = None,
+        error: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ):
+        self.success = success
+        self.data = data or {}
+        self.error = error
+        self.metadata = metadata or {}
+
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        return {
+            "success": self.success,
+            "data": self.data,
+            "error": self.error,
+            "metadata": self.metadata
+        }
+
+
+class BaseParser(ABC):
+    """文档解析器基类"""
+
+    def __init__(self):
+        self.supported_extensions: List[str] = []
+        self.parser_name: str = "base_parser"
+
+    @abstractmethod
+    def parse(self, file_path: str, **kwargs) -> ParseResult:
+        """
+        解析文件
+
+        Args:
+            file_path: 文件路径
+            **kwargs: 其他解析参数
+
+        Returns:
+            ParseResult: 解析结果
+        """
+        pass
+
+    def can_parse(self, file_path: str) -> bool:
+        """
+        检查是否可以解析该文件
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            bool: 是否可以解析
+        """
+        ext = Path(file_path).suffix.lower()
+        return ext in self.supported_extensions
+
+    def get_file_info(self, file_path: str) -> Dict[str, Any]:
+        """
+        获取文件基本信息
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            Dict[str, Any]: 文件信息
+        """
+        path = Path(file_path)
+        if not path.exists():
+            return {"error": "File not found"}
+
+        return {
+            "filename": path.name,
+            "extension": path.suffix.lower(),
+            "size": path.stat().st_size,
+            "parser": self.parser_name
+        }
--- a/backend/app/core/document_parser/utils.py
+++ b/backend/app/core/document_parser/utils.py
@@ -0,0 +1,120 @@
+"""
+文档解析工具函数
+"""
+import re
+from typing import List, Optional, Dict, Any
+
+
+def clean_text(text: str) -> str:
+    """
+    清洗文本，去除多余的空白字符和特殊符号
+
+    Args:
+        text: 原始文本
+
+    Returns:
+        str: 清洗后的文本
+    """
+    if not text:
+        return ""
+
+    # 去除首尾空白
+    text = text.strip()
+
+    # 将多个连续的空白字符替换为单个空格
+    text = re.sub(r'\s+', ' ', text)
+
+    # 去除不可打印字符
+    text = ''.join(char for char in text if char.isprintable() or char in '\n\r\t')
+
+    return text
+
+
+def chunk_text(
+    text: str,
+    chunk_size: int = 1000,
+    overlap: int = 100
+) -> List[str]:
+    """
+    将文本分块
+
+    Args:
+        text: 原始文本
+        chunk_size: 每块的大小（字符数）
+        overlap: 重叠区域的大小
+
+    Returns:
+        List[str]: 文本块列表
+    """
+    if not text:
+        return []
+
+    chunks = []
+    start = 0
+    text_length = len(text)
+
+    while start < text_length:
+        end = start + chunk_size
+        chunk = text[start:end]
+        chunks.append(chunk)
+        start = end - overlap
+
+    return chunks
+
+
+def normalize_string(s: Any) -> str:
+    """
+    标准化字符串
+
+    Args:
+        s: 输入值
+
+    Returns:
+        str: 标准化后的字符串
+    """
+    if s is None:
+        return ""
+    if isinstance(s, (int, float)):
+        return str(s)
+    if isinstance(s, str):
+        return clean_text(s)
+    return str(s)
+
+
+def detect_encoding(file_path: str) -> Optional[str]:
+    """
+    检测文件编码（简化版）
+
+    Args:
+        file_path: 文件路径
+
+    Returns:
+        Optional[str]: 编码格式，无法检测则返回 None
+    """
+    import chardet
+
+    try:
+        with open(file_path, 'rb') as f:
+            raw_data = f.read(10000)  # 读取前 10000 字节
+            result = chardet.detect(raw_data)
+            return result.get('encoding')
+    except Exception:
+        return None
+
+
+def safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any:
+    """
+    安全地获取字典值
+
+    Args:
+        d: 字典
+        key: 键
+        default: 默认值
+
+    Returns:
+        Any: 字典值或默认值
+    """
+    try:
+        return d.get(key, default)
+    except Exception:
+        return default
--- a/backend/app/core/document_parser/xlsx_parser.py
+++ b/backend/app/core/document_parser/xlsx_parser.py
@@ -0,0 +1,288 @@
+"""
+Excel 文件解析器 - 解析 .xlsx 和 .xls 文件
+"""
+from typing import Any, Dict, List, Optional
+from pathlib import Path
+import pandas as pd
+import logging
+
+from .base import BaseParser, ParseResult
+
+logger = logging.getLogger(__name__)
+
+
+class XlsxParser(BaseParser):
+    """Excel 文件解析器"""
+
+    def __init__(self):
+        super().__init__()
+        self.supported_extensions = ['.xlsx', '.xls']
+        self.parser_name = "excel_parser"
+
+    def parse(
+        self,
+        file_path: str,
+        sheet_name: Optional[str | int] = 0,
+        header_row: int = 0,
+        **kwargs
+    ) -> ParseResult:
+        """
+        解析 Excel 文件
+
+        Args:
+            file_path: 文件路径
+            sheet_name: 工作表名称或索引，默认为第一个工作表
+            header_row: 表头所在的行索引，默认为 0
+            **kwargs: 其他参数传递给 pandas.read_excel
+
+        Returns:
+            ParseResult: 解析结果
+        """
+        path = Path(file_path)
+
+        # 检查文件是否存在
+        if not path.exists():
+            return ParseResult(
+                success=False,
+                error=f"File not found: {file_path}"
+            )
+
+        # 检查文件扩展名
+        if path.suffix.lower() not in self.supported_extensions:
+            return ParseResult(
+                success=False,
+                error=f"Unsupported file type: {path.suffix}"
+            )
+
+        # 检查文件大小
+        file_size = path.stat().st_size
+        if file_size == 0:
+            return ParseResult(
+                success=False,
+                error=f"File is empty: {file_path}"
+            )
+
+        try:
+            # 尝试读取 Excel 文件，检查是否有工作表
+            xls_file = pd.ExcelFile(file_path)
+            sheet_names = xls_file.sheet_names
+
+            if not sheet_names:
+                return ParseResult(
+                    success=False,
+                    error=f"Excel 文件没有找到任何工作表: {file_path}"
+                )
+
+            # 验证请求的工作表索引/名称
+            target_sheet = None
+            if sheet_name is not None:
+                if isinstance(sheet_name, int) and sheet_name < len(sheet_names):
+                    target_sheet = sheet_names[sheet_name]
+                elif isinstance(sheet_name, str) and sheet_name in sheet_names:
+                    target_sheet = sheet_name
+                else:
+                    # 如果指定的 sheet_name 无效，使用第一个工作表
+                    target_sheet = sheet_names[0]
+            else:
+                # 默认使用第一个工作表
+                target_sheet = sheet_names[0]
+
+            # 读取 Excel 文件
+            df = pd.read_excel(
+                file_path,
+                sheet_name=target_sheet,
+                header=header_row,
+                **kwargs
+            )
+
+            # 检查 DataFrame 是否为空
+            if df.empty:
+                return ParseResult(
+                    success=False,
+                    error=f"工作表 '{target_sheet}' 为空，请检查 Excel 文件内容"
+                )
+
+            # 转换为可序列化的数据
+            data = self._df_to_dict(df)
+
+            # 构建元数据
+            metadata = {
+                "filename": path.name,
+                "extension": path.suffix.lower(),
+                "sheet_count": len(sheet_names),
+                "sheet_names": sheet_names,
+                "current_sheet": target_sheet,
+                "row_count": len(df),
+                "column_count": len(df.columns) if not df.empty else 0,
+                "columns": df.columns.tolist() if not df.empty else [],
+                "file_size": file_size
+            }
+
+            return ParseResult(
+                success=True,
+                data=data,
+                metadata=metadata
+            )
+
+        except IndexError as e:
+            logger.error(f"工作表索引错误: {str(e)}")
+            # 工作表索引超出范围时，尝试使用第一个工作表
+            try:
+                xls_file = pd.ExcelFile(file_path)
+                sheet_names = xls_file.sheet_names
+                if sheet_names:
+                    df = pd.read_excel(
+                        file_path,
+                        sheet_name=sheet_names[0],
+                        header=header_row,
+                        **kwargs
+                    )
+
+                    data = self._df_to_dict(df)
+                    metadata = {
+                        "filename": path.name,
+                        "extension": path.suffix.lower(),
+                        "sheet_count": len(sheet_names),
+                        "sheet_names": sheet_names,
+                        "current_sheet": sheet_names[0],
+                        "row_count": len(df),
+                        "column_count": len(df.columns) if not df.empty else 0,
+                        "columns": df.columns.tolist() if not df.empty else [],
+                        "file_size": path.stat().st_size
+                    }
+
+                    return ParseResult(
+                        success=True,
+                        data=data,
+                        metadata=metadata
+                    )
+                else:
+                    return ParseResult(
+                        success=False,
+                        error=f"Excel 文件没有有效的工作表"
+                    )
+            except Exception as e2:
+                logger.error(f"重试解析失败: {str(e2)}")
+                return ParseResult(
+                    success=False,
+                    error=f"无法解析 Excel 文件: {str(e)}"
+                )
+
+        except Exception as e:
+            logger.error(f"解析 Excel 文件时出错: {str(e)}")
+            return ParseResult(
+                success=False,
+                error=f"Failed to parse Excel file: {str(e)}"
+            )
+
+    def parse_all_sheets(self, file_path: str, **kwargs) -> ParseResult:
+        """
+        解析 Excel 文件的所有工作表
+
+        Args:
+            file_path: 文件路径
+            **kwargs: 其他参数传递给 pandas.read_excel
+
+        Returns:
+            ParseResult: 解析结果
+        """
+        path = Path(file_path)
+
+        # 检查文件是否存在
+        if not path.exists():
+            return ParseResult(
+                success=False,
+                error=f"File not found: {file_path}"
+            )
+
+        if path.suffix.lower() not in self.supported_extensions:
+            return ParseResult(
+                success=False,
+                error=f"Unsupported file type: {path.suffix}"
+            )
+
+        # 检查文件大小
+        file_size = path.stat().st_size
+        if file_size == 0:
+            return ParseResult(
+                success=False,
+                error=f"File is empty: {file_path}"
+            )
+
+        try:
+            # 读取所有工作表
+            all_data = pd.read_excel(file_path, sheet_name=None, **kwargs)
+
+            # 检查是否成功读取到数据
+            if not all_data or len(all_data) == 0:
+                return ParseResult(
+                    success=False,
+                    error=f"无法读取 Excel 文件或文件为空: {file_path}"
+                )
+
+            # 转换为可序列化的数据
+            sheets_data = {}
+            for sheet_name, df in all_data.items():
+                sheets_data[sheet_name] = self._df_to_dict(df)
+
+            # 获取所有工作表名称
+            all_sheets = list(all_data.keys())
+
+            # 构建元数据
+            total_rows = sum(len(df) for df in all_data.values())
+            metadata = {
+                "filename": path.name,
+                "extension": path.suffix.lower(),
+                "sheet_count": len(all_sheets),
+                "sheet_names": all_sheets,
+                "total_rows": total_rows,
+                "file_size": file_size
+            }
+
+            return ParseResult(
+                success=True,
+                data={"sheets": sheets_data},
+                metadata=metadata
+            )
+
+        except Exception as e:
+            logger.error(f"Failed to parse Excel file: {str(e)}")
+            return ParseResult(
+                success=False,
+                error=f"Failed to parse Excel file: {str(e)}"
+            )
+
+    def _get_sheet_names(self, file_path: str) -> List[str]:
+        """获取 Excel 文件中的所有工作表名称"""
+        try:
+            xls = pd.ExcelFile(file_path)
+            sheet_names = xls.sheet_names
+            if not sheet_names:
+                return []
+            return sheet_names
+        except Exception as e:
+            logger.error(f"获取工作表名称失败: {str(e)}")
+            return []
+
+    def _df_to_dict(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """
+        将 DataFrame 转换为字典，处理 NaN 值
+
+        Args:
+            df: pandas DataFrame
+
+        Returns:
+            Dict[str, Any]: 转换后的字典
+        """
+        # 将 NaN 替换为 None
+        df = df.replace({pd.NA: None, float('nan'): None})
+
+        # 转换为字典列表（每一行一个字典）
+        rows = df.to_dict(orient='records')
+
+        return {
+            "columns": df.columns.tolist(),
+            "rows": rows,
+            "row_count": len(rows),
+            "column_count": len(df.columns) if not df.empty else 0
+        }
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,18 +1,61 @@
+"""
+FastAPI 应用主入口
+"""
 from fastapi import FastAPI
-from config import settings
+from fastapi.middleware.cors import CORSMiddleware
+from app.config import settings
+from app.api import api_router

+# 创建 FastAPI 应用实例
 app = FastAPI(
    title=settings.APP_NAME,
-    openapi_url=f"{settings.API_V1_STR}/openapi.json"
+    description="基于大语言模型的文档理解与多源数据融合系统",
+    version="1.0.0",
+    openapi_url=f"{settings.API_V1_STR}/openapi.json",
+    docs_url=f"{settings.API_V1_STR}/docs",
+    redoc_url=f"{settings.API_V1_STR}/redoc"
 )
+
+# 配置 CORS 中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# 注册 API 路由
+app.include_router(api_router, prefix=settings.API_V1_STR)
+
+
@app.get("/")
 async def root():
+    """根路径"""
    return {
        "message": f"Welcome to {settings.APP_NAME}",
        "status": "online",
-        "debug_mode": settings.DEBUG
+        "version": "1.0.0",
+        "debug_mode": settings.DEBUG,
+        "api_docs": f"{settings.API_V1_STR}/docs"
    }

+
+@app.get("/health")
+async def health_check():
+    """健康检查接口"""
+    return {
+        "status": "healthy",
+        "service": settings.APP_NAME
+    }
+
+
 if __name__ == "__main__":
    import uvicorn
-    uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
+
+    uvicorn.run(
+        "app.main:app",
+        host="127.0.0.1",
+        port=8000,
+        reload=settings.DEBUG
+    )
--- a/backend/app/services/pycache/chart_generator_service.cpython-312.pyc
+++ b/backend/app/services/pycache/chart_generator_service.cpython-312.pyc
--- a/backend/app/services/pycache/chart_generator_service.cpython-313.pyc
+++ b/backend/app/services/pycache/chart_generator_service.cpython-313.pyc
--- a/backend/app/services/pycache/excel_ai_service.cpython-312.pyc
+++ b/backend/app/services/pycache/excel_ai_service.cpython-312.pyc
--- a/backend/app/services/pycache/file_service.cpython-312.pyc
+++ b/backend/app/services/pycache/file_service.cpython-312.pyc
--- a/backend/app/services/pycache/font_helper.cpython-312.pyc
+++ b/backend/app/services/pycache/font_helper.cpython-312.pyc
--- a/backend/app/services/pycache/font_helper.cpython-313.pyc
+++ b/backend/app/services/pycache/font_helper.cpython-313.pyc
--- a/backend/app/services/pycache/llm_service.cpython-312.pyc
+++ b/backend/app/services/pycache/llm_service.cpython-312.pyc
--- a/backend/app/services/pycache/llm_service.cpython-313.pyc
+++ b/backend/app/services/pycache/llm_service.cpython-313.pyc
--- a/backend/app/services/pycache/text_analysis_service.cpython-312.pyc
+++ b/backend/app/services/pycache/text_analysis_service.cpython-312.pyc
--- a/backend/app/services/pycache/text_analysis_service.cpython-313.pyc
+++ b/backend/app/services/pycache/text_analysis_service.cpython-313.pyc
--- a/backend/app/services/pycache/visualization_service.cpython-312.pyc
+++ b/backend/app/services/pycache/visualization_service.cpython-312.pyc
--- a/backend/app/services/pycache/visualization_service.cpython-313.pyc
+++ b/backend/app/services/pycache/visualization_service.cpython-313.pyc
--- a/backend/app/services/chart_generator_service.py
+++ b/backend/app/services/chart_generator_service.py
@@ -0,0 +1,349 @@
+"""
+图表生成服务 - 根据结构化数据生成图表
+"""
+import io
+import base64
+import logging
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+import numpy as np
+
+# 使用字体辅助模块配置中文字体
+from app.services.font_helper import configure_matplotlib_fonts
+
+configure_matplotlib_fonts()
+
+logger = logging.getLogger(__name__)
+
+
+class ChartGeneratorService:
+    """图表生成服务类"""
+
+    def __init__(self):
+        self.output_dir = Path(__file__).resolve().parent.parent.parent / "data" / "charts"
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def generate_charts_from_analysis(
+        self,
+        structured_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        根据提取的结构化数据生成图表
+
+        Args:
+            structured_data: 从 AI 分析结果中提取的结构化数据
+
+        Returns:
+            Dict[str, Any]: 包含图表数据的结果
+        """
+        if not structured_data.get("success"):
+            return {
+                "success": False,
+                "error": structured_data.get("error", "数据提取失败")
+            }
+
+        data = structured_data.get("data", {})
+        charts = {}
+        statistics = {}
+
+        try:
+            # 1. 数值型数据图表
+            numeric_data = data.get("numeric_data", [])
+            if numeric_data:
+                charts["numeric_charts"] = self._create_numeric_charts(numeric_data)
+                statistics["numeric_summary"] = self._create_numeric_summary(numeric_data)
+
+            # 2. 分类数据图表
+            categorical_data = data.get("categorical_data", [])
+            if categorical_data:
+                charts["categorical_charts"] = self._create_categorical_charts(categorical_data)
+
+            # 3. 时间序列图表
+            time_series_data = data.get("time_series_data", [])
+            if time_series_data:
+                charts["time_series_chart"] = self._create_time_series_chart(time_series_data)
+
+            # 4. 对比数据图表
+            comparison_data = data.get("comparison_data", [])
+            if comparison_data:
+                charts["comparison_chart"] = self._create_comparison_chart(comparison_data)
+
+            # 5. 表格数据可视化
+            table_data = data.get("table_data")
+            if table_data:
+                charts["table_preview"] = self._create_table_preview(table_data)
+
+            # 元数据
+            metadata = data.get("metadata", {})
+
+            return {
+                "success": True,
+                "charts": charts,
+                "statistics": statistics,
+                "metadata": metadata,
+                "data_source": "ai_analysis"
+            }
+
+        except Exception as e:
+            logger.error(f"生成图表失败: {str(e)}", exc_info=True)
+            return {
+                "success": False,
+                "error": str(e)
+            }
+
+    def _create_numeric_charts(self, numeric_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """创建数值型数据图表"""
+        charts = []
+
+        # 提取数值和标签
+        names = [item.get("name", f"项{i}") for i, item in enumerate(numeric_data)]
+        values = [item.get("value", 0) for item in numeric_data]
+
+        if not values:
+            return charts
+
+        # 1. 柱状图
+        try:
+            fig, ax = plt.subplots(figsize=(12, 7))
+            colors = plt.cm.Set3(np.linspace(0, 1, len(values)))
+            bars = ax.bar(names, values, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
+
+            # 添加数值标签
+            for bar, value in zip(bars, values):
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width() / 2., height,
+                       f'{value:,.0f}',
+                       ha='center', va='bottom', fontsize=9, fontweight='bold')
+
+            ax.set_xlabel('项目', fontsize=10, labelpad=10, fontweight='bold')
+            ax.set_ylabel('数值', fontsize=10, labelpad=10, fontweight='bold')
+            ax.set_title('数值型数据对比', fontsize=12, fontweight='bold', pad=15)
+            ax.set_xticklabels(names, rotation=30, ha='right', fontsize=9)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+            plt.grid(axis='y', alpha=0.3)
+            plt.tight_layout(pad=1.5)
+
+            img_base64 = self._figure_to_base64(fig)
+            charts.append({
+                "type": "bar",
+                "title": "数值型数据对比",
+                "image": img_base64,
+                "data": [{"name": n, "value": v} for n, v in zip(names, values)]
+            })
+        except Exception as e:
+            logger.error(f"创建柱状图失败: {str(e)}")
+
+        # 2. 饼图
+        if len(values) > 0 and len(values) <= 10:
+            try:
+                fig, ax = plt.subplots(figsize=(10, 10))
+                wedges, texts, autotexts = ax.pie(values, labels=names, autopct='%1.1f%%',
+                                                   startangle=90, colors=plt.cm.Set3.colors[:len(values)])
+
+                for autotext in autotexts:
+                    autotext.set_color('white')
+                    autotext.set_fontsize(9)
+                    autotext.set_fontweight('bold')
+
+                ax.set_title('数值型数据占比', fontsize=12, fontweight='bold', pad=15)
+
+                img_base64 = self._figure_to_base64(fig)
+                charts.append({
+                    "type": "pie",
+                    "title": "数值型数据占比",
+                    "image": img_base64,
+                    "data": [{"name": n, "value": v} for n, v in zip(names, values)]
+                })
+            except Exception as e:
+                logger.error(f"创建饼图失败: {str(e)}")
+
+        return charts
+
+    def _create_categorical_charts(self, categorical_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """创建分类数据图表"""
+        charts = []
+
+        # 提取数据
+        names = [item.get("name", f"类{i}") for i, item in enumerate(categorical_data)]
+        counts = [item.get("count", 1) for item in categorical_data]
+
+        if not names or not counts:
+            return charts
+
+        # 水平条形图
+        try:
+            fig, ax = plt.subplots(figsize=(10, max(6, len(names) * 0.8)))
+            y_pos = np.arange(len(names))
+
+            bars = ax.barh(y_pos, counts, align='center', color='#10b981', alpha=0.8, edgecolor='black', linewidth=0.5)
+
+            # 添加数值标签
+            for bar, count in zip(bars, counts):
+                width = bar.get_width()
+                ax.text(width, bar.get_y() + bar.get_height() / 2.,
+                       f'{count}',
+                       ha='left', va='center', fontsize=10, fontweight='bold')
+
+            ax.set_yticks(y_pos)
+            ax.set_yticklabels(names, fontsize=10)
+            ax.invert_yaxis()
+            ax.set_xlabel('数量', fontsize=10, labelpad=10, fontweight='bold')
+            ax.set_title('分类数据分布', fontsize=12, fontweight='bold', pad=15)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+            ax.grid(axis='x', alpha=0.3)
+            plt.tight_layout(pad=1.5)
+
+            img_base64 = self._figure_to_base64(fig)
+            charts.append({
+                "type": "barh",
+                "title": "分类数据分布",
+                "image": img_base64,
+                "data": [{"name": n, "count": c} for n, c in zip(names, counts)]
+            })
+        except Exception as e:
+            logger.error(f"创建分类图表失败: {str(e)}")
+
+        return charts
+
+    def _create_time_series_chart(self, time_series_data: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """创建时间序列图表"""
+        if not time_series_data:
+            return None
+
+        try:
+            names = [item.get("name", f"时间{i}") for i, item in enumerate(time_series_data)]
+            values = [item.get("value", 0) for item in time_series_data]
+
+            if len(values) < 2:
+                return None
+
+            fig, ax = plt.subplots(figsize=(14, 7))
+
+            # 绘制折线图和柱状图
+            x_pos = np.arange(len(names))
+            bars = ax.bar(x_pos, values, width=0.4, label='数值', color='#3b82f6', alpha=0.7)
+
+            # 添加折线
+            line = ax.plot(x_pos, values, 'o-', color='#ef4444', linewidth=2.5, markersize=8, label='趋势')
+
+            ax.set_xticks(x_pos)
+            ax.set_xticklabels(names, rotation=30, ha='right', fontsize=9)
+            ax.set_ylabel('数值', fontsize=10, labelpad=10, fontweight='bold')
+            ax.set_title('时间序列数据', fontsize=12, fontweight='bold', pad=15)
+            ax.legend(loc='best', fontsize=9)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+            ax.grid(True, alpha=0.3)
+            plt.tight_layout(pad=1.5)
+
+            img_base64 = self._figure_to_base64(fig)
+            return {
+                "type": "time_series",
+                "title": "时间序列数据",
+                "image": img_base64,
+                "data": [{"name": n, "value": v} for n, v in zip(names, values)]
+            }
+        except Exception as e:
+            logger.error(f"创建时间序列图表失败: {str(e)}")
+            return None
+
+    def _create_comparison_chart(self, comparison_data: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """创建对比图表"""
+        if not comparison_data:
+            return None
+
+        try:
+            names = [item.get("name", f"对比{i}") for i, item in enumerate(comparison_data)]
+            values = [item.get("value", 0) for item in comparison_data]
+
+            fig, ax = plt.subplots(figsize=(10, 7))
+
+            # 区分正负值
+            colors = ['#10b981' if v >= 0 else '#ef4444' for v in values]
+            bars = ax.bar(names, values, color=colors, alpha=0.8, edgecolor='black', linewidth=0.8)
+
+            # 添加数值标签
+            for bar, value in zip(bars, values):
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width() / 2., height,
+                       f'{value:,.1f}',
+                       ha='center', va='bottom' if value >= 0 else 'top',
+                       fontsize=10, fontweight='bold')
+
+            # 添加零线
+            ax.axhline(y=0, color='black', linestyle='-', linewidth=1)
+
+            ax.set_ylabel('值', fontsize=10, labelpad=10, fontweight='bold')
+            ax.set_title('对比数据', fontsize=12, fontweight='bold', pad=15)
+            ax.set_xticklabels(names, rotation=30, ha='right', fontsize=9)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+            plt.grid(axis='y', alpha=0.3)
+            plt.tight_layout(pad=1.5)
+
+            img_base64 = self._figure_to_base64(fig)
+            return {
+                "type": "comparison",
+                "title": "对比数据",
+                "image": img_base64,
+                "data": [{"name": n, "value": v} for n, v in zip(names, values)]
+            }
+        except Exception as e:
+            logger.error(f"创建对比图表失败: {str(e)}")
+            return None
+
+    def _create_table_preview(self, table_data: Dict[str, Any]) -> Dict[str, Any]:
+        """创建表格预览数据"""
+        if not table_data:
+            return {}
+
+        columns = table_data.get("columns", [])
+        rows = table_data.get("rows", [])
+
+        return {
+            "columns": columns,
+            "rows": rows[:50],  # 限制显示前50行
+            "total_rows": len(rows),
+            "preview_rows": min(50, len(rows))
+        }
+
+    def _create_numeric_summary(self, numeric_data: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """创建数值型数据摘要"""
+        values = [item.get("value", 0) for item in numeric_data if isinstance(item.get("value"), (int, float))]
+
+        if not values:
+            return {}
+
+        return {
+            "count": len(values),
+            "sum": float(sum(values)),
+            "mean": float(np.mean(values)),
+            "median": float(np.median(values)),
+            "min": float(min(values)),
+            "max": float(max(values)),
+            "std": float(np.std(values)) if len(values) > 1 else 0
+        }
+
+    def _figure_to_base64(self, fig) -> str:
+        """将 matplotlib 图形转换为 base64 字符串"""
+        buf = io.BytesIO()
+        fig.savefig(
+            buf,
+            format='png',
+            dpi=120,
+            bbox_inches='tight',
+            pad_inches=0.3,
+            facecolor='white',
+            edgecolor='none',
+            transparent=False
+        )
+        plt.close(fig)
+        buf.seek(0)
+        img_base64 = base64.b64encode(buf.read()).decode('utf-8')
+        return f"data:image/png;base64,{img_base64}"
+
+
+# 全局单例
+chart_generator_service = ChartGeneratorService()
--- a/backend/app/services/excel_ai_service.py
+++ b/backend/app/services/excel_ai_service.py
@@ -0,0 +1,253 @@
+"""
+Excel AI 分析服务 - 集成 Excel 解析和 LLM 分析
+"""
+import logging
+from typing import Dict, Any, Optional, List
+
+from app.core.document_parser import XlsxParser
+from app.services.file_service import file_service
+from app.services.llm_service import llm_service
+
+logger = logging.getLogger(__name__)
+
+
+class ExcelAIService:
+    """Excel AI 分析服务"""
+
+    def __init__(self):
+        self.parser = XlsxParser()
+        self.file_service = file_service
+        self.llm_service = llm_service
+
+    async def analyze_excel_file(
+        self,
+        file_content: bytes,
+        filename: str,
+        user_prompt: str = "",
+        analysis_type: str = "general",
+        parse_options: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        分析 Excel 文件
+
+        Args:
+            file_content: 文件内容字节
+            filename: 文件名
+            user_prompt: 用户自定义提示词
+            analysis_type: 分析类型
+            parse_options: 解析选项
+
+        Returns:
+            Dict[str, Any]: 分析结果
+        """
+        # 1. 保存文件
+        try:
+            saved_path = self.file_service.save_uploaded_file(
+                file_content,
+                filename,
+                subfolder="excel"
+            )
+            logger.info(f"文件已保存: {saved_path}")
+        except Exception as e:
+            logger.error(f"文件保存失败: {str(e)}")
+            return {
+                "success": False,
+                "error": f"文件保存失败: {str(e)}",
+                "analysis": None
+            }
+
+        # 2. 解析 Excel 文件
+        try:
+            parse_options = parse_options or {}
+            parse_result = self.parser.parse(saved_path, **parse_options)
+
+            if not parse_result.success:
+                return {
+                    "success": False,
+                    "error": parse_result.error,
+                    "analysis": None
+                }
+
+            excel_data = parse_result.data
+            logger.info(f"Excel 解析成功: {parse_result.metadata}")
+
+        except Exception as e:
+            logger.error(f"Excel 解析失败: {str(e)}")
+            return {
+                "success": False,
+                "error": f"Excel 解析失败: {str(e)}",
+                "analysis": None
+            }
+
+        # 3. 调用 LLM 进行分析
+        try:
+            # 如果有自定义提示词，使用模板分析
+            if user_prompt and user_prompt.strip():
+                llm_result = await self.llm_service.analyze_with_template(
+                    excel_data,
+                    user_prompt
+                )
+            else:
+                # 否则使用标准分析
+                llm_result = await self.llm_service.analyze_excel_data(
+                    excel_data,
+                    user_prompt,
+                    analysis_type
+                )
+
+            logger.info(f"AI 分析完成: {llm_result['success']}")
+
+            # 4. 组合结果
+            return {
+                "success": True,
+                "excel": {
+                    "data": excel_data,
+                    "metadata": parse_result.metadata,
+                    "saved_path": saved_path
+                },
+                "analysis": llm_result
+            }
+
+        except Exception as e:
+            logger.error(f"AI 分析失败: {str(e)}")
+            return {
+                "success": False,
+                "error": f"AI 分析失败: {str(e)}",
+                "excel": {
+                    "data": excel_data,
+                    "metadata": parse_result.metadata
+                },
+                "analysis": None
+            }
+
+    async def batch_analyze_sheets(
+        self,
+        file_content: bytes,
+        filename: str,
+        user_prompt: str = "",
+        analysis_type: str = "general"
+    ) -> Dict[str, Any]:
+        """
+        批量分析 Excel 文件的所有工作表
+
+        Args:
+            file_content: 文件内容字节
+            filename: 文件名
+            user_prompt: 用户自定义提示词
+            analysis_type: 分析类型
+
+        Returns:
+            Dict[str, Any]: 分析结果
+        """
+        # 1. 保存文件
+        try:
+            saved_path = self.file_service.save_uploaded_file(
+                file_content,
+                filename,
+                subfolder="excel"
+            )
+            logger.info(f"文件已保存: {saved_path}")
+        except Exception as e:
+            logger.error(f"文件保存失败: {str(e)}")
+            return {
+                "success": False,
+                "error": f"文件保存失败: {str(e)}",
+                "analysis": None
+            }
+
+        # 2. 解析所有工作表
+        try:
+            parse_result = self.parser.parse_all_sheets(saved_path)
+
+            if not parse_result.success:
+                return {
+                    "success": False,
+                    "error": parse_result.error,
+                    "analysis": None
+                }
+
+            sheets_data = parse_result.data.get("sheets", {})
+            logger.info(f"Excel 解析成功，共 {len(sheets_data)} 个工作表")
+
+        except Exception as e:
+            logger.error(f"Excel 解析失败: {str(e)}")
+            return {
+                "success": False,
+                "error": f"Excel 解析失败: {str(e)}",
+                "analysis": None
+            }
+
+        # 3. 批量分析每个工作表
+        sheet_analyses = {}
+        errors = {}
+
+        for sheet_name, sheet_data in sheets_data.items():
+            try:
+                # 调用 LLM 分析
+                if user_prompt and user_prompt.strip():
+                    llm_result = await self.llm_service.analyze_with_template(
+                        sheet_data,
+                        user_prompt
+                    )
+                else:
+                    llm_result = await self.llm_service.analyze_excel_data(
+                        sheet_data,
+                        user_prompt,
+                        analysis_type
+                    )
+
+                sheet_analyses[sheet_name] = llm_result
+
+                if not llm_result["success"]:
+                    errors[sheet_name] = llm_result.get("error", "未知错误")
+
+                logger.info(f"工作表 '{sheet_name}' 分析完成")
+
+            except Exception as e:
+                logger.error(f"工作表 '{sheet_name}' 分析失败: {str(e)}")
+                errors[sheet_name] = str(e)
+
+        # 4. 组合结果
+        return {
+            "success": len(errors) == 0,
+            "excel": {
+                "sheets": sheets_data,
+                "metadata": parse_result.metadata,
+                "saved_path": saved_path
+            },
+            "analysis": {
+                "sheets": sheet_analyses,
+                "total_sheets": len(sheets_data),
+                "successful": len(sheet_analyses) - len(errors),
+                "errors": errors
+            }
+        }
+
+    def get_supported_analysis_types(self) -> List[str]:
+        """获取支持的分析类型"""
+        return [
+            {
+                "value": "general",
+                "label": "综合分析",
+                "description": "提供数据概览、关键发现、质量评估和建议"
+            },
+            {
+                "value": "summary",
+                "label": "数据摘要",
+                "description": "快速了解数据的结构、范围和主要内容"
+            },
+            {
+                "value": "statistics",
+                "label": "统计分析",
+                "description": "数值型列的统计信息和分类列的分布"
+            },
+            {
+                "value": "insights",
+                "label": "深度洞察",
+                "description": "深入挖掘数据，提供异常值和业务建议"
+            }
+        ]
+
+
+# 全局单例
+excel_ai_service = ExcelAIService()
--- a/backend/app/services/file_service.py
+++ b/backend/app/services/file_service.py
@@ -0,0 +1,132 @@
+"""
+文件服务模块 - 处理文件存储和读取
+"""
+import os
+import shutil
+from pathlib import Path
+from datetime import datetime
+from typing import Optional
+import uuid
+
+from app.config import settings
+
+
+class FileService:
+    """文件服务类，负责文件的存储、读取和管理"""
+
+    def __init__(self):
+        self.upload_dir = Path(settings.UPLOAD_DIR)
+        self._ensure_upload_dir()
+
+    def _ensure_upload_dir(self):
+        """确保上传目录存在"""
+        self.upload_dir.mkdir(parents=True, exist_ok=True)
+
+    def save_uploaded_file(
+        self,
+        file_content: bytes,
+        filename: str,
+        subfolder: Optional[str] = None
+    ) -> str:
+        """
+        保存上传的文件
+
+        Args:
+            file_content: 文件内容字节
+            filename: 原始文件名
+            subfolder: 可选的子文件夹名称
+
+        Returns:
+            str: 保存后的文件路径
+        """
+        # 生成唯一文件名，避免覆盖
+        file_ext = Path(filename).suffix
+        unique_name = f"{uuid.uuid4().hex}{file_ext}"
+
+        # 确定保存路径
+        if subfolder:
+            save_dir = self.upload_dir / subfolder
+            save_dir.mkdir(parents=True, exist_ok=True)
+        else:
+            save_dir = self.upload_dir
+
+        file_path = save_dir / unique_name
+
+        # 写入文件
+        with open(file_path, 'wb') as f:
+            f.write(file_content)
+
+        return str(file_path)
+
+    def read_file(self, file_path: str) -> bytes:
+        """
+        读取文件内容
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            bytes: 文件内容
+        """
+        with open(file_path, 'rb') as f:
+            return f.read()
+
+    def delete_file(self, file_path: str) -> bool:
+        """
+        删除文件
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            bool: 是否删除成功
+        """
+        try:
+            file = Path(file_path)
+            if file.exists():
+                file.unlink()
+                return True
+            return False
+        except Exception:
+            return False
+
+    def get_file_info(self, file_path: str) -> dict:
+        """
+        获取文件信息
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            dict: 文件信息
+        """
+        file = Path(file_path)
+        if not file.exists():
+            return {}
+
+        stat = file.stat()
+        return {
+            "filename": file.name,
+            "filepath": str(file),
+            "size": stat.st_size,
+            "created": datetime.fromtimestamp(stat.st_ctime).isoformat(),
+            "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+            "extension": file.suffix.lower()
+        }
+
+    def get_file_size(self, file_path: str) -> int:
+        """
+        获取文件大小（字节）
+
+        Args:
+            file_path: 文件路径
+
+        Returns:
+            int: 文件大小，文件不存在返回 0
+        """
+        file = Path(file_path)
+        return file.stat().st_size if file.exists() else 0
+
+
+# 全局单例
+file_service = FileService()
--- a/backend/app/services/font_helper.py
+++ b/backend/app/services/font_helper.py
@@ -0,0 +1,105 @@
+"""
+字体辅助模块 - 处理中文字体检测和配置
+"""
+import matplotlib
+import matplotlib.font_manager as fm
+import platform
+import os
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+def get_chinese_font() -> str:
+    """
+    获取可用的中文字体
+
+    Returns:
+        str: 可用的中文字体名称
+    """
+    # 获取系统中所有可用字体
+    available_fonts = set([f.name for f in fm.fontManager.ttflist])
+
+    # 定义字体优先级列表
+    # Windows 优先
+    if platform.system() == 'Windows':
+        font_list = [
+            'Microsoft YaHei',  # 微软雅黑
+            'SimHei',           # 黑体
+            'SimSun',           # 宋体
+            'KaiTi',           # 楷体
+            'FangSong',         # 仿宋
+            'STXihei',          # 华文细黑
+            'STKaiti',          # 华文楷体
+            'STSong',           # 华文宋体
+            'STFangsong',       # 华文仿宋
+        ]
+    # macOS 优先
+    elif platform.system() == 'Darwin':
+        font_list = [
+            'PingFang SC',      # 苹方-简
+            'PingFang TC',      # 苹方-繁
+            'Heiti SC',        # 黑体-简
+            'Heiti TC',        # 黑体-繁
+            'STHeiti',          # 华文黑体
+            'STSong',           # 华文宋体
+            'STKaiti',          # 华文楷体
+            'Arial Unicode MS',  # Arial Unicode MS
+        ]
+    # Linux 优先
+    else:
+        font_list = [
+            'Noto Sans CJK SC', # Noto Sans CJK 简体中文
+            'WenQuanYi Micro Hei', # 文泉驿微米黑
+            'AR PL UMing CN',   # AR PL UMing
+            'AR PL UKai CN',    # AR PL UKai
+            'ZCOOL XiaoWei',   # ZCOOL 小薇
+        ]
+
+    # 通用备选字体
+    font_list.extend([
+        'SimHei',
+        'Microsoft YaHei',
+        'Arial Unicode MS',
+        'Droid Sans Fallback',
+    ])
+
+    # 查找第一个可用的字体
+    for font_name in font_list:
+        if font_name in available_fonts:
+            logger.info(f"找到中文字体: {font_name}")
+            return font_name
+
+    # 如果没找到，尝试获取第一个中文字体
+    for font in fm.fontManager.ttflist:
+        if 'CJK' in font.name or 'SC' in font.name or 'TC' in font.name:
+            logger.info(f"使用找到的中文字体: {font.name}")
+            return font.name
+
+    # 最终备选：使用系统默认字体
+    logger.warning("未找到合适的中文字体，使用默认字体")
+    return 'sans-serif'
+
+
+def configure_matplotlib_fonts():
+    """
+    配置 matplotlib 的字体设置
+    """
+    chinese_font = get_chinese_font()
+
+    # 配置字体
+    matplotlib.rcParams['font.sans-serif'] = [chinese_font]
+    matplotlib.rcParams['axes.unicode_minus'] = False
+    matplotlib.rcParams['figure.dpi'] = 100
+    matplotlib.rcParams['savefig.dpi'] = 120
+
+    # 字体大小设置
+    matplotlib.rcParams['font.size'] = 10
+    matplotlib.rcParams['axes.labelsize'] = 10
+    matplotlib.rcParams['axes.titlesize'] = 11
+    matplotlib.rcParams['xtick.labelsize'] = 9
+    matplotlib.rcParams['ytick.labelsize'] = 9
+    matplotlib.rcParams['legend.fontsize'] = 9
+
+    logger.info(f"配置完成，使用字体: {chinese_font}")
+    return chinese_font
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -0,0 +1,268 @@
+"""
+LLM 服务模块 - 封装大模型 API 调用
+"""
+import logging
+from typing import Dict, Any, List, Optional
+import httpx
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+class LLMService:
+    """大语言模型服务类"""
+
+    def __init__(self):
+        self.api_key = settings.LLM_API_KEY
+        self.base_url = settings.LLM_BASE_URL
+        self.model_name = settings.LLM_MODEL_NAME
+
+    async def chat(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float = 0.7,
+        max_tokens: Optional[int] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        调用聊天 API
+
+        Args:
+            messages: 消息列表，格式为 [{"role": "user", "content": "..."}]
+            temperature: 温度参数，控制随机性
+            max_tokens: 最大生成 token 数
+            **kwargs: 其他参数
+
+        Returns:
+            Dict[str, Any]: API 响应结果
+        """
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {
+            "model": self.model_name,
+            "messages": messages,
+            "temperature": temperature
+        }
+
+        if max_tokens:
+            payload["max_tokens"] = max_tokens
+
+        # 添加其他参数
+        payload.update(kwargs)
+
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                return response.json()
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"LLM API 请求失败: {e.response.status_code} - {e.response.text}")
+            raise
+        except Exception as e:
+            logger.error(f"LLM API 调用异常: {str(e)}")
+            raise
+
+    def extract_message_content(self, response: Dict[str, Any]) -> str:
+        """
+        从 API 响应中提取消息内容
+
+        Args:
+            response: API 响应
+
+        Returns:
+            str: 消息内容
+        """
+        try:
+            return response["choices"][0]["message"]["content"]
+        except (KeyError, IndexError) as e:
+            logger.error(f"解析 API 响应失败: {str(e)}")
+            raise
+
+    async def analyze_excel_data(
+        self,
+        excel_data: Dict[str, Any],
+        user_prompt: str,
+        analysis_type: str = "general"
+    ) -> Dict[str, Any]:
+        """
+        分析 Excel 数据
+
+        Args:
+            excel_data: Excel 解析后的数据
+            user_prompt: 用户提示词
+            analysis_type: 分析类型 (general, summary, statistics, insights)
+
+        Returns:
+            Dict[str, Any]: 分析结果
+        """
+        # 构建 Prompt
+        system_prompt = self._get_system_prompt(analysis_type)
+        user_message = self._format_user_message(excel_data, user_prompt)
+
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message}
+        ]
+
+        try:
+            response = await self.chat(
+                messages=messages,
+                temperature=0.3,  # 较低的温度以获得更稳定的输出
+                max_tokens=2000
+            )
+
+            content = self.extract_message_content(response)
+
+            return {
+                "success": True,
+                "analysis": content,
+                "model": self.model_name,
+                "analysis_type": analysis_type
+            }
+
+        except Exception as e:
+            logger.error(f"Excel 数据分析失败: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "analysis": None
+            }
+
+    def _get_system_prompt(self, analysis_type: str) -> str:
+        """获取系统提示词"""
+        prompts = {
+            "general": """你是一个专业的数据分析师。请分析用户提供的 Excel 数据，提供有价值的见解和建议。
+
+请按照以下格式输出：
+1. 数据概览
+2. 关键发现
+3. 数据质量评估
+4. 建议
+
+输出语言：中文""",
+            "summary": """你是一个专业的数据分析师。请对用户提供的 Excel 数据进行简洁的总结。
+
+输出格式：
+- 数据行数和列数
+- 主要列的说明
+- 数据范围概述
+
+输出语言：中文""",
+            "statistics": """你是一个专业的数据分析师。请对用户提供的 Excel 数据进行统计分析。
+
+请分析：
+- 数值型列的统计信息（平均值、中位数、最大值、最小值）
+- 分类列的分布情况
+- 数据相关性
+
+输出语言：中文，使用表格或结构化格式展示""",
+            "insights": """你是一个专业的数据分析师。请深入挖掘用户提供的 Excel 数据，提供有价值的洞察。
+
+请分析：
+1. 数据中的异常值或特殊模式
+2. 数据之间的潜在关联
+3. 基于数据的业务建议
+4. 数据趋势分析（如适用）
+
+输出语言：中文，提供详细且可操作的建议"""
+        }
+
+        return prompts.get(analysis_type, prompts["general"])
+
+    def _format_user_message(self, excel_data: Dict[str, Any], user_prompt: str) -> str:
+        """格式化用户消息"""
+        columns = excel_data.get("columns", [])
+        rows = excel_data.get("rows", [])
+        row_count = excel_data.get("row_count", 0)
+        column_count = excel_data.get("column_count", 0)
+
+        # 构建数据描述
+        data_info = f"""
+Excel 数据概览:
+- 行数: {row_count}
+- 列数: {column_count}
+- 列名: {', '.join(columns)}
+
+数据样例（前 5 行）:
+"""
+
+        # 添加数据样例
+        for i, row in enumerate(rows[:5], 1):
+            row_str = " | ".join([f"{col}: {row.get(col, '')}" for col in columns])
+            data_info += f"第 {i} 行: {row_str}\n"
+
+        if row_count > 5:
+            data_info += f"\n(还有 {row_count - 5} 行数据...)\n"
+
+        # 添加用户自定义提示
+        if user_prompt and user_prompt.strip():
+            data_info += f"\n用户需求:\n{user_prompt}"
+        else:
+            data_info += "\n用户需求: 请对上述数据进行分析"
+
+        return data_info
+
+    async def analyze_with_template(
+        self,
+        excel_data: Dict[str, Any],
+        template_prompt: str
+    ) -> Dict[str, Any]:
+        """
+        使用自定义模板分析 Excel 数据
+
+        Args:
+            excel_data: Excel 解析后的数据
+            template_prompt: 自定义提示词模板
+
+        Returns:
+            Dict[str, Any]: 分析结果
+        """
+        system_prompt = """你是一个专业的数据分析师。请根据用户提供的自定义提示词分析 Excel 数据。
+
+请严格按照用户的要求进行分析，输出清晰、有条理的结果。
+
+输出语言：中文"""
+
+        user_message = self._format_user_message(excel_data, template_prompt)
+
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message}
+        ]
+
+        try:
+            response = await self.chat(
+                messages=messages,
+                temperature=0.5,
+                max_tokens=3000
+            )
+
+            content = self.extract_message_content(response)
+
+            return {
+                "success": True,
+                "analysis": content,
+                "model": self.model_name,
+                "is_template": True
+            }
+
+        except Exception as e:
+            logger.error(f"自定义模板分析失败: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "analysis": None
+            }
+
+
+# 全局单例
+llm_service = LLMService()
--- a/backend/app/services/text_analysis_service.py
+++ b/backend/app/services/text_analysis_service.py
@@ -0,0 +1,218 @@
+"""
+文本分析服务 - 从 AI 分析结果中提取结构化数据用于可视化
+"""
+import logging
+from typing import Dict, Any, List, Optional
+import re
+import json
+
+from app.services.llm_service import llm_service
+
+logger = logging.getLogger(__name__)
+
+
+class TextAnalysisService:
+    """文本分析服务类"""
+
+    def __init__(self):
+        self.llm_service = llm_service
+
+    async def extract_structured_data(
+        self,
+        analysis_text: str,
+        original_filename: str = "",
+        file_type: str = "text"
+    ) -> Dict[str, Any]:
+        """
+        从 AI 分析结果文本中提取结构化数据
+
+        Args:
+            analysis_text: AI 分析结果文本
+            original_filename: 原始文件名
+            file_type: 文件类型
+
+        Returns:
+            Dict[str, Any]: 提取的结构化数据
+        """
+        # 限制分析的文本长度，避免 token 超限
+        max_text_length = 8000
+        truncated_text = analysis_text[:max_text_length]
+
+        system_prompt = """你是一个专业的数据提取助手。你的任务是从AI分析结果中提取结构化数据，用于生成图表。
+
+请按照以下要求提取数据：
+
+1. 数值型数据：
+   - 提取所有的数值、统计信息、百分比等
+   - 为每个数值创建一个条目，包含：名称、值、单位（如果有）
+   - 格式示例：{"name": "销售额", "value": 123456.78, "unit": "元"}
+
+2. 分类数据：
+   - 提取所有的类别、状态、枚举值等
+   - 为每个类别创建一个条目，包含：名称、值、数量（如果有）
+   - 格式示例：{"name": "产品类别", "value": "电子产品", "count": 25}
+
+3. 时间序列数据：
+   - 提取所有的时间相关数据（年月、季度、日期等）
+   - 格式示例：{"name": "2025年1月", "value": 12345}
+
+4. 对比数据：
+   - 提取所有的对比、排名、趋势等数据
+   - 格式示例：{"name": "同比增长", "value": 15.3, "unit": "%"}
+
+5. 表格数据：
+   - 如果分析结果中包含表格或列表形式的数据，提取出来
+   - 格式：{"columns": ["列1", "列2"], "rows": [{"列1": "值1", "列2": "值2"}]}
+
+重要规则：
+- 只提取明确提到的数据和数值
+- 如果某种类型的数据不存在，返回空数组 []
+- 确保所有数值都是有效的数字类型
+- 保持数据的原始精度
+- 返回的 JSON 必须完整且格式正确
+- 表格数据最多提取 20 行
+
+请以 JSON 格式返回，不要添加任何 Markdown 标记或解释文字，只返回纯 JSON：
+{
+  "success": true,
+  "data": {
+    "numeric_data": [
+      {"name": string, "value": number, "unit": string|null}
+    ],
+    "categorical_data": [
+      {"name": string, "value": string, "count": number|null}
+    ],
+    "time_series_data": [
+      {"name": string, "value": number}
+    ],
+    "comparison_data": [
+      {"name": string, "value": number, "unit": string|null}
+    ],
+    "table_data": {
+      "columns": string[],
+      "rows": object[]
+    } | null
+  },
+  "metadata": {
+    "total_items": number,
+    "data_types": string[]
+  }
+}"""
+
+        user_message = f"""请从以下 AI 分析结果中提取结构化数据：
+
+原始文件名：{original_filename}
+文件类型：{file_type}
+
+AI 分析结果：
+{truncated_text}
+
+请按照系统提示的要求提取数据并返回纯 JSON 格式。"""
+
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message}
+        ]
+
+        try:
+            logger.info(f"开始提取结构化数据，文本长度: {len(truncated_text)}")
+
+            response = await self.llm_service.chat(
+                messages=messages,
+                temperature=0.1,
+                max_tokens=4000
+            )
+
+            content = self.llm_service.extract_message_content(response)
+            logger.info(f"LLM 返回内容长度: {len(content)}")
+
+            # 使用简单的方法提取 JSON
+            result = self._extract_json_simple(content)
+
+            if not result:
+                logger.error("无法从 LLM 响应中提取有效的 JSON")
+                return {
+                    "success": False,
+                    "error": "AI 返回的数据格式不正确或被截断",
+                    "raw_content": content[:500]
+                }
+
+            logger.info(f"成功提取结构化数据")
+            return result
+
+        except Exception as e:
+            logger.error(f"提取结构化数据失败: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e)
+            }
+
+    def _extract_json_simple(self, content: str) -> Optional[Dict[str, Any]]:
+        """
+        简化的 JSON 提取方法
+
+        Args:
+            content: LLM 返回的内容
+
+        Returns:
+            Optional[Dict[str, Any]]: 解析后的 JSON，失败返回 None
+        """
+        try:
+            # 方法 1: 查找 ```json 代码块
+            code_block_match = re.search(r'```json\n{[\s\S]*?}[\s\S]*?}\n```', content, re.DOTALL)
+            if code_block_match:
+                json_str = code_block_match.group(1)
+                logger.info("从代码块中提取 JSON")
+                return json.loads(json_str)
+
+            # 方法 2: 查找第一个完整的 { } 对象
+            brace_count = 0
+            json_start = -1
+
+            for i in range(len(content)):
+                if content[i] == '{':
+                    if brace_count == 0:
+                        json_start = i
+                    brace_count += 1
+                elif content[i] == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        # 找到了完整的 JSON 对象
+                        json_end = i + 1
+                        json_str = content[json_start:json_end]
+                        logger.info(f"从大括号中提取 JSON")
+                        return json.loads(json_str)
+
+            # 方法 3: 尝试直接解析
+            logger.info("尝试直接解析整个内容")
+            return json.loads(content)
+
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON 解析失败: {str(e)}")
+            logger.error(f"原始内容（前 500 字符）: {content[:500]}...")
+            return None
+        except Exception as e:
+            logger.error(f"提取 JSON 失败: {str(e)}")
+            return None
+
+    def detect_data_types(self, data: Dict[str, Any]) -> List[str]:
+        """检测数据中包含的类型"""
+        types = []
+        d = data.get("data", {})
+
+        if d.get("numeric_data") and len(d["numeric_data"]) > 0:
+            types.append("numeric")
+        if d.get("categorical_data") and len(d["categorical_data"]) > 0:
+            types.append("categorical")
+        if d.get("time_series_data") and len(d["time_series_data"]) > 0:
+            types.append("time_series")
+        if d.get("comparison_data") and len(d["comparison_data"]) > 0:
+            types.append("comparison")
+        if d.get("table_data") and d["table_data"]:
+            types.append("table")
+
+        return types
+
+
+# 全局单例
+text_analysis_service = TextAnalysisService()
--- a/backend/app/services/text_analysis_service_fixed.py
+++ b/backend/app/services/text_analysis_service_fixed.py
--- a/backend/app/services/visualization_service.py
+++ b/backend/app/services/visualization_service.py
@@ -0,0 +1,388 @@
+"""
+数据可视化服务 - 使用 matplotlib/plotly 生成统计图表
+"""
+import io
+import base64
+import logging
+from typing import Dict, Any, List, Optional, Union
+from pathlib import Path
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+import numpy as np
+
+# 使用字体辅助模块配置中文字体
+from app.services.font_helper import configure_matplotlib_fonts
+
+configure_matplotlib_fonts()
+
+logger = logging.getLogger(__name__)
+
+
+class VisualizationService:
+    """数据可视化服务类"""
+
+    def __init__(self):
+        self.output_dir = Path(__file__).resolve().parent.parent.parent / "data" / "charts"
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def analyze_and_visualize(
+        self,
+        excel_data: Dict[str, Any],
+        analysis_type: str = "statistics"
+    ) -> Dict[str, Any]:
+        """
+        分析数据并生成可视化图表
+
+        Args:
+            excel_data: Excel 解析后的数据
+            analysis_type: 分析类型
+
+        Returns:
+            Dict[str, Any]: 包含图表数据和统计信息的结果
+        """
+        try:
+            columns = excel_data.get("columns", [])
+            rows = excel_data.get("rows", [])
+
+            if not columns or not rows:
+                return {
+                    "success": False,
+                    "error": "没有数据可用于分析"
+                }
+
+            # 转换为 DataFrame
+            df = pd.DataFrame(rows, columns=columns)
+
+            # 根据列类型分类
+            numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
+            categorical_columns = df.select_dtypes(exclude=[np.number]).columns.tolist()
+
+            # 生成统计信息
+            statistics = self._generate_statistics(df, numeric_columns, categorical_columns)
+
+            # 生成图表
+            charts = self._generate_charts(df, numeric_columns, categorical_columns)
+
+            # 生成数据分布信息
+            distributions = self._generate_distributions(df, categorical_columns)
+
+            return {
+                "success": True,
+                "statistics": statistics,
+                "charts": charts,
+                "distributions": distributions,
+                "row_count": len(df),
+                "column_count": len(columns)
+            }
+
+        except Exception as e:
+            logger.error(f"可视化分析失败: {str(e)}", exc_info=True)
+            return {
+                "success": False,
+                "error": str(e)
+            }
+
+    def _generate_statistics(
+        self,
+        df: pd.DataFrame,
+        numeric_columns: List[str],
+        categorical_columns: List[str]
+    ) -> Dict[str, Any]:
+        """生成统计信息"""
+        statistics = {
+            "numeric": {},
+            "categorical": {}
+        }
+
+        # 数值型列统计
+        for col in numeric_columns:
+            try:
+                stats = {
+                    "count": int(df[col].count()),
+                    "mean": float(df[col].mean()),
+                    "median": float(df[col].median()),
+                    "std": float(df[col].std()) if df[col].count() > 1 else 0,
+                    "min": float(df[col].min()),
+                    "max": float(df[col].max()),
+                    "q25": float(df[col].quantile(0.25)),
+                    "q75": float(df[col].quantile(0.75)),
+                    "missing": int(df[col].isna().sum())
+                }
+                statistics["numeric"][col] = stats
+            except Exception as e:
+                logger.warning(f"列 {col} 统计失败: {str(e)}")
+
+        # 分类型列统计
+        for col in categorical_columns:
+            try:
+                value_counts = df[col].value_counts()
+                stats = {
+                    "unique": int(df[col].nunique()),
+                    "most_common": str(value_counts.index[0]) if len(value_counts) > 0 else "",
+                    "most_common_count": int(value_counts.iloc[0]) if len(value_counts) > 0 else 0,
+                    "missing": int(df[col].isna().sum()),
+                    "distribution": {str(k): int(v) for k, v in value_counts.items()}
+                }
+                statistics["categorical"][col] = stats
+            except Exception as e:
+                logger.warning(f"列 {col} 统计失败: {str(e)}")
+
+        return statistics
+
+    def _generate_charts(
+        self,
+        df: pd.DataFrame,
+        numeric_columns: List[str],
+        categorical_columns: List[str]
+    ) -> Dict[str, Any]:
+        """生成图表"""
+        charts = {}
+
+        # 1. 数值型列的直方图
+        charts["histograms"] = []
+        for col in numeric_columns[:5]:  # 限制最多 5 个数值列
+            chart_data = self._create_histogram(df[col], col)
+            if chart_data:
+                charts["histograms"].append(chart_data)
+
+        # 2. 分类型列的条形图
+        charts["bar_charts"] = []
+        for col in categorical_columns[:5]:  # 限制最多 5 个分类型列
+            chart_data = self._create_bar_chart(df[col], col)
+            if chart_data:
+                charts["bar_charts"].append(chart_data)
+
+        # 3. 数值型列的箱线图
+        charts["box_plots"] = []
+        if len(numeric_columns) > 0:
+            chart_data = self._create_box_plot(df[numeric_columns[:5]], numeric_columns[:5])
+            if chart_data:
+                charts["box_plots"].append(chart_data)
+
+        # 4. 相关性热力图
+        if len(numeric_columns) >= 2:
+            chart_data = self._create_correlation_heatmap(df[numeric_columns], numeric_columns)
+            if chart_data:
+                charts["correlation"] = chart_data
+
+        return charts
+
+    def _create_histogram(self, series: pd.Series, column_name: str) -> Optional[Dict[str, Any]]:
+        """创建直方图"""
+        try:
+            fig, ax = plt.subplots(figsize=(11, 7))
+            ax.hist(series.dropna(), bins=20, edgecolor='black', alpha=0.7, color='#3b82f6')
+            ax.set_xlabel(column_name, fontsize=10, labelpad=10)
+            ax.set_ylabel('频数', fontsize=10, labelpad=10)
+            ax.set_title(f'{column_name} 分布', fontsize=12, fontweight='bold', pad=15)
+            ax.grid(True, alpha=0.3, axis='y')
+            ax.tick_params(axis='both', which='major', labelsize=9)
+
+            # 改进布局
+            plt.tight_layout(pad=1.5, w_pad=1.0, h_pad=1.0)
+
+            # 转换为 base64
+            img_base64 = self._figure_to_base64(fig)
+
+            return {
+                "type": "histogram",
+                "column": column_name,
+                "image": img_base64,
+                "stats": {
+                    "mean": float(series.mean()),
+                    "median": float(series.median()),
+                    "std": float(series.std()) if len(series) > 1 else 0
+                }
+            }
+        except Exception as e:
+            logger.error(f"创建直方图失败 ({column_name}): {str(e)}")
+            return None
+
+    def _create_bar_chart(self, series: pd.Series, column_name: str) -> Optional[Dict[str, Any]]:
+        """创建条形图"""
+        try:
+            value_counts = series.value_counts().head(10)  # 只显示前 10 个
+            fig, ax = plt.subplots(figsize=(12, 7))
+
+            # 处理标签显示
+            labels = [str(x)[:15] + '...' if len(str(x)) > 15 else str(x) for x in value_counts.index]
+            x_pos = range(len(value_counts))
+            bars = ax.bar(x_pos, value_counts.values, color='#10b981', alpha=0.8, edgecolor='black', linewidth=0.5)
+
+            ax.set_xticks(x_pos)
+            ax.set_xticklabels(labels, rotation=30, ha='right', fontsize=8)
+            ax.set_xlabel(column_name, fontsize=10, labelpad=10)
+            ax.set_ylabel('数量', fontsize=10, labelpad=10)
+            ax.set_title(f'{column_name} 分布 (Top 10)', fontsize=12, fontweight='bold', pad=15)
+            ax.grid(True, alpha=0.3, axis='y')
+            ax.tick_params(axis='both', which='major', labelsize=9)
+
+            # 添加数值标签（位置稍微上移）
+            max_val = value_counts.values.max()
+            y_offset = max_val * 0.02 if max_val > 0 else 0.5
+            for bar, value in zip(bars, value_counts.values):
+                ax.text(bar.get_x() + bar.get_width() / 2., value + y_offset,
+                        f'{int(value)}',
+                        ha='center', va='bottom', fontsize=8, fontweight='bold')
+
+            # 改进布局
+            plt.tight_layout(pad=1.5, w_pad=1.0, h_pad=1.0)
+
+            # 转换为 base64
+            img_base64 = self._figure_to_base64(fig)
+
+            return {
+                "type": "bar_chart",
+                "column": column_name,
+                "image": img_base64,
+                "categories": {str(k): int(v) for k, v in value_counts.items()}
+            }
+        except Exception as e:
+            logger.error(f"创建条形图失败 ({column_name}): {str(e)}")
+            return None
+
+    def _create_box_plot(self, df: pd.DataFrame, columns: List[str]) -> Optional[Dict[str, Any]]:
+        """创建箱线图"""
+        try:
+            fig, ax = plt.subplots(figsize=(14, 7))
+
+            # 准备数据
+            box_data = [df[col].dropna() for col in columns]
+            bp = ax.boxplot(box_data, labels=columns, patch_artist=True,
+                          notch=True, showcaps=True, showfliers=True)
+
+            # 美化箱线图
+            box_colors = ['#3b82f6', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6']
+            for patch, color in zip(bp['boxes'], box_colors[:len(bp['boxes'])]):
+                patch.set_facecolor(color)
+                patch.set_alpha(0.6)
+                patch.set_linewidth(1.5)
+
+            # 设置其他元素样式
+            for element in ['whiskers', 'fliers', 'means', 'medians', 'caps']:
+                plt.setp(bp[element], linewidth=1.5)
+
+            ax.set_ylabel('值', fontsize=10, labelpad=10)
+            ax.set_title('数值型列分布对比', fontsize=12, fontweight='bold', pad=15)
+            ax.grid(True, alpha=0.3, axis='y')
+
+            # 旋转 x 轴标签以避免重叠
+            plt.setp(ax.get_xticklabels(), rotation=30, ha='right', fontsize=9)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+
+            # 改进布局
+            plt.tight_layout(pad=1.5, w_pad=1.5, h_pad=1.0)
+
+            # 转换为 base64
+            img_base64 = self._figure_to_base64(fig)
+
+            return {
+                "type": "box_plot",
+                "columns": columns,
+                "image": img_base64
+            }
+        except Exception as e:
+            logger.error(f"创建箱线图失败: {str(e)}")
+            return None
+
+    def _create_correlation_heatmap(self, df: pd.DataFrame, columns: List[str]) -> Optional[Dict[str, Any]]:
+        """创建相关性热力图"""
+        try:
+            # 计算相关系数
+            corr = df.corr()
+
+            fig, ax = plt.subplots(figsize=(11, 9))
+            im = ax.imshow(corr, cmap='RdBu_r', aspect='auto', vmin=-1, vmax=1)
+
+            # 设置刻度
+            n_cols = len(corr)
+            ax.set_xticks(np.arange(n_cols))
+            ax.set_yticks(np.arange(n_cols))
+
+            # 处理过长的列名
+            x_labels = [str(col)[:10] + '...' if len(str(col)) > 10 else str(col) for col in corr.columns]
+            y_labels = [str(col)[:10] + '...' if len(str(col)) > 10 else str(col) for col in corr.columns]
+
+            ax.set_xticklabels(x_labels, rotation=30, ha='right', fontsize=9)
+            ax.set_yticklabels(y_labels, fontsize=9)
+
+            # 添加数值标签，根据相关性值选择颜色
+            for i in range(n_cols):
+                for j in range(n_cols):
+                    value = corr.iloc[i, j]
+                    # 根据背景色深浅选择文字颜色
+                    text_color = 'white' if abs(value) > 0.5 else 'black'
+                    ax.text(j, i, f'{value:.2f}',
+                            ha="center", va="center", color=text_color,
+                            fontsize=8, fontweight='bold' if abs(value) > 0.7 else 'normal')
+
+            ax.set_title('数值型列相关性热力图', fontsize=12, fontweight='bold', pad=15)
+            ax.tick_params(axis='both', which='major', labelsize=9)
+
+            # 添加颜色条
+            cbar = plt.colorbar(im, ax=ax)
+            cbar.set_label('相关系数', rotation=270, labelpad=20, fontsize=10)
+            cbar.ax.tick_params(labelsize=9)
+
+            # 改进布局
+            plt.tight_layout(pad=2.0, w_pad=1.0, h_pad=1.0)
+
+            # 转换为 base64
+            img_base64 = self._figure_to_base64(fig)
+
+            return {
+                "type": "correlation_heatmap",
+                "columns": columns,
+                "image": img_base64,
+                "correlation_matrix": corr.to_dict()
+            }
+        except Exception as e:
+            logger.error(f"创建相关性热力图失败: {str(e)}")
+            return None
+
+    def _generate_distributions(
+        self,
+        df: pd.DataFrame,
+        categorical_columns: List[str]
+    ) -> Dict[str, Any]:
+        """生成数据分布信息"""
+        distributions = {}
+
+        for col in categorical_columns[:5]:
+            try:
+                value_counts = df[col].value_counts()
+                total = len(df)
+
+                distributions[col] = {
+                    "categories": {str(k): int(v) for k, v in value_counts.items()},
+                    "percentages": {str(k): round(v / total * 100, 2) for k, v in value_counts.items()},
+                    "unique_count": len(value_counts)
+                }
+            except Exception as e:
+                logger.warning(f"列 {col} 分布生成失败: {str(e)}")
+
+        return distributions
+
+    def _figure_to_base64(self, fig) -> str:
+        """将 matplotlib 图形转换为 base64 字符串"""
+        buf = io.BytesIO()
+        fig.savefig(
+            buf,
+            format='png',
+            dpi=120,
+            bbox_inches='tight',
+            pad_inches=0.3,
+            facecolor='white',
+            edgecolor='none',
+            transparent=False
+        )
+        plt.close(fig)
+        buf.seek(0)
+        img_base64 = base64.b64encode(buf.read()).decode('utf-8')
+        return f"data:image/png;base64,{img_base64}"
+
+
+# 全局单例
+visualization_service = VisualizationService()
--- a/backend/readme.md
+++ b/backend/readme.md
@@ -103,6 +103,21 @@ git config user.email #同上
 #如果想看全局的，可以加上 --global，例如 git config --global user.name
 ```

+## 启动后端项目
+在终端输入以下命令：
+```bash
+cd backend  #确保启动时在后端跟目录下
+./venv/Scripts/python.exe -m uvicorn app.main:app --host 127.0.0.1 --port 8000
+ --reload #启动后端项目
+```
+先启动后端项目，再启动前端项目
+
+记得在你的.gitignore中添加：
+```
+/backend/data/uploads
+/backend/data/charts
+```
+
 ## 预计项目结构：

 ```bash
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -1,6 +1,7 @@
-fastapi[all]==0.104.1
+fastapi[all]==0.104.1
 uvicorn[standard]==0.24.0
 pydantic==2.5.0
+pydantic-settings==2.1.0
 python-multipart==0.0.6
 pymongo==4.5.0
 redis==5.0.0
@@ -10,6 +11,8 @@ faiss-cpu==1.8.0
 python-docx==0.8.11
 pandas==2.1.4
 openpyxl==3.1.2
+matplotlib==3.8.2
+numpy==1.26.2
 markdown==3.5.1
 langchain==0.1.0
 langchain-community==0.0.10
@@ -18,5 +21,4 @@ httpx==0.25.2
 python-dotenv==1.0.0
 loguru==0.7.2
 tqdm==4.66.1
-numpy==1.26.2
-PyYAML==6.0.1
+PyYAML==6.0.1
--- a/backend/test.py
+++ b/backend/test.py
@@ -1 +0,0 @@
-print("Hello,World")
--- a/backend/test1.py
+++ b/backend/test1.py
@@ -1 +0,0 @@
-print("Hello world!")
--- a/backend/test2.py
+++ b/backend/test2.py
@@ -1 +0,0 @@
-print("hello,world!")
--- a/backend/test_font_config.py
+++ b/backend/test_font_config.py
@@ -0,0 +1,71 @@
+"""
+测试字体配置是否正常工作
+"""
+import matplotlib.pyplot as plt
+import matplotlib
+import numpy as np
+from app.services.font_helper import configure_matplotlib_fonts
+import io
+import base64
+
+# 配置字体
+font_name = configure_matplotlib_fonts()
+
+print(f"当前使用字体: {font_name}")
+print(f"matplotlib 中文字体设置: {matplotlib.rcParams['font.sans-serif']}")
+
+# 创建测试图表
+fig, ax = plt.subplots(figsize=(10, 6))
+
+# 测试数据
+x = ['销售', '库存', '采购', '退货', '其他']
+y = [150, 200, 180, 50, 30]
+
+bars = ax.bar(x, y, color='#3b82f6', alpha=0.8)
+ax.set_xlabel('类别', fontsize=12, labelpad=10)
+ax.set_ylabel('数值', fontsize=12, labelpad=10)
+ax.set_title('测试图表 - 中文显示', fontsize=14, fontweight='bold', pad=15)
+ax.tick_params(axis='both', which='major', labelsize=10)
+
+# 添加数值标签
+for bar, value in zip(bars, y):
+    height = bar.get_height()
+    ax.text(bar.get_x() + bar.get_width() / 2., height,
+           f'{value}',
+           ha='center', va='bottom', fontsize=10, fontweight='bold')
+
+plt.grid(axis='y', alpha=0.3)
+plt.tight_layout(pad=1.5)
+
+# 转换为 base64
+buf = io.BytesIO()
+fig.savefig(buf, format='png', dpi=120, bbox_inches='tight', pad_inches=0.3, facecolor='white')
+plt.close(fig)
+
+buf.seek(0)
+img_base64 = base64.b64encode(buf.read()).decode('utf-8')
+data_url = f"data:image/png;base64,{img_base64}"
+
+print("\n=== 测试完成 ===")
+print(f"图表大小: {len(img_base64)} 字符")
+print("如果看到字体警告，请检查系统是否有安装中文字体")
+
+# 尝试获取所有可用字体
+import matplotlib.font_manager as fm
+available_fonts = set([f.name for f in fm.fontManager.ttflist])
+
+print(f"\n=== 可用字体列表（部分）===")
+chinese_fonts = [f for f in available_fonts if 'CJK' in f or 'Chinese' in f or 'YaHei' in f or 'SimHei' in f or 'PingFang' in f]
+for font in sorted(chinese_fonts)[:10]:
+    print(f"  - {font}")
+
+if not chinese_fonts:
+    print("  未找到中文字体！")
+
+print("\n=== 推荐安装的中文字体 ===")
+print("Windows: Microsoft YaHei (系统自带)")
+print("macOS: PingFang SC (系统自带)")
+print("Linux: fonts-noto-cjk 或 fonts-wqy-zenhei")
+
+print("\n=== 生成的 base64 数据（前100字符）===")
+print(data_url[:100] + "...")