添加系统架构图
This commit is contained in:
@@ -223,6 +223,177 @@ class ExcelAIService:
|
||||
}
|
||||
}
|
||||
|
||||
async def analyze_excel_file_from_path(
|
||||
self,
|
||||
file_path: str,
|
||||
filename: str,
|
||||
user_prompt: str = "",
|
||||
analysis_type: str = "general",
|
||||
parse_options: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
从文件路径分析 Excel 文件(用于从数据库加载的文档)
|
||||
|
||||
Args:
|
||||
file_path: Excel 文件路径
|
||||
filename: 文件名
|
||||
user_prompt: 用户自定义提示词
|
||||
analysis_type: 分析类型
|
||||
parse_options: 解析选项
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 分析结果
|
||||
"""
|
||||
# 1. 解析 Excel 文件
|
||||
excel_data = None
|
||||
parse_result_metadata = None
|
||||
try:
|
||||
parse_options = parse_options or {}
|
||||
parse_result = self.parser.parse(file_path, **parse_options)
|
||||
|
||||
if not parse_result.success:
|
||||
return {
|
||||
"success": False,
|
||||
"error": parse_result.error,
|
||||
"analysis": None
|
||||
}
|
||||
|
||||
excel_data = parse_result.data
|
||||
parse_result_metadata = parse_result.metadata
|
||||
logger.info(f"Excel 解析成功: {parse_result_metadata}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Excel 解析失败: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Excel 解析失败: {str(e)}",
|
||||
"analysis": None
|
||||
}
|
||||
|
||||
# 2. 调用 LLM 进行分析
|
||||
try:
|
||||
if user_prompt and user_prompt.strip():
|
||||
llm_result = await self.llm_service.analyze_with_template(
|
||||
excel_data,
|
||||
user_prompt
|
||||
)
|
||||
else:
|
||||
llm_result = await self.llm_service.analyze_excel_data(
|
||||
excel_data,
|
||||
user_prompt,
|
||||
analysis_type
|
||||
)
|
||||
|
||||
logger.info(f"AI 分析完成: {llm_result['success']}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"excel": {
|
||||
"data": excel_data,
|
||||
"metadata": parse_result_metadata,
|
||||
"saved_path": file_path
|
||||
},
|
||||
"analysis": llm_result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI 分析失败: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"AI 分析失败: {str(e)}",
|
||||
"excel": {
|
||||
"data": excel_data,
|
||||
"metadata": parse_result_metadata
|
||||
},
|
||||
"analysis": None
|
||||
}
|
||||
|
||||
async def batch_analyze_sheets_from_path(
|
||||
self,
|
||||
file_path: str,
|
||||
filename: str,
|
||||
user_prompt: str = "",
|
||||
analysis_type: str = "general"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
从文件路径批量分析 Excel 文件的所有工作表(用于从数据库加载的文档)
|
||||
|
||||
Args:
|
||||
file_path: Excel 文件路径
|
||||
filename: 文件名
|
||||
user_prompt: 用户自定义提示词
|
||||
analysis_type: 分析类型
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 分析结果
|
||||
"""
|
||||
# 1. 解析所有工作表
|
||||
try:
|
||||
parse_result = self.parser.parse_all_sheets(file_path)
|
||||
|
||||
if not parse_result.success:
|
||||
return {
|
||||
"success": False,
|
||||
"error": parse_result.error,
|
||||
"analysis": None
|
||||
}
|
||||
|
||||
sheets_data = parse_result.data.get("sheets", {})
|
||||
logger.info(f"Excel 解析成功,共 {len(sheets_data)} 个工作表")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Excel 解析失败: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Excel 解析失败: {str(e)}",
|
||||
"analysis": None
|
||||
}
|
||||
|
||||
# 2. 批量分析每个工作表
|
||||
sheet_analyses = {}
|
||||
errors = {}
|
||||
|
||||
for sheet_name, sheet_data in sheets_data.items():
|
||||
try:
|
||||
if user_prompt and user_prompt.strip():
|
||||
llm_result = await self.llm_service.analyze_with_template(
|
||||
sheet_data,
|
||||
user_prompt
|
||||
)
|
||||
else:
|
||||
llm_result = await self.llm_service.analyze_excel_data(
|
||||
sheet_data,
|
||||
user_prompt,
|
||||
analysis_type
|
||||
)
|
||||
|
||||
sheet_analyses[sheet_name] = llm_result
|
||||
|
||||
if not llm_result["success"]:
|
||||
errors[sheet_name] = llm_result.get("error", "未知错误")
|
||||
|
||||
logger.info(f"工作表 '{sheet_name}' 分析完成")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"工作表 '{sheet_name}' 分析失败: {str(e)}")
|
||||
errors[sheet_name] = str(e)
|
||||
|
||||
# 3. 组合结果
|
||||
return {
|
||||
"success": len(errors) == 0,
|
||||
"excel": {
|
||||
"sheets": sheets_data,
|
||||
"metadata": parse_result.metadata,
|
||||
"saved_path": file_path
|
||||
},
|
||||
"analysis": {
|
||||
"sheets": sheet_analyses,
|
||||
"total_sheets": len(sheets_data),
|
||||
"successful": len(sheet_analyses) - len(errors),
|
||||
"errors": errors
|
||||
}
|
||||
}
|
||||
|
||||
def get_supported_analysis_types(self) -> List[str]:
|
||||
"""获取支持的分析类型"""
|
||||
return [
|
||||
|
||||
@@ -58,7 +58,7 @@ class LLMService:
|
||||
_start_time = time.time()
|
||||
logger.info(f"🤖 [LLM] 正在调用 DeepSeek API... 模型: {self.model_name}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
@@ -84,7 +84,7 @@ class LLMService:
|
||||
pass
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LLM API 调用异常: {str(e)}")
|
||||
logger.error(f"LLM API 调用异常: {repr(e)} - {str(e)}")
|
||||
raise
|
||||
|
||||
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
||||
|
||||
@@ -19,6 +19,7 @@ class TxtAIService:
|
||||
|
||||
def __init__(self):
|
||||
self.parser = TxtParser()
|
||||
self.llm = llm_service
|
||||
|
||||
async def analyze_txt_with_ai(
|
||||
self,
|
||||
@@ -114,7 +115,7 @@ class TxtAIService:
|
||||
response = await self.llm.chat(
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=50000
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
content_text = self.llm.extract_message_content(response)
|
||||
@@ -220,7 +221,7 @@ class TxtAIService:
|
||||
response = await self.llm.chat(
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=50000
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
content_text = self.llm.extract_message_content(response)
|
||||
|
||||
@@ -53,7 +53,11 @@ class VisualizationService:
|
||||
}
|
||||
|
||||
# 转换为 DataFrame
|
||||
df = pd.DataFrame(rows, columns=columns)
|
||||
# 过滤掉行数与列数不匹配的数据
|
||||
valid_rows = [row for row in rows if len(row) == len(columns)]
|
||||
if len(valid_rows) < len(rows):
|
||||
logger.warning(f"过滤了 {len(rows) - len(valid_rows)} 行无效数据(列数不匹配)")
|
||||
df = pd.DataFrame(valid_rows, columns=columns)
|
||||
|
||||
# 根据列类型分类
|
||||
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
|
||||
@@ -141,18 +145,18 @@ class VisualizationService:
|
||||
charts = {}
|
||||
|
||||
# 1. 数值型列的直方图
|
||||
charts["histograms"] = []
|
||||
charts["numeric_charts"] = []
|
||||
for col in numeric_columns[:5]: # 限制最多 5 个数值列
|
||||
chart_data = self._create_histogram(df[col], col)
|
||||
if chart_data:
|
||||
charts["histograms"].append(chart_data)
|
||||
charts["numeric_charts"].append(chart_data)
|
||||
|
||||
# 2. 分类型列的条形图
|
||||
charts["bar_charts"] = []
|
||||
charts["categorical_charts"] = []
|
||||
for col in categorical_columns[:5]: # 限制最多 5 个分类型列
|
||||
chart_data = self._create_bar_chart(df[col], col)
|
||||
if chart_data:
|
||||
charts["bar_charts"].append(chart_data)
|
||||
charts["categorical_charts"].append(chart_data)
|
||||
|
||||
# 3. 数值型列的箱线图
|
||||
charts["box_plots"] = []
|
||||
|
||||
@@ -184,7 +184,7 @@ class WordAIService:
|
||||
response = await self.llm.chat(
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=50000
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
content = self.llm.extract_message_content(response)
|
||||
@@ -276,7 +276,7 @@ class WordAIService:
|
||||
response = await self.llm.chat(
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=50000
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
content = self.llm.extract_message_content(response)
|
||||
@@ -849,10 +849,12 @@ class WordAIService:
|
||||
|
||||
# 提取可用于图表的数据
|
||||
chart_data = None
|
||||
logger.info(f"准备提取图表数据,structured_data type: {structured_data.get('type')}, keys: {list(structured_data.keys())}")
|
||||
|
||||
if structured_data.get("type") == "table_data":
|
||||
headers = structured_data.get("headers", [])
|
||||
rows = structured_data.get("rows", [])
|
||||
logger.info(f"table_data类型: headers数量={len(headers)}, rows数量={len(rows)}")
|
||||
if headers and rows:
|
||||
chart_data = {
|
||||
"columns": headers,
|
||||
@@ -860,15 +862,19 @@ class WordAIService:
|
||||
}
|
||||
elif structured_data.get("type") == "structured_text":
|
||||
tables_data = structured_data.get("tables", [])
|
||||
logger.info(f"structured_text类型: tables数量={len(tables_data)}")
|
||||
if tables_data and len(tables_data) > 0:
|
||||
first_table = tables_data[0]
|
||||
headers = first_table.get("headers", [])
|
||||
rows = first_table.get("rows", [])
|
||||
logger.info(f"第一个表格: headers={headers[:5]}, rows数量={len(rows)}")
|
||||
if headers and rows:
|
||||
chart_data = {
|
||||
"columns": headers,
|
||||
"rows": rows
|
||||
}
|
||||
else:
|
||||
logger.warning(f"无法识别的structured_data类型: {structured_data.get('type')}")
|
||||
|
||||
# 生成可视化图表
|
||||
if chart_data:
|
||||
@@ -904,3 +910,6 @@ class WordAIService:
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
word_ai_service = WordAIService()
|
||||
|
||||
Reference in New Issue
Block a user