Compare commits
3 Commits
5fca4eb094
...
4a53be7eeb
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a53be7eeb | |||
| 8b5b24fa2a | |||
| ed66aa346d |
@@ -10,6 +10,7 @@ import os
|
||||
|
||||
from app.services.excel_ai_service import excel_ai_service
|
||||
from app.services.markdown_ai_service import markdown_ai_service
|
||||
from app.services.template_fill_service import template_fill_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -340,3 +341,74 @@ async def get_markdown_outline(
|
||||
except Exception as e:
|
||||
logger.error(f"获取 Markdown 大纲失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"获取大纲失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/analyze/txt")
|
||||
async def analyze_txt(
|
||||
file: UploadFile = File(...),
|
||||
):
|
||||
"""
|
||||
上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
||||
|
||||
将非结构化文本转换为结构化表格数据,便于后续填表使用
|
||||
|
||||
Args:
|
||||
file: 上传的 TXT 文件
|
||||
|
||||
Returns:
|
||||
dict: 分析结果,包含结构化表格数据
|
||||
"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名为空")
|
||||
|
||||
file_ext = file.filename.split('.')[-1].lower()
|
||||
if file_ext not in ['txt', 'text']:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型: {file_ext},仅支持 .txt"
|
||||
)
|
||||
|
||||
try:
|
||||
# 读取文件内容
|
||||
content = await file.read()
|
||||
|
||||
# 保存到临时文件
|
||||
with tempfile.NamedTemporaryFile(mode='wb', suffix='.txt', delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
logger.info(f"开始 AI 分析 TXT 文件: {file.filename}")
|
||||
|
||||
# 使用 template_fill_service 的 AI 分析方法
|
||||
result = await template_fill_service.analyze_txt_with_ai(
|
||||
content=content.decode('utf-8', errors='replace'),
|
||||
filename=file.filename
|
||||
)
|
||||
|
||||
if result:
|
||||
logger.info(f"TXT AI 分析成功: {file.filename}")
|
||||
return {
|
||||
"success": True,
|
||||
"filename": file.filename,
|
||||
"structured_data": result
|
||||
}
|
||||
else:
|
||||
logger.warning(f"TXT AI 分析返回空结果: {file.filename}")
|
||||
return {
|
||||
"success": False,
|
||||
"filename": file.filename,
|
||||
"error": "AI 分析未能提取到结构化数据",
|
||||
"structured_data": None
|
||||
}
|
||||
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(tmp_path):
|
||||
os.unlink(tmp_path)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"TXT AI 分析过程中出错: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"分析失败: {str(e)}")
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
"""
|
||||
FastAPI 应用主入口
|
||||
"""
|
||||
# ========== 压制 MongoDB 疯狂刷屏日志 ==========
|
||||
import logging
|
||||
logging.getLogger("pymongo").setLevel(logging.WARNING)
|
||||
logging.getLogger("pymongo.topology").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
# ==============================================
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import sys
|
||||
|
||||
@@ -42,33 +42,86 @@ class LLMService:
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# DeepSeek API temperature 范围: (0, 2]
|
||||
if temperature < 0.01:
|
||||
temperature = 0.01
|
||||
elif temperature > 2.0:
|
||||
temperature = 2.0
|
||||
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"messages": messages,
|
||||
"temperature": temperature
|
||||
}
|
||||
|
||||
# DeepSeek API 限制 max_tokens 范围
|
||||
if max_tokens:
|
||||
if max_tokens > 8192:
|
||||
max_tokens = 8192
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
# 移除不兼容的参数
|
||||
for key in ["stream", "stop", "presence_penalty", "frequency_penalty", "logit_bias"]:
|
||||
kwargs.pop(key, None)
|
||||
|
||||
# 添加其他参数
|
||||
payload.update(kwargs)
|
||||
|
||||
# 验证消息格式
|
||||
validated_messages = []
|
||||
for i, msg in enumerate(messages):
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
|
||||
# 确保 content 是字符串
|
||||
if not isinstance(content, str):
|
||||
logger.warning(f"消息[{i}] content 不是字符串类型: {type(content)},转换为字符串")
|
||||
content = str(content)
|
||||
|
||||
# 确保 role 有效
|
||||
if role not in ["system", "user", "assistant"]:
|
||||
logger.warning(f"消息[{i}] role 无效: {role},跳过")
|
||||
continue
|
||||
|
||||
validated_messages.append({"role": role, "content": content})
|
||||
|
||||
payload["messages"] = validated_messages
|
||||
logger.info(f"验证后消息数量: {len(validated_messages)}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
logger.info(f"LLM API 请求: model={self.model_name}, base_url={self.base_url}, temperature={temperature}, max_tokens={max_tokens}")
|
||||
logger.info(f"消息数量: {len(messages)}")
|
||||
total_content_len = sum(len(msg.get('content', '')) for msg in messages)
|
||||
logger.info(f"总内容长度: {total_content_len}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
|
||||
logger.info(f"LLM API 响应状态: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
logger.error(f"LLM API 错误响应: {error_text}")
|
||||
# 尝试解析错误详情
|
||||
try:
|
||||
error_json = response.json()
|
||||
error_msg = error_json.get("error", {}).get("message", error_text)
|
||||
logger.error(f"错误详情: {error_msg}")
|
||||
except:
|
||||
pass
|
||||
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"LLM API 请求失败: {e.response.status_code} - {e.response.text}")
|
||||
logger.error(f"LLM API HTTP 错误: {e.response.status_code} - {e.response.text}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LLM API 调用异常: {str(e)}")
|
||||
logger.error(f"LLM API 调用异常: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
||||
def extract_message_content(self, response: Dict[str, Any]) -> str:
|
||||
@@ -111,6 +164,10 @@ class LLMService:
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# DeepSeek API 限制
|
||||
if max_tokens and max_tokens > 8192:
|
||||
max_tokens = 8192
|
||||
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"messages": messages,
|
||||
@@ -121,9 +178,14 @@ class LLMService:
|
||||
if max_tokens:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
# 移除不兼容的参数
|
||||
for key in ["stop", "presence_penalty", "frequency_penalty", "logit_bias"]:
|
||||
kwargs.pop(key, None)
|
||||
payload.update(kwargs)
|
||||
|
||||
try:
|
||||
logger.info(f"LLM 流式 API 请求: model={self.model_name}, max_tokens={max_tokens}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
@@ -131,9 +193,14 @@ class LLMService:
|
||||
headers=headers,
|
||||
json=payload
|
||||
) as response:
|
||||
if response.status_code != 200:
|
||||
error_text = await response.aread()
|
||||
logger.error(f"LLM 流式 API 错误: {response.status_code} - {error_text}")
|
||||
response.raise_for_status()
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data = line[6:] # Remove "data: " prefix
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
break
|
||||
try:
|
||||
@@ -149,7 +216,7 @@ class LLMService:
|
||||
logger.error(f"LLM 流式 API 请求失败: {e.response.status_code}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LLM 流式 API 调用异常: {str(e)}")
|
||||
logger.error(f"LLM 流式 API 调用异常: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def analyze_excel_data(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1204,6 +1204,48 @@ export const aiApi = {
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 上传并使用 AI 分析 TXT 文本文件,提取结构化数据
|
||||
*/
|
||||
async analyzeTxt(
|
||||
file: File
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
filename?: string;
|
||||
structured_data?: {
|
||||
table?: {
|
||||
columns?: string[];
|
||||
rows?: string[][];
|
||||
};
|
||||
summary?: string;
|
||||
key_value_pairs?: Array<{ key: string; value: string }>;
|
||||
numeric_data?: Array<{ name: string; value: number; unit?: string }>;
|
||||
};
|
||||
error?: string;
|
||||
}> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
|
||||
const url = `${BACKEND_BASE_URL}/ai/analyze/txt`;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'TXT AI 分析失败');
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error('TXT AI 分析失败:', error);
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* 生成统计信息和图表
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user