添加XML回退解析机制支持复杂Excel文件

当pandas无法解析某些包含非标准元素的Excel文件时,
添加了XML直接解析功能来提取工作表名称和数据。

- 实现了`_extract_sheet_names_from_xml`方法从XML提取工作表名称
- 实现了`_read_excel_sheet_xml`方法直接解析Excel XML数据
- 添加多种命名空间支持以处理不同Excel格式
- 在pandas解析失败时自动回退到XML解析方式

fix(excel-storage-service): 修复XML解析中的命名空间问题

改进了XML解析逻辑,添加对多种命名空间的支持,
使用通配符查找元素以兼容不同Excel文件格式。

refactor(table-rag-service): 优化XML解析逻辑提高兼容性

统一了XML解析的命名空间处理方式,
改进了元素查找逻辑以更好地支持不同Excel格式。

feat(frontend): 添加RAG向量检索和索引重建功能

- 实现了RAG状态查看、搜索和索引重建接口
- 添加了前端RAG检索界面组件
- 增加了错误处理和加载状态提示
This commit is contained in:
2026-04-08 19:21:40 +08:00
parent 41e5eaaa2d
commit 3b82103e87
6 changed files with 523 additions and 145 deletions

View File

@@ -563,6 +563,30 @@ export const backendApi = {
}
},
/**
* 重建 RAG 索引
*/
async rebuildRAGIndex(): Promise<{
success: boolean;
message: string;
}> {
const url = `${BACKEND_BASE_URL}/rag/rebuild`;
try {
const response = await fetch(url, {
method: 'POST',
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || '重建索引失败');
}
return await response.json();
} catch (error) {
console.error('重建 RAG 索引失败:', error);
throw error;
}
},
// ==================== 表格填写 ====================
/**

View File

@@ -91,6 +91,13 @@ const Documents: React.FC = () => {
const [mdStreaming, setMdStreaming] = useState(false);
const [mdStreamingContent, setMdStreamingContent] = useState('');
// RAG 向量检索相关状态
const [ragStatus, setRagStatus] = useState<{ vector_count: number; collections: string[] } | null>(null);
const [ragSearchQuery, setRagSearchQuery] = useState('');
const [ragSearching, setRagSearching] = useState(false);
const [ragResults, setRagResults] = useState<any[]>([]);
const [ragRebuilding, setRagRebuilding] = useState(false);
// 解析选项
const [parseOptions, setParseOptions] = useState({
parseAllSheets: false,
@@ -146,6 +153,61 @@ const Documents: React.FC = () => {
loadDocuments();
}, [loadDocuments]);
// 获取 RAG 状态
useEffect(() => {
const fetchRagStatus = async () => {
try {
const status = await backendApi.getRAGStatus();
if (status.success) {
setRagStatus({ vector_count: status.vector_count, collections: status.collections });
}
} catch (err) {
console.error('获取 RAG 状态失败:', err);
}
};
fetchRagStatus();
}, []);
// RAG 搜索
const handleRagSearch = async () => {
if (!ragSearchQuery.trim()) {
toast.error('请输入搜索内容');
return;
}
setRagSearching(true);
setRagResults([]);
try {
const result = await backendApi.searchRAG(ragSearchQuery, 5);
if (result.success) {
setRagResults(result.results || []);
}
} catch (err: any) {
toast.error(err.message || '搜索失败');
} finally {
setRagSearching(false);
}
};
// 重建 RAG 索引
const handleRebuildRag = async () => {
setRagRebuilding(true);
try {
const result = await backendApi.rebuildRAGIndex();
if (result.success) {
toast.success(result.message || '索引重建成功');
// 刷新状态
const status = await backendApi.getRAGStatus();
if (status.success) {
setRagStatus({ vector_count: status.vector_count, collections: status.collections });
}
}
} catch (err: any) {
toast.error(err.message || '重建索引失败');
} finally {
setRagRebuilding(false);
}
};
// 文件上传处理
const onDrop = async (acceptedFiles: File[]) => {
const file = acceptedFiles[0];
@@ -688,7 +750,7 @@ const Documents: React.FC = () => {
<SelectValue />
</SelectTrigger>
<SelectContent>
{analysisTypes.map(type => (
{(analysisTypes || []).map(type => (
<SelectItem key={type.value} value={type.value}>
<div className="flex items-center gap-2">
{getAnalysisIcon(type.value)}
@@ -851,9 +913,9 @@ const Documents: React.FC = () => {
</div>
</CardHeader>
<CardContent className="max-h-[400px] overflow-y-auto">
{aiAnalysis.analysis?.sheets ? (
{aiAnalysis.analysis?.sheets && typeof aiAnalysis.analysis.sheets === 'object' ? (
<div className="space-y-4">
{Object.entries(aiAnalysis.analysis.sheets).map(([sheetName, result]: [string, any]) => (
{Object.entries(aiAnalysis.analysis.sheets || {}).map(([sheetName, result]: [string, any]) => (
<div key={sheetName} className="p-4 bg-muted/30 rounded-xl">
<div className="flex items-center gap-2 mb-2">
<FileSpreadsheet size={16} className="text-primary" />
@@ -940,7 +1002,7 @@ const Documents: React.FC = () => {
<Table className="text-primary" size={20} />
</CardTitle>
<CardDescription>{parseResult.data.sheets ? '所有工作表数据' : '工作表数据'}</CardDescription>
<CardDescription>{parseResult?.data?.sheets ? '所有工作表数据' : '工作表数据'}</CardDescription>
</div>
<Button variant="outline" size="sm" onClick={openExportDialog} className="gap-2">
<Download size={14} />
@@ -948,9 +1010,9 @@ const Documents: React.FC = () => {
</div>
</CardHeader>
<CardContent>
{parseResult.data.sheets ? (
{parseResult?.data?.sheets && typeof parseResult.data.sheets === 'object' ? (
<div className="space-y-4">
{Object.entries(parseResult.data.sheets).map(([sheetName, sheetData]: [string, any]) => (
{Object.entries(parseResult.data.sheets || {}).map(([sheetName, sheetData]: [string, any]) => (
<div key={sheetName} className="border rounded-xl overflow-hidden">
<button
onClick={() => setExpandedSheet(expandedSheet === sheetName ? null : sheetName)}
@@ -972,12 +1034,89 @@ const Documents: React.FC = () => {
))}
</div>
) : (
<DataTable columns={parseResult.data.columns || []} rows={parseResult.data.rows || []} />
<DataTable columns={parseResult?.data?.columns || []} rows={parseResult?.data?.rows || []} />
)}
</CardContent>
</Card>
)}
{/* RAG 向量检索 */}
<Card className="border-none shadow-md bg-gradient-to-br from-violet-500/5 to-cyan-500/5">
<CardHeader className="pb-4">
<div className="flex items-center justify-between">
<div className="space-y-1">
<CardTitle className="flex items-center gap-2">
<Brain className="text-violet-500" size={20} />
RAG
</CardTitle>
<CardDescription>
: {(ragStatus?.vector_count) || 0}
{ragStatus?.collections && ragStatus.collections.length > 0 && ` | 集合: ${ragStatus.collections.join(', ')}`}
</CardDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={handleRebuildRag}
disabled={ragRebuilding}
>
{ragRebuilding ? <Loader2 className="mr-2 h-4 w-4 animate-spin" /> : <RefreshCcw className="mr-2 h-4 w-4" />}
</Button>
</div>
</CardHeader>
<CardContent className="space-y-4">
{/* 搜索框 */}
<div className="flex gap-2">
<Input
placeholder="输入查询内容,例如:查询去年销售额最高的客户..."
value={ragSearchQuery}
onChange={(e) => setRagSearchQuery(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && handleRagSearch()}
className="flex-1"
/>
<Button onClick={handleRagSearch} disabled={ragSearching}>
{ragSearching ? <Loader2 className="h-4 w-4 animate-spin" /> : <Search className="h-4 w-4" />}
</Button>
</div>
{/* 搜索结果 */}
{(ragResults?.length ?? 0) > 0 && (
<div className="space-y-3">
<Label className="text-sm font-medium"></Label>
{(ragResults || []).map((result, index) => (
<div key={index} className="p-4 rounded-xl border bg-card hover:bg-muted/30 transition-colors">
<div className="flex items-start justify-between gap-2 mb-2">
<Badge variant="outline" className="text-xs">
: {(result.score * 100).toFixed(1)}%
</Badge>
{result.metadata?.table_name && (
<Badge variant="secondary" className="text-xs">
{result.metadata.table_name}
</Badge>
)}
</div>
<p className="text-sm whitespace-pre-wrap">{result.content}</p>
{result.metadata && (
<div className="flex gap-2 mt-2 flex-wrap">
{result.metadata.field_name && (
<span className="text-xs text-muted-foreground">
: {result.metadata.field_name}
</span>
)}
{result.metadata.filename && (
<span className="text-xs text-muted-foreground">
: {result.metadata.filename}
</span>
)}
</div>
)}
</div>
))}
</div>
)}
</CardContent>
</Card>
{/* 文档列表 */}
<Card className="border-none shadow-md">
<CardHeader>
@@ -1002,9 +1141,9 @@ const Documents: React.FC = () => {
{/* 文档列表 */}
{loading ? (
<div className="space-y-3">{[1, 2, 3].map(i => <Skeleton key={i} className="h-16 w-full rounded-xl" />)}</div>
) : filteredDocs.length > 0 ? (
) : (filteredDocs?.length ?? 0) > 0 ? (
<div className="space-y-3">
{filteredDocs.map(doc => (
{(filteredDocs || []).map(doc => (
<div key={doc.doc_id} className="flex items-center gap-4 p-4 rounded-xl border border-transparent hover:bg-muted/30 transition-all group">
<div className={cn(
"w-10 h-10 rounded-lg flex items-center justify-center shrink-0",