From d37edfa5d0577f2f0f87e07249a33fa1d4fd8e6e Mon Sep 17 00:00:00 2001 From: jimmyken Date: Mon, 5 Jan 2026 10:40:39 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84=E5=90=91=E9=87=8F?= =?UTF-8?q?=E5=AD=98=E5=82=A8=E6=9E=B6=E6=9E=84=E5=92=8C=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE=E7=94=9F=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要改进: **向量存储架构优化** - 实现每个笔记本独立的向量表(vec_{notebookId}),替代全局 vec_embeddings - 添加 vec_metadata 表追踪每个笔记本的向量维度 - 支持动态向量维度(768, 1024, 1536等),自动检测嵌入模型输出维度 - 修复维度不匹配错误(Expected 1024 dimensions but received 768) **AI 提供商兼容性** - 修复 Qwen 提供商与 AI SDK v5 的兼容性问题 - 将 Qwen 从 qwen-ai-provider 迁移到 @ai-sdk/openai-compatible - 解决 UnsupportedModelVersionError 错误 **思维导图生成改进** - 优化提示词结构,添加明确的格式要求和示例 - 修复 schema 验证错误:chunkIds 和 keywords 字段支持 null 值 - 增强内容聚合:MAX_CHUNKS_PER_DOC 从 10 提升到 30 - 添加详细的调试日志(输入提示词、模型输出、错误信息) **技术细节** - 修改文件:9个核心文件 - 新增功能:动态向量表管理、维度自动检测 - 性能优化:提升内容聚合效率 - 调试改进:完整的输入输出日志追踪 此次重构解决了多个关键问题,提升了系统的灵活性和稳定性。 --- .vscode/settings.json | 6 +- src/main/config/defaults.ts | 178 ++++++++++++++++----- src/main/db/index.ts | 77 ++++++++- src/main/providers/base/AISDKProvider.ts | 10 +- src/main/services/KnowledgeService.ts | 8 +- src/main/services/MindMapService.ts | 79 ++++++++- src/main/vectorstore/SQLiteVectorStore.ts | 47 +++--- src/main/vectorstore/VectorStoreManager.ts | 2 +- src/shared/types/mindmap.ts | 4 +- 9 files changed, 318 insertions(+), 93 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4c05394..39b8c6b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,5 +7,9 @@ }, "[json]": { "editor.defaultFormatter": "esbenp.prettier-vscode" - } + }, + "i18n-ally.localesPaths": [ + "src/renderer/src/i18n", + "src/renderer/src/locales" + ] } diff --git a/src/main/config/defaults.ts b/src/main/config/defaults.ts index 10d9969..00c6009 100644 --- a/src/main/config/defaults.ts +++ b/src/main/config/defaults.ts @@ -12,78 +12,172 @@ export const defaultSettings: AppSettings = { defaultEmbeddingModel: undefined, prompts: { mindMap: { - 'zh-CN': `你是知识结构分析专家,负责从笔记本内容中提炼核心知识结构。 + 'zh-CN': `你是专业的知识结构分析专家。请仔细分析笔记本内容,提炼核心知识结构,生成层次清晰的思维导图。 -**重要:请用中文回复,所有节点标签必须使用中文。** +## 核心要求 -**输出格式要求(必须严格遵守):** -你必须返回一个包含 rootNode 和 metadata 的 JSON 对象: +**1. 必须使用中文** +- 所有节点标签(label)必须使用中文 +- 每个节点标签严格限制在 12 个汉字以内(包含标点符号) +- 标签要简洁有力,突出核心概念 + +**2. 结构层次要求** +- 深度:最多 4 层(根节点 level=0,最深子节点 level=3) +- 广度:每个父节点必须有 2-5 个子节点 +- 平衡:尽量保持树形结构的平衡,避免某一分支过深或过浅 + +**3. 节点设计原则** +- 根节点:概括笔记本的整体主题 +- 一级子节点:主要知识领域或章节 +- 二级子节点:具体知识点或子主题 +- 三级子节点:详细概念或实例 + +**4. 数据关联要求** +- chunkIds:如果节点内容来源于特定的文档片段,必须在 metadata.chunkIds 中列出相关的 chunk ID(不是推测,而是从提供的内容中实际存在的) +- 如果无法确定具体的 chunk ID,设置为空数组 [] 而非 null +- keywords:可选,提取该节点的 2-3 个关键词,设置为空数组 [] 而非 null + +## 严格的输出格式(JSON) + +\`\`\`json { "rootNode": { - "id": "节点唯一ID(字符串)", - "label": "节点标签(必须≤12字)", + "id": "0", + "label": "主题名称", "metadata": { "level": 0, - "chunkIds": ["相关chunk ID数组"], - "keywords": ["关键词数组(可选)"] + "chunkIds": [], + "keywords": ["关键词1", "关键词2"] }, - "children": [子节点数组,每个子节点结构相同] + "children": [ + { + "id": "1", + "label": "子主题", + "metadata": { + "level": 1, + "chunkIds": [], + "keywords": [] + }, + "children": [] + } + ] }, "metadata": { - "totalNodes": 总节点数(数字), - "maxDepth": 最大深度(数字) + "totalNodes": 实际节点总数, + "maxDepth": 实际最大深度 } } +\`\`\` -**内容要求:** -1. **所有节点标签必须用中文,且严格 ≤ 12字**(非常重要!) -2. 层级深度 ≤ 4层(根节点level=0, 最深level=3) -3. 每个父节点必须有 2-5 个子节点 -4. 每个节点的 id 必须唯一 -5. 尽可能在 metadata.chunkIds 中关联相关的 chunk ID -6. totalNodes 必须等于实际节点总数 -7. maxDepth 必须等于实际最大层级深度 +## 字段说明 + +- **id**: 字符串,唯一标识符,建议使用数字编号 +- **label**: 字符串,节点显示文本,≤12 个汉字 +- **level**: 数字,0-3,表示层级深度 +- **chunkIds**: 字符串数组,相关文档片段 ID,无关联时使用 [] +- **keywords**: 字符串数组,可选关键词,不需要时使用 [] +- **children**: 数组,子节点列表,叶子节点可省略或设为 [] +- **totalNodes**: 数字,必须等于实际生成的节点总数 +- **maxDepth**: 数字,必须等于实际的最大层级(0-3) + +## 笔记本内容 -**笔记本内容:** {{CONTENT}} -请基于以上内容生成思维导图结构,严格按照格式要求返回 JSON。`, - 'en-US': `You are a knowledge structure analysis expert, responsible for extracting core knowledge structures from notebook content. +## 生成指导 + +1. 先通读全部内容,识别主要主题和知识结构 +2. 设计根节点,用一个精炼的短语概括整体 +3. 将内容分解为 2-5 个主要领域作为一级子节点 +4. 继续细化每个主要领域为 2-5 个知识点 +5. 如需更深层次,再细化到具体概念(但不超过 3 级子节点) +6. 确保节点 ID 唯一且连续 +7. 统计总节点数和最大深度,填入 metadata + +请严格按照上述格式返回 JSON 对象。`, + 'en-US': `You are a professional knowledge structure analysis expert. Please carefully analyze the notebook content, extract the core knowledge structure, and generate a well-organized mind map. -**IMPORTANT: Please respond in English. All node labels must be in English.** +## Core Requirements -**Output Format Requirements (MUST strictly follow):** -You must return a JSON object with rootNode and metadata: +**1. Language Requirement** +- All node labels must be in English +- Each node label strictly limited to 24 characters (including punctuation) +- Labels should be concise and highlight core concepts + +**2. Structural Hierarchy Requirements** +- Depth: Maximum 4 levels (root node level=0, deepest child level=3) +- Breadth: Each parent node must have 2-5 child nodes +- Balance: Maintain balanced tree structure, avoid overly deep or shallow branches + +**3. Node Design Principles** +- Root node: Summarize the overall theme of the notebook +- Level-1 children: Main knowledge domains or chapters +- Level-2 children: Specific knowledge points or sub-topics +- Level-3 children: Detailed concepts or examples + +**4. Data Association Requirements** +- chunkIds: If node content comes from specific document fragments, must list relevant chunk IDs in metadata.chunkIds (from actual provided content, not speculation) +- If unable to determine specific chunk IDs, set to empty array [] instead of null +- keywords: Optional, extract 2-3 keywords for the node, set to [] instead of null when not needed + +## Strict Output Format (JSON) + +\`\`\`json { "rootNode": { - "id": "unique node ID (string)", - "label": "node label (must be ≤24 characters)", + "id": "0", + "label": "Topic Name", "metadata": { "level": 0, - "chunkIds": ["array of related chunk IDs"], - "keywords": ["array of keywords (optional)"] + "chunkIds": [], + "keywords": ["keyword1", "keyword2"] }, - "children": [array of child nodes, each with same structure] + "children": [ + { + "id": "1", + "label": "Sub-topic", + "metadata": { + "level": 1, + "chunkIds": [], + "keywords": [] + }, + "children": [] + } + ] }, "metadata": { - "totalNodes": total number of nodes (number), - "maxDepth": maximum depth (number) + "totalNodes": actual_total_node_count, + "maxDepth": actual_max_depth } } +\`\`\` -**Content Requirements:** -1. **All node labels must be in English and strictly ≤ 24 characters** (VERY IMPORTANT!) -2. Hierarchy depth ≤ 4 levels (root node level=0, deepest level=3) -3. Each parent node must have 2-5 child nodes -4. Each node's id must be unique -5. Associate relevant chunk IDs in metadata.chunkIds whenever possible -6. totalNodes must equal the actual total number of nodes -7. maxDepth must equal the actual maximum hierarchy depth +## Field Descriptions + +- **id**: String, unique identifier, suggest using numeric sequence +- **label**: String, node display text, ≤24 characters +- **level**: Number, 0-3, indicates hierarchy depth +- **chunkIds**: String array, related document fragment IDs, use [] when no association +- **keywords**: String array, optional keywords, use [] when not needed +- **children**: Array, child node list, can be omitted or set to [] for leaf nodes +- **totalNodes**: Number, must equal actual generated node count +- **maxDepth**: Number, must equal actual maximum level (0-3) + +## Notebook Content -**Notebook Content:** {{CONTENT}} -Please generate a mind map structure based on the above content, strictly following the format requirements to return JSON.` +## Generation Guidelines + +1. Read through all content, identify main themes and knowledge structure +2. Design root node, summarize overall theme in a concise phrase +3. Break down content into 2-5 main domains as level-1 children +4. Continue refining each main domain into 2-5 knowledge points +5. If deeper levels needed, refine to specific concepts (but not exceeding level-3 children) +6. Ensure node IDs are unique and sequential +7. Count total nodes and max depth, fill into metadata + +Please strictly return JSON object following the above format.` } } } diff --git a/src/main/db/index.ts b/src/main/db/index.ts index 052d367..753fd89 100644 --- a/src/main/db/index.ts +++ b/src/main/db/index.ts @@ -195,20 +195,83 @@ export function initVectorStore() { console.log('[Database] Initializing vector store...') try { - // 创建向量索引虚拟表(如果不存在) - // 使用 cosine 距离度量,1024 维度(BAAI/bge-m3 默认维度) + // 创建向量表元数据表,用于记录每个笔记本的向量维度 sqlite.exec(` - CREATE VIRTUAL TABLE IF NOT EXISTS vec_embeddings USING vec0( + CREATE TABLE IF NOT EXISTS vec_metadata ( + notebook_id TEXT PRIMARY KEY, + table_name TEXT NOT NULL, + dimensions INTEGER NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + `) + + console.log('[Database] Vector store initialization completed (tables will be created per notebook)') + } catch (error) { + console.error('[Database] Failed to initialize vector store:', error) + throw error + } +} + +/** + * 为指定笔记本创建向量表 + * @param notebookId 笔记本 ID + * @param dimensions 向量维度 + */ +export function createNotebookVectorTable(notebookId: string, dimensions: number) { + if (!sqlite) { + throw new Error('[Database] Database not initialized. Call initDatabase() first.') + } + + const tableName = `vec_${notebookId.replace(/[^a-zA-Z0-9]/g, '_')}` + + try { + // 检查元数据表中是否有记录 + const metadata = sqlite + .prepare(`SELECT table_name, dimensions FROM vec_metadata WHERE notebook_id = ?`) + .get(notebookId) as { table_name: string; dimensions: number } | undefined + + if (metadata) { + // 表已存在,检查维度是否匹配 + if (metadata.dimensions !== dimensions) { + console.warn( + `[Database] Vector table ${metadata.table_name} exists with dimensions ${metadata.dimensions}, ` + + `but requested ${dimensions}. Dropping and recreating table.` + ) + + // 删除旧表 + try { + sqlite.exec(`DROP TABLE IF EXISTS ${metadata.table_name}`) + } catch (err) { + console.error(`[Database] Failed to drop old table ${metadata.table_name}:`, err) + } + + // 删除元数据 + sqlite.prepare(`DELETE FROM vec_metadata WHERE notebook_id = ?`).run(notebookId) + } else { + console.log(`[Database] Vector table ${metadata.table_name} already exists with correct dimensions: ${dimensions}`) + return metadata.table_name + } + } + + // 创建向量表 + sqlite.exec(` + CREATE VIRTUAL TABLE ${tableName} USING vec0( embedding_id TEXT PRIMARY KEY, chunk_id TEXT, - notebook_id TEXT, - embedding FLOAT[1024] distance_metric=cosine + embedding FLOAT[${dimensions}] distance_metric=cosine ); `) - console.log('[Database] Vector store initialized successfully') + // 记录元数据 + sqlite + .prepare(`INSERT INTO vec_metadata (notebook_id, table_name, dimensions) VALUES (?, ?, ?)`) + .run(notebookId, tableName, dimensions) + + console.log(`[Database] Created vector table ${tableName} with dimensions: ${dimensions}`) + return tableName } catch (error) { - console.error('[Database] Failed to initialize vector store:', error) + console.error(`[Database] Failed to create vector table for notebook ${notebookId}:`, error) throw error } } diff --git a/src/main/providers/base/AISDKProvider.ts b/src/main/providers/base/AISDKProvider.ts index bc16324..ab093c1 100644 --- a/src/main/providers/base/AISDKProvider.ts +++ b/src/main/providers/base/AISDKProvider.ts @@ -7,7 +7,6 @@ import { createOpenAI } from '@ai-sdk/openai' import { createOpenAICompatible } from '@ai-sdk/openai-compatible' import { createDeepSeek } from '@ai-sdk/deepseek' -import { createQwen } from 'qwen-ai-provider' import { createOllama } from 'ollama-ai-provider-v2' import { streamText, embed, embedMany } from 'ai' import type { BaseProvider, LLMProviderConfig } from '../capabilities/BaseProvider' @@ -43,7 +42,6 @@ export class AISDKProvider implements BaseProvider { | ReturnType | ReturnType | ReturnType - | ReturnType | ReturnType | null = null @@ -82,19 +80,13 @@ export class AISDKProvider implements BaseProvider { baseURL: this.config.baseUrl, apiKey: config.apiKey }) - } else if (this.name === 'qwen') { - // Qwen 使用社区 provider - this.aiProvider = createQwen({ - baseURL: this.config.baseUrl, - apiKey: config.apiKey - }) } else if (this.name === 'ollama') { // Ollama 使用社区 provider this.aiProvider = createOllama({ baseURL: this.config.baseUrl || 'http://localhost:11434/api' }) } else { - // 其他所有 provider 都使用 OpenAI Compatible (Kimi, SiliconFlow) + // 其他所有 provider 都使用 OpenAI Compatible (Qwen, Kimi, SiliconFlow 等) this.aiProvider = createOpenAICompatible({ name: this.name, baseURL: this.config.baseUrl || '', diff --git a/src/main/services/KnowledgeService.ts b/src/main/services/KnowledgeService.ts index 5cef6a0..aaec9cd 100644 --- a/src/main/services/KnowledgeService.ts +++ b/src/main/services/KnowledgeService.ts @@ -214,7 +214,7 @@ export class KnowledgeService { onProgress?.('generating_embeddings', 30) const embeddingResults = await this.embeddingService.embedBatch( chunkContents, - { dimensions: 1024 }, // 显式指定 1024 维 + {}, // 使用模型的原生维度 (completed, total) => { const progress = 30 + (completed / total) * 50 onProgress?.('generating_embeddings', Math.round(progress)) @@ -225,7 +225,7 @@ export class KnowledgeService { const detectedDimensions = embeddingResults.length > 0 ? embeddingResults[0].dimensions : 1536 if (embeddingResults.length > 0) { vectorStoreManager.setDefaultDimensions(detectedDimensions) - Logger.debug('KnowledgeService', `Detected embedding dimensions: ${detectedDimensions}`) + Logger.info('KnowledgeService', `Detected embedding dimensions: ${detectedDimensions}`) } // 5. 保存嵌入元数据并添加到向量存储 @@ -398,7 +398,7 @@ export class KnowledgeService { onProgress?.('generating_embeddings', 30) const embeddingResults = await this.embeddingService.embedBatch( chunkContents, - { dimensions: 1024 }, // 显式指定 1024 维 + {}, // 使用模型的原生维度 (completed, total) => { const progress = 30 + (completed / total) * 50 onProgress?.('generating_embeddings', Math.round(progress)) @@ -409,7 +409,7 @@ export class KnowledgeService { const detectedDimensions = embeddingResults.length > 0 ? embeddingResults[0].dimensions : 1536 if (embeddingResults.length > 0) { vectorStoreManager.setDefaultDimensions(detectedDimensions) - Logger.debug('KnowledgeService', `Detected embedding dimensions: ${detectedDimensions}`) + Logger.info('KnowledgeService', `Detected embedding dimensions: ${detectedDimensions}`) } // 5. 保存嵌入元数据并添加到向量存储 diff --git a/src/main/services/MindMapService.ts b/src/main/services/MindMapService.ts index db2e1d6..bb1d92c 100644 --- a/src/main/services/MindMapService.ts +++ b/src/main/services/MindMapService.ts @@ -32,8 +32,8 @@ const MindMapNodeSchema: z.ZodType = z.lazy(() => metadata: z .object({ level: z.number().min(0).max(3).describe('层级深度 0-3'), - chunkIds: z.array(z.string()).describe('关联的chunk ID列表'), - keywords: z.array(z.string()).optional().describe('关键词') + chunkIds: z.array(z.string()).nullable().optional().describe('关联的chunk ID列表,可为空'), + keywords: z.array(z.string()).nullable().optional().describe('关键词,可为空') }) .optional() }) @@ -78,6 +78,7 @@ export class MindMapService { /** * 聚合笔记本内容 + * 收集笔记本中所有已索引文档的内容片段 */ private async aggregateNotebookContent(notebookId: string): Promise { const db = getDatabase() @@ -97,19 +98,25 @@ export class MindMapService { Logger.info('MindMapService', `Found ${docs.length} indexed documents`) - // 2. 聚合文档chunks (每个文档最多10个chunks) + // 2. 聚合文档chunks + // 配置:每个文档最多取多少个 chunks(可根据需要调整) + const MAX_CHUNKS_PER_DOC = 30 // 从 10 增加到 30,提供更完整的上下文 + for (const doc of docs) { const docChunks = db .select() .from(chunks) .where(eq(chunks.documentId, doc.id)) .orderBy(chunks.chunkIndex) - .limit(10) + .limit(MAX_CHUNKS_PER_DOC) .all() if (docChunks.length > 0) { const chunkContent = docChunks.map((c) => c.content).join('\n') contentParts.push(`[文档: ${doc.title}]\n${chunkContent}`) + + // 记录实际使用的 chunks 数量 + Logger.info('MindMapService', `Document "${doc.title}": ${docChunks.length} chunks (max ${MAX_CHUNKS_PER_DOC})`) } } @@ -117,7 +124,10 @@ export class MindMapService { throw new Error('笔记本没有可用内容生成思维导图') } - return contentParts.join('\n\n---\n\n') + const aggregatedContent = contentParts.join('\n\n---\n\n') + Logger.info('MindMapService', `Aggregated content: ${aggregatedContent.length} characters from ${docs.length} documents`) + + return aggregatedContent } catch (error) { Logger.error('MindMapService', 'Error aggregating content:', error) throw error @@ -146,6 +156,21 @@ export class MindMapService { const promptTemplate = await getMindMapPrompt() const prompt = promptTemplate.replace('{{CONTENT}}', content) + // ===== 调试日志:记录发送给模型的提示词 ===== + Logger.info('MindMapService', '==================== 发送给模型的提示词 ====================') + Logger.info('MindMapService', `Provider: ${provider.name}`) + Logger.info('MindMapService', `Prompt length: ${prompt.length} characters`) + Logger.info('MindMapService', `Content length: ${content.length} characters`) + // 记录提示词的前1000字符和后500字符,避免日志过长 + if (prompt.length > 1500) { + Logger.info('MindMapService', `Prompt preview (first 1000 chars):\n${prompt.substring(0, 1000)}`) + Logger.info('MindMapService', `...(truncated)...`) + Logger.info('MindMapService', `Prompt end (last 500 chars):\n${prompt.substring(prompt.length - 500)}`) + } else { + Logger.info('MindMapService', `Full prompt:\n${prompt}`) + } + Logger.info('MindMapService', '==================== 提示词结束 ====================') + try { onProgress?.('generating_mindmap', 30) @@ -178,6 +203,31 @@ export class MindMapService { // 等待完整对象 const result = await object + + // ===== 调试日志:记录模型返回的原始数据 ===== + Logger.info('MindMapService', '==================== 模型返回数据开始 ====================') + Logger.info('MindMapService', `Provider: ${provider.name}`) + Logger.info('MindMapService', `Raw result (stringified):`) + try { + const resultJson = JSON.stringify(result, null, 2) + Logger.info('MindMapService', resultJson) + // 同时记录详细的结构信息 + Logger.info('MindMapService', `Root node structure:`) + Logger.info('MindMapService', `- ID: ${result.rootNode?.id}`) + Logger.info('MindMapService', `- Label: ${result.rootNode?.label}`) + Logger.info('MindMapService', `- Children count: ${result.rootNode?.children?.length ?? 0}`) + Logger.info('MindMapService', `- Metadata:`, result.rootNode?.metadata) + if (result.rootNode?.children) { + result.rootNode.children.forEach((child, idx) => { + Logger.info('MindMapService', ` Child ${idx}: id=${child.id}, label=${child.label}, level=${child.metadata?.level}, chunkIds=${child.metadata?.chunkIds?.length ?? 'null'}, keywords=${child.metadata?.keywords?.length ?? 'null'}`) + }) + } + Logger.info('MindMapService', `Overall metadata: totalNodes=${result.metadata.totalNodes}, maxDepth=${result.metadata.maxDepth}`) + } catch (logError) { + Logger.error('MindMapService', 'Failed to stringify result:', logError) + } + Logger.info('MindMapService', '==================== 模型返回数据结束 ====================') + Logger.info( 'MindMapService', `Generated mind map: ${result.metadata.totalNodes} nodes, depth ${result.metadata.maxDepth}` @@ -204,7 +254,24 @@ export class MindMapService { metadata: result.metadata } } catch (error) { - Logger.error('MindMapService', 'Error calling LLM:', error) + // ===== 调试日志:记录错误详情 ===== + Logger.error('MindMapService', '==================== 生成失败详情 ====================') + Logger.error('MindMapService', `Provider: ${provider.name}`) + Logger.error('MindMapService', 'Error details:', error) + if (error && typeof error === 'object') { + // 尝试记录错误对象的所有属性 + Logger.error('MindMapService', 'Error properties:', Object.keys(error)) + if ('cause' in error) { + Logger.error('MindMapService', 'Error cause:', error.cause) + } + if ('text' in error) { + Logger.error('MindMapService', 'Error text:', error.text) + } + if ('value' in error) { + Logger.error('MindMapService', 'Error value:', JSON.stringify(error.value, null, 2)) + } + } + Logger.error('MindMapService', '==================== 错误详情结束 ====================') throw new Error(`生成思维导图失败: ${(error as Error).message}`) } } diff --git a/src/main/vectorstore/SQLiteVectorStore.ts b/src/main/vectorstore/SQLiteVectorStore.ts index a40ff8c..cbccc75 100644 --- a/src/main/vectorstore/SQLiteVectorStore.ts +++ b/src/main/vectorstore/SQLiteVectorStore.ts @@ -3,24 +3,30 @@ * 基于 sqlite-vec 扩展的向量存储实现 */ -import { getSqlite } from '../db' +import { getSqlite, createNotebookVectorTable } from '../db' import type { VectorStore, VectorItem, QueryResult, QueryOptions, VectorStoreConfig } from './types' import Logger from '../../shared/utils/logger' /** * SQLite 向量存储实现 * 使用 sqlite-vec 的 vec0 虚拟表进行高性能向量检索 + * 每个笔记本使用独立的向量表,支持不同的向量维度 */ export class SQLiteVectorStore implements VectorStore { private notebookId: string = '' - private dimensions: number = 1024 + private dimensions: number = 768 + private tableName: string = '' private initialized: boolean = false async initialize(config: VectorStoreConfig): Promise { this.notebookId = config.notebookId - this.dimensions = config.dimensions || 1024 + this.dimensions = config.dimensions || 768 + + // 为笔记本创建独立的向量表 + this.tableName = createNotebookVectorTable(this.notebookId, this.dimensions) + this.initialized = true - Logger.info('SQLiteVectorStore', `Initialized for notebook: ${this.notebookId}`) + Logger.info('SQLiteVectorStore', `Initialized for notebook: ${this.notebookId}, table: ${this.tableName}, dimensions: ${this.dimensions}`) } async upsert(items: VectorItem[]): Promise { @@ -34,8 +40,8 @@ export class SQLiteVectorStore implements VectorStore { } const insertStmt = sqlite.prepare(` - INSERT OR REPLACE INTO vec_embeddings (embedding_id, chunk_id, notebook_id, embedding) - VALUES (?, ?, ?, ?) + INSERT OR REPLACE INTO ${this.tableName} (embedding_id, chunk_id, embedding) + VALUES (?, ?, ?) `) const insertMany = sqlite.transaction((items: VectorItem[]) => { @@ -49,13 +55,13 @@ export class SQLiteVectorStore implements VectorStore { } // sqlite-vec 可以直接接受 Float32Array - insertStmt.run(item.id, item.chunkId, this.notebookId, item.vector) + insertStmt.run(item.id, item.chunkId, item.vector) } }) try { insertMany(items) - Logger.debug('SQLiteVectorStore', `Upserted ${items.length} vectors`) + Logger.debug('SQLiteVectorStore', `Upserted ${items.length} vectors to ${this.tableName}`) } catch (error) { Logger.error('SQLiteVectorStore', 'Failed to upsert vectors:', error) throw error @@ -76,12 +82,12 @@ export class SQLiteVectorStore implements VectorStore { const placeholders = ids.map(() => '?').join(',') const deleteStmt = sqlite.prepare(` - DELETE FROM vec_embeddings WHERE embedding_id IN (${placeholders}) + DELETE FROM ${this.tableName} WHERE embedding_id IN (${placeholders}) `) try { deleteStmt.run(...ids) - Logger.debug('SQLiteVectorStore', `Deleted ${ids.length} vectors`) + Logger.debug('SQLiteVectorStore', `Deleted ${ids.length} vectors from ${this.tableName}`) } catch (error) { Logger.error('SQLiteVectorStore', 'Failed to delete vectors:', error) throw error @@ -102,12 +108,12 @@ export class SQLiteVectorStore implements VectorStore { const placeholders = chunkIds.map(() => '?').join(',') const deleteStmt = sqlite.prepare(` - DELETE FROM vec_embeddings WHERE chunk_id IN (${placeholders}) + DELETE FROM ${this.tableName} WHERE chunk_id IN (${placeholders}) `) try { deleteStmt.run(...chunkIds) - Logger.debug('SQLiteVectorStore', `Deleted vectors for ${chunkIds.length} chunks`) + Logger.debug('SQLiteVectorStore', `Deleted vectors for ${chunkIds.length} chunks from ${this.tableName}`) } catch (error) { Logger.error('SQLiteVectorStore', 'Failed to delete vectors by chunk IDs:', error) throw error @@ -135,15 +141,14 @@ export class SQLiteVectorStore implements VectorStore { embedding_id, chunk_id, distance - FROM vec_embeddings + FROM ${this.tableName} WHERE embedding MATCH ? AND k = ? - AND notebook_id = ? ORDER BY distance ASC `) // sqlite-vec 可以直接接受 Float32Array - const results = queryStmt.all(queryVector, topK, this.notebookId) as Array<{ + const results = queryStmt.all(queryVector, topK) as Array<{ embedding_id: string chunk_id: string distance: number @@ -169,7 +174,7 @@ export class SQLiteVectorStore implements VectorStore { Logger.debug( 'SQLiteVectorStore', - `Query returned ${filteredResults.length} results (threshold: ${threshold})` + `Query returned ${filteredResults.length} results from ${this.tableName} (threshold: ${threshold})` ) return filteredResults @@ -191,11 +196,11 @@ export class SQLiteVectorStore implements VectorStore { try { const deleteStmt = sqlite.prepare(` - DELETE FROM vec_embeddings WHERE notebook_id = ? + DELETE FROM ${this.tableName} `) - deleteStmt.run(this.notebookId) + deleteStmt.run() - Logger.info('SQLiteVectorStore', `Cleared all vectors for notebook: ${this.notebookId}`) + Logger.info('SQLiteVectorStore', `Cleared all vectors from ${this.tableName}`) } catch (error) { Logger.error('SQLiteVectorStore', 'Failed to clear vectors:', error) throw error @@ -214,9 +219,9 @@ export class SQLiteVectorStore implements VectorStore { try { const countStmt = sqlite.prepare(` - SELECT COUNT(*) as count FROM vec_embeddings WHERE notebook_id = ? + SELECT COUNT(*) as count FROM ${this.tableName} `) - const result = countStmt.get(this.notebookId) as { count: number } + const result = countStmt.get() as { count: number } return result?.count || 0 } catch (error) { diff --git a/src/main/vectorstore/VectorStoreManager.ts b/src/main/vectorstore/VectorStoreManager.ts index d40e826..3ab1ab6 100644 --- a/src/main/vectorstore/VectorStoreManager.ts +++ b/src/main/vectorstore/VectorStoreManager.ts @@ -14,7 +14,7 @@ import Logger from '../../shared/utils/logger' export class VectorStoreManager { private stores: Map = new Map() private defaultType: VectorStoreType = 'sqlite' - private defaultDimensions: number = 1024 + private defaultDimensions: number = 768 // 默认使用 768 维,兼容大多数嵌入模型 /** * 获取或创建 notebook 的 VectorStore diff --git a/src/shared/types/mindmap.ts b/src/shared/types/mindmap.ts index 3c77c68..57357e7 100644 --- a/src/shared/types/mindmap.ts +++ b/src/shared/types/mindmap.ts @@ -7,8 +7,8 @@ export interface MindMapTreeNode { children?: MindMapTreeNode[] metadata?: { level: number // 层级: 0-3 (根节点为0) - chunkIds: string[] // 关联的chunk IDs - keywords?: string[] + chunkIds?: string[] | null // 关联的chunk IDs,可为空 + keywords?: string[] | null // 关键词,可为空 } }