MemTensor · abakane1 · Apr 14, 2026
diff --git a/apps/memos-local-openclaw/package.json b/apps/memos-local-openclaw/package.json
@@ -54,6 +54,7 @@
     "posthog-node": "^5.28.0",
     "puppeteer": "^24.38.0",
     "semver": "^7.7.4",
+    "sqlite-vec": "^0.1.9",
     "uuid": "^10.0.0"
   },
   "devDependencies": {

diff --git a/apps/memos-local-openclaw/src/embedding/cache.ts b/apps/memos-local-openclaw/src/embedding/cache.ts
@@ -0,0 +1,169 @@
+import type { Logger } from "../types";
+
+interface CacheEntry {
+  vector: number[];
+  timestamp: number;
+}
+
+interface CacheOptions {
+  maxSize: number;
+  ttlMs: number;
+}
+
+/**
+ * LRU Cache for embedding vectors
+ * 
+ * - maxSize: maximum number of cached entries
+ * - ttlMs: time-to-live in milliseconds
+ * 
+ * Uses SHA-256 hash of query text as key for fast lookup
+ */
+export class EmbeddingCache {
+  private cache: Map<string, CacheEntry>;
+  private readonly maxSize: number;
+  private readonly ttlMs: number;
+  private accessOrder: string[];
+
+  constructor(options: CacheOptions, private log?: Logger) {
+    this.maxSize = options.maxSize;
+    this.ttlMs = options.ttlMs;
+    this.cache = new Map();
+    this.accessOrder = [];
+  }
+
+  /**
+   * Generate SHA-256 hash of text
+   */
+  private async hashText(text: string): Promise<string> {
+    const encoder = new TextEncoder();
+    const data = encoder.encode(text.trim().toLowerCase());
+    const hashBuffer = await crypto.subtle.digest("SHA-256", data);
+    const hashArray = Array.from(new Uint8Array(hashBuffer));
+    return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
+  }
+
+  /**
+   * Get cached embedding if available and not expired
+   */
+  async get(text: string): Promise<number[] | null> {
+    const key = await this.hashText(text);
+    const entry = this.cache.get(key);
+
+    if (!entry) {
+      return null;
+    }
+
+    // Check TTL
+    const now = Date.now();
+    if (now - entry.timestamp > this.ttlMs) {
+      this.cache.delete(key);
+      this.removeFromAccessOrder(key);
+      this.log?.debug(`[EmbeddingCache] Entry expired for key: ${key.slice(0, 16)}...`);
+      return null;
+    }
+
+    // Update access order for LRU
+    this.updateAccessOrder(key);
+    this.log?.debug(`[EmbeddingCache] Cache hit for key: ${key.slice(0, 16)}...`);
+    return entry.vector;
+  }
+
+  /**
+   * Store embedding in cache
+   */
+  async set(text: string, vector: number[]): Promise<void> {
+    const key = await this.hashText(text);
+
+    // If at capacity and adding new entry, evict oldest
+    if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
+      this.evictLRU();
+    }
+
+    this.cache.set(key, {
+      vector,
+      timestamp: Date.now(),
+    });
+    this.updateAccessOrder(key);
+    this.log?.debug(`[EmbeddingCache] Cached embedding for key: ${key.slice(0, 16)}...`);
+  }
+
+  /**
+   * Check if text is cached and valid
+   */
+  async has(text: string): Promise<boolean> {
+    const key = await this.hashText(text);
+    const entry = this.cache.get(key);
+
+    if (!entry) return false;
+
+    // Check TTL
+    if (Date.now() - entry.timestamp > this.ttlMs) {
+      this.cache.delete(key);
+      this.removeFromAccessOrder(key);
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Get cache statistics
+   */
+  getStats(): { size: number; maxSize: number; ttlMs: number } {
+    return {
+      size: this.cache.size,
+      maxSize: this.maxSize,
+      ttlMs: this.ttlMs,
+    };
+  }
+
+  /**
+   * Clear all cached entries
+   */
+  clear(): void {
+    this.cache.clear();
+    this.accessOrder = [];
+    this.log?.debug("[EmbeddingCache] Cache cleared");
+  }
+
+  private updateAccessOrder(key: string): void {
+    this.removeFromAccessOrder(key);
+    this.accessOrder.push(key);
+  }
+
+  private removeFromAccessOrder(key: string): void {
+    const index = this.accessOrder.indexOf(key);
+    if (index > -1) {
+      this.accessOrder.splice(index, 1);
+    }
+  }
+
+  private evictLRU(): void {
+    if (this.accessOrder.length === 0) return;
+    const oldestKey = this.accessOrder.shift();
+    if (oldestKey) {
+      this.cache.delete(oldestKey);
+      this.log?.debug(`[EmbeddingCache] Evicted LRU entry: ${oldestKey.slice(0, 16)}...`);
+    }
+  }
+}
+
+// Default cache configuration
+export const DEFAULT_CACHE_OPTIONS: CacheOptions = {
+  maxSize: 1000,
+  ttlMs: 60 * 60 * 1000, // 1 hour
+};
+
+// Global cache instance (singleton pattern)
+let globalCache: EmbeddingCache | null = null;
+
+export function getGlobalCache(log?: Logger): EmbeddingCache {
+  if (!globalCache) {
+    globalCache = new EmbeddingCache(DEFAULT_CACHE_OPTIONS, log);
+  }
+  return globalCache;
+}
+
+export function resetGlobalCache(): void {
+  globalCache = null;
+}
diff --git a/apps/memos-local-openclaw/src/embedding/index.ts b/apps/memos-local-openclaw/src/embedding/index.ts
@@ -4,15 +4,59 @@ import { embedGemini } from "./providers/gemini";
 import { embedCohere, embedCohereQuery } from "./providers/cohere";
 import { embedVoyage } from "./providers/voyage";
 import { embedMistral } from "./providers/mistral";
+import { embedOllama } from "./providers/ollama";
 import { embedLocal } from "./local";
 import { modelHealth } from "../ingest/providers";
+import { EmbeddingCache, DEFAULT_CACHE_OPTIONS, getGlobalCache } from "./cache";
 
 export class Embedder {
+  private cache: EmbeddingCache;
+
   constructor(
     private cfg: EmbeddingConfig | undefined,
     private log: Logger,
     private openclawAPI?: OpenClawAPI,
-  ) {}
+  ) {
+    // Use global cache singleton to share cache across instances
+    this.cache = getGlobalCache(log);
+  }
+
+  /**
+   * Get embedding for query with caching support
+   */
+  async embedQueryWithCache(text: string): Promise<number[]> {
+    // Try cache first
+    const cached = await this.cache.get(text);
+    if (cached) {
+      this.log.debug(`[Embedder] Cache hit for query: "${text.slice(0, 50)}..."`);
+      return cached;
+    }
+
+    // Generate embedding
+    const startTime = Date.now();
+    const vector = await this.embedQuery(text);
+    const duration = Date.now() - startTime;
+
+    // Store in cache
+    await this.cache.set(text, vector);
+    this.log.debug(`[Embedder] Cached embedding (${duration}ms) for query: "${text.slice(0, 50)}..."`);
+
+    return vector;
+  }
+
+  /**
+   * Clear embedding cache
+   */
+  clearCache(): void {
+    this.cache.clear();
+  }
+
+  /**
+   * Get cache statistics
+   */
+  getCacheStats(): { size: number; maxSize: number; ttlMs: number } {
+    return this.cache.getStats();
+  }
 
   get provider(): string {
     if (this.cfg?.provider === "openclaw" && this.cfg.capabilities?.hostEmbedding !== true) {
@@ -70,6 +114,8 @@ export class Embedder {
           result = await embedMistral(texts, cfg!, this.log); break;
         case "voyage":
           result = await embedVoyage(texts, cfg!, this.log); break;
+        case "ollama":
+          result = await embedOllama(texts, cfg!, this.log); break;
         case "local":
         default:
           result = await embedLocal(texts, this.log); break;

diff --git a/apps/memos-local-openclaw/src/embedding/providers/ollama.ts b/apps/memos-local-openclaw/src/embedding/providers/ollama.ts
@@ -0,0 +1,50 @@
+import type { EmbeddingConfig, Logger } from "../../types";
+
+export async function embedOllama(
+  texts: string[],
+  cfg: EmbeddingConfig,
+  log: Logger,
+): Promise<number[][]> {
+  const endpoint = cfg.endpoint ?? "http://localhost:11434";
+  const model = cfg.model ?? "qwen";
+
+  // Ollama embedding API endpoint
+  const url = `${endpoint.replace(/\/+$/, "")}/api/embed`;
+
+  const results: number[][] = [];
+
+  // Ollama 支持批量 embedding，但某些模型可能有限制
+  // 这里使用单个处理以确保兼容性
+  for (const text of texts) {
+    const resp = await fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        ...cfg.headers,
+      },
+      body: JSON.stringify({
+        model,
+        input: text,
+      }),
+      signal: AbortSignal.timeout(cfg.timeoutMs ?? 60_000),
+    });
+
+    if (!resp.ok) {
+      const body = await resp.text();
+      throw new Error(`Ollama embedding failed (${resp.status}): ${body}`);
+    }
+
+    const json = (await resp.json()) as {
+      embeddings: number[][] | number[];
+    };
+
+    // Ollama 返回的 embeddings 可能是二维数组或一维数组
+    const embedding = Array.isArray(json.embeddings[0]) 
+      ? (json.embeddings as number[][])[0]
+      : (json.embeddings as number[]);
+
+    results.push(embedding);
+  }
+
+  return results;
+}