diff --git a/apps/memos-local-openclaw/README.md b/apps/memos-local-openclaw/README.md
index f2a9df64f..1886a8818 100644
--- a/apps/memos-local-openclaw/README.md
+++ b/apps/memos-local-openclaw/README.md
@@ -519,7 +519,8 @@ All optional — shown with defaults:
       "rrfK": 60,                 // RRF fusion constant
       "mmrLambda": 0.7,           // MMR relevance vs diversity (0-1)
       "recencyHalfLifeDays": 14,  // Time decay half-life
-      "vectorSearchMaxChunks": 0  // 0 = search all (default). Set 200000–300000 only if search is slow on huge DBs
+      "vectorSearchMaxChunks": 0, // 0 = search all (default). Set 200000–300000 only if search is slow on huge DBs
+      "autoRecallMinQueryLength": 2 // Auto-recall skips shorter normalized prompts; set 10 to ignore short acknowledgements
     },
     "dedup": {
       "similarityThreshold": 0.75,  // Cosine similarity for smart-dedup candidates (Top-5)
diff --git a/apps/memos-local-openclaw/index.ts b/apps/memos-local-openclaw/index.ts
index 2f46052ee..346020301 100644
--- a/apps/memos-local-openclaw/index.ts
+++ b/apps/memos-local-openclaw/index.ts
@@ -1882,8 +1882,9 @@ Groups: ${groupNames.length > 0 ? groupNames.join(", ") : "(none)"}`,
         const query = normalizeAutoRecallQuery(rawPrompt);
         recallQuery = query;
 
-        if (query.length < 2) {
-          ctx.log.debug("auto-recall: extracted query too short, skipping");
+        const autoRecallMinQueryLength = ctx.config.recall?.autoRecallMinQueryLength ?? DEFAULTS.autoRecallMinQueryLength;
+        if (query.length < autoRecallMinQueryLength) {
+          ctx.log.debug(`auto-recall: extracted query shorter than autoRecallMinQueryLength=${autoRecallMinQueryLength}, skipping`);
           return;
         }
         ctx.log.debug(`auto-recall: query="${query.slice(0, 80)}"`);
diff --git a/apps/memos-local-openclaw/src/config.ts b/apps/memos-local-openclaw/src/config.ts
index 150b09cc4..6e588f10c 100644
--- a/apps/memos-local-openclaw/src/config.ts
+++ b/apps/memos-local-openclaw/src/config.ts
@@ -66,6 +66,7 @@ export function resolveConfig(raw: Partial<MemosLocalConfig> | undefined, stateD
       mmrLambda: cfg.recall?.mmrLambda ?? DEFAULTS.mmrLambda,
       recencyHalfLifeDays: cfg.recall?.recencyHalfLifeDays ?? DEFAULTS.recencyHalfLifeDays,
       vectorSearchMaxChunks: cfg.recall?.vectorSearchMaxChunks ?? DEFAULTS.vectorSearchMaxChunks,
+      autoRecallMinQueryLength: cfg.recall?.autoRecallMinQueryLength ?? DEFAULTS.autoRecallMinQueryLength,
     },
     dedup: {
       similarityThreshold: cfg.dedup?.similarityThreshold ?? DEFAULTS.dedupSimilarityThreshold,
diff --git a/apps/memos-local-openclaw/src/types.ts b/apps/memos-local-openclaw/src/types.ts
index cb08eb1cf..31d80b001 100644
--- a/apps/memos-local-openclaw/src/types.ts
+++ b/apps/memos-local-openclaw/src/types.ts
@@ -312,6 +312,8 @@ export interface MemosLocalConfig {
     recencyHalfLifeDays?: number;
     /** Cap vector search to this many most recent chunks. 0 = no cap (search all; may get slower with 200k+ chunks). If you set a cap for performance, use a large value (e.g. 200000–300000) so older memories are still in the window; FTS always searches all. */
     vectorSearchMaxChunks?: number;
+    /** Auto-recall skips normalized prompts shorter than this many characters. */
+    autoRecallMinQueryLength?: number;
   };
   dedup?: {
     similarityThreshold?: number;
@@ -337,6 +339,7 @@ export const DEFAULTS = {
   mmrLambda: 0.7,
   recencyHalfLifeDays: 14,
   vectorSearchMaxChunks: 0,
+  autoRecallMinQueryLength: 2,
   dedupSimilarityThreshold: 0.80,
   evidenceWrapperTag: "STORED_MEMORY",
   excerptMinChars: 200,
diff --git a/apps/memos-local-openclaw/tests/auto-recall-min-query-length.test.ts b/apps/memos-local-openclaw/tests/auto-recall-min-query-length.test.ts
new file mode 100644
index 000000000..d756eefd1
--- /dev/null
+++ b/apps/memos-local-openclaw/tests/auto-recall-min-query-length.test.ts
@@ -0,0 +1,160 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import type { MemosLocalConfig } from "../src/types";
+
+type AutoRecallHook = (
+  event: { prompt?: string; messages?: unknown[] },
+  hookCtx?: { agentId?: string; sessionKey?: string },
+) => Promise<unknown>;
+
+const noopLog = {
+  debug() {},
+  info() {},
+  warn() {},
+  error() {},
+};
+
+async function registerPluginAndGetAutoRecallHook(opts: {
+  config: Partial<MemosLocalConfig>;
+  engineSearch: ReturnType<typeof vi.fn>;
+}): Promise<AutoRecallHook> {
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memos-auto-recall-min-query-"));
+  const handlers = new Map<string, AutoRecallHook>();
+
+  vi.doMock("../src/config", () => ({
+    buildContext: () => ({
+      stateDir: tmpDir,
+      workspaceDir: path.join(tmpDir, "workspace"),
+      config: {
+        storage: { dbPath: path.join(tmpDir, "memos.db") },
+        capture: { evidenceWrapperTag: "STORED_MEMORY" },
+        telemetry: {},
+        sharing: {
+          enabled: false,
+          role: "client",
+          hub: { port: 18800, teamName: "", teamToken: "" },
+          client: { hubAddress: "", userToken: "" },
+          capabilities: { hostEmbedding: false, hostCompletion: false, hostSkill: false },
+        },
+        skillEvolution: { autoRecallSkills: false },
+        ...opts.config,
+      },
+      log: noopLog,
+    }),
+  }));
+  vi.doMock("../src/storage/ensure-binding", () => ({ ensureSqliteBinding: () => {} }));
+  vi.doMock("../src/storage/sqlite", () => ({ SqliteStore: class {
+    recordToolCall() {}
+    recordApiLog() {}
+    close() {}
+  } }));
+  vi.doMock("../src/embedding", () => ({ Embedder: class { provider = "mock"; } }));
+  vi.doMock("../src/ingest/worker", () => ({ IngestWorker: class {
+    getTaskProcessor() { return { onTaskCompleted() {} }; }
+    enqueue() {}
+    async flush() {}
+  } }));
+  vi.doMock("../src/recall/engine", () => ({ RecallEngine: class {
+    search = opts.engineSearch;
+    async searchSkills() { return []; }
+  } }));
+  vi.doMock("../src/ingest/providers", () => ({ Summarizer: class {
+    async filterRelevant() { return null; }
+  } }));
+  vi.doMock("../src/viewer/server", () => ({ ViewerServer: class {
+    async start() { return "http://127.0.0.1:18799"; }
+    stop() {}
+    getResetToken() { return "token"; }
+  } }));
+  vi.doMock("../src/hub/server", () => ({ HubServer: class {
+    async start() { return "http://127.0.0.1:18800"; }
+    async stop() {}
+  } }));
+  vi.doMock("../src/client/hub", () => ({
+    hubGetMemoryDetail: async () => ({}),
+    hubRequestJson: async () => ({}),
+    hubSearchMemories: async () => ({ hits: [], meta: {} }),
+    hubSearchSkills: async () => ({ hits: [] }),
+    resolveHubClient: async () => ({ hubUrl: "", userToken: "", userId: "" }),
+  }));
+  vi.doMock("../src/client/connector", () => ({
+    connectToHub: async () => ({ connected: false }),
+    getHubStatus: async () => ({ connected: false }),
+  }));
+  vi.doMock("../src/client/skill-sync", () => ({
+    fetchHubSkillBundle: async () => ({}),
+    publishSkillBundleToHub: async () => ({}),
+    restoreSkillBundleFromHub: () => ({}),
+    unpublishSkillBundleFromHub: async () => ({}),
+  }));
+  vi.doMock("../src/skill/evolver", () => ({ SkillEvolver: class { async onTaskCompleted() {} } }));
+  vi.doMock("../src/skill/installer", () => ({ SkillInstaller: class {
+    getCompanionManifest() { return null; }
+    install() { return { message: "ok" }; }
+  } }));
+  vi.doMock("../src/skill/bundled-memory-guide", () => ({ MEMORY_GUIDE_SKILL_MD: "# mock" }));
+  vi.doMock("../src/telemetry", () => ({ Telemetry: class {
+    trackToolCalled() {}
+    trackAutoRecall() {}
+    trackMemoryIngested() {}
+    trackSkillInstalled() {}
+    trackSkillEvolved() {}
+    trackPluginStarted() {}
+    trackError() {}
+    async shutdown() {}
+  } }));
+
+  const pluginModule = await import("../plugin-impl");
+  pluginModule.default.register({
+    id: "memos-local-openclaw-plugin",
+    pluginConfig: {},
+    config: { plugins: { entries: { "memos-local-openclaw-plugin": {} } } },
+    resolvePath: (p: string) => path.join(tmpDir, p.replace(/^~[\\/]/, "")),
+    logger: { info() {}, warn() {} },
+    registerTool: () => {},
+    registerMemoryCapability: () => {},
+    registerService: () => {},
+    on: (name: string, handler: AutoRecallHook) => {
+      handlers.set(name, handler);
+    },
+  } as any);
+
+  const hook = handlers.get("before_prompt_build");
+  if (!hook) throw new Error("before_prompt_build hook was not registered");
+  return hook;
+}
+
+afterEach(() => {
+  vi.resetModules();
+  vi.clearAllMocks();
+});
+
+describe("auto-recall min query length", () => {
+  it("skips auto-recall search when query is shorter than configured threshold", async () => {
+    const search = vi.fn(async () => ({ hits: [], meta: {} }));
+    const hook = await registerPluginAndGetAutoRecallHook({
+      config: { recall: { autoRecallMinQueryLength: 10 } },
+      engineSearch: search,
+    });
+
+    await hook({ prompt: "继续吧" }, { agentId: "main" });
+
+    expect(search).not.toHaveBeenCalled();
+  });
+
+  it("runs auto-recall search when query reaches configured threshold", async () => {
+    const search = vi.fn(async () => ({ hits: [], meta: {} }));
+    const hook = await registerPluginAndGetAutoRecallHook({
+      config: { recall: { autoRecallMinQueryLength: 10 } },
+      engineSearch: search,
+    });
+
+    await hook({ prompt: "remember deployment rollback preference" }, { agentId: "main" });
+
+    expect(search).toHaveBeenCalledWith(expect.objectContaining({
+      query: "remember deployment rollback preference",
+    }));
+  });
+});
diff --git a/apps/memos-local-openclaw/tests/config.test.ts b/apps/memos-local-openclaw/tests/config.test.ts
index 072728b9d..65cb774f2 100644
--- a/apps/memos-local-openclaw/tests/config.test.ts
+++ b/apps/memos-local-openclaw/tests/config.test.ts
@@ -2,6 +2,21 @@ import { describe, expect, it } from "vitest";
 import { resolveConfig } from "../src/config";
 
 describe("resolveConfig", () => {
+  it("defaults autoRecallMinQueryLength to the existing two-character threshold", () => {
+    const resolved = resolveConfig(undefined, "/tmp/memos-config-test");
+
+    expect(resolved.recall?.autoRecallMinQueryLength).toBe(2);
+  });
+
+  it("preserves configured autoRecallMinQueryLength", () => {
+    const resolved = resolveConfig(
+      { recall: { autoRecallMinQueryLength: 10 } },
+      "/tmp/memos-config-test",
+    );
+
+    expect(resolved.recall?.autoRecallMinQueryLength).toBe(10);
+  });
+
   it("injects openclaw providers into existing blocks when host capabilities are enabled", () => {
     const resolved = resolveConfig(
       {
diff --git a/docs/cn/open_source/evaluation/openai_memory_locomo_eval_guide.md b/docs/cn/open_source/evaluation/openai_memory_locomo_eval_guide.md
new file mode 100644
index 000000000..26340394d
--- /dev/null
+++ b/docs/cn/open_source/evaluation/openai_memory_locomo_eval_guide.md
@@ -0,0 +1,115 @@
+# OpenAI Memory 在 LoCoMo 上的评估指南
+
+本文档简要概述了使用 LoCoMo 数据集对 OpenAI 的 Memory 功能进行评估的整体流程。
+
+## 1. 简介
+
+由于 OpenAI 的 [Memory 功能](https://openai.com/index/memory-and-new-controls-for-chatgpt/) 没有公开 API，因此评估需要手动进行。LoCoMo 数据集中的对话会被格式化并手动输入到 ChatGPT 网页界面中。生成的记忆随后从账号的记忆管理页面中获取并保存到本地。
+
+为了评估这些记忆的质量，我们将通过 API 使用 `gpt-4o-mini` 模型。模型将被问及 LoCoMo 数据集中的问题，并提供相关对话的完整记忆历史作为上下文。这模拟了一个完美的记忆检索系统，为模型提供了最佳的回答信息。
+
+## 2. 工作流程
+
+### 步骤 2.1：生成用于记忆提取的输入上下文
+
+运行以下 Python 脚本，为每个对话中的每个会话生成输入提示。该脚本将为每个会话创建一个单独的 `.txt` 文件，包含格式化的对话历史和提取提示。
+
+**脚本：**
+```python
+import json
+import os
+
+# 确保数据集路径正确
+LOCOMO_DATA_PATH = "data/locomo/locomo10.json"
+SAVE_DIR = "openai_inputs"
+
+os.makedirs(SAVE_DIR, exist_ok=True)
+
+TEMPLATE = """Can you please extract relevant information from this conversation and create memory entries for each user mentioned? Please store these memories in your knowledge base in addition to the timestamp provided for future reference and personalized interactions.
+
+{context}
+"""
+
+with open(LOCOMO_DATA_PATH, "r", encoding="utf-8") as f:
+    data = json.load(f)
+
+for conv_idx, item in enumerate(data):
+    conv = item["conversation"]
+
+    for i in range(1, 35):
+        session_key = f"session_{i}"
+        session_dt_key = f"session_{i}_date_time"
+        if session_key not in conv:
+            continue
+
+        session = conv[session_key]
+        session_dt = conv[session_dt_key]
+
+        session_context = ""
+        for chat in session:
+            chat_str = f"({session_dt}) {chat['speaker']}: {chat['text']}\n"
+            session_context += chat_str
+
+        input_string = TEMPLATE.format(context=session_context)
+
+        output_filename = os.path.join(SAVE_DIR, f"{conv_idx}-D{i}.txt")
+        with open(output_filename, "w", encoding="utf-8") as f:
+            f.write(input_string)
+
+print(f"Generated {len(os.listdir(SAVE_DIR))} input files in '{SAVE_DIR}' directory.")
+```
+
+**输入示例（`0-D9.txt`）：**
+```plaintext
+Can you please extract relevant information from this conversation and create memory entries for each user mentioned? Please store these memories in your knowledge base in addition to the timestamp provided for future reference and personalized interactions.
+
+(2:31 pm on 17 July, 2023) Melanie: Hey Caroline, hope all's good! I had a quiet weekend after we went camping with my fam two weekends ago. It was great to unplug and hang with the kids. What've you been up to? Anything fun over the weekend?
+(2:31 pm on 17 July, 2023) Caroline: Hey Melanie! That sounds great! Last weekend I joined a mentorship program for LGBTQ youth - it's really rewarding to help the community.
+... (rest of the conversation)
+```
+
+### 步骤 2.2：从 ChatGPT 中提取并保存记忆
+
+1.  **启用记忆功能：** 在 ChatGPT 中，前往 **设置（Settings） -> 个性化（Personalization）**，确保 **记忆（Memory）** 功能已开启。
+2.  **清除已有记忆：** 在处理新对话之前，点击 **管理（Manage）** -> **清除全部（Clear all）**，确保清除已有记忆。
+3.  **输入并验证：**
+    * 开启一个新的聊天。
+    * 确保模型设置为 **GPT-4o**。
+    * 复制生成的 `.txt` 文件的内容（例如 `0-D1.txt`）并粘贴到聊天中。
+    * 模型回复后，确认看到"记忆已更新"(Memory updated)的提示。
+4.  **保存记忆：**
+    * 点击记忆确认中的 **管理(Manage)**，查看新生成的记忆。
+    * 创建一个与输入文件同名的新本地 `.txt` 文件（例如 `0-D1.txt`）。
+    * 从 ChatGPT 中复制每条记忆并粘贴到新文件中，每条记忆占一行。
+5.  **为下一个对话重置记忆：**
+    * 一个对话的所有会话完成后，务必**删除所有记忆，以确保下一个对话从干净状态开始**。前往设置(Settings) -> 个性化(Personalization) -> 管理(Manage)，点击删除全部(Delete all)。
+
+**记忆输出示例（`0-D9.txt`）：**
+```plaintext
+As of November 17, 2023, Dave has taken up photography and enjoys capturing nature scenes like sunsets, beaches, waves, rocks, and waterfalls.
+Dave recently purchased a vintage camera that takes high-quality photos.
+Dave discovered a serene park nearby with a peaceful spot featuring a bench under a tree with pink flowers.
+As of November 17, 2023, Calvin attended a fancy gala in Boston where he had an inspiring conversation with an artist about music and art.
+Calvin finds music a powerful connector and source of creativity.
+Calvin took a photo in a Japanese garden that he shared with Dave.
+Calvin accepted an invitation to perform at an upcoming show in Boston, expressing excitement about the musical experience.
+```
+
+### 步骤 2.3：合并记忆
+
+记忆目前按会话分别保存。你需要编写一个简单的脚本，将同一对话的所有记忆合并到一个文件中。例如，`0-D1.txt`、`0-D2.txt` 等文件中的所有记忆应合并为一个 `conversation_0_memories.txt` 文件。
+
+
+### 步骤 2.4：自动化评估
+
+所有对话的记忆提取并保存完成后，可以运行自动化[评估脚本](../../../../evaluation/scripts/run_openai_eval.sh)。该脚本将处理生成答案、评估答案和计算指标的过程。
+
+```bash
+# 编辑 evaluation/scripts/run_openai_eval.sh 中的配置
+evaluation/scripts/run_openai_eval.sh
+```
+
+## 3. 注意事项
+
+-   **账号差异：** 请注意免费账号和 Plus 账号之间可能存在差异，例如上下文长度限制和可存储的记忆数量。
+-   **粒度：** 评估过程在会话级别添加记忆。为确保高质量的记忆提取，应遵循相同的原则。一次性将整个对话提供给模型已被证明效果不佳，通常会导致模型忽略重要细节，从而造成大量信息丢失。
diff --git a/docs/cn/open_source/evaluation/overview.md b/docs/cn/open_source/evaluation/overview.md
new file mode 100644
index 000000000..070ddf018
--- /dev/null
+++ b/docs/cn/open_source/evaluation/overview.md
@@ -0,0 +1,94 @@
+# 记忆评估框架
+
+本仓库提供了使用各种模型和 API 对 `LoCoMo`、`LongMemEval`、`PrefEval`、`personaMem` 数据集进行评估的工具和脚本。
+
+
+## 环境安装
+
+1. 设置 `PYTHONPATH` 环境变量：
+   ```bash
+   export PYTHONPATH=../src
+   cd evaluation  # 请在仓库根目录执行
+   ```
+
+2. 安装依赖：
+   ```bash
+   poetry install --extras all --with eval
+   ```
+
+## 配置说明
+将 .env-example 文件复制一份并重命名为 .env，然后根据你的环境和 API 密钥，填写所需的环境变量。
+
+## 安装 MemOS
+### 本地服务
+```bash
+# 修改 {project_dir}/.env 文件并启动服务器
+uvicorn memos.api.server_api:app --host 0.0.0.0 --port 8001 --workers 8
+
+# 配置 {project_dir}/evaluation/.env 文件
+MEMOS_URL="http://127.0.0.1:8001"
+```
+### 在线服务
+```bash
+# 请访问 https://memos-dashboard.openmem.net/cn/quickstart/ 获取您的 API 密钥
+# 获取到API密钥后，将密钥配置到 {project_dir}/evaluation/.env 文件中
+MEMOS_KEY="Token mpg-xxxxx"
+MEMOS_ONLINE_URL="https://memos.memtensor.cn/api/openmem/v1"
+
+```
+## 支持的框架
+
+脚本支持 `memos-api` 和 `memos-api-online`。同时，我们为以下记忆框架提供了非官方实现：`zep`、`mem0`、`memobase`、`supermemory`、`memu`。
+
+## 评估脚本
+
+### LoCoMo 评估
+
+⚙️ 使用支持的记忆框架之一评估 **LoCoMo** 数据集 —— 运行以下脚本：
+
+```bash
+# 编辑 ./scripts/run_locomo_eval.sh 中的配置
+# 指定要使用的模型和记忆后端（例如 mem0、zep 等）
+evaluation/scripts/run_locomo_eval.sh
+```
+
+✍️ 如需使用 OpenAI 的原生记忆功能评估 LoCoMo 数据集，请参考详细指南：[OpenAI Memory on LoCoMo - 评估指南](./openai_memory_locomo_eval_guide.md)。
+
+### LongMemEval 评估
+
+首先从 https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned 下载数据集 `longmemeval_s`，并将其保存为 `data/longmemeval/longmemeval_s.json`
+
+```bash
+# 编辑 evaluation/scripts/run_lme_eval.sh 中的配置
+# 指定要使用的模型和记忆后端（例如 mem0、zep 等）
+evaluation/scripts/run_lme_eval.sh
+```
+
+#### 问题日期与 `reference_time`
+
+LongMemEval 为每个问题提供了一个**问题日期**；评估时应以该日期作为“当前时间”参考，而不是运行脚本时的实际时间。LongMemEval 搜索脚本会将 `question_date` 作为 **`reference_time`** 传递给支持该参数的后端。
+
+**MemOS Cloud** 目前不支持在搜索时提供问题日期，因此在该平台上的 LongMemEval 得分可能与完全遵循规范的运行结果存在差异。**如果需要获得可比较的数值，建议使用开源的 MemOS 服务器来评估 LongMemEval。**
+
+### PrefEval 评估
+
+从 https://github.com/amazon-science/PrefEval/blob/main/benchmark_dataset/filtered_inter_turns.json 下载 `benchmark_dataset/filtered_inter_turns.json`，并将其保存为 `./data/prefeval/filtered_inter_turns.json`。
+
+要评估 **Prefeval** 数据集 —— 请运行以下脚本：
+
+```bash
+# 编辑 evaluation/scripts/run_prefeval_eval.sh 中的配置
+# 指定要使用的模型和记忆后端（例如 mem0、zep 等）
+evaluation/scripts/run_prefeval_eval.sh
+```
+
+### PersonaMem 评估
+
+从 https://huggingface.co/datasets/bowen-upenn/PersonaMem 获取 `questions_32k.csv` 和 `shared_contexts_32k.jsonl`，并将其保存到 `data/personamem/` 目录下。
+
+```bash
+# 编辑 evaluation/scripts/run_pm_eval.sh 中的配置
+# 指定要使用的模型和记忆后端（例如 mem0、zep 等）
+# 如需使用 MIRIX，请编辑 evaluation/scripts/personamem/config.yaml 中的配置
+evaluation/scripts/run_pm_eval.sh
+```
diff --git a/docs/cn/open_source/modules/api_deployment.md b/docs/cn/open_source/modules/api_deployment.md
new file mode 100644
index 000000000..917cca69f
--- /dev/null
+++ b/docs/cn/open_source/modules/api_deployment.md
@@ -0,0 +1,13 @@
+# MemOS API
+
+## 默认入口与部署方式
+
+- **公开开源使用**时，请使用 **`server_api.py`** 作为 API 服务的入口。
+- 您可以通过 **`docker/Dockerfile`** 进行部署。
+
+以上是运行和部署 API 的默认通用方式。
+
+## 扩展功能与参考实现
+
+- **`server_api_ext.py`** 和 **`Dockerfile.krolik`** 是某位开发者扩展后的 API 及部署配置，**仅供参考**。这些内容尚未与云服务集成，仍处于测试阶段。
+- 如需扩展或自定义行为，可参考上述文件，按需使用或改造。
diff --git a/docs/cn/open_source/modules/model_backend.md b/docs/cn/open_source/modules/model_backend.md
new file mode 100644
index 000000000..d586ab2cf
--- /dev/null
+++ b/docs/cn/open_source/modules/model_backend.md
@@ -0,0 +1,104 @@
+---
+title: LLMs and Embeddings
+desc: "在 **MemOS** 中配置和使用大型语言模型（LLM）及嵌入器的实用指南。"
+---
+
+## 概述 <a id="overview"></a>
+MemOS 通过两个 Pydantic 工厂类将**模型逻辑**与**运行时配置**解耦：
+
+| 工厂类 | 产出 | 典型后端 |
+|---------|----------|------------------|
+| `LLMFactory` | 对话模型 | `ollama`, `openai`, `azure`, `qwen`, `deepseek`, `huggingface`, `huggingface_singleton`, `vllm`, `openai_new` |
+| `EmbedderFactory` | 文本嵌入器 | `ollama`, `sentence_transformer`, `ark`, `universal_api` |
+
+两个工厂类均接受 `*_ConfigFactory.model_validate(...)` 配置对象，因此只需修改 `backend=` 参数即可切换服务提供商。
+
+
+## LLM 模块 <a id="llm-module"></a>
+
+### 支持的 LLM 后端 <a id="supported-llm-backends"></a>
+| Backend | 说明 | 示例 model_name_or_path |
+|---|---|---|
+| `ollama` | 本地 Ollama 服务器 | `qwen3:0.6b` |
+| `openai` | 兼容 OpenAI 的 Chat Completions 接口 | `gpt-4.1-nano` |
+| `azure` | Azure OpenAI Chat Completions | `<your-deployment-name>` |
+| `qwen` | DashScope 兼容 OpenAI 的 API | `qwen-plus` |
+| `deepseek` | DeepSeek 兼容 OpenAI 的 API | `deepseek-chat` / `deepseek-reasoner` |
+| `huggingface` | 本地 transformers pipeline | `Qwen/Qwen3-1.7B` |
+| `huggingface_singleton` | 与 `huggingface` 相同，但启用单例复用 | `Qwen/Qwen3-1.7B` |
+| `vllm` | 兼容 OpenAI 的 vLLM 服务器 | `Qwen/Qwen2.5-7B-Instruct` |
+| `openai_new` | OpenAI Responses API 封装 | `gpt-4.1` |
+
+### LLM 配置模式 <a id="llm-config-schema"></a>
+
+
+常用字段：
+
+| 字段 | 类型 | 默认值 | 描述 |
+|-------|------|---------|-------------|
+| `model_name_or_path` | str | – | 模型 ID 或本地标签 |
+| `temperature` | float | 0.7 | |
+| `max_tokens` | int | 8192 | |
+| `top_p` / `top_k` | float / int | 0.95 / 50 | |
+| *API 专用字段* | 如 `api_key`, `api_base` | – | 兼容 OpenAI 的认证信息 |
+| `remove_think_prefix` | bool | False | 从生成文本中移除思考标签内的内容 |
+
+
+### 工厂用法 <a id="llm-factory-usage"></a>
+```python
+from memos.configs.llm import LLMConfigFactory
+from memos.llms.factory import LLMFactory
+
+cfg = LLMConfigFactory.model_validate({
+    "backend": "ollama",
+    "config": {"model_name_or_path": "qwen3:0.6b"}
+})
+llm = LLMFactory.from_config(cfg)
+```
+
+### LLM 核心 API <a id="llm-core-apis"></a>
+| 方法 | 用途 |
+|--------|---------|
+| `generate(messages: list)` | 返回完整的字符串响应 |
+| `generate_stream(messages)` | 以流式方式逐块生成内容 |
+
+### 流式输出与思维链（CoT） <a id="streaming--cot"></a>
+```python
+messages = [{"role": "user", "content": "Let's think step by step: …"}]
+for chunk in llm.generate_stream(messages):
+    print(chunk, end="")
+```
+
+::note
+**完整代码**
+所有使用场景示例请参见 `examples/basic_modules/llm.py`。
+::
+
+### 性能建议 <a id="llm-performance-tips"></a>
+- 在本地原型开发时，使用 `qwen3:0.6b` 可将内存占用控制在 2 GB 以内。
+- 结合 KV Cache（参见 *KVCacheMemory* 文档）可降低首个 token 的生成延迟（TTFT）。
+
+## 嵌入模块 <a id="embedding-module"></a>
+
+### 支持的嵌入器后端 <a id="supported-embedder-backends"></a>
+| Backend | 说明 | 示例 model_name_or_path |
+|---|---|---|
+| `ollama` | 本地 Ollama 服务器 | `nomic-embed-text:latest` |
+| `sentence_transformer` | 本地 sentence-transformers | `nomic-ai/nomic-embed-text-v1.5` |
+| `ark` | 火山引擎 Ark 嵌入服务 | `<ark-model-id>` |
+| `universal_api` | 通用服务提供商封装（如 OpenAI） | `text-embedding-3-large` |
+
+### 嵌入器配置模式 <a id="embedder-config-schema"></a>
+共享字段：`model_name_or_path`，可选的 API 认证信息（`api_key`、`base_url`）等。
+
+### 工厂用法 <a id="embedder-factory-usage"></a>
+```python
+from memos.configs.embedder import EmbedderConfigFactory
+from memos.embedders.factory import EmbedderFactory
+
+cfg = EmbedderConfigFactory.model_validate({
+    "backend": "ollama",
+    "config": {"model_name_or_path": "nomic-embed-text:latest"}
+})
+embedder = EmbedderFactory.from_config(cfg)
+```
diff --git a/docs/cn/open_source/modules/mos/memos_mcp.md b/docs/cn/open_source/modules/mos/memos_mcp.md
new file mode 100644
index 000000000..abb061393
--- /dev/null
+++ b/docs/cn/open_source/modules/mos/memos_mcp.md
@@ -0,0 +1,110 @@
+---
+title: MCP (Model Context Protocol) Setup Guide
+desc: Model Context Protocol（MCP）是一种标准协议，使 AI 助手能够安全地访问和交互本地及远程资源。在 MemOS 项目中，MCP 为内存操作提供了标准化接口，使外部应用程序能够通过定义良好的工具和资源与内存系统进行交互。
+---
+
+
+## 配置
+
+### 环境变量
+
+在项目根目录下创建 `.env` 文件，并填写以下配置：
+
+```bash
+# OpenAI Configuration
+OPENAI_API_KEY=your_openai_api_key_here
+OPENAI_API_BASE=https://api.openai.com/v1
+
+# Memory System Configuration
+MOS_TEXT_MEM_TYPE=tree_text
+
+# Neo4j Configuration (required for tree_text memory type)
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_neo4j_password
+```
+
+## 启动 MCP 服务器
+
+### 方式一：使用内置服务器脚本
+
+```bash
+# Navigate to the project root
+cd /path/to/MemOS
+
+# Run with default stdio transport
+python src/memos/api/mcp_serve.py
+
+# Run with HTTP transport
+python src/memos/api/mcp_serve.py --transport http --host localhost --port 8000
+
+# Run with SSE transport (deprecated but supported)
+python src/memos/api/mcp_serve.py --transport sse --host localhost --port 8000
+```
+
+### 方式二：使用示例脚本
+
+```bash
+# Navigate to the examples directory
+cd examples/mem_mcp
+
+# Run the server
+python simple_fastmcp_serve.py --transport http --port 8000
+```
+
+### 传输方式选项
+
+MCP 服务器支持三种传输方式：
+
+1. **stdio**（默认）：面向本地应用的标准输入/输出
+2. **http**：面向 Web 应用的 HTTP 传输
+3. **sse**：Server-Sent Events（已弃用，但仍受支持）
+
+### 命令行参数
+
+- `--transport`：选择传输方式（`stdio`、`http`、`sse`）
+- `--host`：HTTP/SSE 传输的主机地址（默认：`localhost`）
+- `--port`：HTTP/SSE 传输的端口号（默认：`8000`）
+
+## MCP 客户端使用
+
+### 基础客户端示例
+
+项目包含一个示例客户端，演示如何与 MCP 服务器进行交互：
+
+```bash
+# Ensure the MCP server is running on HTTP transport
+cd examples/mem_mcp
+python simple_fastmcp_serve.py --transport http --port 8000
+
+# In another terminal, run the client
+cd examples/mem_mcp
+python simple_fastmcp_client.py
+```
+
+## MCP 配置
+
+如需将 Cursor IDE 与 MemOS MCP 服务器集成，请将以下配置添加到 `desktop_config.json` 及其他本地 MCP 配置文件中：
+
+```json
+{
+  "mcpServers": {
+    "memos-fastmcp": {
+      "command": "/path/to/your/conda/envs/memos/bin/python",
+      "args": [
+        "-m", "memos.api.mcp_serve",
+        "--transport", "stdio"
+      ],
+    //   "cwd": "/path/to/your/MemOS pip user is optional",
+      "env": {
+        "OPENAI_API_KEY": "sk-your-openai-key-here",
+        "OPENAI_API_BASE": "https://api.openai.com/v1",
+        "MOS_TEXT_MEM_TYPE": "tree_text",
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "your-neo4j-password"
+      }
+    }
+  }
+}
+```
diff --git a/docs/cn/open_source/modules/mos/memos_neo.md b/docs/cn/open_source/modules/mos/memos_neo.md
new file mode 100644
index 000000000..0ca261571
--- /dev/null
+++ b/docs/cn/open_source/modules/mos/memos_neo.md
@@ -0,0 +1,171 @@
+---
+title: MemOS NEO Version
+desc: 使用 `MOS.simple()` 在几分钟内快速上手 MemOS —— 构建具备记忆增强能力的应用程序的最快方式。
+---
+
+## 快速设置
+
+### 环境变量
+
+设置您的 API 凭据：
+
+```bash
+export OPENAI_API_KEY="sk-your-api-key-here"
+export OPENAI_API_BASE="https://api.openai.com/v1"  # Optional
+export MOS_TEXT_MEM_TYPE="general_text"  #or "tree_text" for advanced
+
+#tips: general_text only support one-user when init MOS
+```
+
+### 一行代码完成设置
+
+```python
+from memos.mem_os.main import MOS
+
+# Auto-configured instance
+memory = MOS.simple()
+```
+::note
+**警告：**<br>`MOS.simple()` 将使用默认的嵌入模型，维度为 text-embedding-3-large（dim-size 3027）。如果您之前使用过其他版本的 memos，需要删除 `~/.memos` 目录以重置 qdrant，或清空 neo4j 数据库。
+::
+
+## 基础用法
+
+```python
+#!/usr/bin/env python3
+import os
+from memos.mem_os.main import MOS
+
+# Set environment variables
+os.environ["OPENAI_API_KEY"] = "sk-your-api-key"
+os.environ["MOS_TEXT_MEM_TYPE"] = "general_text"
+
+# Create memory system
+memory = MOS.simple()
+
+# Add memories
+memory.add("My favorite color is blue")
+memory.add("I work as a software engineer")
+memory.add("I live in San Francisco")
+
+# Chat with memory context
+response = memory.chat("What is user favorite color?")
+print(response)  # "favorite color is blue!"
+
+response = memory.chat("Tell me about user job and location")
+print(response)  # Uses stored memories to respond
+```
+
+## 记忆类型
+
+### 通用文本记忆（推荐新手使用）
+- **存储方式**：本地 JSON 文件 + Qdrant 向量数据库
+- **初始配置**：无需外部依赖
+- **适用场景**：大多数用例、快速原型开发
+
+```bash
+export MOS_TEXT_MEM_TYPE="general_text"
+```
+
+### 树形文本记忆（进阶）
+- **存储方式**：Neo4j 图数据库
+- **初始配置**：需要 Neo4j 服务器
+- **适用场景**：复杂关系推理
+
+```bash
+export MOS_TEXT_MEM_TYPE="tree_text"
+export NEO4J_URI="bolt://localhost:7687"  # Optional
+export NEO4J_PASSWORD="your-password"     # Optional
+```
+
+## Neo 版本概述
+
+`MOS.simple()` 会自动使用合理的默认值创建完整配置：
+
+### 默认设置
+- **LLM**：GPT-4o-mini，temperature 为 0.8
+- **嵌入器**：OpenAI text-embedding-3-large
+- **分块策略**：512 个 token，重叠 128 个
+- **图数据库**：Neo4j 图数据库
+
+### 默认配置工具
+
+MemOS 在 `default_config.py` 中提供了三个主要配置工具：
+
+- **`get_default_config()`**：使用合理默认值创建完整的 MOS 配置
+- **`get_default_cube_config()`**：创建用于记忆存储的 MemCube 配置
+- **`get_default()`**：同时返回 MOS 配置和 MemCube 实例
+
+```python
+from memos.mem_os.utils.default_config import get_default, get_default_cube_config
+
+# Get both MOS config and MemCube instance
+mos_config, default_cube = get_default(
+    openai_api_key="sk-your-key",
+    text_mem_type="general_text"
+)
+
+# Or create just MemCube config
+cube_config = get_default_cube_config(
+    openai_api_key="sk-your-key",
+    text_mem_type="general_text"
+)
+```
+
+### 手动配置（可选）
+
+如需更精细的控制，可使用配置工具：
+
+```python
+from memos.mem_os.main import MOS
+from memos.mem_os.utils.default_config import get_default_config
+
+# Custom configuration
+config = get_default_config(
+    openai_api_key="sk-your-key",
+    text_mem_type="general_text",
+    user_id="my_user",
+    model_name="gpt-4",           # Different model
+    temperature=0.5,              # Lower creativity
+    chunk_size=256,               # Smaller chunks
+    top_k=10                      # More search results
+)
+
+memory = MOS(config)
+```
+
+### 高级功能
+
+启用附加能力：
+
+```python
+config = get_default_config(
+    openai_api_key="sk-your-key",
+    enable_activation_memory=True,    # KV-cache memory
+    enable_mem_scheduler=True,        # Background processing
+)
+```
+
+
+## 其他使用建议
+
+1. **从简开始**：初始时使用 `general_text` 记忆类型
+2. **环境配置**：将 API 密钥存储在环境变量中
+3. **记忆质量**：添加具体、事实性的信息以获得最佳效果
+4. **批量操作**：将多条相关记忆一起添加
+5. **用户上下文**：多用户场景下仅在使用 `tree_text` 时使用 `user_id` 参数
+
+## 故障排查
+
+### 常见问题
+
+**缺少 API 密钥错误**：
+```bash
+# Ensure environment variable is set
+echo $OPENAI_API_KEY
+```
+
+**Neo4j 连接错误**（tree_text 模式）：
+```bash
+# Check Neo4j is running desktop for local user or enterprise neo4j
+```
diff --git a/docs/en/open_source/open_source_api/core/add_memory.md b/docs/en/open_source/open_source_api/core/add_memory.md
new file mode 100644
index 000000000..1b26ea00f
--- /dev/null
+++ b/docs/en/open_source/open_source_api/core/add_memory.md
@@ -0,0 +1,71 @@
+---
+title: Add Memory
+desc: The core production interface for MemOS. Enables asynchronous memory production for personal memory, knowledge bases, and multi-tenant scenarios through the MemCube isolation mechanism.
+---
+
+**Endpoint**: `POST /product/add`
+**Description**: This is the primary entry point for storing unstructured data in the system. It supports converting raw data into structured memory fragments via conversation lists, plain text, or metadata. In the open-source version, the system uses **MemCube** to achieve physical isolation and dynamic organization of memories.
+
+## 1. Core Mechanism: MemCube and Isolation
+
+In the open-source architecture, understanding MemCube is essential for effective use of this interface:
+
+* **Isolation Unit**: MemCube is the atomic unit of memory production. Cubes are completely independent of each other — deduplication and conflict resolution only occur within a single Cube.
+* **Flexible Mapping**:
+    * **Personal Mode**: Pass `user_id` as `writable_cube_ids` to establish a private personal memory store.
+    * **Knowledge Base Mode**: Pass the unique identifier (QID) of a knowledge base as `writable_cube_ids` to store content in that knowledge base.
+* **Multi-Target Write**: The interface supports writing memories to multiple Cubes simultaneously, enabling cross-domain synchronization.
+
+
+## 2. Key Parameters
+
+Core parameters are defined as follows:
+
+| Parameter | Type | Required | Default | Description |
+| :--- | :--- | :--- | :--- | :--- |
+| **`user_id`** | `str` | Yes | - | Unique user identifier, used for permission validation. |
+| **`messages`** | `list/str`| Yes | - | A list of messages or plain text content to be stored. |
+| **`writable_cube_ids`** | `list[str]`| Yes | - | **Core parameter**: Specifies the list of target Cube IDs to write to. |
+| **`async_mode`** | `str` | No | `async` | Processing mode: `async` (background queue processing) or `sync` (blocks the current request). |
+| **`is_feedback`** | `bool` | No | `false` | If `true`, the system automatically routes to the feedback handler to perform memory correction. |
+| `session_id` | `str` | No | `default` | Session identifier, used to track conversation context. |
+| `custom_tags` | `list[str]`| No | - | Custom tags that can be used as filter conditions in subsequent searches. |
+| `info` | `dict` | No | - | Extended metadata. All key-value pairs support subsequent filtered retrieval. |
+| `mode` | `str` | No | - | Only takes effect when `async_mode='sync'`. Options: `fast` or `fine`. |
+
+## 3. How It Works (Component & Handler)
+
+When a request reaches the backend, the system dispatches the **AddHandler** to execute the following logic using core components:
+
+1. **Multimodal Parsing**: The `MemReader` component converts `messages` into internal memory objects.
+2. **Feedback Routing**: If `is_feedback=True`, the Handler extracts the end of the conversation as feedback and directly corrects existing memories without generating new facts.
+3. **Async Dispatch**: In `async` mode, `MemScheduler` pushes the task into the task queue and the interface immediately returns a `task_id`.
+4. **Internal Organization**: The algorithm executes organization logic within the target Cube, optimizing memory quality through deduplication and merging.
+
+## 4. Quick Start
+
+The recommended way to interact with this interface is via the `MemOSClient` SDK:
+
+```python
+from memos.api.client import MemOSClient
+
+# Initialize the client
+client = MemOSClient(api_key="...", base_url="...")
+
+# Scenario 1: Add memory for a personal user
+client.add_message(
+    user_id="sde_dev_01",
+    writable_cube_ids=["user_01_private"],
+    messages=[{"role": "user", "content": "I am learning ggplot2 in R."}],
+    async_mode="async",
+    custom_tags=["Programming", "R"]
+)
+# Scenario 2: Import content into a knowledge base with feedback enabled
+client.add_message(
+    user_id="admin_01",
+    writable_cube_ids=["kb_finance_2026"],
+    messages="The 2026 financial audit process has been updated. Please refer to the attachment.",
+    is_feedback=True, # Mark as feedback to correct the old process
+    info={"source": "Internal_Portal"}
+)
+```
diff --git a/docs/en/open_source/open_source_api/core/delete_memory.md b/docs/en/open_source/open_source_api/core/delete_memory.md
new file mode 100644
index 000000000..fe8a43820
--- /dev/null
+++ b/docs/en/open_source/open_source_api/core/delete_memory.md
@@ -0,0 +1,62 @@
+---
+title: Delete Memory
+desc: Permanently removes memory entries, associated files, or a collection of memories matching specific filter conditions from a designated MemCube.
+---
+
+**Endpoint**: `POST /product/delete_memory`
+**Description**: This interface is used to maintain the accuracy and compliance of the memory store. When a user requests that specific information be forgotten, when data becomes outdated, or when a specific uploaded file needs to be purged, this interface performs a physical deletion that is synchronized across both the vector database and the graph database.
+
+## 1. Core Mechanism: Cube-Level Physical Cleanup
+
+In the open-source version, delete operations follow strict **MemCube** isolation logic:
+
+* **Scope Restriction**: Via the `writable_cube_ids` parameter, delete operations are strictly confined to the specified memory stores and will never accidentally delete content from other Cubes.
+* **Multi-Dimensional Deletion**: Supports concurrent cleanup across three dimensions: **Memory ID** (precise), **File ID** (associated deletion), and **Filter** (conditional logic).
+* **Atomic Synchronization**: Delete operations are triggered by **MemoryHandler**, ensuring that the underlying vector index and entity nodes in the graph database are removed synchronously, preventing retrieval "hallucinations".
+
+
+
+## 2. Key Parameters
+Core parameters are defined as follows:
+
+| Parameter | Type | Required | Description |
+| :--- | :--- | :--- | :--- |
+| **`writable_cube_ids`** | `list[str]` | Yes | Specifies the list of target Cubes on which to perform the delete operation. |
+| **`memory_ids`** | `list[str]` | No | A list of unique memory identifiers to be deleted. |
+| **`file_ids`** | `list[str]` | No | A list of source file identifiers to be deleted. All memories derived from those files will be cleaned up as well. |
+| **`filter`** | `object` | No | A logical filter. Supports bulk deletion of memories matching conditions based on tags, metadata, or timestamps. |
+
+## 3. How It Works (MemoryHandler)
+
+1. **Permission & Routing**: The system validates operation permissions via `user_id` and routes the request to **MemoryHandler**.
+2. **Storage Location**: Locates the underlying **naive_mem_cube** component based on `writable_cube_ids`.
+3. **Dispatch Cleanup Tasks**:
+    * **Cleanup by ID**: Directly erases records from the primary database and vector store based on UUID.
+    * **Cleanup by Filter**: First retrieves the set of memory IDs matching the conditions, then performs bulk physical removal.
+4. **Status Feedback**: Returns a success status upon completion. The deleted content will immediately disappear from the recall scope of the [**Search interface**](./search_memory.md).
+
+## 4. Quick Start
+
+Use `MemOSClient` to perform deletions across different dimensions:
+
+```python
+# Initialize the client
+client = MemOSClient(api_key="...", base_url="...")
+
+# Scenario 1: Precisely delete a single known incorrect memory
+client.delete_memory(
+    writable_cube_ids=["user_01_private"],
+    memory_ids=["2f40be8f-736c-4a5f-aada-9489037769e0"]
+)
+
+# Scenario 2: Bulk-clean all outdated memories under a specific tag
+client.delete_memory(
+    writable_cube_ids=["kb_finance_2026"],
+    filter={"tags": {"contains": "deprecated_policy"}}
+)
+```
+## 5. Important Notes
+
+**Irreversibility**: Delete operations are physical deletions. Once executed successfully, the memory can no longer be recalled via the search interface.
+
+**File Association**: When deleting via `file_ids`, the system automatically traces and cleans up the factual memories and summaries extracted from those files.
diff --git a/docs/en/open_source/open_source_api/core/get_memory.md b/docs/en/open_source/open_source_api/core/get_memory.md
new file mode 100644
index 000000000..e041dbcd5
--- /dev/null
+++ b/docs/en/open_source/open_source_api/core/get_memory.md
@@ -0,0 +1,82 @@
+---
+title: Get Memories
+desc: Paginated query or full export of the memory collection within a specified Cube, with support for type-based filtering and subgraph extraction.
+---
+
+**Endpoint**:
+* **Paginated Query**: `POST /product/get_memory`
+* **Full Export**: `POST /product/get_all`
+
+**Description**: Used to list or export memory assets from a specified **MemCube**. Through these two interfaces, you can retrieve raw memory fragments, user preferences, or tool usage records generated by the system, with support for paginated display and structured export.
+
+## 1. Core Mechanism: Paginated vs. Full Export
+
+In the open-source version, the system provides two different collection access modes via **MemoryHandler**:
+
+* **Paginated Business Mode (`/get_memory`)**:
+    * **Design Intent**: Designed for frontend UI lists. Supports `page` and `page_size` parameters.
+    * **Features**: Includes preference memories by default (`include_preference`), supports lightweight data loading.
+* **Full Export Mode (`/get_all`)**:
+    * **Design Intent**: Designed for data migration or complex relationship analysis.
+    * **Core Capability**: Supports passing a `search_query` to extract relevant **subgraphs**, or exporting all data by `memory_type` (text/action/parameter).
+
+
+## 2. Key Parameters
+
+### 2.1 Paginated Query Parameters (`/get_memory`)
+
+| Parameter | Type | Required | Description |
+| :--- | :--- | :--- | :--- |
+| **`mem_cube_id`** | `str` | Yes | Target MemCube ID. |
+| **`user_id`** | `str` | No | Unique user identifier. |
+| **`page`** | `int` | No | Page number (starting from 1). If set to `None`, attempts a full export. |
+| **`page_size`** | `int` | No | Number of entries per page. |
+| `include_preference` | `bool` | No | Whether to include preference memories. |
+
+### 2.2 Full/Subgraph Export Parameters (`/get_all`)
+
+| Parameter | Type | Required | Description |
+| :--- | :--- | :--- | :--- |
+| **`user_id`** | `str` | Yes | User ID. |
+| **`memory_type`** | `str` | Yes | Memory type: `text_mem`, `act_mem`, `para_mem`. |
+| `mem_cube_ids` | `list` | No | List of Cube IDs to export. |
+| `search_query` | `str` | No | If provided, recalls and returns the relevant memory subgraph based on this query. |
+
+## 3. Quick Start
+
+### 3.1 Frontend Paginated Display (SDK Call)
+
+```python
+# Retrieve the first page with 10 memories per page
+res = client.get_memory(
+    user_id="sde_dev_01",
+    mem_cube_id="cube_research_01",
+    page=1,
+    page_size=10
+)
+
+for mem in res.data:
+    print(f"[{mem['type']}] {mem['memory_value']}")
+```
+### 3.2 Export a Specific Factual Memory Subgraph
+```python
+# Extract all factual memories related to "R language"
+res = client.get_all(
+    user_id="sde_dev_01",
+    memory_type="text_mem",
+    search_query="R language visualization"
+)
+```
+
+## 4. Response Structure
+
+The interface returns a standard business response, where `data` contains an array of memory objects. Each memory typically contains the following core fields:
+
+`id`: The unique memory identifier, used for **Get Detail** or **Delete** operations.
+
+`memory_value`: The memory text after algorithmic processing.
+
+`tags`: Associated custom tags.
+
+::note
+Developer Tip: If you already know the memory ID and want to view its complete metadata (such as `confidence` or `usage` records), use the **Get Memory Detail** (`Get_memory_by_id`) interface. :::
diff --git a/docs/en/open_source/open_source_api/core/get_memory_by_id.md b/docs/en/open_source/open_source_api/core/get_memory_by_id.md
new file mode 100644
index 000000000..c691e88f3
--- /dev/null
+++ b/docs/en/open_source/open_source_api/core/get_memory_by_id.md
@@ -0,0 +1,58 @@
+---
+title: Get Memory Detail
+desc: Retrieves the complete metadata of a single memory entry via its unique identifier (ID), including confidence score, background context, and usage records.
+---
+
+**Endpoint**: `GET /product/get_memory/{memory_id}`
+**Description**: This interface allows developers to retrieve all underlying details of a single memory entry. Unlike the search interface which returns summary information, this interface exposes the full lifecycle data of the memory (such as vector synchronization status and AI extraction context), making it a core tool for system management and troubleshooting.
+
+## 1. Why Fetch Memory Details?
+
+* **Metadata Inspection**: View the `confidence` score and `background` context that the AI used when extracting this memory entry.
+* **Lifecycle Verification**: Confirm whether the memory's `vector_sync` (vector synchronization) succeeded and check its `updated_at` timestamp.
+* **Usage Tracking**: Use `usage` records to trace which sessions recalled this memory and used it to assist generation.
+
+
+## 2. Key Parameters
+
+This interface uses the standard RESTful path parameter format:
+
+| Parameter | Location | Type | Required | Description |
+| :--- | :--- | :--- | :--- | :--- |
+| **`memory_id`** | Path | `str` | Yes | The unique identifier (UUID) of the memory. You can obtain this ID from the results of the [**Get Memory List**](./get_memory_list.md) or [**Search**](./search_memory.md) interfaces. |
+
+## 3. How It Works (MemoryHandler)
+
+1. **Direct Query**: The **MemoryHandler** bypasses the business orchestration layer and interacts directly with the underlying core component **naive_mem_cube**.
+2. **Data Completion**: The system fetches the complete `metadata` dictionary from the persistent database and returns it without any semantic truncation.
+
+## 4. Response Data Reference
+
+The `data` object in the response body contains the following core fields:
+
+| Field | Description |
+| :--- | :--- |
+| **`id`** | Unique memory identifier. |
+| **`memory`** | The text content of the memory, typically including annotations (e.g., `[user opinion]`). |
+| **`metadata.confidence`** | The AI's confidence score when extracting this memory (0.0 - 1.0). |
+| **`metadata.type`** | Memory classification, such as `fact` or `preference`. |
+| **`metadata.background`** | Detailed description of why the AI extracted this memory and its contextual background. |
+| **`metadata.usage`** | A list recording the historical times and contexts in which this memory was used by the model. |
+| **`metadata.vector_sync`**| Vector database synchronization status, typically `success`. |
+
+## 5. Quick Start
+
+Use the SDK to fetch memory details:
+
+```python
+# Assume the ID of a memory is already known
+mem_id = "2f40be8f-736c-4a5f-aada-9489037769e0"
+
+# Fetch the complete details
+res = client.get_memory_by_id(memory_id=mem_id)
+
+if res and res.code == 200:
+    metadata = res.data.get('metadata', {})
+    print(f"Memory Background: {metadata.get('background')}")
+    print(f"Sync Status: {metadata.get('vector_sync')}")
+```
diff --git a/docs/en/open_source/open_source_api/core/search_memory.md b/docs/en/open_source/open_source_api/core/search_memory.md
new file mode 100644
index 000000000..5adfae0e9
--- /dev/null
+++ b/docs/en/open_source/open_source_api/core/search_memory.md
@@ -0,0 +1,95 @@
+---
+title: Search Memory
+desc: Recalls the most relevant contextual information from the memory store using semantic retrieval and logical filtering, based on the MemCube isolation mechanism.
+---
+
+**Endpoint**: `POST /product/search`
+**Description**: This interface is the core of MemOS's Retrieval-Augmented Generation (RAG) capability. It performs semantic matching across multiple isolated **MemCubes**, automatically recalling relevant facts, user preferences, and tool invocation records.
+
+## 1. Core Mechanism: Readable Cubes
+
+Unlike the single-user perspective in cloud services, the open-source interface achieves highly flexible retrieval scope control through **`readable_cube_ids`**:
+
+* **Cross-Cube Retrieval**: You can specify multiple Cube IDs simultaneously (e.g., `[user_private_cube, enterprise_public_kb_cube]`), and the algorithm will recall the most relevant content from these isolated memory stores in parallel.
+* **Soft Signal Weighting**: By passing a `session_id`, the system will prioritize content from that session during recall. This acts only as a "weight" to improve relevance, not as a hard filter.
+* **Absolute Isolation**: Content from Cubes not included in the `readable_cube_ids` list is completely invisible at the algorithm level, ensuring data security in multi-tenant environments.
+
+
+
+## 2. Key Parameters
+
+Core retrieval parameters are defined as follows:
+
+### Retrieval Basics
+| Parameter | Type | Required | Description |
+| :--- | :--- | :--- | :--- |
+| **`query`** | `str` | Yes | The user's search query string. The system will perform semantic matching based on this. |
+| **`user_id`** | `str` | Yes | The unique identifier of the requester, used for authentication and context tracking. |
+| **`readable_cube_ids`**| `list[str]`| Yes | **Core parameter**: Specifies the list of Cube IDs that this search can read. |
+| **`mode`** | `str` | No | **Search strategy**: Options are `fast`, `fine`, or `mixture`. |
+
+### Recall Control
+| Parameter | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| **`top_k`** | `int` | `10` | Maximum number of text memories to recall. |
+| **`include_preference`**| `bool` | `true` | Whether to recall relevant user preference memories (explicit/implicit preferences). |
+| **`search_tool_memory`**| `bool` | `true` | Whether to recall relevant tool invocation records. |
+| **`filter`** | `dict` | - | Logical filter supporting precise filtering by tags or metadata. |
+| **`dedup`** | `str` | - | Deduplication strategy: `no` (no deduplication), `sim` (semantic deduplication), `None` (default exact text deduplication). |
+
+## 3. How It Works (SearchHandler Strategy)
+
+When a request reaches the backend, **SearchHandler** calls different components based on the specified `mode`:
+
+1. **Query Rewriting**: Uses an LLM to semantically enhance the user's `query`, improving match accuracy.
+2. **Multi-Mode Matching**:
+    * **Fast Mode**: Performs quick recall via vector index. Suitable for scenarios with extremely high response speed requirements.
+    * **Fine Mode**: Adds a reranking step to improve the relevance of recalled content.
+    * **Mixture Mode**: Combines semantic search with graph-based search to recall memories with greater depth and association.
+3. **Multi-Dimensional Aggregation**: The system retrieves facts, preferences (`pref_top_k`), and tool memories (`tool_mem_top_k`) in parallel and aggregates the results for return.
+4. **Post-Processing Deduplication**: Compresses highly similar memory entries based on the `dedup` configuration.
+
+## 4. Quick Start
+
+Perform a multi-Cube joint search via SDK:
+
+```python
+from memos.api.client import MemOSClient
+
+client = MemOSClient(api_key="...", base_url="...")
+
+# Scenario: Search user memories and two specialized knowledge bases simultaneously
+res = client.search_memory(
+    user_id="sde_dev_01",
+    query="Based on my previous preferences, recommend some R language visualization solutions",
+    # Pass the list of readable Cubes, including personal space and two knowledge bases
+    readable_cube_ids=["user_01_private", "kb_r_lang", "kb_data_viz"],
+    mode="fine",             # Use fine mode for more accurate recommendations
+    include_preference=True,  # Recall preferences such as "user prefers a minimalist style"
+    top_k=5
+)
+
+if res:
+    # Results are contained in memory_detail_list
+    print(f"Recall results: {res.data}")
+```
+
+## 5. Advanced: Using Filters
+SearchHandler supports complex filters to meet more granular business requirements:
+```python
+
+# Example: Search only for memories tagged "Programming" and created after 2026
+search_filter = {
+    "and": [
+        {"tags": {"contains": "Programming"}},
+        {"created_at": {"gt": "2026-01-01"}}
+    ]
+}
+
+res = client.search_memory(
+    query="data cleaning logic",
+    user_id="sde_dev_01",
+    readable_cube_ids=["user_01_private"],
+    filter=search_filter
+)
+```
diff --git a/packages/memos-core/src/config.ts b/packages/memos-core/src/config.ts
index b2316d78c..7d255015b 100644
--- a/packages/memos-core/src/config.ts
+++ b/packages/memos-core/src/config.ts
@@ -65,6 +65,7 @@ export function resolveConfig(raw: Partial<MemosLocalConfig> | undefined, stateD
       mmrLambda: cfg.recall?.mmrLambda ?? DEFAULTS.mmrLambda,
       recencyHalfLifeDays: cfg.recall?.recencyHalfLifeDays ?? DEFAULTS.recencyHalfLifeDays,
       vectorSearchMaxChunks: cfg.recall?.vectorSearchMaxChunks ?? DEFAULTS.vectorSearchMaxChunks,
+      autoRecallMinQueryLength: cfg.recall?.autoRecallMinQueryLength ?? DEFAULTS.autoRecallMinQueryLength,
     },
     dedup: {
       similarityThreshold: cfg.dedup?.similarityThreshold ?? DEFAULTS.dedupSimilarityThreshold,
diff --git a/packages/memos-core/src/types.ts b/packages/memos-core/src/types.ts
index cb08eb1cf..31d80b001 100644
--- a/packages/memos-core/src/types.ts
+++ b/packages/memos-core/src/types.ts
@@ -312,6 +312,8 @@ export interface MemosLocalConfig {
     recencyHalfLifeDays?: number;
     /** Cap vector search to this many most recent chunks. 0 = no cap (search all; may get slower with 200k+ chunks). If you set a cap for performance, use a large value (e.g. 200000–300000) so older memories are still in the window; FTS always searches all. */
     vectorSearchMaxChunks?: number;
+    /** Auto-recall skips normalized prompts shorter than this many characters. */
+    autoRecallMinQueryLength?: number;
   };
   dedup?: {
     similarityThreshold?: number;
@@ -337,6 +339,7 @@ export const DEFAULTS = {
   mmrLambda: 0.7,
   recencyHalfLifeDays: 14,
   vectorSearchMaxChunks: 0,
+  autoRecallMinQueryLength: 2,
   dedupSimilarityThreshold: 0.80,
   evidenceWrapperTag: "STORED_MEMORY",
   excerptMinChars: 200,
diff --git a/src/memos/api/mcp_serve.py b/src/memos/api/mcp_serve.py
index 8f8e70311..9cfa02820 100644
--- a/src/memos/api/mcp_serve.py
+++ b/src/memos/api/mcp_serve.py
@@ -270,7 +270,10 @@ async def unregister_cube(cube_id: str, user_id: str | None = None) -> str:
 
         @self.mcp.tool()
         async def search_memories(
-            query: str, user_id: str | None = None, cube_ids: list[str] | None = None
+            query: str,
+            user_id: str | None = None,
+            cube_ids: list[str] | None = None,
+            filter: dict[str, Any] | None = None,
         ) -> dict[str, Any]:
             """
             Search for memories across user's accessible memory cubes.
@@ -282,11 +285,15 @@ async def search_memories(
                 query (str): Search query to find relevant memories
                 user_id (str, optional): User ID whose cubes to search. If not provided, uses default user
                 cube_ids (list[str], optional): Specific cube IDs to search. If not provided, searches all user's cubes
+                filter (dict, optional): Filter conditions for the search. An empty dict is treated as no filter.
 
             Returns:
                 dict: Search results containing text_mem, act_mem, and para_mem categories with relevant memories
             """
             try:
+                # Some MCP clients always send filter:{} in conversation mode; treat it as no filter
+                if not filter:
+                    filter = None
                 result = self.mos_core.search(query, user_id, cube_ids)
                 return result
             except Exception as e:
diff --git a/tests/api/test_mcp_serve.py b/tests/api/test_mcp_serve.py
new file mode 100644
index 000000000..5920fbb3f
--- /dev/null
+++ b/tests/api/test_mcp_serve.py
@@ -0,0 +1,80 @@
+"""
+Unit tests for MOSMCPServer — specifically the search_memories tool.
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def mock_mos():
+    """Return a MagicMock standing in for a MOS instance."""
+    mos = MagicMock()
+    mos.search.return_value = {"text_mem": [], "act_mem": [], "para_mem": [], "pref_mem": []}
+    return mos
+
+
+@pytest.fixture
+def mcp_server(mock_mos):
+    """Create a MOSMCPServer with a pre-built MOS mock (skips heavy init)."""
+    from memos.api.mcp_serve import MOSMCPServer
+
+    server = MOSMCPServer.__new__(MOSMCPServer)
+    server.mos_core = mock_mos
+    server.mcp = MagicMock()
+
+    # Collect the registered tool functions by intercepting mcp.tool()
+    registered_tools: dict = {}
+
+    def fake_tool():
+        def decorator(fn):
+            registered_tools[fn.__name__] = fn
+            return fn
+
+        return decorator
+
+    server.mcp.tool = fake_tool
+    server._setup_tools()
+    server._tools = registered_tools
+    return server
+
+
+@pytest.mark.asyncio
+async def test_search_memories_empty_filter_treated_as_none(mcp_server, mock_mos):
+    """search_memories with filter={} must not raise and must call mos_core.search."""
+    search_fn = mcp_server._tools["search_memories"]
+    result = await search_fn(query="test query", filter={})
+
+    mock_mos.search.assert_called_once_with("test query", None, None)
+    assert "error" not in result
+
+
+@pytest.mark.asyncio
+async def test_search_memories_none_filter(mcp_server, mock_mos):
+    """search_memories with filter=None behaves identically to filter={}."""
+    search_fn = mcp_server._tools["search_memories"]
+    result = await search_fn(query="test query", filter=None)
+
+    mock_mos.search.assert_called_once_with("test query", None, None)
+    assert "error" not in result
+
+
+@pytest.mark.asyncio
+async def test_search_memories_no_filter_arg(mcp_server, mock_mos):
+    """search_memories without filter kwarg uses the default (None)."""
+    search_fn = mcp_server._tools["search_memories"]
+    result = await search_fn(query="test query")
+
+    mock_mos.search.assert_called_once_with("test query", None, None)
+    assert "error" not in result
+
+
+@pytest.mark.asyncio
+async def test_search_memories_passes_user_and_cube_ids(mcp_server, mock_mos):
+    """search_memories forwards user_id and cube_ids to mos_core.search."""
+    search_fn = mcp_server._tools["search_memories"]
+    result = await search_fn(query="q", user_id="u1", cube_ids=["c1", "c2"], filter={})
+
+    mock_mos.search.assert_called_once_with("q", "u1", ["c1", "c2"])
+    assert "error" not in result