diff --git a/docs/deepseek-vs-claude-pricing.md b/docs/deepseek-vs-claude-pricing.md
new file mode 100644
index 0000000..e4632ba
--- /dev/null
+++ b/docs/deepseek-vs-claude-pricing.md
@@ -0,0 +1,98 @@
+# DeepSeek vs Claude API Pricing
+
+Captured 2026-05-12. Sources:
+- DeepSeek: https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+- Anthropic: https://platform.claude.com/docs/en/about-claude/pricing
+- FX assumed: $1 ≈ ¥7.15
+
+## Per-model prices (per 1M tokens)
+
+### Flagship tier
+
+| Item | Claude Opus 4.7 | Claude Sonnet 4.6 | DeepSeek V4 Pro (discount) | DeepSeek V4 Pro (full) |
+|---|---:|---:|---:|---:|
+| Input (cache miss) | $5.00 / ¥35.75 | $3.00 / ¥21.45 | ¥3 | ¥12 |
+| Cache write (5m) | $6.25 / ¥44.69 | $3.75 / ¥26.81 | ¥3 | ¥12 |
+| Cache read | $0.50 / ¥3.58 | $0.30 / ¥2.15 | ¥0.025 | ¥0.10 |
+| Output | $25.00 / ¥178.75 | $15.00 / ¥107.25 | ¥6 | ¥24 |
+| Context window | 200K | 1M | 1M | 1M |
+| Max output tokens | varies | varies | 384K | 384K |
+
+DeepSeek V4 Pro discount (2.5x off) runs through 2026-05-31 23:59 Beijing time.
+After 2026-06-01 the full-price column applies unless DeepSeek extends it.
+
+### Lightweight tier
+
+| Item | Claude Haiku 4.5 | DeepSeek V4 Flash |
+|---|---:|---:|
+| Input (cache miss) | $1.00 / ¥7.15 | ¥1 |
+| Cache write (5m) | $1.25 / ¥8.94 | ¥1 |
+| Cache read | $0.10 / ¥0.72 | ¥0.02 |
+| Output | $5.00 / ¥35.75 | ¥2 |
+| Context window | 200K | 1M |
+
+## Multiplier view (how much Claude costs vs DeepSeek discount price)
+
+### Pro vs Sonnet 4.6 (same-tier comparison)
+
+| Item | Sonnet 4.6 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 85.7x |
+| Output | 17.9x |
+
+### Pro vs Opus 4.7 (cross-tier comparison)
+
+| Item | Opus 4.7 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 11.9x |
+| Cache read | 143x |
+| Output | 29.8x |
+
+### Flash vs Haiku 4.5
+
+| Item | Haiku 4.5 / Flash multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 35.8x |
+| Output | 17.9x |
+
+## Realistic Claude Code request cost
+
+Measured pattern: 15,872 cache_read + 66 input + 30 output tokens
+(observed first cache hit after the session_id-pinning fix in 283678a).
+
+| Provider | Cost per request | Per 100 requests |
+|---|---:|---:|
+| Claude Opus 4.7 | $0.0094 / ¥0.067 | ¥6.70 |
+| Claude Sonnet 4.6 | $0.0054 / ¥0.039 | ¥3.90 |
+| Claude Haiku 4.5 | $0.0018 / ¥0.013 | ¥1.30 |
+| DeepSeek V4 Pro (discount) | ¥0.000777 | ¥0.078 |
+| DeepSeek V4 Flash | ¥0.000437 | ¥0.044 |
+
+## Headline ratios at the typical Claude Code workload
+
+- DeepSeek Pro vs Claude Sonnet 4.6: **~50x cheaper**
+- DeepSeek Pro vs Claude Opus 4.7: **~86x cheaper**
+- DeepSeek Flash vs Claude Haiku 4.5: **~30x cheaper**
+
+After the 2026-05-31 discount expires, Pro full price would be 4x its
+current rate; ratios shrink to roughly 12x vs Sonnet, 21x vs Opus, but the
+gap remains substantial.
+
+## Caveats
+
+- **No Opus-class DeepSeek model.** Tasks that genuinely need Opus-level
+  reasoning (long math proofs, deep architecture work) have no DeepSeek
+  equivalent.
+- **Capability gaps with the Anthropic API**: DeepSeek's /anthropic endpoint
+  does not implement image/document content blocks, computer use, server-side
+  web search beyond what the model emits, MCP gateway, or redacted_thinking.
+  See `docs/openclaude-commits-review.md` and the deepseek branch port commits
+  for the adaptations Claude Code makes for these.
+- **Cache hit assumptions**: the per-request cost above assumes the
+  session_id pinning fix is in effect (commit 283678a). Without it,
+  cache_read_input_tokens stays at 0 and DeepSeek per-request cost is
+  roughly 100x higher (every request pays full input price).
+- **No /count_tokens endpoint on DeepSeek.** Local UTF-8 byte estimation is
+  used instead (commit 6fbad90).
diff --git a/scripts/build.ts b/scripts/build.ts
index 4a98314..f3c0eae 100644
--- a/scripts/build.ts
+++ b/scripts/build.ts
@@ -20,7 +20,11 @@ const ENABLED_FEATURES = [
   'MCP_SKILLS',
   'HISTORY_PICKER',
   'TREE_SITTER_BASH',
-  'NATIVE_CLIENT_ATTESTATION',
+  // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000'
+  // placeholder into the x-anthropic-billing-header and computes a body hash.
+  // GLM/DeepSeek's /anthropic endpoints do not validate this header — sending
+  // it adds CPU + an HTTP header without value. Keep off for non-Anthropic
+  // providers.
   'BRIDGE_MODE',
   'COORDINATOR_MODE',
 ]
diff --git a/src/commands/cost/cost.ts b/src/commands/cost/cost.ts
index c9fb0cb..0eaf9f6 100644
--- a/src/commands/cost/cost.ts
+++ b/src/commands/cost/cost.ts
@@ -1,24 +1,7 @@
 import { formatTotalCost } from '../../cost-tracker.js'
-import { currentLimits } from '../../services/claudeAiLimits.js'
 import type { LocalCommandCall } from '../../types/command.js'
-import { isClaudeAISubscriber } from '../../utils/auth.js'
 
+// DeepSeek: no Claude.ai subscription; always show the formatted cost.
 export const call: LocalCommandCall = async () => {
-  if (isClaudeAISubscriber()) {
-    let value: string
-
-    if (currentLimits.isUsingOverage) {
-      value =
-        'You are currently using your overages to power your Claude Code usage. We will automatically switch you back to your subscription rate limits when they reset'
-    } else {
-      value =
-        'You are currently using your subscription to power your Claude Code usage'
-    }
-
-    if (process.env.USER_TYPE === 'ant') {
-      value += `\n\n[ANT-ONLY] Showing cost anyway:\n ${formatTotalCost()}`
-    }
-    return { type: 'text', value }
-  }
   return { type: 'text', value: formatTotalCost() }
 }
diff --git a/src/components/CostThresholdDialog.tsx b/src/components/CostThresholdDialog.tsx
index bdf9f53..d0639d4 100644
--- a/src/components/CostThresholdDialog.tsx
+++ b/src/components/CostThresholdDialog.tsx
@@ -38,7 +38,7 @@ export function CostThresholdDialog(t0) {
   }
   let t4;
   if ($[4] !== onDone || $[5] !== t3) {
-    t4 = <Dialog title="You've spent $5 on the Anthropic API this session." onCancel={onDone}>{t1}{t3}</Dialog>;
+    t4 = <Dialog title="You've spent a significant amount on API calls this session." onCancel={onDone}>{t1}{t3}</Dialog>;
     $[4] = onDone;
     $[5] = t3;
     $[6] = t4;
diff --git a/src/components/Message.tsx b/src/components/Message.tsx
index ca2ef76..886e917 100644
--- a/src/components/Message.tsx
+++ b/src/components/Message.tsx
@@ -538,9 +538,6 @@ function AssistantMessageBlock(t0) {
       }
     case "thinking":
       {
-        if (!isTranscriptMode && !verbose) {
-          return null;
-        }
         const isLastThinking = !lastThinkingBlockId || thinkingBlockId === lastThinkingBlockId;
         const t1 = isTranscriptMode && !isLastThinking;
         let t2;
diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx
index 37ee93c..7c09649 100644
--- a/src/components/Settings/Config.tsx
+++ b/src/components/Settings/Config.tsx
@@ -281,6 +281,26 @@ export function Config({
         enabled: autoCompactEnabled
       });
     }
+  }, {
+    id: 'autoMemoryEnabled',
+    label: 'Auto-memory',
+    // settings.json default is "undefined" which the resolver in
+    // src/memdir/paths.ts treats as "disabled" on this branch. Mirror
+    // that so the toggle reflects what the runtime actually does.
+    value: settingsData?.autoMemoryEnabled ?? false,
+    type: 'boolean' as const,
+    onChange(autoMemoryEnabled: boolean) {
+      updateSettingsForSource('localSettings', {
+        autoMemoryEnabled
+      });
+      setSettingsData(prev_auto_mem => ({
+        ...prev_auto_mem,
+        autoMemoryEnabled
+      }));
+      logEvent('tengu_auto_memory_setting_changed', {
+        enabled: autoMemoryEnabled
+      });
+    }
   }, {
     id: 'spinnerTipsEnabled',
     label: 'Show tips',
diff --git a/src/components/ThinkingToggle.tsx b/src/components/ThinkingToggle.tsx
index a7b7a1b..2a4604e 100644
--- a/src/components/ThinkingToggle.tsx
+++ b/src/components/ThinkingToggle.tsx
@@ -30,11 +30,11 @@ export function ThinkingToggle(t0) {
     t1 = [{
       value: "true",
       label: "Enabled",
-      description: "Claude will think before responding"
+      description: "Model will think before responding"
     }, {
       value: "false",
       label: "Disabled",
-      description: "Claude will respond without extended thinking"
+      description: "Model will respond without extended thinking"
     }];
     $[0] = t1;
   } else {
diff --git a/src/components/messages/AssistantThinkingMessage.tsx b/src/components/messages/AssistantThinkingMessage.tsx
index 3825f5f..af4b492 100644
--- a/src/components/messages/AssistantThinkingMessage.tsx
+++ b/src/components/messages/AssistantThinkingMessage.tsx
@@ -4,6 +4,7 @@ import React from 'react';
 import { Box, Text } from '../../ink.js';
 import { CtrlOToExpand } from '../CtrlOToExpand.js';
 import { Markdown } from '../Markdown.js';
+import { useSettings } from '../../hooks/useSettings.js';
 type Props = {
   // Accept either full ThinkingBlock/ThinkingBlockParam or a minimal shape with just type and thinking
   param: ThinkingBlock | ThinkingBlockParam | {
@@ -36,7 +37,8 @@ export function AssistantThinkingMessage(t0) {
   if (hideInTranscript) {
     return null;
   }
-  const shouldShowFullThinking = isTranscriptMode || verbose;
+  const settings = useSettings();
+  const shouldShowFullThinking = isTranscriptMode || verbose || settings.alwaysThinkingEnabled !== false;
   if (!shouldShowFullThinking) {
     const t4 = addMargin ? 1 : 0;
     let t5;
diff --git a/src/constants/system.ts b/src/constants/system.ts
index 0cd2e76..9495bd8 100644
--- a/src/constants/system.ts
+++ b/src/constants/system.ts
@@ -3,7 +3,7 @@
 import { feature } from 'bun:bundle'
 import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 import { logForDebugging } from '../utils/debug.js'
-import { isEnvDefinedFalsy } from '../utils/envUtils.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
 import { getAPIProvider } from '../utils/model/providers.js'
 import { getWorkload } from '../utils/workloadContext.js'
 
@@ -47,13 +47,10 @@ export function getCLISyspromptPrefix(options?: {
 
 /**
  * Check if attribution header is enabled.
- * Enabled by default, can be disabled via env var or GrowthBook killswitch.
+ * Disabled by default, can be enabled via env var.
  */
 function isAttributionHeaderEnabled(): boolean {
-  if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
-    return false
-  }
-  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
+  return isEnvTruthy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)
 }
 
 /**
diff --git a/src/context.ts b/src/context.ts
index 423414d..b614490 100644
--- a/src/context.ts
+++ b/src/context.ts
@@ -4,7 +4,7 @@ import {
   getAdditionalDirectoriesForClaudeMd,
   setCachedClaudeMdContent,
 } from './bootstrap/state.js'
-import { getLocalISODate } from './constants/common.js'
+import { getSessionStartDate } from './constants/common.js'
 import {
   filterInjectedMemoryFiles,
   getClaudeMds,
@@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js'
 import { shouldIncludeGitInstructions } from './utils/gitSettings.js'
 import { logError } from './utils/log.js'
 
-const MAX_STATUS_CHARS = 2000
+const MAX_STATUS_CHARS = 1000
 
 // System prompt injection for cache breaking (ant-only, ephemeral debugging state)
 let systemPromptInjection: string | null = null
@@ -85,7 +85,7 @@ export const getGitStatus = memoize(async (): Promise<string | null> => {
     const truncatedStatus =
       status.length > MAX_STATUS_CHARS
         ? status.substring(0, MAX_STATUS_CHARS) +
-          '\n... (truncated because it exceeds 2k characters. If you need more information, run "git status" using BashTool)'
+          '\n... (truncated because it exceeds 1k characters. If you need more information, run "git status" using BashTool)'
         : status
 
     logForDiagnosticsNoPII('info', 'git_status_completed', {
@@ -183,7 +183,9 @@ export const getUserContext = memoize(
 
     return {
       ...(claudeMd && { claudeMd }),
-      currentDate: `Today's date is ${getLocalISODate()}.`,
+      // Use session-stable date to preserve DeepSeek's server-side prefix
+      // cache across midnight (was getLocalISODate() — refreshes daily).
+      currentDate: `Today's date is ${getSessionStartDate()}.`,
     }
   },
 )
diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts
index b03184c..72375bf 100644
--- a/src/cost-tracker.ts
+++ b/src/cost-tracker.ts
@@ -44,8 +44,11 @@ import {
 import { isFastModeEnabled } from './utils/fastMode.js'
 import { formatDuration, formatNumber } from './utils/format.js'
 import type { FpsMetrics } from './utils/fpsTracker.js'
-import { getCanonicalName } from './utils/model/model.js'
-import { calculateUSDCost } from './utils/modelCost.js'
+import {
+  getCanonicalName,
+  getDefaultMainLoopModelSetting,
+} from './utils/model/model.js'
+import { calculateUSDCost, getModelCosts } from './utils/modelCost.js'
 export {
   getTotalCostUSD as getTotalCost,
   getTotalDuration,
@@ -175,7 +178,7 @@ export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void {
 }
 
 function formatCost(cost: number, maxDecimalPlaces: number = 4): string {
-  return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
+  return `¥${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
 }
 
 function formatModelUsage(): string {
@@ -234,12 +237,35 @@ export function formatTotalCost(): string {
 
   const modelUsageDisplay = formatModelUsage()
 
+  // DeepSeek-specific: show prompt cache hit rate and yuan savings, since the
+  // /anthropic endpoint exposes cache_read / cache_creation token counts and
+  // these are the headline cost driver (cache reads are ~120x cheaper).
+  let cacheStatsDisplay = ''
+  const cacheRead = getTotalCacheReadInputTokens()
+  const cacheCreation = getTotalCacheCreationInputTokens()
+  const directInput = getTotalInputTokens()
+  const totalInput = cacheRead + cacheCreation + directInput
+  if (totalInput > 0) {
+    const hitRate = (cacheRead / totalInput) * 100
+    const model = getDefaultMainLoopModelSetting()
+    const costs = getModelCosts(model, {
+      input_tokens: 0,
+      output_tokens: 0,
+    } as Usage)
+    const savings =
+      (cacheRead / 1_000_000) *
+      (costs.inputTokens - costs.promptCacheReadTokens)
+    cacheStatsDisplay =
+      `\nCache hit rate:         ${hitRate.toFixed(1)}% (${formatNumber(cacheRead)} / ${formatNumber(totalInput)} input tokens)` +
+      `\nCache savings:         ${formatCost(savings)}`
+  }
+
   return chalk.dim(
     `Total cost:            ${costDisplay}\n` +
       `Total duration (API):  ${formatDuration(getTotalAPIDuration())}
 Total duration (wall): ${formatDuration(getTotalDuration())}
 Total code changes:    ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed
-${modelUsageDisplay}`,
+${modelUsageDisplay}${cacheStatsDisplay}`,
   )
 }
 
diff --git a/src/costHook.ts b/src/costHook.ts
index 798a093..fa740c4 100644
--- a/src/costHook.ts
+++ b/src/costHook.ts
@@ -1,6 +1,5 @@
 import { useEffect } from 'react'
 import { formatTotalCost, saveCurrentSessionCosts } from './cost-tracker.js'
-import { hasConsoleBillingAccess } from './utils/billing.js'
 import type { FpsMetrics } from './utils/fpsTracker.js'
 
 export function useCostSummary(
@@ -8,10 +7,8 @@ export function useCostSummary(
 ): void {
   useEffect(() => {
     const f = () => {
-      if (hasConsoleBillingAccess()) {
-        process.stdout.write('\n' + formatTotalCost() + '\n')
-      }
-
+      // DeepSeek: there's no Anthropic console billing tier; always print.
+      process.stdout.write('\n' + formatTotalCost() + '\n')
       saveCurrentSessionCosts(getFpsMetrics?.())
     }
     process.on('exit', f)
diff --git a/src/memdir/paths.ts b/src/memdir/paths.ts
index 68a6baf..c794a8b 100644
--- a/src/memdir/paths.ts
+++ b/src/memdir/paths.ts
@@ -51,7 +51,12 @@ export function isAutoMemoryEnabled(): boolean {
   if (settings.autoMemoryEnabled !== undefined) {
     return settings.autoMemoryEnabled
   }
-  return true
+  // DeepSeek branch default: off. The auto-memory section injects ~3145
+  // fixed tokens into every system prompt (a 32% surcharge on a minimal
+  // -p call). Users who want it can flip it in /config -> Auto-memory or
+  // set autoMemoryEnabled: true in settings.json (or unset
+  // CLAUDE_CODE_DISABLE_AUTO_MEMORY=0).
+  return false
 }
 
 /**
diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx
index b483ead..7b99a10 100644
--- a/src/screens/REPL.tsx
+++ b/src/screens/REPL.tsx
@@ -2207,7 +2207,9 @@ export function REPL({
   };
   useEffect(() => {
     const totalCost = getTotalCost();
-    if (totalCost >= 5 /* $5 */ && !showCostDialog && !haveShownCostDialog) {
+    // DeepSeek: pricing is in CNY; raise the threshold to roughly match the
+    // user-perceived "5 USD" notification level (1 USD ≈ 7 CNY, rounded up).
+    if (totalCost >= 35 && !showCostDialog && !haveShownCostDialog) {
       logEvent('tengu_cost_threshold_reached', {});
       // Mark as shown even if the dialog won't render (no console billing
       // access). Otherwise this effect re-fires on every message change for
diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts
index 4c09f06..733896c 100644
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -22,6 +22,8 @@ import type { Stream } from '@anthropic-ai/sdk/streaming.mjs'
 import { randomUUID } from 'crypto'
 import {
   getAPIProvider,
+  isDeepSeekProvider,
+  isGLMProvider,
   isFirstPartyAnthropicBaseUrl,
 } from 'src/utils/model/providers.js'
 import {
@@ -508,7 +510,16 @@ export function getAPIMetadata() {
       device_id: getOrCreateUserID(),
       // Only include OAuth account UUID when actively using OAuth authentication
       account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
-      session_id: getSessionId(),
+      // GLM/DeepSeek prompt caches key on the full request body bytes. The
+      // real session_id changes every launch and would force a fresh cache
+      // entry per session — defeating the cache entirely. Pin to a stable
+      // sentinel for non-Anthropic providers so identical conversations across
+      // sessions share a cache entry. Real Anthropic telemetry keeps the live
+      // session id.
+      session_id:
+        isDeepSeekProvider() || isGLMProvider()
+          ? 'claude-code-ds'
+          : getSessionId(),
     }),
   }
 }
@@ -935,6 +946,77 @@ function isToolResult(
   return block.type === 'tool_result'
 }
 
+/**
+ * DeepSeek silently ignores the `is_error: true` flag on tool_result blocks,
+ * so the model has no way to know a tool call failed. Prefix the content with
+ * a literal "[ERROR]" text block so the model can detect failures from text.
+ *
+ * Walks nested tool_result content recursively (for cached histories).
+ */
+function prefixDeepSeekErrorToolResults(
+  blocks: BetaContentBlockParam[],
+): BetaContentBlockParam[] {
+  let changed = false
+  const updated = blocks.map(block => {
+    if (!isToolResult(block)) return block
+
+    let nextBlock = block
+    let blockChanged = false
+
+    if ((block as { is_error?: boolean }).is_error) {
+      const content = Array.isArray(block.content)
+        ? block.content
+        : [{ type: 'text', text: String(block.content ?? '') }]
+      const prefixed = [
+        { type: 'text', text: '[ERROR] Tool execution failed:' },
+        ...content,
+      ] as BetaContentBlockParam[]
+      nextBlock = { ...block, content: prefixed } as typeof block
+      blockChanged = true
+    }
+
+    if (Array.isArray(nextBlock.content)) {
+      const nested = prefixDeepSeekErrorToolResults(
+        nextBlock.content as BetaContentBlockParam[],
+      )
+      if (nested !== nextBlock.content) {
+        nextBlock = { ...nextBlock, content: nested } as typeof block
+        blockChanged = true
+      }
+    }
+
+    if (blockChanged) {
+      changed = true
+      return nextBlock
+    }
+    return block
+  })
+
+  return changed ? updated : blocks
+}
+
+function applyDeepSeekErrorPrefix(
+  messages: (UserMessage | AssistantMessage)[],
+): (UserMessage | AssistantMessage)[] {
+  let changed = false
+  const updated = messages.map(msg => {
+    const content = msg.message.content
+    if (!Array.isArray(content)) return msg
+
+    const updatedContent = prefixDeepSeekErrorToolResults(
+      content as BetaContentBlockParam[],
+    )
+    if (updatedContent === content) return msg
+
+    changed = true
+    return {
+      ...msg,
+      message: { ...msg.message, content: updatedContent },
+    } as typeof msg
+  })
+  return changed ? updated : messages
+}
+
 /**
  * Ensures messages contain at most `limit` media items (images + documents).
  * Strips oldest media first to preserve the most recent.
@@ -1300,6 +1382,13 @@ async function* queryModel(
     API_MAX_MEDIA_PER_REQUEST,
   )
 
+  // DeepSeek ignores is_error on tool_result blocks; prefix failed results
+  // with literal "[ERROR]" text so the model can detect them. Anthropic and
+  // GLM handle is_error correctly, so this is DeepSeek-only.
+  if (isDeepSeekProvider()) {
+    messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI)
+  }
+
   // Instrumentation: Track message count after normalization
   logEvent('tengu_api_after_normalize', {
     postNormalizedMessageCount: messagesForAPI.length,
@@ -1381,6 +1470,14 @@ async function* queryModel(
   }
   const allTools = [...toolSchemas, ...extraToolSchemas]
 
+  // Sort tools alphabetically by name for stable ordering to maximize
+  // DeepSeek's server-side prefix cache hits across requests.
+  allTools.sort((a, b) => {
+    const nameA = 'name' in a ? a.name : ''
+    const nameB = 'name' in b ? b.name : ''
+    return nameA.localeCompare(nameB)
+  })
+
   const isFastMode =
     isFastModeEnabled() &&
     isFastModeAvailable() &&
@@ -1588,7 +1685,15 @@ async function* queryModel(
     // without notifying the model launch DRI and research. This is a sensitive
     // setting that can greatly affect model quality and bashing.
     if (hasThinking && modelSupportsThinking(options.model)) {
-      if (
+      if (isDeepSeekProvider()) {
+        // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone;
+        // budget_tokens is ignored server-side. Send a minimal thinking param
+        // so the SDK knows to expect thinking blocks in the response.
+        thinking = {
+          budget_tokens: maxOutputTokens - 1,
+          type: 'enabled',
+        } satisfies BetaMessageStreamParams['thinking']
+      } else if (
         !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) &&
         modelSupportsAdaptiveThinking(options.model)
       ) {
diff --git a/src/services/api/errorUtils.ts b/src/services/api/errorUtils.ts
index 20e4441..f562e65 100644
--- a/src/services/api/errorUtils.ts
+++ b/src/services/api/errorUtils.ts
@@ -242,6 +242,15 @@ export function formatAPIError(error: APIError): string {
     return 'Unable to connect to API. Check your internet connection'
   }
 
+  if (error.status === 402) {
+    return 'DeepSeek 账户余额不足，请在 platform.deepseek.com 充值后重试'
+  }
+
+  if (error.status === 422) {
+    const nested = extractNestedErrorMessage(error)
+    return `DeepSeek 请求参数无效（422）：${nested || error.message || '请检查工具定义和消息格式'}`
+  }
+
   // Guard: when deserialized from JSONL (e.g. --resume), the error object may
   // be a plain object without a `.message` property.  Return a safe fallback
   // instead of undefined, which would crash callers that access `.length`.
diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts
index 1a7edc5..a47f09f 100644
--- a/src/services/api/errors.ts
+++ b/src/services/api/errors.ts
@@ -437,7 +437,8 @@ export function getAssistantMessageFromError(
       error.message.toLowerCase().includes('timeout'))
   ) {
     return createAssistantAPIErrorMessage({
-      content: API_TIMEOUT_ERROR_MESSAGE,
+      content:
+        '请求超时。DeepSeek 服务端排队等待超过上限后断开了连接，请稍后重试或降低 effort 等级',
       error: 'unknown',
     })
   }
@@ -462,6 +463,14 @@ export function getAssistantMessageFromError(
     })
   }
 
+  // DeepSeek 429 — simple rate-limit message (no Anthropic-specific headers).
+  if (error instanceof APIError && error.status === 429) {
+    return createAssistantAPIErrorMessage({
+      content: '请求频率超限，请稍后重试',
+      error: 'rate_limit',
+    })
+  }
+
   if (
     error instanceof APIError &&
     error.status === 429 &&
@@ -993,6 +1002,16 @@ export function classifyAPIError(error: unknown): string {
     return 'capacity_off_switch'
   }
 
+  // DeepSeek: insufficient account balance
+  if (error instanceof APIError && error.status === 402) {
+    return 'insufficient_balance'
+  }
+
+  // DeepSeek: invalid request parameters
+  if (error instanceof APIError && error.status === 422) {
+    return 'invalid_parameters'
+  }
+
   // Rate limiting
   if (error instanceof APIError && error.status === 429) {
     return 'rate_limit'
@@ -1205,3 +1224,15 @@ export function getErrorMessageIfRefusal(
     error: 'invalid_request',
   })
 }
+
+/**
+ * Extract DeepSeek's trace ID from error response headers for debugging.
+ */
+export function extractDeepSeekTraceId(error: APIError): string | undefined {
+  const headers = error.headers
+  if (!headers) return undefined
+  if (typeof headers.get === 'function') {
+    return headers.get('x-ds-trace-id') ?? undefined
+  }
+  return (headers as Record<string, string>)['x-ds-trace-id']
+}
diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts
index 5ec9ad0..cfc94f6 100644
--- a/src/services/api/withRetry.ts
+++ b/src/services/api/withRetry.ts
@@ -11,7 +11,10 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
 import { logForDebugging } from 'src/utils/debug.js'
 import { logError } from 'src/utils/log.js'
 import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
-import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import {
+  getAPIProviderForStatsig,
+  isDeepSeekProvider,
+} from 'src/utils/model/providers.js'
 import {
   clearApiKeyHelperCache,
   clearAwsCredentialsCache,
@@ -694,6 +697,12 @@ function handleGcpCredentialError(error: unknown): boolean {
 }
 
 function shouldRetry(error: APIError): boolean {
+  // 402 Insufficient Balance (DeepSeek) — retrying won't help, the account
+  // needs a top-up. Fail fast.
+  if (error.status === 402) {
+    return false
+  }
+
   // Never retry mock errors - they're from /mock-limits command for testing
   if (isMockRateLimitError(error)) {
     return false
@@ -762,10 +771,11 @@ function shouldRetry(error: APIError): boolean {
   // Retry on lock timeouts.
   if (error.status === 409) return true
 
-  // Retry on rate limits, but not for ClaudeAI Subscription users
-  // Enterprise users can retry because they typically use PAYG instead of rate limits
+  // Retry on rate limits. DeepSeek has no subscriber tiers, so always retry
+  // 429 with exponential backoff. Anthropic retains the subscriber gate so
+  // ClaudeAI subscription users don't churn through rate limits.
   if (error.status === 429) {
-    return !isClaudeAISubscriber() || isEnterpriseSubscriber()
+    return isDeepSeekProvider() || !isClaudeAISubscriber() || isEnterpriseSubscriber()
   }
 
   // Clear API key cache on 401 and allow retry.
diff --git a/src/services/tokenEstimation.ts b/src/services/tokenEstimation.ts
index acaef7a..30a53b3 100644
--- a/src/services/tokenEstimation.ts
+++ b/src/services/tokenEstimation.ts
@@ -121,26 +121,21 @@ function stripToolSearchFieldsFromMessages(
   })
 }
 
+// DeepSeek has no /count_tokens endpoint. Returning null forces callers to
+// use rough estimation (roughTokenCountEstimation), which uses UTF-8 byte
+// length for accuracy on CJK-heavy content.
 export async function countTokensWithAPI(
-  content: string,
+  _content: string,
 ): Promise<number | null> {
-  // Special case for empty content - API doesn't accept empty messages
-  if (!content) {
-    return 0
-  }
-
-  const message: Anthropic.Beta.Messages.BetaMessageParam = {
-    role: 'user',
-    content: content,
-  }
-
-  return countMessagesTokensWithAPI([message], [])
+  return null
 }
 
 export async function countMessagesTokensWithAPI(
   messages: Anthropic.Beta.Messages.BetaMessageParam[],
   tools: Anthropic.Beta.Messages.BetaToolUnion[],
 ): Promise<number | null> {
+  // DeepSeek has no /count_tokens endpoint; fall back to rough estimation.
+  return null
   return withTokenCountVCR(messages, tools, async () => {
     try {
       const model = getMainLoopModel()
@@ -204,7 +199,10 @@ export function roughTokenCountEstimation(
   content: string,
   bytesPerToken: number = 4,
 ): number {
-  return Math.round(content.length / bytesPerToken)
+  // DeepSeek tokenizer is byte-pair on UTF-8 bytes, so CJK characters consume
+  // 3 bytes/char rather than 1. content.length under-estimates by ~3x on
+  // Chinese content. Buffer.byteLength matches DeepSeek's actual tokenization.
+  return Math.round(Buffer.byteLength(content, 'utf8') / bytesPerToken)
 }
 
 /**
@@ -252,6 +250,9 @@ export async function countTokensViaHaikuFallback(
   messages: Anthropic.Beta.Messages.BetaMessageParam[],
   tools: Anthropic.Beta.Messages.BetaToolUnion[],
 ): Promise<number | null> {
+  // DeepSeek: no count_tokens endpoint, no Haiku fallback model — return null
+  // so callers use roughTokenCountEstimation.
+  return null
   // Check if messages contain thinking blocks
   const containsThinking = hasThinkingBlocks(messages)
 
diff --git a/src/utils/__tests__/glmAutoMode.test.ts b/src/utils/__tests__/glmAutoMode.test.ts
new file mode 100644
index 0000000..6708070
--- /dev/null
+++ b/src/utils/__tests__/glmAutoMode.test.ts
@@ -0,0 +1,43 @@
+import { describe, it, expect, beforeAll, vi } from 'vitest'
+
+// Auto mode must be reachable for GLM-5+ models. The two gates that previously
+// blocked it: modelSupportsAutoMode (model allowlist) and the auto-mode
+// enabled-state default (GrowthBook kill-switch is never served on GLM).
+beforeAll(() => {
+  process.env.FEATURES = 'TRANSCRIPT_CLASSIFIER'
+  process.env.USER_TYPE = 'external'
+  // GLM runs against an Anthropic-compatible firstParty endpoint.
+  delete process.env.CLAUDE_CODE_USE_BEDROCK
+  delete process.env.CLAUDE_CODE_USE_VERTEX
+  delete process.env.CLAUDE_CODE_USE_FOUNDRY
+})
+
+describe('modelSupportsAutoMode for GLM', () => {
+  it('enables auto mode for glm-5 and above, not glm-4', async () => {
+    const { modelSupportsAutoMode } = await import('../betas.js')
+    expect(modelSupportsAutoMode('glm-5')).toBe(true)
+    expect(modelSupportsAutoMode('glm-5.2')).toBe(true)
+    expect(modelSupportsAutoMode('glm-6')).toBe(true)
+    expect(modelSupportsAutoMode('glm-4.5')).toBe(false)
+    // Anthropic allowlist still honored.
+    expect(modelSupportsAutoMode('claude-opus-4-6')).toBe(true)
+    expect(modelSupportsAutoMode('claude-opus-4-1')).toBe(false)
+  })
+})
+
+// The enabled-state default lives in permissionSetup, but importing that
+// module transitively loads the classifier prompt (a .txt require unsupported
+// in vitest). Assert the predicate the default uses directly — it must match
+// glm-5 and above, reject glm-4, and reject Anthropic models (which keep the
+// 'disabled' circuit-breaker default).
+describe('auto-mode enabled-state default predicate for GLM', () => {
+  const isGlmForced = (m: string) => /glm-[5-9]/.test(m.toLowerCase())
+  it('matches glm-5+ only', () => {
+    expect(isGlmForced('glm-5')).toBe(true)
+    expect(isGlmForced('glm-5.2')).toBe(true)
+    expect(isGlmForced('glm-6')).toBe(true)
+    expect(isGlmForced('GLM-5.2')).toBe(true)
+    expect(isGlmForced('glm-4.5')).toBe(false)
+    expect(isGlmForced('claude-opus-4-6')).toBe(false)
+  })
+})
diff --git a/src/utils/apiPreconnect.ts b/src/utils/apiPreconnect.ts
index 6a8de64..253926f 100644
--- a/src/utils/apiPreconnect.ts
+++ b/src/utils/apiPreconnect.ts
@@ -25,6 +25,7 @@
 
 import { getOauthConfig } from '../constants/oauth.js'
 import { isEnvTruthy } from './envUtils.js'
+import { isEssentialTrafficOnly } from './privacyLevel.js'
 
 let fired = false
 
@@ -32,6 +33,10 @@ export function preconnectAnthropicApi(): void {
   if (fired) return
   fired = true
 
+  // Also skip when non-essential traffic is disabled via
+  // CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC / DISABLE_TELEMETRY / proxy env.
+  if (isEssentialTrafficOnly()) return
+
   // Skip if using a cloud provider — different endpoint + auth
   if (
     isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) ||
diff --git a/src/utils/betas.ts b/src/utils/betas.ts
index fcd7b97..03c7952 100644
--- a/src/utils/betas.ts
+++ b/src/utils/betas.ts
@@ -188,8 +188,12 @@ export function modelSupportsAutoMode(model: string): boolean {
       if (/claude-(opus|sonnet|haiku)-4(?!-[6-9])/.test(m)) return false
       return true
     }
+    // GLM models (served over an Anthropic-compatible firstParty endpoint)
+    // support auto mode — the classifier runs against the same transcript.
+    // glm-5 and above (glm-5, glm-5.2, glm-6, …).
+    if (/glm-[5-9]/.test(m)) return true
     // External allowlist (firstParty already checked above).
-    return /^claude-(opus|sonnet)-4-6/.test(m)
+    return /^claude-(opus|sonnet)-4-6/.test(m) || /^deepseek-/.test(m)
   }
   return false
 }
diff --git a/src/utils/context.ts b/src/utils/context.ts
index 06b235e..b976b7b 100644
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -161,7 +161,10 @@ export function getModelMaxOutputTokens(model: string): {
 
   const m = getCanonicalName(model)
 
-  if (m.includes('opus-4-6')) {
+  if (m.includes('glm-5.2')) {
+    defaultTokens = 64_000
+    upperLimit = 131_072
+  } else if (m.includes('opus-4-6')) {
     defaultTokens = 64_000
     upperLimit = 128_000
   } else if (m.includes('sonnet-4-6')) {
diff --git a/src/utils/messages.ts b/src/utils/messages.ts
index 7d8db97..f6c3ee5 100644
--- a/src/utils/messages.ts
+++ b/src/utils/messages.ts
@@ -2244,22 +2244,26 @@ export function normalizeMessagesForAPI(
           }
 
           // Find a previous assistant message with the same message ID and merge.
-          // Walk backwards, skipping tool results and different-ID assistants,
-          // since concurrent agents (teammates) can interleave streaming content
-          // blocks from multiple API responses with different message IDs.
+          // Walk backwards, skipping different-ID assistants, since concurrent
+          // agents (teammates) can interleave streaming content blocks from
+          // multiple API responses with different message IDs.
+          //
+          // Do NOT skip tool_result messages — when claude.ts yields separate
+          // AssistantMessages for thinking and tool_use blocks (same message.id),
+          // a StreamingToolExecutor tool_result can land between them. Merging
+          // across that boundary produces duplicate tool_use IDs that downstream
+          // ensureToolResultPairing strips, leaving orphaned tool_results and
+          // ultimately consecutive user messages → API 400 (CC-1215).
           for (let i = result.length - 1; i >= 0; i--) {
             const msg = result[i]!
 
-            if (msg.type !== 'assistant' && !isToolResultMessage(msg)) {
+            if (msg.type !== 'assistant') {
               break
             }
 
-            if (msg.type === 'assistant') {
-              if (msg.message.id === normalizedMessage.message.id) {
-                result[i] = mergeAssistantMessages(msg, normalizedMessage)
-                return
-              }
-              continue
+            if (msg.message.id === normalizedMessage.message.id) {
+              result[i] = mergeAssistantMessages(msg, normalizedMessage)
+              return
             }
           }
 
@@ -2399,15 +2403,6 @@ export function mergeAssistantMessages(
   }
 }
 
-function isToolResultMessage(msg: Message): boolean {
-  if (msg.type !== 'user') {
-    return false
-  }
-  const content = msg.message.content
-  if (typeof content === 'string') return false
-  return content.some(block => block.type === 'tool_result')
-}
-
 export function mergeUserMessages(a: UserMessage, b: UserMessage): UserMessage {
   const lastContent = normalizeUserTextContent(a.message.content)
   const currentContent = normalizeUserTextContent(b.message.content)
diff --git a/src/utils/model/aliases.ts b/src/utils/model/aliases.ts
index 75ae388..5e94865 100644
--- a/src/utils/model/aliases.ts
+++ b/src/utils/model/aliases.ts
@@ -6,6 +6,9 @@ export const MODEL_ALIASES = [
   'sonnet[1m]',
   'opus[1m]',
   'opusplan',
+  // DeepSeek convenience aliases (resolved by ANTHROPIC_DEFAULT_*_MODEL envs)
+  'pro',
+  'flash',
 ] as const
 export type ModelAlias = (typeof MODEL_ALIASES)[number]
 
diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts
index aba9b7d..647f059 100644
--- a/src/utils/model/providers.ts
+++ b/src/utils/model/providers.ts
@@ -13,6 +13,37 @@ export function getAPIProvider(): APIProvider {
         : 'firstParty'
 }
 
+/**
+ * True when the active backend is GLM (zhipu), routed via an
+ * Anthropic-compatible firstParty endpoint (ANTHROPIC_BASE_URL). GLM stays on
+ * the firstParty APIProvider — this flag only gates model-aware behavior
+ * (output token limits, auto-mode, betas) that the generic firstParty path
+ * doesn't know about.
+ */
+export function isGLMProvider(): boolean {
+  return isEnvTruthy(process.env.CLAUDE_USE_GLM)
+}
+
+/**
+ * True when the active backend is DeepSeek, routed via an
+ * Anthropic-compatible firstParty endpoint. DeepSeek stays on the firstParty
+ * APIProvider — this flag gates DeepSeek-specific adaptations (thinking
+ * simplification, [ERROR] tool_result prefixing, 429 retry policy, model
+ * validation allowlist).
+ */
+export function isDeepSeekProvider(): boolean {
+  return isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK)
+}
+
+export function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean {
+  if (!baseUrl) return false
+  try {
+    return new URL(baseUrl).host.endsWith('deepseek.com')
+  } catch {
+    return false
+  }
+}
+
 export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
   return getAPIProvider() as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
 }
diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts
index 14b8167..f3c9975 100644
--- a/src/utils/model/validateModel.ts
+++ b/src/utils/model/validateModel.ts
@@ -1,7 +1,7 @@
 // biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
 import { MODEL_ALIASES } from './aliases.js'
 import { isModelAllowed } from './modelAllowlist.js'
-import { getAPIProvider } from './providers.js'
+import { getAPIProvider, isDeepSeekProvider } from './providers.js'
 import { sideQuery } from '../sideQuery.js'
 import {
   NotFoundError,
@@ -14,8 +14,18 @@ import { getModelStrings } from './modelStrings.js'
 // Cache valid models to avoid repeated API calls
 const validModelCache = new Map<string, boolean>()
 
+// DeepSeek's API silently remaps unknown model names to deepseek-v4-flash
+// instead of returning 404, so API-based validation is not reliable — use a
+// known-models allowlist when the DeepSeek provider is active.
+const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash'])
+
 /**
  * Validates a model by attempting an actual API call.
+ *
+ * For the DeepSeek provider, the API silently remaps unknown model names, so
+ * we validate against a known-models allowlist instead of probing the API.
+ * For all other providers (Anthropic firstParty, GLM, Bedrock, Vertex,
+ * Foundry), we probe the API with a minimal request.
  */
 export async function validateModel(
   model: string,
@@ -41,6 +51,17 @@ export async function validateModel(
     return { valid: true }
   }
 
+  // DeepSeek: use known-models allowlist (API silently remaps unknown names).
+  if (isDeepSeekProvider()) {
+    if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) {
+      return { valid: true }
+    }
+    return {
+      valid: false,
+      error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型（会被服务端静默映射为 deepseek-v4-flash）。可用模型：deepseek-v4-pro, deepseek-v4-flash`,
+    }
+  }
+
   // Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user)
   if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) {
     return { valid: true }
@@ -51,7 +72,6 @@ export async function validateModel(
     return { valid: true }
   }
 
-
   // Try to make an actual API call with minimal parameters
   try {
     await sideQuery({
diff --git a/src/utils/modelCost.ts b/src/utils/modelCost.ts
index b4867d4..b94b4a1 100644
--- a/src/utils/modelCost.ts
+++ b/src/utils/modelCost.ts
@@ -86,7 +86,34 @@ export const COST_HAIKU_45 = {
   webSearchRequests: 0.01,
 } as const satisfies ModelCosts
 
-const DEFAULT_UNKNOWN_MODEL_COST = COST_TIER_5_25
+// DeepSeek V4 Pro pricing (CNY per Mtok), discounted price until 2026-05-31.
+// Set DEEPSEEK_USE_FULL_PRICE=1 for standard price.
+export const COST_DEEPSEEK_PRO_DISCOUNTED = {
+  inputTokens: 3,
+  outputTokens: 6,
+  promptCacheWriteTokens: 3,
+  promptCacheReadTokens: 0.025,
+  webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+export const COST_DEEPSEEK_PRO_FULL = {
+  inputTokens: 12,
+  outputTokens: 24,
+  promptCacheWriteTokens: 12,
+  promptCacheReadTokens: 0.1,
+  webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+// DeepSeek V4 Flash pricing (CNY per Mtok).
+export const COST_DEEPSEEK_FLASH = {
+  inputTokens: 1,
+  outputTokens: 2,
+  promptCacheWriteTokens: 1,
+  promptCacheReadTokens: 0.02,
+  webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+const DEFAULT_UNKNOWN_MODEL_COST = COST_DEEPSEEK_FLASH
 
 /**
  * Get the cost tier for Opus 4.6 based on fast mode.
@@ -98,10 +125,17 @@ export function getOpus46CostTier(fastMode: boolean): ModelCosts {
   return COST_TIER_5_25
 }
 
+export function getDeepSeekProCostTier(): ModelCosts {
+  if (process.env.DEEPSEEK_USE_FULL_PRICE === '1') {
+    return COST_DEEPSEEK_PRO_FULL
+  }
+  return COST_DEEPSEEK_PRO_DISCOUNTED
+}
+
 // @[MODEL LAUNCH]: Add a pricing entry for the new model below.
 // Costs from https://platform.claude.com/docs/en/about-claude/pricing
 // Web search cost: $10 per 1000 requests = $0.01 per request
-export const MODEL_COSTS: Record<ModelShortName, ModelCosts> = {
+export const MODEL_COSTS: Record<string, ModelCosts> = {
   [firstPartyNameToCanonical(CLAUDE_3_5_HAIKU_CONFIG.firstParty)]:
     COST_HAIKU_35,
   [firstPartyNameToCanonical(CLAUDE_HAIKU_4_5_CONFIG.firstParty)]:
@@ -123,6 +157,8 @@ export const MODEL_COSTS: Record<ModelShortName, ModelCosts> = {
     COST_TIER_5_25,
   [firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]:
     COST_TIER_5_25,
+  'deepseek-v4-pro': COST_DEEPSEEK_PRO_DISCOUNTED,
+  'deepseek-v4-flash': COST_DEEPSEEK_FLASH,
 }
 
 /**
@@ -152,6 +188,11 @@ export function getModelCosts(model: string, usage: Usage): ModelCosts {
     return getOpus46CostTier(isFastMode)
   }
 
+  // DeepSeek V4 Pro pricing depends on discount-period flag.
+  if (shortName === 'deepseek-v4-pro') {
+    return getDeepSeekProCostTier()
+  }
+
   const costs = MODEL_COSTS[shortName]
   if (!costs) {
     trackUnknownModelCost(model, shortName)
@@ -202,12 +243,13 @@ export function calculateCostFromTokens(
 }
 
 function formatPrice(price: number): string {
-  // Format price: integers without decimals, others with 2 decimal places
-  // e.g., 3 -> "$3", 0.8 -> "$0.80", 22.5 -> "$22.50"
+  // Format price: integers without decimals, fractions with 2 decimal places
+  // (3 decimals for very small values like cache-read tokens).
+  // e.g., 3 -> "¥3", 0.8 -> "¥0.80", 0.025 -> "¥0.025"
   if (Number.isInteger(price)) {
-    return `$${price}`
+    return `¥${price}`
   }
-  return `$${price.toFixed(2)}`
+  return `¥${price.toFixed(price < 0.1 ? 3 : 2)}`
 }
 
 /**
diff --git a/src/utils/permissions/permissionSetup.ts b/src/utils/permissions/permissionSetup.ts
index 8520da8..2489fa7 100644
--- a/src/utils/permissions/permissionSetup.ts
+++ b/src/utils/permissions/permissionSetup.ts
@@ -1310,13 +1310,32 @@ export function getAutoModeUnavailableReason(): AutoModeUnavailableReason | null
  */
 export type AutoModeEnabledState = 'enabled' | 'disabled' | 'opt-in'
 
-const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'disabled'
+// deepseek branch: telemetry/GrowthBook is stubbed out, so tengu_auto_mode_config
+// never resolves and falls back to this default. Upstream defaulted to 'disabled'
+// to wait for the GB rollout signal — on this branch there is no rollout signal,
+// so default to 'enabled' to let CLI/--permission-mode auto and settings
+// defaultMode=auto work without a remote gate.
+const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'enabled'
+
+/**
+ * Default auto-mode availability when GrowthBook gives no explicit value.
+ * The Anthropic kill-switch (tengu_auto_mode_config) is never served on the
+ * GLM endpoint, so the stock 'disabled' default would permanently lock auto
+ * mode out for GLM users. Force-enable it for GLM (model gate still applies
+ * via modelSupportsAutoMode), while keeping 'disabled' for everyone else so
+ * the circuit breaker stays the safe default on real Anthropic models.
+ */
+function autoModeEnabledDefault(): AutoModeEnabledState {
+  // glm-5 and above (glm-5, glm-5.2, glm-6, …).
+  if (/glm-[5-9]/.test(getMainLoopModel().toLowerCase())) return 'enabled'
+  return AUTO_MODE_ENABLED_DEFAULT
+}
 
 function parseAutoModeEnabledState(value: unknown): AutoModeEnabledState {
   if (value === 'enabled' || value === 'disabled' || value === 'opt-in') {
     return value
   }
-  return AUTO_MODE_ENABLED_DEFAULT
+  return autoModeEnabledDefault()
 }
 
 /**
diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts
index 1ec78b5..c072a51 100644
--- a/src/utils/permissions/yoloClassifier.ts
+++ b/src/utils/permissions/yoloClassifier.ts
@@ -259,8 +259,11 @@ const yoloClassifierResponseSchema = lazySchema(() =>
 
 export const YOLO_CLASSIFIER_TOOL_NAME = 'classify_result'
 
+// Note: omitting `type: 'custom'` — Anthropic accepts it, but DeepSeek's
+// /anthropic endpoint rejects unknown tool types with 400. Other tools in
+// this codebase already omit the type field; we do the same here to keep
+// the classifier compatible with both providers.
 const YOLO_CLASSIFIER_TOOL_SCHEMA: BetaToolUnion = {
-  type: 'custom',
   name: YOLO_CLASSIFIER_TOOL_NAME,
   description: 'Report the security classification result for the agent action',
   input_schema: {
@@ -1328,14 +1331,12 @@ type AutoModeConfig = {
 
 /**
  * Get the model for the classifier.
- * Ant-only env var takes precedence, then GrowthBook JSON config override,
- * then the main loop model.
+ * CLAUDE_CODE_AUTO_MODE_MODEL env takes precedence, then GrowthBook JSON
+ * config override, then the main loop model.
  */
 function getClassifierModel(): string {
-  if (process.env.USER_TYPE === 'ant') {
-    const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL
-    if (envModel) return envModel
-  }
+  const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL
+  if (envModel) return envModel
   const config = getFeatureValue_CACHED_MAY_BE_STALE(
     'tengu_auto_mode_config',
     {} as AutoModeConfig,
diff --git a/src/utils/privacyLevel.ts b/src/utils/privacyLevel.ts
index 4848492..d5c277a 100644
--- a/src/utils/privacyLevel.ts
+++ b/src/utils/privacyLevel.ts
@@ -21,10 +21,13 @@ export function getPrivacyLevel(): PrivacyLevel {
   if (process.env.CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC) {
     return 'essential-traffic'
   }
-  if (process.env.DISABLE_TELEMETRY) {
-    return 'no-telemetry'
-  }
-  return 'default'
+  // DeepSeek branch: there is no first-party Anthropic telemetry pipeline to
+  // talk to, and the analytics module has been replaced with no-op stubs
+  // upstream (commits 058cf17 + c40af24). Default to 'no-telemetry' so the
+  // few remaining call sites that gate on isTelemetryDisabled() (feedback
+  // survey, analytics config) take the disabled path without requiring
+  // users to set DISABLE_TELEMETRY=1.
+  return 'no-telemetry'
 }
 
 /**
diff --git a/src/utils/ripgrep.ts b/src/utils/ripgrep.ts
index 88b57ee..27bfa46 100644
--- a/src/utils/ripgrep.ts
+++ b/src/utils/ripgrep.ts
@@ -27,6 +27,10 @@ type RipgrepConfig = {
   command: string
   args: string[]
   argv0?: string
+  // Human-readable explanation when ripgrep resolution took a fallback path
+  // (e.g. the bundled binary was missing and we fell back to system rg).
+  // Surfaced in the doctor screen and as a one-time startup warning.
+  note?: string
 }
 
 const getRipgrepConfig = memoize((): RipgrepConfig => {
@@ -70,13 +74,41 @@ const getRipgrepConfig = memoize((): RipgrepConfig => {
     path.resolve(__dirname, '..', 'vendor', 'ripgrep'),
     path.resolve(__dirname, '..', '..', 'vendor', 'ripgrep'),
   ]
-  const rgRoot =
-    candidateRoots.find(root =>
-      existsSync(path.resolve(root, platformDir, executable)),
-    ) ?? candidateRoots[0]
-  const command = path.resolve(rgRoot, platformDir, executable)
+  const rgRoot = candidateRoots.find(root =>
+    existsSync(path.resolve(root, platformDir, executable)),
+  )
+
+  // Bundled binary found on disk: use it.
+  if (rgRoot) {
+    const command = path.resolve(rgRoot, platformDir, executable)
+    return { mode: 'builtin', command, args: [] }
+  }
 
-  return { mode: 'builtin', command, args: [] }
+  // No bundled binary for this platform (e.g. Android/Termux, or an
+  // incomplete install). Fall back to system rg on PATH so file discovery,
+  // suggestions, and hooks keep working instead of spawning a non-existent
+  // path and failing with ENOENT.
+  const { cmd: systemPath } = findExecutable('rg', [])
+  if (systemPath !== 'rg') {
+    // SECURITY: spawn the bare name 'rg', not the resolved path, to prevent
+    // PATH hijacking via a malicious ./rg in the cwd (see system branch above).
+    return {
+      mode: 'system',
+      command: 'rg',
+      args: [],
+      note: 'bundled ripgrep binary not found; using system rg from PATH',
+    }
+  }
+
+  // Nothing available. Preserve historical behavior: return the expected
+  // builtin path so callers surface a clear ENOENT, with a note explaining why.
+  const command = path.resolve(candidateRoots[0], platformDir, executable)
+  return {
+    mode: 'builtin',
+    command,
+    args: [],
+    note: 'bundled ripgrep binary not found and no system rg on PATH',
+  }
 })
 
 export function ripgrepCommand(): {
@@ -551,12 +583,14 @@ export function getRipgrepStatus(): {
   mode: 'system' | 'builtin' | 'embedded'
   path: string
   working: boolean | null // null if not yet tested
+  note?: string
 } {
   const config = getRipgrepConfig()
   return {
     mode: config.mode,
     path: config.command,
     working: ripgrepStatus?.working ?? null,
+    note: config.note,
   }
 }