quantumiodb · yjhjstz · Jun 2, 2026 · May 12, 2026 · Jun 6, 2026 · Jun 19, 2026
diff --git a/docs/deepseek-vs-claude-pricing.md b/docs/deepseek-vs-claude-pricing.md
@@ -0,0 +1,98 @@
+# DeepSeek vs Claude API Pricing
+
+Captured 2026-05-12. Sources:
+- DeepSeek: https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+- Anthropic: https://platform.claude.com/docs/en/about-claude/pricing
+- FX assumed: $1 ≈ ¥7.15
+
+## Per-model prices (per 1M tokens)
+
+### Flagship tier
+
+| Item | Claude Opus 4.7 | Claude Sonnet 4.6 | DeepSeek V4 Pro (discount) | DeepSeek V4 Pro (full) |
+|---|---:|---:|---:|---:|
+| Input (cache miss) | $5.00 / ¥35.75 | $3.00 / ¥21.45 | ¥3 | ¥12 |
+| Cache write (5m) | $6.25 / ¥44.69 | $3.75 / ¥26.81 | ¥3 | ¥12 |
+| Cache read | $0.50 / ¥3.58 | $0.30 / ¥2.15 | ¥0.025 | ¥0.10 |
+| Output | $25.00 / ¥178.75 | $15.00 / ¥107.25 | ¥6 | ¥24 |
+| Context window | 200K | 1M | 1M | 1M |
+| Max output tokens | varies | varies | 384K | 384K |
+
+DeepSeek V4 Pro discount (2.5x off) runs through 2026-05-31 23:59 Beijing time.
+After 2026-06-01 the full-price column applies unless DeepSeek extends it.
+
+### Lightweight tier
+
+| Item | Claude Haiku 4.5 | DeepSeek V4 Flash |
+|---|---:|---:|
+| Input (cache miss) | $1.00 / ¥7.15 | ¥1 |
+| Cache write (5m) | $1.25 / ¥8.94 | ¥1 |
+| Cache read | $0.10 / ¥0.72 | ¥0.02 |
+| Output | $5.00 / ¥35.75 | ¥2 |
+| Context window | 200K | 1M |
+
+## Multiplier view (how much Claude costs vs DeepSeek discount price)
+
+### Pro vs Sonnet 4.6 (same-tier comparison)
+
+| Item | Sonnet 4.6 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 85.7x |
+| Output | 17.9x |
+
+### Pro vs Opus 4.7 (cross-tier comparison)
+
+| Item | Opus 4.7 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 11.9x |
+| Cache read | 143x |
+| Output | 29.8x |
+
+### Flash vs Haiku 4.5
+
+| Item | Haiku 4.5 / Flash multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 35.8x |
+| Output | 17.9x |
+
+## Realistic Claude Code request cost
+
+Measured pattern: 15,872 cache_read + 66 input + 30 output tokens
+(observed first cache hit after the session_id-pinning fix in 283678a).
+
+| Provider | Cost per request | Per 100 requests |
+|---|---:|---:|
+| Claude Opus 4.7 | $0.0094 / ¥0.067 | ¥6.70 |
+| Claude Sonnet 4.6 | $0.0054 / ¥0.039 | ¥3.90 |
+| Claude Haiku 4.5 | $0.0018 / ¥0.013 | ¥1.30 |
+| DeepSeek V4 Pro (discount) | ¥0.000777 | ¥0.078 |
+| DeepSeek V4 Flash | ¥0.000437 | ¥0.044 |
+
+## Headline ratios at the typical Claude Code workload
+
+- DeepSeek Pro vs Claude Sonnet 4.6: **~50x cheaper**
+- DeepSeek Pro vs Claude Opus 4.7: **~86x cheaper**
+- DeepSeek Flash vs Claude Haiku 4.5: **~30x cheaper**
+
+After the 2026-05-31 discount expires, Pro full price would be 4x its
+current rate; ratios shrink to roughly 12x vs Sonnet, 21x vs Opus, but the
+gap remains substantial.
+
+## Caveats
+
+- **No Opus-class DeepSeek model.** Tasks that genuinely need Opus-level
+  reasoning (long math proofs, deep architecture work) have no DeepSeek
+  equivalent.
+- **Capability gaps with the Anthropic API**: DeepSeek's /anthropic endpoint
+  does not implement image/document content blocks, computer use, server-side
+  web search beyond what the model emits, MCP gateway, or redacted_thinking.
+  See `docs/openclaude-commits-review.md` and the deepseek branch port commits
+  for the adaptations Claude Code makes for these.
+- **Cache hit assumptions**: the per-request cost above assumes the
+  session_id pinning fix is in effect (commit 283678a). Without it,
+  cache_read_input_tokens stays at 0 and DeepSeek per-request cost is
+  roughly 100x higher (every request pays full input price).
+- **No /count_tokens endpoint on DeepSeek.** Local UTF-8 byte estimation is
+  used instead (commit 6fbad90).
diff --git a/scripts/build.ts b/scripts/build.ts
@@ -20,7 +20,11 @@ const ENABLED_FEATURES = [
   'MCP_SKILLS',
   'HISTORY_PICKER',
   'TREE_SITTER_BASH',
-  'NATIVE_CLIENT_ATTESTATION',
+  // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000'
+  // placeholder into the x-anthropic-billing-header and computes a body hash.
+  // GLM/DeepSeek's /anthropic endpoints do not validate this header — sending
+  // it adds CPU + an HTTP header without value. Keep off for non-Anthropic
+  // providers.
   'BRIDGE_MODE',
   'COORDINATOR_MODE',
 ]

diff --git a/src/commands/cost/cost.ts b/src/commands/cost/cost.ts
@@ -1,24 +1,7 @@
 import { formatTotalCost } from '../../cost-tracker.js'
-import { currentLimits } from '../../services/claudeAiLimits.js'
 import type { LocalCommandCall } from '../../types/command.js'
-import { isClaudeAISubscriber } from '../../utils/auth.js'
 
+// DeepSeek: no Claude.ai subscription; always show the formatted cost.
 export const call: LocalCommandCall = async () => {
-  if (isClaudeAISubscriber()) {
-    let value: string
-
-    if (currentLimits.isUsingOverage) {
-      value =
-        'You are currently using your overages to power your Claude Code usage. We will automatically switch you back to your subscription rate limits when they reset'
-    } else {
-      value =
-        'You are currently using your subscription to power your Claude Code usage'
-    }
-
-    if (process.env.USER_TYPE === 'ant') {
-      value += `\n\n[ANT-ONLY] Showing cost anyway:\n ${formatTotalCost()}`
-    }
-    return { type: 'text', value }
-  }
   return { type: 'text', value: formatTotalCost() }
 }
diff --git a/src/components/CostThresholdDialog.tsx b/src/components/CostThresholdDialog.tsx
@@ -38,7 +38,7 @@ export function CostThresholdDialog(t0) {
   }
   let t4;
   if ($[4] !== onDone || $[5] !== t3) {
-    t4 = <Dialog title="You've spent $5 on the Anthropic API this session." onCancel={onDone}>{t1}{t3}</Dialog>;
+    t4 = <Dialog title="You've spent a significant amount on API calls this session." onCancel={onDone}>{t1}{t3}</Dialog>;
     $[4] = onDone;
     $[5] = t3;
     $[6] = t4;

diff --git a/src/components/Message.tsx b/src/components/Message.tsx
@@ -538,9 +538,6 @@ function AssistantMessageBlock(t0) {
       }
     case "thinking":
       {
-        if (!isTranscriptMode && !verbose) {
-          return null;
-        }
         const isLastThinking = !lastThinkingBlockId || thinkingBlockId === lastThinkingBlockId;
         const t1 = isTranscriptMode && !isLastThinking;
         let t2;

diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx
@@ -281,6 +281,26 @@ export function Config({
         enabled: autoCompactEnabled
       });
     }
+  }, {
+    id: 'autoMemoryEnabled',
+    label: 'Auto-memory',
+    // settings.json default is "undefined" which the resolver in
+    // src/memdir/paths.ts treats as "disabled" on this branch. Mirror
+    // that so the toggle reflects what the runtime actually does.
+    value: settingsData?.autoMemoryEnabled ?? false,
+    type: 'boolean' as const,
+    onChange(autoMemoryEnabled: boolean) {
+      updateSettingsForSource('localSettings', {
+        autoMemoryEnabled
+      });
+      setSettingsData(prev_auto_mem => ({
+        ...prev_auto_mem,
+        autoMemoryEnabled
+      }));
+      logEvent('tengu_auto_memory_setting_changed', {
+        enabled: autoMemoryEnabled
+      });
+    }
   }, {
     id: 'spinnerTipsEnabled',
     label: 'Show tips',

diff --git a/src/components/ThinkingToggle.tsx b/src/components/ThinkingToggle.tsx
@@ -30,11 +30,11 @@ export function ThinkingToggle(t0) {
     t1 = [{
       value: "true",
       label: "Enabled",
-      description: "Claude will think before responding"
+      description: "Model will think before responding"
     }, {
       value: "false",
       label: "Disabled",
-      description: "Claude will respond without extended thinking"
+      description: "Model will respond without extended thinking"
     }];
     $[0] = t1;
   } else {

diff --git a/src/components/messages/AssistantThinkingMessage.tsx b/src/components/messages/AssistantThinkingMessage.tsx
@@ -4,6 +4,7 @@ import React from 'react';
 import { Box, Text } from '../../ink.js';
 import { CtrlOToExpand } from '../CtrlOToExpand.js';
 import { Markdown } from '../Markdown.js';
+import { useSettings } from '../../hooks/useSettings.js';
 type Props = {
   // Accept either full ThinkingBlock/ThinkingBlockParam or a minimal shape with just type and thinking
   param: ThinkingBlock | ThinkingBlockParam | {
@@ -36,7 +37,8 @@ export function AssistantThinkingMessage(t0) {
   if (hideInTranscript) {
     return null;
   }
-  const shouldShowFullThinking = isTranscriptMode || verbose;
+  const settings = useSettings();
+  const shouldShowFullThinking = isTranscriptMode || verbose || settings.alwaysThinkingEnabled !== false;
   if (!shouldShowFullThinking) {
     const t4 = addMargin ? 1 : 0;
     let t5;

diff --git a/src/constants/system.ts b/src/constants/system.ts
@@ -3,7 +3,7 @@
 import { feature } from 'bun:bundle'
 import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 import { logForDebugging } from '../utils/debug.js'
-import { isEnvDefinedFalsy } from '../utils/envUtils.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
 import { getAPIProvider } from '../utils/model/providers.js'
 import { getWorkload } from '../utils/workloadContext.js'
 
@@ -47,13 +47,10 @@ export function getCLISyspromptPrefix(options?: {
 
 /**
  * Check if attribution header is enabled.
- * Enabled by default, can be disabled via env var or GrowthBook killswitch.
+ * Disabled by default, can be enabled via env var.
  */
 function isAttributionHeaderEnabled(): boolean {
-  if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
-    return false
-  }
-  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
+  return isEnvTruthy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)
 }
 
 /**

diff --git a/src/context.ts b/src/context.ts
@@ -4,7 +4,7 @@ import {
   getAdditionalDirectoriesForClaudeMd,
   setCachedClaudeMdContent,
 } from './bootstrap/state.js'
-import { getLocalISODate } from './constants/common.js'
+import { getSessionStartDate } from './constants/common.js'
 import {
   filterInjectedMemoryFiles,
   getClaudeMds,
@@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js'
 import { shouldIncludeGitInstructions } from './utils/gitSettings.js'
 import { logError } from './utils/log.js'
 
-const MAX_STATUS_CHARS = 2000
+const MAX_STATUS_CHARS = 1000
 
 // System prompt injection for cache breaking (ant-only, ephemeral debugging state)
 let systemPromptInjection: string | null = null
@@ -85,7 +85,7 @@ export const getGitStatus = memoize(async (): Promise<string | null> => {
     const truncatedStatus =
       status.length > MAX_STATUS_CHARS
         ? status.substring(0, MAX_STATUS_CHARS) +
-          '\n... (truncated because it exceeds 2k characters. If you need more information, run "git status" using BashTool)'
+          '\n... (truncated because it exceeds 1k characters. If you need more information, run "git status" using BashTool)'
         : status
 
     logForDiagnosticsNoPII('info', 'git_status_completed', {
@@ -183,7 +183,9 @@ export const getUserContext = memoize(
 
     return {
       ...(claudeMd && { claudeMd }),
-      currentDate: `Today's date is ${getLocalISODate()}.`,
+      // Use session-stable date to preserve DeepSeek's server-side prefix
+      // cache across midnight (was getLocalISODate() — refreshes daily).
+      currentDate: `Today's date is ${getSessionStartDate()}.`,
     }
   },
 )
diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts
@@ -44,8 +44,11 @@ import {
 import { isFastModeEnabled } from './utils/fastMode.js'
 import { formatDuration, formatNumber } from './utils/format.js'
 import type { FpsMetrics } from './utils/fpsTracker.js'
-import { getCanonicalName } from './utils/model/model.js'
-import { calculateUSDCost } from './utils/modelCost.js'
+import {
+  getCanonicalName,
+  getDefaultMainLoopModelSetting,
+} from './utils/model/model.js'
+import { calculateUSDCost, getModelCosts } from './utils/modelCost.js'
 export {
   getTotalCostUSD as getTotalCost,
   getTotalDuration,
@@ -175,7 +178,7 @@ export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void {
 }
 
 function formatCost(cost: number, maxDecimalPlaces: number = 4): string {
-  return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
+  return `¥${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
 }
 
 function formatModelUsage(): string {
@@ -234,12 +237,35 @@ export function formatTotalCost(): string {
 
   const modelUsageDisplay = formatModelUsage()
 
+  // DeepSeek-specific: show prompt cache hit rate and yuan savings, since the
+  // /anthropic endpoint exposes cache_read / cache_creation token counts and
+  // these are the headline cost driver (cache reads are ~120x cheaper).
+  let cacheStatsDisplay = ''
+  const cacheRead = getTotalCacheReadInputTokens()
+  const cacheCreation = getTotalCacheCreationInputTokens()
+  const directInput = getTotalInputTokens()
+  const totalInput = cacheRead + cacheCreation + directInput
+  if (totalInput > 0) {
+    const hitRate = (cacheRead / totalInput) * 100
+    const model = getDefaultMainLoopModelSetting()
+    const costs = getModelCosts(model, {
+      input_tokens: 0,
+      output_tokens: 0,
+    } as Usage)
+    const savings =
+      (cacheRead / 1_000_000) *
+      (costs.inputTokens - costs.promptCacheReadTokens)
+    cacheStatsDisplay =
+      `\nCache hit rate:         ${hitRate.toFixed(1)}% (${formatNumber(cacheRead)} / ${formatNumber(totalInput)} input tokens)` +
+      `\nCache savings:         ${formatCost(savings)}`
+  }
+
   return chalk.dim(
     `Total cost:            ${costDisplay}\n` +
       `Total duration (API):  ${formatDuration(getTotalAPIDuration())}
 Total duration (wall): ${formatDuration(getTotalDuration())}
 Total code changes:    ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed
-${modelUsageDisplay}`,
+${modelUsageDisplay}${cacheStatsDisplay}`,
   )
 }
 

diff --git a/src/costHook.ts b/src/costHook.ts
@@ -1,17 +1,14 @@
 import { useEffect } from 'react'
 import { formatTotalCost, saveCurrentSessionCosts } from './cost-tracker.js'
-import { hasConsoleBillingAccess } from './utils/billing.js'
 import type { FpsMetrics } from './utils/fpsTracker.js'
 
 export function useCostSummary(
   getFpsMetrics?: () => FpsMetrics | undefined,
 ): void {
   useEffect(() => {
     const f = () => {
-      if (hasConsoleBillingAccess()) {
-        process.stdout.write('\n' + formatTotalCost() + '\n')
-      }
-
+      // DeepSeek: there's no Anthropic console billing tier; always print.
+      process.stdout.write('\n' + formatTotalCost() + '\n')
       saveCurrentSessionCosts(getFpsMetrics?.())
     }
     process.on('exit', f)

diff --git a/src/memdir/paths.ts b/src/memdir/paths.ts
@@ -51,7 +51,12 @@ export function isAutoMemoryEnabled(): boolean {
   if (settings.autoMemoryEnabled !== undefined) {
     return settings.autoMemoryEnabled
   }
-  return true
+  // DeepSeek branch default: off. The auto-memory section injects ~3145
+  // fixed tokens into every system prompt (a 32% surcharge on a minimal
+  // -p call). Users who want it can flip it in /config -> Auto-memory or
+  // set autoMemoryEnabled: true in settings.json (or unset
+  // CLAUDE_CODE_DISABLE_AUTO_MEMORY=0).
+  return false
 }
 
 /**

diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx
@@ -2207,7 +2207,9 @@ export function REPL({
   };
   useEffect(() => {
     const totalCost = getTotalCost();
-    if (totalCost >= 5 /* $5 */ && !showCostDialog && !haveShownCostDialog) {
+    // DeepSeek: pricing is in CNY; raise the threshold to roughly match the
+    // user-perceived "5 USD" notification level (1 USD ≈ 7 CNY, rounded up).
+    if (totalCost >= 35 && !showCostDialog && !haveShownCostDialog) {
       logEvent('tengu_cost_threshold_reached', {});
       // Mark as shown even if the dialog won't render (no console billing
       // access). Otherwise this effect re-fires on every message change for