From 440c310cf693749c21ebbd56375498b021b3f341 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 2 Jun 2026 16:44:10 -0700 Subject: [PATCH 01/33] security: gate preconnectAnthropicApi on isEssentialTrafficOnly The startup preconnect fired a TCP+TLS handshake to api.anthropic.com even when CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC / DISABLE_TELEMETRY was set, leaking client IP and session timing. Gate it like every other telemetry sink already does. --- src/utils/apiPreconnect.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/apiPreconnect.ts b/src/utils/apiPreconnect.ts index 6a8de64..253926f 100644 --- a/src/utils/apiPreconnect.ts +++ b/src/utils/apiPreconnect.ts @@ -25,6 +25,7 @@ import { getOauthConfig } from '../constants/oauth.js' import { isEnvTruthy } from './envUtils.js' +import { isEssentialTrafficOnly } from './privacyLevel.js' let fired = false @@ -32,6 +33,10 @@ export function preconnectAnthropicApi(): void { if (fired) return fired = true + // Also skip when non-essential traffic is disabled via + // CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC / DISABLE_TELEMETRY / proxy env. + if (isEssentialTrafficOnly()) return + // Skip if using a cloud provider — different endpoint + auth if ( isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) || From 407c9ed284913a7d9bc9cb1abf40a993826c2438 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 14:35:38 -0700 Subject: [PATCH 02/33] perf: pin session_id in API metadata for DeepSeek prompt cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek's automatic prompt cache keys on the full request body bytes. Anthropic's metadata.user_id is a JSON blob that we populate with the current session_id — which is freshly generated every CLI launch. Result: every new session entered the API with a unique body prefix, forcing cache_creation on each first request and producing zero cache_read_input_tokens across sessions. Pin the session_id field in metadata.user_id to the fixed literal 'claude-code-ds'. Real telemetry / analytics paths still call getSessionId() directly and get the live id — only the wire-level metadata is stabilized. Verified: identical 'say only: ping' requests across two separate CLI launches now show cache_read_input_tokens=15872 on the second call (99.6% hit rate, ~99% cost reduction at Pro discount prices). --- src/services/api/claude.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 4c09f06..caf8955 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -508,7 +508,13 @@ export function getAPIMetadata() { device_id: getOrCreateUserID(), // Only include OAuth account UUID when actively using OAuth authentication account_uuid: getOauthAccountInfo()?.accountUuid ?? '', - session_id: getSessionId(), + // DeepSeek's prompt cache keys on the full request body bytes. The + // real session_id changes every launch and would force a fresh cache + // entry per session — defeating the cache entirely. Pin to a stable + // sentinel so identical conversations across sessions share a cache + // entry. Real telemetry/analytics still get the live id via + // getSessionId() at the call sites that need it. + session_id: 'claude-code-ds', }), } } From abc0590c64d020eb63666c90f604a1052ef1c7f1 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sat, 6 Jun 2026 18:16:17 +0800 Subject: [PATCH 03/33] fix: normalizeMessagesForAPI must not merge same-id assistants across tool_result (CC-1215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When extended thinking + tool_use appear in the same turn under ACP, claude.ts yields two AssistantMessages sharing one message.id and StreamingToolExecutor inserts a tool_result between them. The backward walk used to skip past the tool_result and merge the two assistants, producing duplicate tool_use IDs. ensureToolResultPairing then stripped them, leaving orphaned tool_results and consecutive user messages → API 400. Stop the backward walk at any non-assistant message. Remove the now-unused isToolResultMessage helper. Ref: https://github.com/claude-code-best/claude-code/commit/b62b384e36f263417bf2f9127bd942393f19fc8d --- src/utils/messages.ts | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/utils/messages.ts b/src/utils/messages.ts index 7d8db97..f6c3ee5 100644 --- a/src/utils/messages.ts +++ b/src/utils/messages.ts @@ -2244,22 +2244,26 @@ export function normalizeMessagesForAPI( } // Find a previous assistant message with the same message ID and merge. - // Walk backwards, skipping tool results and different-ID assistants, - // since concurrent agents (teammates) can interleave streaming content - // blocks from multiple API responses with different message IDs. + // Walk backwards, skipping different-ID assistants, since concurrent + // agents (teammates) can interleave streaming content blocks from + // multiple API responses with different message IDs. + // + // Do NOT skip tool_result messages — when claude.ts yields separate + // AssistantMessages for thinking and tool_use blocks (same message.id), + // a StreamingToolExecutor tool_result can land between them. Merging + // across that boundary produces duplicate tool_use IDs that downstream + // ensureToolResultPairing strips, leaving orphaned tool_results and + // ultimately consecutive user messages → API 400 (CC-1215). for (let i = result.length - 1; i >= 0; i--) { const msg = result[i]! - if (msg.type !== 'assistant' && !isToolResultMessage(msg)) { + if (msg.type !== 'assistant') { break } - if (msg.type === 'assistant') { - if (msg.message.id === normalizedMessage.message.id) { - result[i] = mergeAssistantMessages(msg, normalizedMessage) - return - } - continue + if (msg.message.id === normalizedMessage.message.id) { + result[i] = mergeAssistantMessages(msg, normalizedMessage) + return } } @@ -2399,15 +2403,6 @@ export function mergeAssistantMessages( } } -function isToolResultMessage(msg: Message): boolean { - if (msg.type !== 'user') { - return false - } - const content = msg.message.content - if (typeof content === 'string') return false - return content.some(block => block.type === 'tool_result') -} - export function mergeUserMessages(a: UserMessage, b: UserMessage): UserMessage { const lastContent = normalizeUserTextContent(a.message.content) const currentContent = normalizeUserTextContent(b.message.content) From df224d73441e967707ea683d541465511dc71111 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Fri, 19 Jun 2026 14:20:51 +0800 Subject: [PATCH 04/33] feat: set GLM-5.2 max output tokens to 131072 GLM-5.2 supports up to 128K (131072) output tokens, but the model fell through to the catch-all else branch (32K/64K). Add an explicit branch so the Z.AI endpoint's full output capacity is usable. Co-Authored-By: Claude Opus 4.6 --- src/utils/context.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/utils/context.ts b/src/utils/context.ts index 06b235e..b976b7b 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -161,7 +161,10 @@ export function getModelMaxOutputTokens(model: string): { const m = getCanonicalName(model) - if (m.includes('opus-4-6')) { + if (m.includes('glm-5.2')) { + defaultTokens = 64_000 + upperLimit = 131_072 + } else if (m.includes('opus-4-6')) { defaultTokens = 64_000 upperLimit = 128_000 } else if (m.includes('sonnet-4-6')) { From e1a7e523f2038dff3e321203b428efc27a983ce8 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Fri, 19 Jun 2026 14:57:23 +0800 Subject: [PATCH 05/33] open glm thinking mode --- src/components/Message.tsx | 3 --- src/components/messages/AssistantThinkingMessage.tsx | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/components/Message.tsx b/src/components/Message.tsx index ca2ef76..886e917 100644 --- a/src/components/Message.tsx +++ b/src/components/Message.tsx @@ -538,9 +538,6 @@ function AssistantMessageBlock(t0) { } case "thinking": { - if (!isTranscriptMode && !verbose) { - return null; - } const isLastThinking = !lastThinkingBlockId || thinkingBlockId === lastThinkingBlockId; const t1 = isTranscriptMode && !isLastThinking; let t2; diff --git a/src/components/messages/AssistantThinkingMessage.tsx b/src/components/messages/AssistantThinkingMessage.tsx index 3825f5f..af4b492 100644 --- a/src/components/messages/AssistantThinkingMessage.tsx +++ b/src/components/messages/AssistantThinkingMessage.tsx @@ -4,6 +4,7 @@ import React from 'react'; import { Box, Text } from '../../ink.js'; import { CtrlOToExpand } from '../CtrlOToExpand.js'; import { Markdown } from '../Markdown.js'; +import { useSettings } from '../../hooks/useSettings.js'; type Props = { // Accept either full ThinkingBlock/ThinkingBlockParam or a minimal shape with just type and thinking param: ThinkingBlock | ThinkingBlockParam | { @@ -36,7 +37,8 @@ export function AssistantThinkingMessage(t0) { if (hideInTranscript) { return null; } - const shouldShowFullThinking = isTranscriptMode || verbose; + const settings = useSettings(); + const shouldShowFullThinking = isTranscriptMode || verbose || settings.alwaysThinkingEnabled !== false; if (!shouldShowFullThinking) { const t4 = addMargin ? 1 : 0; let t5; From c691e0676cb66aaaa603b0d4970d77579b4105db Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Fri, 19 Jun 2026 16:58:51 +0800 Subject: [PATCH 06/33] feat: enable auto permission mode for GLM-5+ models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto mode (transcript-classifier auto-approval) was unreachable on the GLM endpoint because of two gates: - modelSupportsAutoMode only allowed claude-*-4-6 for external/firstParty providers, so GLM model IDs never qualified. - The tengu_auto_mode_config kill-switch is never served on the GLM endpoint, so enabledState defaulted to 'disabled' — tripping the circuit breaker and blocking canEnterAuto/carousel availability. Allow glm-5 and above in both gates (glm-5, glm-5.2, glm-6, …), keeping the 'disabled' circuit-breaker default for real Anthropic models. The TRANSCRIPT_CLASSIFIER build flag is already enabled in scripts/build.ts. Claude-Session: https://claude.ai/code/session_012h5pf4zSDdtzKhtSqdCJ8J --- src/utils/__tests__/glmAutoMode.test.ts | 43 ++++++++++++++++++++++++ src/utils/betas.ts | 4 +++ src/utils/permissions/permissionSetup.ts | 16 ++++++++- 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 src/utils/__tests__/glmAutoMode.test.ts diff --git a/src/utils/__tests__/glmAutoMode.test.ts b/src/utils/__tests__/glmAutoMode.test.ts new file mode 100644 index 0000000..6708070 --- /dev/null +++ b/src/utils/__tests__/glmAutoMode.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect, beforeAll, vi } from 'vitest' + +// Auto mode must be reachable for GLM-5+ models. The two gates that previously +// blocked it: modelSupportsAutoMode (model allowlist) and the auto-mode +// enabled-state default (GrowthBook kill-switch is never served on GLM). +beforeAll(() => { + process.env.FEATURES = 'TRANSCRIPT_CLASSIFIER' + process.env.USER_TYPE = 'external' + // GLM runs against an Anthropic-compatible firstParty endpoint. + delete process.env.CLAUDE_CODE_USE_BEDROCK + delete process.env.CLAUDE_CODE_USE_VERTEX + delete process.env.CLAUDE_CODE_USE_FOUNDRY +}) + +describe('modelSupportsAutoMode for GLM', () => { + it('enables auto mode for glm-5 and above, not glm-4', async () => { + const { modelSupportsAutoMode } = await import('../betas.js') + expect(modelSupportsAutoMode('glm-5')).toBe(true) + expect(modelSupportsAutoMode('glm-5.2')).toBe(true) + expect(modelSupportsAutoMode('glm-6')).toBe(true) + expect(modelSupportsAutoMode('glm-4.5')).toBe(false) + // Anthropic allowlist still honored. + expect(modelSupportsAutoMode('claude-opus-4-6')).toBe(true) + expect(modelSupportsAutoMode('claude-opus-4-1')).toBe(false) + }) +}) + +// The enabled-state default lives in permissionSetup, but importing that +// module transitively loads the classifier prompt (a .txt require unsupported +// in vitest). Assert the predicate the default uses directly — it must match +// glm-5 and above, reject glm-4, and reject Anthropic models (which keep the +// 'disabled' circuit-breaker default). +describe('auto-mode enabled-state default predicate for GLM', () => { + const isGlmForced = (m: string) => /glm-[5-9]/.test(m.toLowerCase()) + it('matches glm-5+ only', () => { + expect(isGlmForced('glm-5')).toBe(true) + expect(isGlmForced('glm-5.2')).toBe(true) + expect(isGlmForced('glm-6')).toBe(true) + expect(isGlmForced('GLM-5.2')).toBe(true) + expect(isGlmForced('glm-4.5')).toBe(false) + expect(isGlmForced('claude-opus-4-6')).toBe(false) + }) +}) diff --git a/src/utils/betas.ts b/src/utils/betas.ts index fcd7b97..723b0c0 100644 --- a/src/utils/betas.ts +++ b/src/utils/betas.ts @@ -188,6 +188,10 @@ export function modelSupportsAutoMode(model: string): boolean { if (/claude-(opus|sonnet|haiku)-4(?!-[6-9])/.test(m)) return false return true } + // GLM models (served over an Anthropic-compatible firstParty endpoint) + // support auto mode — the classifier runs against the same transcript. + // glm-5 and above (glm-5, glm-5.2, glm-6, …). + if (/glm-[5-9]/.test(m)) return true // External allowlist (firstParty already checked above). return /^claude-(opus|sonnet)-4-6/.test(m) } diff --git a/src/utils/permissions/permissionSetup.ts b/src/utils/permissions/permissionSetup.ts index 8520da8..84a4536 100644 --- a/src/utils/permissions/permissionSetup.ts +++ b/src/utils/permissions/permissionSetup.ts @@ -1312,11 +1312,25 @@ export type AutoModeEnabledState = 'enabled' | 'disabled' | 'opt-in' const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'disabled' +/** + * Default auto-mode availability when GrowthBook gives no explicit value. + * The Anthropic kill-switch (tengu_auto_mode_config) is never served on the + * GLM endpoint, so the stock 'disabled' default would permanently lock auto + * mode out for GLM users. Force-enable it for GLM (model gate still applies + * via modelSupportsAutoMode), while keeping 'disabled' for everyone else so + * the circuit breaker stays the safe default on real Anthropic models. + */ +function autoModeEnabledDefault(): AutoModeEnabledState { + // glm-5 and above (glm-5, glm-5.2, glm-6, …). + if (/glm-[5-9]/.test(getMainLoopModel().toLowerCase())) return 'enabled' + return AUTO_MODE_ENABLED_DEFAULT +} + function parseAutoModeEnabledState(value: unknown): AutoModeEnabledState { if (value === 'enabled' || value === 'disabled' || value === 'opt-in') { return value } - return AUTO_MODE_ENABLED_DEFAULT + return autoModeEnabledDefault() } /** From 649d1f845eba67d0a20455122a04751ff2a25ef6 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 23 Jun 2026 08:16:31 +0800 Subject: [PATCH 07/33] feat: name binary output after current git branch Binary build now produces dist/claude- (e.g. dist/claude-glm) instead of dist/claude, so per-branch artifacts don't overwrite each other. Co-Authored-By: Claude Opus 4.6 --- scripts/build.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/build.ts b/scripts/build.ts index 4a98314..8c822ba 100644 --- a/scripts/build.ts +++ b/scripts/build.ts @@ -59,8 +59,12 @@ walkDir(SRC_DIR, (filePath) => { const isBinary = process.argv.includes('--binary') +// Name the binary after the current git branch, e.g. dist/claude-glm +const branch = execSync('git rev-parse --abbrev-ref HEAD', { encoding: 'utf-8' }).trim() || 'main' +const binaryName = `claude-${branch}` + console.log(`Enabled features: ${ENABLED_FEATURES.join(', ')} (patched ${modified.length} files)`) -console.log(`Build mode: ${isBinary ? 'binary (standalone executable)' : 'bundle (JS)'}`) +console.log(`Build mode: ${isBinary ? `binary (standalone executable → dist/${binaryName})` : 'bundle (JS)'}`) const MACRO_DEFINES = `--define 'MACRO.VERSION="2.1.87"' ` + @@ -77,7 +81,7 @@ const MACRO_DEFINES = try { if (isBinary) { execSync( - `bun build src/entrypoints/cli.tsx --compile --outfile=dist/claude ` + MACRO_DEFINES, + `bun build src/entrypoints/cli.tsx --compile --outfile=dist/${binaryName} ` + MACRO_DEFINES, { stdio: 'inherit', cwd: join(import.meta.dir, '..') }, ) } else { From cf60007667e11fee7f9451bcf8b601012bbd2950 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 23 Jun 2026 08:22:28 +0800 Subject: [PATCH 08/33] chore: disable NATIVE_CLIENT_ATTESTATION on glm branch GLM's /anthropic endpoint does not validate the cch client-attestation header, so computing the xxHash64 body hash and sending the placeholder only adds CPU cost and a useless HTTP header. Co-Authored-By: Claude Opus 4.6 --- scripts/build.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/build.ts b/scripts/build.ts index 8c822ba..a154510 100644 --- a/scripts/build.ts +++ b/scripts/build.ts @@ -20,7 +20,10 @@ const ENABLED_FEATURES = [ 'MCP_SKILLS', 'HISTORY_PICKER', 'TREE_SITTER_BASH', - 'NATIVE_CLIENT_ATTESTATION', + // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000' + // placeholder into the x-anthropic-billing-header and computes a body hash. + // GLM's /anthropic endpoint does not validate this header — sending it + // adds CPU + an HTTP header without value. Keep off on glm branch. 'BRIDGE_MODE', 'COORDINATOR_MODE', ] From 88a5eab09521ed81aee5b9621d89470ed1061ea4 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sat, 16 May 2026 06:04:36 -0700 Subject: [PATCH 09/33] perf: disable attribution header by default Replace opt-out (isEnvDefinedFalsy) with opt-in (isEnvTruthy) for CLAUDE_CODE_ATTRIBUTION_HEADER. Header is now off unless explicitly set to 1/true/yes/on. Drops GrowthBook dependency from this path. --- src/constants/system.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/constants/system.ts b/src/constants/system.ts index 0cd2e76..9495bd8 100644 --- a/src/constants/system.ts +++ b/src/constants/system.ts @@ -3,7 +3,7 @@ import { feature } from 'bun:bundle' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' import { logForDebugging } from '../utils/debug.js' -import { isEnvDefinedFalsy } from '../utils/envUtils.js' +import { isEnvTruthy } from '../utils/envUtils.js' import { getAPIProvider } from '../utils/model/providers.js' import { getWorkload } from '../utils/workloadContext.js' @@ -47,13 +47,10 @@ export function getCLISyspromptPrefix(options?: { /** * Check if attribution header is enabled. - * Enabled by default, can be disabled via env var or GrowthBook killswitch. + * Disabled by default, can be enabled via env var. */ function isAttributionHeaderEnabled(): boolean { - if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) { - return false - } - return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true) + return isEnvTruthy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER) } /** From c1fdc34b749878aa0c20b8327339377ca7f34367 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 13 May 2026 07:43:10 -0700 Subject: [PATCH 10/33] perf: default privacy level to 'no-telemetry' When running against a third-party endpoint the first-party Anthropic telemetry pipeline doesn't apply. Defaulting to 'no-telemetry' lets the remaining gate-checks (isAnalyticsDisabled, isFeedbackSurveyDisabled in services/analytics/config.ts) short-circuit without requiring users to set DISABLE_TELEMETRY=1 manually. Users can still opt up to 'essential-traffic' via CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1 to suppress the larger set of non-telemetry network calls (auto-update, MCP registry prefetch, model-capabilities fetch, etc.). --- src/utils/privacyLevel.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/utils/privacyLevel.ts b/src/utils/privacyLevel.ts index 4848492..d5c277a 100644 --- a/src/utils/privacyLevel.ts +++ b/src/utils/privacyLevel.ts @@ -21,10 +21,13 @@ export function getPrivacyLevel(): PrivacyLevel { if (process.env.CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC) { return 'essential-traffic' } - if (process.env.DISABLE_TELEMETRY) { - return 'no-telemetry' - } - return 'default' + // DeepSeek branch: there is no first-party Anthropic telemetry pipeline to + // talk to, and the analytics module has been replaced with no-op stubs + // upstream (commits 058cf17 + c40af24). Default to 'no-telemetry' so the + // few remaining call sites that gate on isTelemetryDisabled() (feedback + // survey, analytics config) take the disabled path without requiring + // users to set DISABLE_TELEMETRY=1. + return 'no-telemetry' } /** From fe55b08086b1b95f3a1d338715d66c6eacfb7fed Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 13 May 2026 13:03:58 -0700 Subject: [PATCH 11/33] feat: surface autoMemoryEnabled in /config TUI, default off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related changes: 1. /config TUI now shows an 'Auto-memory' toggle right under 'Auto-compact'. The autoMemoryEnabled setting was already wired up in supportedSettings.ts and honored at runtime by src/memdir/paths.ts:isAutoMemoryEnabled(), but the only places to flip it were the /memory file selector or hand-edited settings.json. 2. Default flips from 'enabled' to 'disabled'. The auto-memory section injects ~3,145 fixed tokens into every system prompt — a 32% surcharge on a minimal -p call (measured: 15,938 -> 10,909 total context tokens). Users running against a third-party endpoint are usually here for the cheap-API-key experience and won't benefit from the memory-persistence machinery that the section instructs. Existing escape hatches still work: - settings.json autoMemoryEnabled: true (per project/user) - CLAUDE_CODE_DISABLE_AUTO_MEMORY=0 env var (highest priority) - /config -> Auto-memory toggle (interactive) --- src/components/Settings/Config.tsx | 20 ++++++++++++++++++++ src/memdir/paths.ts | 7 ++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx index 37ee93c..7c09649 100644 --- a/src/components/Settings/Config.tsx +++ b/src/components/Settings/Config.tsx @@ -281,6 +281,26 @@ export function Config({ enabled: autoCompactEnabled }); } + }, { + id: 'autoMemoryEnabled', + label: 'Auto-memory', + // settings.json default is "undefined" which the resolver in + // src/memdir/paths.ts treats as "disabled" on this branch. Mirror + // that so the toggle reflects what the runtime actually does. + value: settingsData?.autoMemoryEnabled ?? false, + type: 'boolean' as const, + onChange(autoMemoryEnabled: boolean) { + updateSettingsForSource('localSettings', { + autoMemoryEnabled + }); + setSettingsData(prev_auto_mem => ({ + ...prev_auto_mem, + autoMemoryEnabled + })); + logEvent('tengu_auto_memory_setting_changed', { + enabled: autoMemoryEnabled + }); + } }, { id: 'spinnerTipsEnabled', label: 'Show tips', diff --git a/src/memdir/paths.ts b/src/memdir/paths.ts index 68a6baf..c794a8b 100644 --- a/src/memdir/paths.ts +++ b/src/memdir/paths.ts @@ -51,7 +51,12 @@ export function isAutoMemoryEnabled(): boolean { if (settings.autoMemoryEnabled !== undefined) { return settings.autoMemoryEnabled } - return true + // DeepSeek branch default: off. The auto-memory section injects ~3145 + // fixed tokens into every system prompt (a 32% surcharge on a minimal + // -p call). Users who want it can flip it in /config -> Auto-memory or + // set autoMemoryEnabled: true in settings.json (or unset + // CLAUDE_CODE_DISABLE_AUTO_MEMORY=0). + return false } /** From 25767362677718a3f930c6ec6aeb9158cdf8dc94 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 13:03:52 +0800 Subject: [PATCH 12/33] feat: extend APIProvider with glm and deepseek, add isGLMProvider/isDeepSeekProvider helpers --- src/utils/model/providers.ts | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index aba9b7d..34184e3 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -1,7 +1,13 @@ import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../../services/analytics/index.js' import { isEnvTruthy } from '../envUtils.js' -export type APIProvider = 'firstParty' | 'bedrock' | 'vertex' | 'foundry' +export type APIProvider = + | 'firstParty' + | 'bedrock' + | 'vertex' + | 'foundry' + | 'glm' + | 'deepseek' export function getAPIProvider(): APIProvider { return isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) @@ -10,7 +16,31 @@ export function getAPIProvider(): APIProvider { ? 'vertex' : isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY) ? 'foundry' - : 'firstParty' + : isEnvTruthy(process.env.CLAUDE_USE_GLM) + ? 'glm' + : isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK) + ? 'deepseek' + : 'firstParty' +} + +/** + * True when the active provider is GLM (zhipu). + * GLM rides the firstParty SDK path (Anthropic-compatible endpoint via + * ANTHROPIC_BASE_URL) but needs model-aware gating for output limits, betas, + * and auto-permission mode. + */ +export function isGLMProvider(): boolean { + return getAPIProvider() === 'glm' +} + +/** + * True when the active provider is DeepSeek. + * DeepSeek rides the firstParty SDK path but needs provider-aware gating for + * thinking simplification, [ERROR] tool_result prefixing, 429 retry policy, + * and model validation (DeepSeek silently remaps unknown model names). + */ +export function isDeepSeekProvider(): boolean { + return getAPIProvider() === 'deepseek' } export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS { From 8915bb0e4dc3f8868745dd0212d29fcdc5e753a6 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 12:15:19 -0700 Subject: [PATCH 13/33] feat: add 'deepseek' API provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recognizes deepseek mode via explicit CLAUDE_CODE_USE_DEEPSEEK=1 env var. Note: Implicit detection via ANTHROPIC_BASE_URL or DEEPSEEK_* envs is intentionally avoided — multiple firstParty-specific code paths (betas, thinking config, preflight) would need to be updated to handle a new provider value, and silently switching the provider on existing ANTHROPIC_BASE_URL users breaks the existing Anthropic-compatible gateway flow that already works for DeepSeek's /anthropic endpoint. isDeepSeekBaseUrl() helper exported for future use by callers that want to detect deepseek mode from the ANTHROPIC_BASE_URL env var without triggering the provider switch. --- src/utils/model/providers.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index 34184e3..0281a71 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -18,7 +18,7 @@ export function getAPIProvider(): APIProvider { ? 'foundry' : isEnvTruthy(process.env.CLAUDE_USE_GLM) ? 'glm' - : isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK) + : isEnvTruthy(process.env.CLAUDE_CODE_USE_DEEPSEEK) ? 'deepseek' : 'firstParty' } @@ -43,6 +43,15 @@ export function isDeepSeekProvider(): boolean { return getAPIProvider() === 'deepseek' } +export function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean { + if (!baseUrl) return false + try { + return new URL(baseUrl).host.endsWith('deepseek.com') + } catch { + return false + } +} + export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS { return getAPIProvider() as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } From 89912f6ad9452e230eae1e415fb3dcb6d4e5c70b Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 12:58:53 -0700 Subject: [PATCH 14/33] feat: sort tools alphabetically for DeepSeek prefix cache stability DeepSeek's API caches request prefixes server-side. Tool schemas appear early in the request body, so keeping their order stable across requests maximizes cache hit rate. Different orderings (e.g. due to dynamic tool loading or feature flags) would otherwise break the cache. Adapted from QingJ01/DeepSeekCode c22d46b. --- src/services/api/claude.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index caf8955..3b13b27 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1387,6 +1387,14 @@ async function* queryModel( } const allTools = [...toolSchemas, ...extraToolSchemas] + // Sort tools alphabetically by name for stable ordering to maximize + // DeepSeek's server-side prefix cache hits across requests. + allTools.sort((a, b) => { + const nameA = 'name' in a ? a.name : '' + const nameB = 'name' in b ? b.name : '' + return nameA.localeCompare(nameB) + }) + const isFastMode = isFastModeEnabled() && isFastModeAvailable() && From 7dedeaff10fa4b30084af7550fc1f6f9a8c097bc Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 12:59:33 -0700 Subject: [PATCH 15/33] fix: simplify thinking mode params for DeepSeek DeepSeek V4 controls thinking depth entirely via CLAUDE_CODE_EFFORT_LEVEL (server-side); budget_tokens and adaptive thinking are ignored. Send a single minimal thinking param so the SDK still expects thinking blocks in the response. Adapted from QingJ01/DeepSeekCode 28856df. Temperature handling is already correct (only sent when thinking is disabled). --- src/services/api/claude.ts | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 3b13b27..1404089 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1602,31 +1602,13 @@ async function* queryModel( // without notifying the model launch DRI and research. This is a sensitive // setting that can greatly affect model quality and bashing. if (hasThinking && modelSupportsThinking(options.model)) { - if ( - !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) && - modelSupportsAdaptiveThinking(options.model) - ) { - // For models that support adaptive thinking, always use adaptive - // thinking without a budget. - thinking = { - type: 'adaptive', - } satisfies BetaMessageStreamParams['thinking'] - } else { - // For models that do not support adaptive thinking, use the default - // thinking budget unless explicitly specified. - let thinkingBudget = getMaxThinkingTokensForModel(options.model) - if ( - thinkingConfig.type === 'enabled' && - thinkingConfig.budgetTokens !== undefined - ) { - thinkingBudget = thinkingConfig.budgetTokens - } - thinkingBudget = Math.min(maxOutputTokens - 1, thinkingBudget) - thinking = { - budget_tokens: thinkingBudget, - type: 'enabled', - } satisfies BetaMessageStreamParams['thinking'] - } + // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone; + // budget_tokens is ignored server-side. Send a minimal thinking param + // so the SDK knows to expect thinking blocks in the response. + thinking = { + budget_tokens: maxOutputTokens - 1, + type: 'enabled', + } satisfies BetaMessageStreamParams['thinking'] } // Get API context management strategies if enabled From 038c1a72af9cb3370543fb9b557c691ea0a3c162 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:01:01 -0700 Subject: [PATCH 16/33] feat: adapt token estimation for DeepSeek (UTF-8 bytes, skip API counting) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - countTokensWithAPI / countMessagesTokensWithAPI / countTokensViaHaikuFallback now return null unconditionally — DeepSeek has no /count_tokens endpoint and no Haiku fallback model. - roughTokenCountEstimation switches from content.length to Buffer.byteLength (UTF-8). DeepSeek's tokenizer is byte-pair on UTF-8 bytes, so CJK chars consume ~3 bytes each. The old char-count under-estimates Chinese content by ~3x, causing premature context-exhausted warnings. Adapted from QingJ01/DeepSeekCode e49fdec. --- src/services/tokenEstimation.ts | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/services/tokenEstimation.ts b/src/services/tokenEstimation.ts index acaef7a..30a53b3 100644 --- a/src/services/tokenEstimation.ts +++ b/src/services/tokenEstimation.ts @@ -121,26 +121,21 @@ function stripToolSearchFieldsFromMessages( }) } +// DeepSeek has no /count_tokens endpoint. Returning null forces callers to +// use rough estimation (roughTokenCountEstimation), which uses UTF-8 byte +// length for accuracy on CJK-heavy content. export async function countTokensWithAPI( - content: string, + _content: string, ): Promise { - // Special case for empty content - API doesn't accept empty messages - if (!content) { - return 0 - } - - const message: Anthropic.Beta.Messages.BetaMessageParam = { - role: 'user', - content: content, - } - - return countMessagesTokensWithAPI([message], []) + return null } export async function countMessagesTokensWithAPI( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], ): Promise { + // DeepSeek has no /count_tokens endpoint; fall back to rough estimation. + return null return withTokenCountVCR(messages, tools, async () => { try { const model = getMainLoopModel() @@ -204,7 +199,10 @@ export function roughTokenCountEstimation( content: string, bytesPerToken: number = 4, ): number { - return Math.round(content.length / bytesPerToken) + // DeepSeek tokenizer is byte-pair on UTF-8 bytes, so CJK characters consume + // 3 bytes/char rather than 1. content.length under-estimates by ~3x on + // Chinese content. Buffer.byteLength matches DeepSeek's actual tokenization. + return Math.round(Buffer.byteLength(content, 'utf8') / bytesPerToken) } /** @@ -252,6 +250,9 @@ export async function countTokensViaHaikuFallback( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], ): Promise { + // DeepSeek: no count_tokens endpoint, no Haiku fallback model — return null + // so callers use roughTokenCountEstimation. + return null // Check if messages contain thinking blocks const containsThinking = hasThinkingBlocks(messages) From 13cf8d79c3b568850b1495ca2dad330c5022e52f Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:03:11 -0700 Subject: [PATCH 17/33] feat: adapt error handling for DeepSeek API (402/429/529) - 402 'Insufficient Balance': friendly message in formatAPIError, classify as 'insufficient_balance', and never retry (account top-up required). - 429 Rate Limit: simple zh-CN message (DeepSeek lacks Anthropic-specific unified-rate-limit headers) and always retry with exponential backoff. - extractDeepSeekTraceId() helper to pull x-ds-trace-id from error headers for debugging. Adapted from QingJ01/DeepSeekCode 5496466. --- src/services/api/errorUtils.ts | 4 ++++ src/services/api/errors.ts | 25 +++++++++++++++++++++++++ src/services/api/withRetry.ts | 11 ++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/services/api/errorUtils.ts b/src/services/api/errorUtils.ts index 20e4441..55d3c7a 100644 --- a/src/services/api/errorUtils.ts +++ b/src/services/api/errorUtils.ts @@ -242,6 +242,10 @@ export function formatAPIError(error: APIError): string { return 'Unable to connect to API. Check your internet connection' } + if (error.status === 402) { + return 'DeepSeek 账户余额不足,请在 platform.deepseek.com 充值后重试' + } + // Guard: when deserialized from JSONL (e.g. --resume), the error object may // be a plain object without a `.message` property. Return a safe fallback // instead of undefined, which would crash callers that access `.length`. diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts index 1a7edc5..dda813a 100644 --- a/src/services/api/errors.ts +++ b/src/services/api/errors.ts @@ -462,6 +462,14 @@ export function getAssistantMessageFromError( }) } + // DeepSeek 429 — simple rate-limit message (no Anthropic-specific headers). + if (error instanceof APIError && error.status === 429) { + return createAssistantAPIErrorMessage({ + content: '请求频率超限,请稍后重试', + error: 'rate_limit', + }) + } + if ( error instanceof APIError && error.status === 429 && @@ -993,6 +1001,11 @@ export function classifyAPIError(error: unknown): string { return 'capacity_off_switch' } + // DeepSeek: insufficient account balance + if (error instanceof APIError && error.status === 402) { + return 'insufficient_balance' + } + // Rate limiting if (error instanceof APIError && error.status === 429) { return 'rate_limit' @@ -1205,3 +1218,15 @@ export function getErrorMessageIfRefusal( error: 'invalid_request', }) } + +/** + * Extract DeepSeek's trace ID from error response headers for debugging. + */ +export function extractDeepSeekTraceId(error: APIError): string | undefined { + const headers = error.headers + if (!headers) return undefined + if (typeof headers.get === 'function') { + return headers.get('x-ds-trace-id') ?? undefined + } + return (headers as Record)['x-ds-trace-id'] +} diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts index 5ec9ad0..e7bcb05 100644 --- a/src/services/api/withRetry.ts +++ b/src/services/api/withRetry.ts @@ -694,6 +694,12 @@ function handleGcpCredentialError(error: unknown): boolean { } function shouldRetry(error: APIError): boolean { + // 402 Insufficient Balance (DeepSeek) — retrying won't help, the account + // needs a top-up. Fail fast. + if (error.status === 402) { + return false + } + // Never retry mock errors - they're from /mock-limits command for testing if (isMockRateLimitError(error)) { return false @@ -762,10 +768,9 @@ function shouldRetry(error: APIError): boolean { // Retry on lock timeouts. if (error.status === 409) return true - // Retry on rate limits, but not for ClaudeAI Subscription users - // Enterprise users can retry because they typically use PAYG instead of rate limits + // DeepSeek: always retry 429 with exponential backoff (no subscriber gates). if (error.status === 429) { - return !isClaudeAISubscriber() || isEnterpriseSubscriber() + return true } // Clear API key cache on 401 and allow retry. From 293e00326c94b0ca1ff28beb1471867f2fb8951c Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:05:46 -0700 Subject: [PATCH 18/33] feat: DeepSeek V4 API behavior adaptations Generic-but-deepseek-specific behavior adjustments (this branch targets DeepSeek exclusively, so all changes apply unconditionally): - Friendly 422 error message: include nested message hint about tool definitions / message format. - Friendly timeout error: explain DeepSeek queue-saturation cause and suggest lowering effort level. - Classify HTTP 422 as 'invalid_parameters'. - validateModel: known DeepSeek models (deepseek-v4-pro, deepseek-v4-flash) always pass; unrecognized model names pass with a warning explaining the server will silently remap them to deepseek-v4-flash. - model.tsx: surface validateModel's warning to the user via onDone(). - aliases: add 'pro' and 'flash' as canonical aliases (resolved via ANTHROPIC_DEFAULT_*_MODEL envs). - context.ts: use getSessionStartDate() (already present) instead of getLocalISODate() so DeepSeek's server-side prefix cache survives across midnight boundaries within a session. Skipped from upstream cd3a58b: the sanitizeDeepSeekContentBlocks [ERROR] prefix logic, which depends on helpers not yet present in this fork's claude.ts. Adapted from QingJ01/DeepSeekCode cd3a58b. --- src/commands/model/model.tsx | 6 +++++- src/context.ts | 6 ++++-- src/services/api/errorUtils.ts | 5 +++++ src/services/api/errors.ts | 8 +++++++- src/utils/model/aliases.ts | 3 +++ src/utils/model/validateModel.ts | 13 ++++++++++++- 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/commands/model/model.tsx b/src/commands/model/model.tsx index 65c084d..3f76c23 100644 --- a/src/commands/model/model.tsx +++ b/src/commands/model/model.tsx @@ -180,10 +180,14 @@ function SetModelAndClose({ // and model names are case-sensitive const { valid, - error: error_0 + error: error_0, + warning } = await validateModel(model); if (valid) { setModel(model); + if (warning) { + onDone(warning, { display: 'system' }); + } } else { onDone(error_0 || `Model '${model}' not found`, { display: 'system' diff --git a/src/context.ts b/src/context.ts index 423414d..b5c0c2d 100644 --- a/src/context.ts +++ b/src/context.ts @@ -4,7 +4,7 @@ import { getAdditionalDirectoriesForClaudeMd, setCachedClaudeMdContent, } from './bootstrap/state.js' -import { getLocalISODate } from './constants/common.js' +import { getSessionStartDate } from './constants/common.js' import { filterInjectedMemoryFiles, getClaudeMds, @@ -183,7 +183,9 @@ export const getUserContext = memoize( return { ...(claudeMd && { claudeMd }), - currentDate: `Today's date is ${getLocalISODate()}.`, + // Use session-stable date to preserve DeepSeek's server-side prefix + // cache across midnight (was getLocalISODate() — refreshes daily). + currentDate: `Today's date is ${getSessionStartDate()}.`, } }, ) diff --git a/src/services/api/errorUtils.ts b/src/services/api/errorUtils.ts index 55d3c7a..f562e65 100644 --- a/src/services/api/errorUtils.ts +++ b/src/services/api/errorUtils.ts @@ -246,6 +246,11 @@ export function formatAPIError(error: APIError): string { return 'DeepSeek 账户余额不足,请在 platform.deepseek.com 充值后重试' } + if (error.status === 422) { + const nested = extractNestedErrorMessage(error) + return `DeepSeek 请求参数无效(422):${nested || error.message || '请检查工具定义和消息格式'}` + } + // Guard: when deserialized from JSONL (e.g. --resume), the error object may // be a plain object without a `.message` property. Return a safe fallback // instead of undefined, which would crash callers that access `.length`. diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts index dda813a..a47f09f 100644 --- a/src/services/api/errors.ts +++ b/src/services/api/errors.ts @@ -437,7 +437,8 @@ export function getAssistantMessageFromError( error.message.toLowerCase().includes('timeout')) ) { return createAssistantAPIErrorMessage({ - content: API_TIMEOUT_ERROR_MESSAGE, + content: + '请求超时。DeepSeek 服务端排队等待超过上限后断开了连接,请稍后重试或降低 effort 等级', error: 'unknown', }) } @@ -1006,6 +1007,11 @@ export function classifyAPIError(error: unknown): string { return 'insufficient_balance' } + // DeepSeek: invalid request parameters + if (error instanceof APIError && error.status === 422) { + return 'invalid_parameters' + } + // Rate limiting if (error instanceof APIError && error.status === 429) { return 'rate_limit' diff --git a/src/utils/model/aliases.ts b/src/utils/model/aliases.ts index 75ae388..5e94865 100644 --- a/src/utils/model/aliases.ts +++ b/src/utils/model/aliases.ts @@ -6,6 +6,9 @@ export const MODEL_ALIASES = [ 'sonnet[1m]', 'opus[1m]', 'opusplan', + // DeepSeek convenience aliases (resolved by ANTHROPIC_DEFAULT_*_MODEL envs) + 'pro', + 'flash', ] as const export type ModelAlias = (typeof MODEL_ALIASES)[number] diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts index 14b8167..d7a502d 100644 --- a/src/utils/model/validateModel.ts +++ b/src/utils/model/validateModel.ts @@ -11,6 +11,8 @@ import { } from '@anthropic-ai/sdk' import { getModelStrings } from './modelStrings.js' +const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash']) + // Cache valid models to avoid repeated API calls const validModelCache = new Map() @@ -19,7 +21,7 @@ const validModelCache = new Map() */ export async function validateModel( model: string, -): Promise<{ valid: boolean; error?: string }> { +): Promise<{ valid: boolean; error?: string; warning?: string }> { const normalizedModel = model.trim() // Empty model is invalid @@ -41,6 +43,15 @@ export async function validateModel( return { valid: true } } + // DeepSeek: unrecognized model names are silently remapped server-side to + // deepseek-v4-flash. Warn the user so they know the model alias didn't apply. + if (!KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { + return { + valid: true, + warning: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型,服务端会将其映射为 deepseek-v4-flash。已知模型:deepseek-v4-pro, deepseek-v4-flash`, + } + } + // Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user) if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) { return { valid: true } From 154d981878b7213888e2a57d7fc868c48893b6c4 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:06:56 -0700 Subject: [PATCH 19/33] fix: reject unknown DeepSeek model names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit allowed unknown model names with only a warning, but DeepSeek's API silently remaps anything unrecognized to deepseek-v4-flash rather than returning 404. A warning is too easy to miss when the user genuinely typo'd 'deepsek-v4-pro' and wonders why their flash-tier requests are slower than expected. Reject hard with a clear error listing valid models. Also drop the dead sideQuery-based API validation path entirely — DeepSeek never returns 404 for model names, so it's unreachable. Adapted from QingJ01/DeepSeekCode 818e6a3. --- src/commands/model/model.tsx | 6 +- src/utils/model/validateModel.ts | 139 ++----------------------------- 2 files changed, 6 insertions(+), 139 deletions(-) diff --git a/src/commands/model/model.tsx b/src/commands/model/model.tsx index 3f76c23..65c084d 100644 --- a/src/commands/model/model.tsx +++ b/src/commands/model/model.tsx @@ -180,14 +180,10 @@ function SetModelAndClose({ // and model names are case-sensitive const { valid, - error: error_0, - warning + error: error_0 } = await validateModel(model); if (valid) { setModel(model); - if (warning) { - onDone(warning, { display: 'system' }); - } } else { onDone(error_0 || `Model '${model}' not found`, { display: 'system' diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts index d7a502d..e0c80dc 100644 --- a/src/utils/model/validateModel.ts +++ b/src/utils/model/validateModel.ts @@ -1,35 +1,23 @@ // biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered import { MODEL_ALIASES } from './aliases.js' import { isModelAllowed } from './modelAllowlist.js' -import { getAPIProvider } from './providers.js' -import { sideQuery } from '../sideQuery.js' -import { - NotFoundError, - APIError, - APIConnectionError, - AuthenticationError, -} from '@anthropic-ai/sdk' -import { getModelStrings } from './modelStrings.js' const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash']) -// Cache valid models to avoid repeated API calls -const validModelCache = new Map() - /** - * Validates a model by attempting an actual API call. + * Validates a model name. DeepSeek's API silently remaps unknown model names + * to deepseek-v4-flash instead of returning 404, so API-based validation is + * not reliable — we use a known-models allowlist. */ export async function validateModel( model: string, ): Promise<{ valid: boolean; error?: string; warning?: string }> { const normalizedModel = model.trim() - // Empty model is invalid if (!normalizedModel) { return { valid: false, error: 'Model name cannot be empty' } } - // Check against availableModels allowlist before any API call if (!isModelAllowed(normalizedModel)) { return { valid: false, @@ -37,134 +25,17 @@ export async function validateModel( } } - // Check if it's a known alias (these are always valid) const lowerModel = normalizedModel.toLowerCase() if ((MODEL_ALIASES as readonly string[]).includes(lowerModel)) { return { valid: true } } - // DeepSeek: unrecognized model names are silently remapped server-side to - // deepseek-v4-flash. Warn the user so they know the model alias didn't apply. - if (!KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { - return { - valid: true, - warning: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型,服务端会将其映射为 deepseek-v4-flash。已知模型:deepseek-v4-pro, deepseek-v4-flash`, - } - } - - // Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user) - if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) { - return { valid: true } - } - - // Check cache first - if (validModelCache.has(normalizedModel)) { - return { valid: true } - } - - - // Try to make an actual API call with minimal parameters - try { - await sideQuery({ - model: normalizedModel, - max_tokens: 1, - maxRetries: 0, - querySource: 'model_validation', - messages: [ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hi', - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ], - }) - - // If we got here, the model is valid - validModelCache.set(normalizedModel, true) + if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { return { valid: true } - } catch (error) { - return handleValidationError(error, normalizedModel) } -} -function handleValidationError( - error: unknown, - modelName: string, -): { valid: boolean; error: string } { - // NotFoundError (404) means the model doesn't exist - if (error instanceof NotFoundError) { - const fallback = get3PFallbackSuggestion(modelName) - const suggestion = fallback ? `. Try '${fallback}' instead` : '' - return { - valid: false, - error: `Model '${modelName}' not found${suggestion}`, - } - } - - // For other API errors, provide context-specific messages - if (error instanceof APIError) { - if (error instanceof AuthenticationError) { - return { - valid: false, - error: 'Authentication failed. Please check your API credentials.', - } - } - - if (error instanceof APIConnectionError) { - return { - valid: false, - error: 'Network error. Please check your internet connection.', - } - } - - // Check error body for model-specific errors - const errorBody = error.error as unknown - if ( - errorBody && - typeof errorBody === 'object' && - 'type' in errorBody && - errorBody.type === 'not_found_error' && - 'message' in errorBody && - typeof errorBody.message === 'string' && - errorBody.message.includes('model:') - ) { - return { valid: false, error: `Model '${modelName}' not found` } - } - - // Generic API error - return { valid: false, error: `API error: ${error.message}` } - } - - // For unknown errors, be safe and reject - const errorMessage = error instanceof Error ? error.message : String(error) return { valid: false, - error: `Unable to validate model: ${errorMessage}`, - } -} - -// @[MODEL LAUNCH]: Add a fallback suggestion chain for the new model → previous version -/** - * Suggest a fallback model for 3P users when the selected model is unavailable. - */ -function get3PFallbackSuggestion(model: string): string | undefined { - if (getAPIProvider() === 'firstParty') { - return undefined - } - const lowerModel = model.toLowerCase() - if (lowerModel.includes('opus-4-6') || lowerModel.includes('opus_4_6')) { - return getModelStrings().opus41 - } - if (lowerModel.includes('sonnet-4-6') || lowerModel.includes('sonnet_4_6')) { - return getModelStrings().sonnet45 - } - if (lowerModel.includes('sonnet-4-5') || lowerModel.includes('sonnet_4_5')) { - return getModelStrings().sonnet40 + error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型(会被服务端静默映射为 deepseek-v4-flash)。可用模型:deepseek-v4-pro, deepseek-v4-flash`, } - return undefined } From bb13e9ba8ba1f02b012c8924bebc3131d0bb7f3c Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:11:18 -0700 Subject: [PATCH 20/33] feat: prefix failed tool_results with [ERROR] for DeepSeek DeepSeek silently ignores the is_error: true flag on tool_result content blocks, so the model has no way to detect that a tool call failed and will treat the (often confusing) error text as a normal observation. Add a prefix-injection pass after normalizeMessagesForAPI: when a tool_result has is_error=true, prepend a literal '[ERROR] Tool execution failed:' text block to the content. Walks nested blocks recursively so cached histories with nested tool_results are handled. Adapted from QingJ01/DeepSeekCode cd3a58b (the part that was skipped in the earlier port because it depended on helpers not yet present). --- src/services/api/claude.ts | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 1404089..f5fcca0 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -941,6 +941,77 @@ function isToolResult( return block.type === 'tool_result' } +/** + * DeepSeek silently ignores the `is_error: true` flag on tool_result blocks, + * so the model has no way to know a tool call failed. Prefix the content with + * a literal "[ERROR]" text block so the model can detect failures from text. + * + * Walks nested tool_result content recursively (for cached histories). + */ +function prefixDeepSeekErrorToolResults( + blocks: BetaContentBlockParam[], +): BetaContentBlockParam[] { + let changed = false + const updated = blocks.map(block => { + if (!isToolResult(block)) return block + + let nextBlock = block + let blockChanged = false + + if ((block as { is_error?: boolean }).is_error) { + const content = Array.isArray(block.content) + ? block.content + : [{ type: 'text', text: String(block.content ?? '') }] + const prefixed = [ + { type: 'text', text: '[ERROR] Tool execution failed:' }, + ...content, + ] as BetaContentBlockParam[] + nextBlock = { ...block, content: prefixed } as typeof block + blockChanged = true + } + + if (Array.isArray(nextBlock.content)) { + const nested = prefixDeepSeekErrorToolResults( + nextBlock.content as BetaContentBlockParam[], + ) + if (nested !== nextBlock.content) { + nextBlock = { ...nextBlock, content: nested } as typeof block + blockChanged = true + } + } + + if (blockChanged) { + changed = true + return nextBlock + } + return block + }) + + return changed ? updated : blocks +} + +function applyDeepSeekErrorPrefix( + messages: (UserMessage | AssistantMessage)[], +): (UserMessage | AssistantMessage)[] { + let changed = false + const updated = messages.map(msg => { + const content = msg.message.content + if (!Array.isArray(content)) return msg + + const updatedContent = prefixDeepSeekErrorToolResults( + content as BetaContentBlockParam[], + ) + if (updatedContent === content) return msg + + changed = true + return { + ...msg, + message: { ...msg.message, content: updatedContent }, + } as typeof msg + }) + return changed ? updated : messages +} + /** * Ensures messages contain at most `limit` media items (images + documents). * Strips oldest media first to preserve the most recent. @@ -1306,6 +1377,10 @@ async function* queryModel( API_MAX_MEDIA_PER_REQUEST, ) + // DeepSeek ignores is_error on tool_result blocks; prefix failed results + // with literal "[ERROR]" text so the model can detect them. + messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI) + // Instrumentation: Track message count after normalization logEvent('tengu_api_after_normalize', { postNormalizedMessageCount: messagesForAPI.length, From 8a971b1860ebfb4e004683e325a374eeb9210e76 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 13:15:48 -0700 Subject: [PATCH 21/33] feat: CNY pricing, cache stats, and DeepSeek cost summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combined port of QingJ01/DeepSeekCode fad575f + c608652 + 4ba8eca, adapted: since this branch targets DeepSeek exclusively, all currency formatting is unconditionally ¥ (no isDeepSeekCurrency() gate). - modelCost.ts: - DeepSeek V4 Pro pricing table (¥3/¥6 discounted, ¥12/¥24 full price via DEEPSEEK_USE_FULL_PRICE=1 — discount window ends 2026-05-31). - DeepSeek V4 Flash pricing table (¥1/¥2). - getDeepSeekProCostTier() helper. - getModelCosts(): route deepseek-v4-pro through tier helper. - formatPrice(): emit '¥' with 3 decimal places for sub-0.1 prices (cache reads at ¥0.025 would otherwise round to ¥0.03). - DEFAULT_UNKNOWN_MODEL_COST: COST_DEEPSEEK_FLASH (was COST_TIER_5_25). - cost-tracker.ts: - formatCost(): switch '$' -> '¥'. - formatTotalCost(): append 'Cache hit rate' and 'Cache savings' lines. DeepSeek's prompt cache cuts input cost ~120x, so hit rate is the headline cost driver; surface it next to the totals. - costHook.ts: drop the hasConsoleBillingAccess() gate — always print the session cost summary at exit. - commands/cost/cost.ts: drop the Claude.ai-subscription branch — always return formatTotalCost() (no overage/subscription concept for DeepSeek). - CostThresholdDialog.tsx: drop the 'You've spent $5 on the Anthropic API' literal; generic 'significant amount on API calls' instead. - screens/REPL.tsx: raise the cost-threshold-reached trigger from 5 (USD) to 35 (CNY), matching the user-perceived '~5 USD' notification level. --- src/commands/cost/cost.ts | 19 +-------- src/components/CostThresholdDialog.tsx | 2 +- src/cost-tracker.ts | 34 ++++++++++++++-- src/costHook.ts | 7 +--- src/screens/REPL.tsx | 4 +- src/utils/modelCost.ts | 54 +++++++++++++++++++++++--- 6 files changed, 85 insertions(+), 35 deletions(-) diff --git a/src/commands/cost/cost.ts b/src/commands/cost/cost.ts index c9fb0cb..0eaf9f6 100644 --- a/src/commands/cost/cost.ts +++ b/src/commands/cost/cost.ts @@ -1,24 +1,7 @@ import { formatTotalCost } from '../../cost-tracker.js' -import { currentLimits } from '../../services/claudeAiLimits.js' import type { LocalCommandCall } from '../../types/command.js' -import { isClaudeAISubscriber } from '../../utils/auth.js' +// DeepSeek: no Claude.ai subscription; always show the formatted cost. export const call: LocalCommandCall = async () => { - if (isClaudeAISubscriber()) { - let value: string - - if (currentLimits.isUsingOverage) { - value = - 'You are currently using your overages to power your Claude Code usage. We will automatically switch you back to your subscription rate limits when they reset' - } else { - value = - 'You are currently using your subscription to power your Claude Code usage' - } - - if (process.env.USER_TYPE === 'ant') { - value += `\n\n[ANT-ONLY] Showing cost anyway:\n ${formatTotalCost()}` - } - return { type: 'text', value } - } return { type: 'text', value: formatTotalCost() } } diff --git a/src/components/CostThresholdDialog.tsx b/src/components/CostThresholdDialog.tsx index bdf9f53..d0639d4 100644 --- a/src/components/CostThresholdDialog.tsx +++ b/src/components/CostThresholdDialog.tsx @@ -38,7 +38,7 @@ export function CostThresholdDialog(t0) { } let t4; if ($[4] !== onDone || $[5] !== t3) { - t4 = {t1}{t3}; + t4 = {t1}{t3}; $[4] = onDone; $[5] = t3; $[6] = t4; diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts index b03184c..72375bf 100644 --- a/src/cost-tracker.ts +++ b/src/cost-tracker.ts @@ -44,8 +44,11 @@ import { import { isFastModeEnabled } from './utils/fastMode.js' import { formatDuration, formatNumber } from './utils/format.js' import type { FpsMetrics } from './utils/fpsTracker.js' -import { getCanonicalName } from './utils/model/model.js' -import { calculateUSDCost } from './utils/modelCost.js' +import { + getCanonicalName, + getDefaultMainLoopModelSetting, +} from './utils/model/model.js' +import { calculateUSDCost, getModelCosts } from './utils/modelCost.js' export { getTotalCostUSD as getTotalCost, getTotalDuration, @@ -175,7 +178,7 @@ export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void { } function formatCost(cost: number, maxDecimalPlaces: number = 4): string { - return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}` + return `¥${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}` } function formatModelUsage(): string { @@ -234,12 +237,35 @@ export function formatTotalCost(): string { const modelUsageDisplay = formatModelUsage() + // DeepSeek-specific: show prompt cache hit rate and yuan savings, since the + // /anthropic endpoint exposes cache_read / cache_creation token counts and + // these are the headline cost driver (cache reads are ~120x cheaper). + let cacheStatsDisplay = '' + const cacheRead = getTotalCacheReadInputTokens() + const cacheCreation = getTotalCacheCreationInputTokens() + const directInput = getTotalInputTokens() + const totalInput = cacheRead + cacheCreation + directInput + if (totalInput > 0) { + const hitRate = (cacheRead / totalInput) * 100 + const model = getDefaultMainLoopModelSetting() + const costs = getModelCosts(model, { + input_tokens: 0, + output_tokens: 0, + } as Usage) + const savings = + (cacheRead / 1_000_000) * + (costs.inputTokens - costs.promptCacheReadTokens) + cacheStatsDisplay = + `\nCache hit rate: ${hitRate.toFixed(1)}% (${formatNumber(cacheRead)} / ${formatNumber(totalInput)} input tokens)` + + `\nCache savings: ${formatCost(savings)}` + } + return chalk.dim( `Total cost: ${costDisplay}\n` + `Total duration (API): ${formatDuration(getTotalAPIDuration())} Total duration (wall): ${formatDuration(getTotalDuration())} Total code changes: ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed -${modelUsageDisplay}`, +${modelUsageDisplay}${cacheStatsDisplay}`, ) } diff --git a/src/costHook.ts b/src/costHook.ts index 798a093..fa740c4 100644 --- a/src/costHook.ts +++ b/src/costHook.ts @@ -1,6 +1,5 @@ import { useEffect } from 'react' import { formatTotalCost, saveCurrentSessionCosts } from './cost-tracker.js' -import { hasConsoleBillingAccess } from './utils/billing.js' import type { FpsMetrics } from './utils/fpsTracker.js' export function useCostSummary( @@ -8,10 +7,8 @@ export function useCostSummary( ): void { useEffect(() => { const f = () => { - if (hasConsoleBillingAccess()) { - process.stdout.write('\n' + formatTotalCost() + '\n') - } - + // DeepSeek: there's no Anthropic console billing tier; always print. + process.stdout.write('\n' + formatTotalCost() + '\n') saveCurrentSessionCosts(getFpsMetrics?.()) } process.on('exit', f) diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx index b483ead..7b99a10 100644 --- a/src/screens/REPL.tsx +++ b/src/screens/REPL.tsx @@ -2207,7 +2207,9 @@ export function REPL({ }; useEffect(() => { const totalCost = getTotalCost(); - if (totalCost >= 5 /* $5 */ && !showCostDialog && !haveShownCostDialog) { + // DeepSeek: pricing is in CNY; raise the threshold to roughly match the + // user-perceived "5 USD" notification level (1 USD ≈ 7 CNY, rounded up). + if (totalCost >= 35 && !showCostDialog && !haveShownCostDialog) { logEvent('tengu_cost_threshold_reached', {}); // Mark as shown even if the dialog won't render (no console billing // access). Otherwise this effect re-fires on every message change for diff --git a/src/utils/modelCost.ts b/src/utils/modelCost.ts index b4867d4..b94b4a1 100644 --- a/src/utils/modelCost.ts +++ b/src/utils/modelCost.ts @@ -86,7 +86,34 @@ export const COST_HAIKU_45 = { webSearchRequests: 0.01, } as const satisfies ModelCosts -const DEFAULT_UNKNOWN_MODEL_COST = COST_TIER_5_25 +// DeepSeek V4 Pro pricing (CNY per Mtok), discounted price until 2026-05-31. +// Set DEEPSEEK_USE_FULL_PRICE=1 for standard price. +export const COST_DEEPSEEK_PRO_DISCOUNTED = { + inputTokens: 3, + outputTokens: 6, + promptCacheWriteTokens: 3, + promptCacheReadTokens: 0.025, + webSearchRequests: 0, +} as const satisfies ModelCosts + +export const COST_DEEPSEEK_PRO_FULL = { + inputTokens: 12, + outputTokens: 24, + promptCacheWriteTokens: 12, + promptCacheReadTokens: 0.1, + webSearchRequests: 0, +} as const satisfies ModelCosts + +// DeepSeek V4 Flash pricing (CNY per Mtok). +export const COST_DEEPSEEK_FLASH = { + inputTokens: 1, + outputTokens: 2, + promptCacheWriteTokens: 1, + promptCacheReadTokens: 0.02, + webSearchRequests: 0, +} as const satisfies ModelCosts + +const DEFAULT_UNKNOWN_MODEL_COST = COST_DEEPSEEK_FLASH /** * Get the cost tier for Opus 4.6 based on fast mode. @@ -98,10 +125,17 @@ export function getOpus46CostTier(fastMode: boolean): ModelCosts { return COST_TIER_5_25 } +export function getDeepSeekProCostTier(): ModelCosts { + if (process.env.DEEPSEEK_USE_FULL_PRICE === '1') { + return COST_DEEPSEEK_PRO_FULL + } + return COST_DEEPSEEK_PRO_DISCOUNTED +} + // @[MODEL LAUNCH]: Add a pricing entry for the new model below. // Costs from https://platform.claude.com/docs/en/about-claude/pricing // Web search cost: $10 per 1000 requests = $0.01 per request -export const MODEL_COSTS: Record = { +export const MODEL_COSTS: Record = { [firstPartyNameToCanonical(CLAUDE_3_5_HAIKU_CONFIG.firstParty)]: COST_HAIKU_35, [firstPartyNameToCanonical(CLAUDE_HAIKU_4_5_CONFIG.firstParty)]: @@ -123,6 +157,8 @@ export const MODEL_COSTS: Record = { COST_TIER_5_25, [firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]: COST_TIER_5_25, + 'deepseek-v4-pro': COST_DEEPSEEK_PRO_DISCOUNTED, + 'deepseek-v4-flash': COST_DEEPSEEK_FLASH, } /** @@ -152,6 +188,11 @@ export function getModelCosts(model: string, usage: Usage): ModelCosts { return getOpus46CostTier(isFastMode) } + // DeepSeek V4 Pro pricing depends on discount-period flag. + if (shortName === 'deepseek-v4-pro') { + return getDeepSeekProCostTier() + } + const costs = MODEL_COSTS[shortName] if (!costs) { trackUnknownModelCost(model, shortName) @@ -202,12 +243,13 @@ export function calculateCostFromTokens( } function formatPrice(price: number): string { - // Format price: integers without decimals, others with 2 decimal places - // e.g., 3 -> "$3", 0.8 -> "$0.80", 22.5 -> "$22.50" + // Format price: integers without decimals, fractions with 2 decimal places + // (3 decimals for very small values like cache-read tokens). + // e.g., 3 -> "¥3", 0.8 -> "¥0.80", 0.025 -> "¥0.025" if (Number.isInteger(price)) { - return `$${price}` + return `¥${price}` } - return `$${price.toFixed(2)}` + return `¥${price.toFixed(price < 0.1 ? 3 : 2)}` } /** From b62ffde5cf2513b2d12ccc26d31c494c3ddd341a Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 14:41:13 -0700 Subject: [PATCH 22/33] docs: capture DeepSeek vs Claude API pricing comparison Snapshot of 2026-05-12 prices from both vendors' pricing pages, with multiplier views and a realistic per-request cost table calibrated to the actual cache-hit pattern observed after the session_id-pinning fix. Includes caveats about the V4 Pro discount expiring 2026-05-31, missing Opus-class equivalents on DeepSeek, and capability gaps in DeepSeek's Anthropic-compatible endpoint. --- docs/deepseek-vs-claude-pricing.md | 98 ++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 docs/deepseek-vs-claude-pricing.md diff --git a/docs/deepseek-vs-claude-pricing.md b/docs/deepseek-vs-claude-pricing.md new file mode 100644 index 0000000..e4632ba --- /dev/null +++ b/docs/deepseek-vs-claude-pricing.md @@ -0,0 +1,98 @@ +# DeepSeek vs Claude API Pricing + +Captured 2026-05-12. Sources: +- DeepSeek: https://api-docs.deepseek.com/zh-cn/quick_start/pricing +- Anthropic: https://platform.claude.com/docs/en/about-claude/pricing +- FX assumed: $1 ≈ ¥7.15 + +## Per-model prices (per 1M tokens) + +### Flagship tier + +| Item | Claude Opus 4.7 | Claude Sonnet 4.6 | DeepSeek V4 Pro (discount) | DeepSeek V4 Pro (full) | +|---|---:|---:|---:|---:| +| Input (cache miss) | $5.00 / ¥35.75 | $3.00 / ¥21.45 | ¥3 | ¥12 | +| Cache write (5m) | $6.25 / ¥44.69 | $3.75 / ¥26.81 | ¥3 | ¥12 | +| Cache read | $0.50 / ¥3.58 | $0.30 / ¥2.15 | ¥0.025 | ¥0.10 | +| Output | $25.00 / ¥178.75 | $15.00 / ¥107.25 | ¥6 | ¥24 | +| Context window | 200K | 1M | 1M | 1M | +| Max output tokens | varies | varies | 384K | 384K | + +DeepSeek V4 Pro discount (2.5x off) runs through 2026-05-31 23:59 Beijing time. +After 2026-06-01 the full-price column applies unless DeepSeek extends it. + +### Lightweight tier + +| Item | Claude Haiku 4.5 | DeepSeek V4 Flash | +|---|---:|---:| +| Input (cache miss) | $1.00 / ¥7.15 | ¥1 | +| Cache write (5m) | $1.25 / ¥8.94 | ¥1 | +| Cache read | $0.10 / ¥0.72 | ¥0.02 | +| Output | $5.00 / ¥35.75 | ¥2 | +| Context window | 200K | 1M | + +## Multiplier view (how much Claude costs vs DeepSeek discount price) + +### Pro vs Sonnet 4.6 (same-tier comparison) + +| Item | Sonnet 4.6 / Pro multiplier | +|---|---:| +| Input (cache miss) | 7.15x | +| Cache read | 85.7x | +| Output | 17.9x | + +### Pro vs Opus 4.7 (cross-tier comparison) + +| Item | Opus 4.7 / Pro multiplier | +|---|---:| +| Input (cache miss) | 11.9x | +| Cache read | 143x | +| Output | 29.8x | + +### Flash vs Haiku 4.5 + +| Item | Haiku 4.5 / Flash multiplier | +|---|---:| +| Input (cache miss) | 7.15x | +| Cache read | 35.8x | +| Output | 17.9x | + +## Realistic Claude Code request cost + +Measured pattern: 15,872 cache_read + 66 input + 30 output tokens +(observed first cache hit after the session_id-pinning fix in 283678a). + +| Provider | Cost per request | Per 100 requests | +|---|---:|---:| +| Claude Opus 4.7 | $0.0094 / ¥0.067 | ¥6.70 | +| Claude Sonnet 4.6 | $0.0054 / ¥0.039 | ¥3.90 | +| Claude Haiku 4.5 | $0.0018 / ¥0.013 | ¥1.30 | +| DeepSeek V4 Pro (discount) | ¥0.000777 | ¥0.078 | +| DeepSeek V4 Flash | ¥0.000437 | ¥0.044 | + +## Headline ratios at the typical Claude Code workload + +- DeepSeek Pro vs Claude Sonnet 4.6: **~50x cheaper** +- DeepSeek Pro vs Claude Opus 4.7: **~86x cheaper** +- DeepSeek Flash vs Claude Haiku 4.5: **~30x cheaper** + +After the 2026-05-31 discount expires, Pro full price would be 4x its +current rate; ratios shrink to roughly 12x vs Sonnet, 21x vs Opus, but the +gap remains substantial. + +## Caveats + +- **No Opus-class DeepSeek model.** Tasks that genuinely need Opus-level + reasoning (long math proofs, deep architecture work) have no DeepSeek + equivalent. +- **Capability gaps with the Anthropic API**: DeepSeek's /anthropic endpoint + does not implement image/document content blocks, computer use, server-side + web search beyond what the model emits, MCP gateway, or redacted_thinking. + See `docs/openclaude-commits-review.md` and the deepseek branch port commits + for the adaptations Claude Code makes for these. +- **Cache hit assumptions**: the per-request cost above assumes the + session_id pinning fix is in effect (commit 283678a). Without it, + cache_read_input_tokens stays at 0 and DeepSeek per-request cost is + roughly 100x higher (every request pays full input price). +- **No /count_tokens endpoint on DeepSeek.** Local UTF-8 byte estimation is + used instead (commit 6fbad90). From ccc60884b3ed7551cca5bfcadfb1f7ddc7429a2e Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 14:48:09 -0700 Subject: [PATCH 23/33] build: disable NATIVE_CLIENT_ATTESTATION on deepseek branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flag controls a 'cch=00000' placeholder in the x-anthropic-billing-header plus an xxHash64 body-integrity computation. DeepSeek's /anthropic endpoint does not validate this header — every byte we send is dead weight. With CLAUDE_CODE_ATTRIBUTION_HEADER=0 in .env.deepseek the header is suppressed anyway, but disabling the flag also drops the xxhash-wasm initialization on the hot path and trims 2 source-patching ops at build time. Verified: identical 'say only: ping' requests across two CLI launches still produce cache_read_input_tokens=15872 (commit 283678a's session_id pinning remains effective). --- scripts/build.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/build.ts b/scripts/build.ts index a154510..2e8059f 100644 --- a/scripts/build.ts +++ b/scripts/build.ts @@ -22,8 +22,9 @@ const ENABLED_FEATURES = [ 'TREE_SITTER_BASH', // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000' // placeholder into the x-anthropic-billing-header and computes a body hash. - // GLM's /anthropic endpoint does not validate this header — sending it - // adds CPU + an HTTP header without value. Keep off on glm branch. + // GLM/DeepSeek's /anthropic endpoints do not validate this header — sending + // it adds CPU + an HTTP header without value. Keep off for non-Anthropic + // providers. 'BRIDGE_MODE', 'COORDINATOR_MODE', ] From c76596785535c5864992c3e0ada82f841d5ca4f0 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 12 May 2026 19:13:20 -0700 Subject: [PATCH 24/33] perf: halve git-status injection cap from 2000 to 1000 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MAX_STATUS_CHARS caps the 'git status' output that gets injected into every system prompt. At 2000 chars (~500 tokens) on a dirty repo it dominated the project-context section; cutting it to 1000 chars (~250 tokens) saves cache-write cost on first turn without hiding the information — the truncation message tells the model to run 'git status' via BashTool if it needs the full output. Also fix the truncation message which still said 'exceeds 2k characters' even though the threshold had already been reduced. --- src/context.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/context.ts b/src/context.ts index b5c0c2d..b614490 100644 --- a/src/context.ts +++ b/src/context.ts @@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js' import { shouldIncludeGitInstructions } from './utils/gitSettings.js' import { logError } from './utils/log.js' -const MAX_STATUS_CHARS = 2000 +const MAX_STATUS_CHARS = 1000 // System prompt injection for cache breaking (ant-only, ephemeral debugging state) let systemPromptInjection: string | null = null @@ -85,7 +85,7 @@ export const getGitStatus = memoize(async (): Promise => { const truncatedStatus = status.length > MAX_STATUS_CHARS ? status.substring(0, MAX_STATUS_CHARS) + - '\n... (truncated because it exceeds 2k characters. If you need more information, run "git status" using BashTool)' + '\n... (truncated because it exceeds 1k characters. If you need more information, run "git status" using BashTool)' : status logForDiagnosticsNoPII('info', 'git_status_completed', { From 0057a9d0420e6077c4b1da21e12a280d37eb6aff Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 13 May 2026 05:55:13 -0700 Subject: [PATCH 25/33] =?UTF-8?q?fix:=20ThinkingToggle=20wording=20?= =?UTF-8?q?=E2=80=94=20'Claude'=20->=20'Model'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This branch can run any backend behind ANTHROPIC_BASE_URL (DeepSeek today). The /thinking toggle description hardcoded 'Claude will think / respond...' which is misleading when the actual model is deepseek-v4-pro. Generic 'Model' label is accurate regardless of provider. Adapted from QingJ01/DeepSeekCode 5adf400 — the rest of that rebrand commit is either pure 'Claude' -> 'DeepSeek' branding, dead code under ClaudeAI-only paths, or logo whitespace trimming. --- src/components/ThinkingToggle.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/ThinkingToggle.tsx b/src/components/ThinkingToggle.tsx index a7b7a1b..2a4604e 100644 --- a/src/components/ThinkingToggle.tsx +++ b/src/components/ThinkingToggle.tsx @@ -30,11 +30,11 @@ export function ThinkingToggle(t0) { t1 = [{ value: "true", label: "Enabled", - description: "Claude will think before responding" + description: "Model will think before responding" }, { value: "false", label: "Disabled", - description: "Claude will respond without extended thinking" + description: "Model will respond without extended thinking" }]; $[0] = t1; } else { From 5e52795d78f4cecf28afe617032fc6e6fddb69f2 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sat, 16 May 2026 20:23:17 -0700 Subject: [PATCH 26/33] feat: allow CLAUDE_CODE_AUTO_MODE_MODEL override for non-ant users Drop the USER_TYPE === 'ant' gate on the env var so external builds (e.g. DeepSeek backend) can point the auto-mode classifier at any model without going through GrowthBook config. --- src/utils/permissions/yoloClassifier.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts index 1ec78b5..e8a4318 100644 --- a/src/utils/permissions/yoloClassifier.ts +++ b/src/utils/permissions/yoloClassifier.ts @@ -1328,14 +1328,12 @@ type AutoModeConfig = { /** * Get the model for the classifier. - * Ant-only env var takes precedence, then GrowthBook JSON config override, - * then the main loop model. + * CLAUDE_CODE_AUTO_MODE_MODEL env takes precedence, then GrowthBook JSON + * config override, then the main loop model. */ function getClassifierModel(): string { - if (process.env.USER_TYPE === 'ant') { - const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL - if (envModel) return envModel - } + const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL + if (envModel) return envModel const config = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_auto_mode_config', {} as AutoModeConfig, From 7d8a5fd5b0b1292ae7c3cf7a1eee63cc5e05d944 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sun, 17 May 2026 07:36:58 -0700 Subject: [PATCH 27/33] feat: enable auto mode end-to-end on deepseek branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes needed to make `--permission-mode auto` actually work with DeepSeek as both main and classifier model: - betas.ts: extend modelSupportsAutoMode external allowlist to accept ^deepseek- model names alongside the existing Claude family. - permissionSetup.ts: flip AUTO_MODE_ENABLED_DEFAULT from 'disabled' to 'enabled'. With telemetry/GrowthBook stubbed on this branch, tengu_auto_mode_config never resolves and the default was kicking every session out of auto via the circuit-breaker path. - yoloClassifier.ts: drop `type: 'custom'` from YOLO_CLASSIFIER_TOOL_SCHEMA. DeepSeek's /anthropic endpoint returns 400 for unknown tool types; other tools in the codebase already omit this field. Verified end-to-end: `ls /tmp` passes classifier, `curl https://...` triggers deny — both correct behaviors with deepseek-v4-flash as the classifier model (set via CLAUDE_CODE_AUTO_MODE_MODEL). --- src/utils/betas.ts | 2 +- src/utils/permissions/permissionSetup.ts | 7 ++++++- src/utils/permissions/yoloClassifier.ts | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/utils/betas.ts b/src/utils/betas.ts index 723b0c0..db3ffd8 100644 --- a/src/utils/betas.ts +++ b/src/utils/betas.ts @@ -193,7 +193,7 @@ export function modelSupportsAutoMode(model: string): boolean { // glm-5 and above (glm-5, glm-5.2, glm-6, …). if (/glm-[5-9]/.test(m)) return true // External allowlist (firstParty already checked above). - return /^claude-(opus|sonnet)-4-6/.test(m) + return /^claude-(opus|sonnet)-4/.test(m) || /^deepseek-/.test(m) } return false } diff --git a/src/utils/permissions/permissionSetup.ts b/src/utils/permissions/permissionSetup.ts index 84a4536..2489fa7 100644 --- a/src/utils/permissions/permissionSetup.ts +++ b/src/utils/permissions/permissionSetup.ts @@ -1310,7 +1310,12 @@ export function getAutoModeUnavailableReason(): AutoModeUnavailableReason | null */ export type AutoModeEnabledState = 'enabled' | 'disabled' | 'opt-in' -const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'disabled' +// deepseek branch: telemetry/GrowthBook is stubbed out, so tengu_auto_mode_config +// never resolves and falls back to this default. Upstream defaulted to 'disabled' +// to wait for the GB rollout signal — on this branch there is no rollout signal, +// so default to 'enabled' to let CLI/--permission-mode auto and settings +// defaultMode=auto work without a remote gate. +const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'enabled' /** * Default auto-mode availability when GrowthBook gives no explicit value. diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts index e8a4318..c072a51 100644 --- a/src/utils/permissions/yoloClassifier.ts +++ b/src/utils/permissions/yoloClassifier.ts @@ -259,8 +259,11 @@ const yoloClassifierResponseSchema = lazySchema(() => export const YOLO_CLASSIFIER_TOOL_NAME = 'classify_result' +// Note: omitting `type: 'custom'` — Anthropic accepts it, but DeepSeek's +// /anthropic endpoint rejects unknown tool types with 400. Other tools in +// this codebase already omit the type field; we do the same here to keep +// the classifier compatible with both providers. const YOLO_CLASSIFIER_TOOL_SCHEMA: BetaToolUnion = { - type: 'custom', name: YOLO_CLASSIFIER_TOOL_NAME, description: 'Report the security classification result for the agent action', input_schema: { From 7bb5a6015749c023b010913c1deff5a3bbdf5c0d Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 17 Jun 2026 11:02:01 +0800 Subject: [PATCH 28/33] feat: fall back to system ripgrep when bundled binary is missing On platforms without a vendored rg binary (e.g. Android/Termux) or incomplete installs, getRipgrepConfig() returned a builtin path that didn't exist, causing ENOENT on spawn. Now fall back to system rg on PATH (spawning the bare name to prevent PATH hijacking), and carry a `note` surfaced via getRipgrepStatus(). When no rg is available at all, preserve the historical ENOENT path with an explanatory note. --- src/utils/ripgrep.ts | 46 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/src/utils/ripgrep.ts b/src/utils/ripgrep.ts index 88b57ee..27bfa46 100644 --- a/src/utils/ripgrep.ts +++ b/src/utils/ripgrep.ts @@ -27,6 +27,10 @@ type RipgrepConfig = { command: string args: string[] argv0?: string + // Human-readable explanation when ripgrep resolution took a fallback path + // (e.g. the bundled binary was missing and we fell back to system rg). + // Surfaced in the doctor screen and as a one-time startup warning. + note?: string } const getRipgrepConfig = memoize((): RipgrepConfig => { @@ -70,13 +74,41 @@ const getRipgrepConfig = memoize((): RipgrepConfig => { path.resolve(__dirname, '..', 'vendor', 'ripgrep'), path.resolve(__dirname, '..', '..', 'vendor', 'ripgrep'), ] - const rgRoot = - candidateRoots.find(root => - existsSync(path.resolve(root, platformDir, executable)), - ) ?? candidateRoots[0] - const command = path.resolve(rgRoot, platformDir, executable) + const rgRoot = candidateRoots.find(root => + existsSync(path.resolve(root, platformDir, executable)), + ) + + // Bundled binary found on disk: use it. + if (rgRoot) { + const command = path.resolve(rgRoot, platformDir, executable) + return { mode: 'builtin', command, args: [] } + } - return { mode: 'builtin', command, args: [] } + // No bundled binary for this platform (e.g. Android/Termux, or an + // incomplete install). Fall back to system rg on PATH so file discovery, + // suggestions, and hooks keep working instead of spawning a non-existent + // path and failing with ENOENT. + const { cmd: systemPath } = findExecutable('rg', []) + if (systemPath !== 'rg') { + // SECURITY: spawn the bare name 'rg', not the resolved path, to prevent + // PATH hijacking via a malicious ./rg in the cwd (see system branch above). + return { + mode: 'system', + command: 'rg', + args: [], + note: 'bundled ripgrep binary not found; using system rg from PATH', + } + } + + // Nothing available. Preserve historical behavior: return the expected + // builtin path so callers surface a clear ENOENT, with a note explaining why. + const command = path.resolve(candidateRoots[0], platformDir, executable) + return { + mode: 'builtin', + command, + args: [], + note: 'bundled ripgrep binary not found and no system rg on PATH', + } }) export function ripgrepCommand(): { @@ -551,12 +583,14 @@ export function getRipgrepStatus(): { mode: 'system' | 'builtin' | 'embedded' path: string working: boolean | null // null if not yet tested + note?: string } { const config = getRipgrepConfig() return { mode: config.mode, path: config.command, working: ripgrepStatus?.working ?? null, + note: config.note, } } From b37d8a43fdb09ec4fe6464adfbebf00cb3fc5ee3 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 13:09:46 +0800 Subject: [PATCH 29/33] fix: gate DeepSeek-specific adaptations behind isDeepSeekProvider() Four high-risk changes from the deepseek branch were unconditional and would have broken Anthropic/GLM runtime behavior: - claude.ts thinking: restore adaptive-vs-budget selection for Anthropic/GLM; DeepSeek keeps the simplified budget_tokens path (effort-level-driven). - claude.ts [ERROR] prefix: only inject into tool_result content for DeepSeek (it ignores is_error); Anthropic/GLM handle is_error correctly. - withRetry.ts 429: DeepSeek always retries (no subscriber tiers); Anthropic retains the ClaudeAI subscriber gate. - validateModel.ts: DeepSeek uses a known-models allowlist (API silently remaps unknown names); all other providers restore main's API-based probe. Co-Authored-By: Claude Opus 4.6 --- src/services/api/claude.ts | 48 ++++++++-- src/services/api/withRetry.ts | 11 ++- src/utils/model/validateModel.ts | 148 +++++++++++++++++++++++++++++-- 3 files changed, 190 insertions(+), 17 deletions(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index f5fcca0..7c77297 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -22,6 +22,7 @@ import type { Stream } from '@anthropic-ai/sdk/streaming.mjs' import { randomUUID } from 'crypto' import { getAPIProvider, + isDeepSeekProvider, isFirstPartyAnthropicBaseUrl, } from 'src/utils/model/providers.js' import { @@ -1378,8 +1379,11 @@ async function* queryModel( ) // DeepSeek ignores is_error on tool_result blocks; prefix failed results - // with literal "[ERROR]" text so the model can detect them. - messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI) + // with literal "[ERROR]" text so the model can detect them. Anthropic and + // GLM handle is_error correctly, so this is DeepSeek-only. + if (isDeepSeekProvider()) { + messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI) + } // Instrumentation: Track message count after normalization logEvent('tengu_api_after_normalize', { @@ -1677,13 +1681,39 @@ async function* queryModel( // without notifying the model launch DRI and research. This is a sensitive // setting that can greatly affect model quality and bashing. if (hasThinking && modelSupportsThinking(options.model)) { - // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone; - // budget_tokens is ignored server-side. Send a minimal thinking param - // so the SDK knows to expect thinking blocks in the response. - thinking = { - budget_tokens: maxOutputTokens - 1, - type: 'enabled', - } satisfies BetaMessageStreamParams['thinking'] + if (isDeepSeekProvider()) { + // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone; + // budget_tokens is ignored server-side. Send a minimal thinking param + // so the SDK knows to expect thinking blocks in the response. + thinking = { + budget_tokens: maxOutputTokens - 1, + type: 'enabled', + } satisfies BetaMessageStreamParams['thinking'] + } else if ( + !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) && + modelSupportsAdaptiveThinking(options.model) + ) { + // For models that support adaptive thinking, always use adaptive + // thinking without a budget. + thinking = { + type: 'adaptive', + } satisfies BetaMessageStreamParams['thinking'] + } else { + // For models that do not support adaptive thinking, use the default + // thinking budget unless explicitly specified. + let thinkingBudget = getMaxThinkingTokensForModel(options.model) + if ( + thinkingConfig.type === 'enabled' && + thinkingConfig.budgetTokens !== undefined + ) { + thinkingBudget = thinkingConfig.budgetTokens + } + thinkingBudget = Math.min(maxOutputTokens - 1, thinkingBudget) + thinking = { + budget_tokens: thinkingBudget, + type: 'enabled', + } satisfies BetaMessageStreamParams['thinking'] + } } // Get API context management strategies if enabled diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts index e7bcb05..cfc94f6 100644 --- a/src/services/api/withRetry.ts +++ b/src/services/api/withRetry.ts @@ -11,7 +11,10 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js' import { logForDebugging } from 'src/utils/debug.js' import { logError } from 'src/utils/log.js' import { createSystemAPIErrorMessage } from 'src/utils/messages.js' -import { getAPIProviderForStatsig } from 'src/utils/model/providers.js' +import { + getAPIProviderForStatsig, + isDeepSeekProvider, +} from 'src/utils/model/providers.js' import { clearApiKeyHelperCache, clearAwsCredentialsCache, @@ -768,9 +771,11 @@ function shouldRetry(error: APIError): boolean { // Retry on lock timeouts. if (error.status === 409) return true - // DeepSeek: always retry 429 with exponential backoff (no subscriber gates). + // Retry on rate limits. DeepSeek has no subscriber tiers, so always retry + // 429 with exponential backoff. Anthropic retains the subscriber gate so + // ClaudeAI subscription users don't churn through rate limits. if (error.status === 429) { - return true + return isDeepSeekProvider() || !isClaudeAISubscriber() || isEnterpriseSubscriber() } // Clear API key cache on 401 and allow retry. diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts index e0c80dc..a57b49a 100644 --- a/src/utils/model/validateModel.ts +++ b/src/utils/model/validateModel.ts @@ -1,23 +1,43 @@ // biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered import { MODEL_ALIASES } from './aliases.js' import { isModelAllowed } from './modelAllowlist.js' +import { getAPIProvider, isDeepSeekProvider } from './providers.js' +import { sideQuery } from '../sideQuery.js' +import { + NotFoundError, + APIError, + APIConnectionError, + AuthenticationError, +} from '@anthropic-ai/sdk' +import { getModelStrings } from './modelStrings.js' +// Cache valid models to avoid repeated API calls +const validModelCache = new Map() + +// DeepSeek's API silently remaps unknown model names to deepseek-v4-flash +// instead of returning 404, so API-based validation is not reliable — use a +// known-models allowlist when the DeepSeek provider is active. const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash']) /** - * Validates a model name. DeepSeek's API silently remaps unknown model names - * to deepseek-v4-flash instead of returning 404, so API-based validation is - * not reliable — we use a known-models allowlist. + * Validates a model by attempting an actual API call. + * + * For the DeepSeek provider, the API silently remaps unknown model names, so + * we validate against a known-models allowlist instead of probing the API. + * For all other providers (Anthropic firstParty, GLM, Bedrock, Vertex, + * Foundry), we probe the API with a minimal request. */ export async function validateModel( model: string, ): Promise<{ valid: boolean; error?: string; warning?: string }> { const normalizedModel = model.trim() + // Empty model is invalid if (!normalizedModel) { return { valid: false, error: 'Model name cannot be empty' } } + // Check against availableModels allowlist before any API call if (!isModelAllowed(normalizedModel)) { return { valid: false, @@ -25,17 +45,135 @@ export async function validateModel( } } + // Check if it's a known alias (these are always valid) const lowerModel = normalizedModel.toLowerCase() if ((MODEL_ALIASES as readonly string[]).includes(lowerModel)) { return { valid: true } } - if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { + // DeepSeek: use known-models allowlist (API silently remaps unknown names). + if (isDeepSeekProvider()) { + if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { + return { valid: true } + } + return { + valid: false, + error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型(会被服务端静默映射为 deepseek-v4-flash)。可用模型:deepseek-v4-pro, deepseek-v4-flash`, + } + } + + // Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user) + if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) { + return { valid: true } + } + + // Check cache first + if (validModelCache.has(normalizedModel)) { + return { valid: true } + } + + // Try to make an actual API call with minimal parameters + try { + await sideQuery({ + model: normalizedModel, + max_tokens: 1, + maxRetries: 0, + querySource: 'model_validation', + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Hi', + cache_control: { type: 'ephemeral' }, + }, + ], + }, + ], + }) + + // If we got here, the model is valid + validModelCache.set(normalizedModel, true) return { valid: true } + } catch (error) { + return handleValidationError(error, normalizedModel) + } +} + +function handleValidationError( + error: unknown, + modelName: string, +): { valid: boolean; error: string } { + // NotFoundError (404) means the model doesn't exist + if (error instanceof NotFoundError) { + const fallback = get3PFallbackSuggestion(modelName) + const suggestion = fallback ? `. Try '${fallback}' instead` : '' + return { + valid: false, + error: `Model '${modelName}' not found${suggestion}`, + } + } + + // For other API errors, provide context-specific messages + if (error instanceof APIError) { + if (error instanceof AuthenticationError) { + return { + valid: false, + error: 'Authentication failed. Please check your API credentials.', + } + } + + if (error instanceof APIConnectionError) { + return { + valid: false, + error: 'Network error. Please check your internet connection.', + } + } + + // Check error body for model-specific errors + const errorBody = error.error as unknown + if ( + errorBody && + typeof errorBody === 'object' && + 'type' in errorBody && + errorBody.type === 'not_found_error' && + 'message' in errorBody && + typeof errorBody.message === 'string' && + errorBody.message.includes('model:') + ) { + return { valid: false, error: `Model '${modelName}' not found` } + } + + // Generic API error + return { valid: false, error: `API error: ${error.message}` } } + // For unknown errors, be safe and reject + const errorMessage = error instanceof Error ? error.message : String(error) return { valid: false, - error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型(会被服务端静默映射为 deepseek-v4-flash)。可用模型:deepseek-v4-pro, deepseek-v4-flash`, + error: `Unable to validate model: ${errorMessage}`, + } +} + +// @[MODEL LAUNCH]: Add a fallback suggestion chain for the new model → previous version +/** + * Suggest a fallback model for 3P users when the selected model is unavailable. + */ +function get3PFallbackSuggestion(model: string): string | undefined { + if (getAPIProvider() === 'firstParty') { + return undefined + } + const lowerModel = model.toLowerCase() + if (lowerModel.includes('opus-4-6') || lowerModel.includes('opus_4_6')) { + return getModelStrings().opus41 + } + if (lowerModel.includes('sonnet-4-6') || lowerModel.includes('sonnet_4_6')) { + return getModelStrings().sonnet45 + } + if (lowerModel.includes('sonnet-4-5') || lowerModel.includes('sonnet_4_5')) { + return getModelStrings().sonnet40 } + return undefined } From 9468124264d4a18a01c966233187014e85cf823f Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 13:15:08 +0800 Subject: [PATCH 30/33] fix: restore claude-4-6 anchor in modelSupportsAutoMode allowlist The deepseek branch relaxed the Anthropic auto-mode allowlist regex from 'claude-(opus|sonnet)-4-6' to 'claude-(opus|sonnet)-4', which incorrectly enables auto mode for older Claude models (4-1, 4-5) that don't support it. Restore the '-6' anchor while keeping the /^deepseek-/ addition. Co-Authored-By: Claude Opus 4.6 --- src/utils/betas.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/betas.ts b/src/utils/betas.ts index db3ffd8..03c7952 100644 --- a/src/utils/betas.ts +++ b/src/utils/betas.ts @@ -193,7 +193,7 @@ export function modelSupportsAutoMode(model: string): boolean { // glm-5 and above (glm-5, glm-5.2, glm-6, …). if (/glm-[5-9]/.test(m)) return true // External allowlist (firstParty already checked above). - return /^claude-(opus|sonnet)-4/.test(m) || /^deepseek-/.test(m) + return /^claude-(opus|sonnet)-4-6/.test(m) || /^deepseek-/.test(m) } return false } From cd735b7234f43b33d21bce5a727980d5d37ff3f2 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 18:45:31 +0800 Subject: [PATCH 31/33] fix: keep GLM/DeepSeek on firstParty provider, detect via env flags Reverting the APIProvider type extension: adding 'glm'/'deepseek' as distinct provider values broke every `=== 'firstParty'` gate in the codebase (modelSupportsAutoMode, shouldIncludeFirstPartyOnlyBetas, etc.), which would silently disable features for GLM/DeepSeek users. GLM and DeepSeek both ride Anthropic-compatible firstParty endpoints (ANTHROPIC_BASE_URL + new Anthropic(...)), so they ARE firstParty from the SDK perspective. Keep the APIProvider type unchanged and detect the specific backend via env flags: - isGLMProvider(): CLAUDE_USE_GLM=1 - isDeepSeekProvider(): CLAUDE_USE_DEEPSEEK=1 These gate only the model-aware adaptations (thinking, [ERROR] prefix, 429 retry, validateModel), not the provider routing itself. Co-Authored-By: Claude Opus 4.6 --- src/utils/model/providers.ts | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index 0281a71..647f059 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -1,13 +1,7 @@ import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../../services/analytics/index.js' import { isEnvTruthy } from '../envUtils.js' -export type APIProvider = - | 'firstParty' - | 'bedrock' - | 'vertex' - | 'foundry' - | 'glm' - | 'deepseek' +export type APIProvider = 'firstParty' | 'bedrock' | 'vertex' | 'foundry' export function getAPIProvider(): APIProvider { return isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) @@ -16,31 +10,29 @@ export function getAPIProvider(): APIProvider { ? 'vertex' : isEnvTruthy(process.env.CLAUDE_CODE_USE_FOUNDRY) ? 'foundry' - : isEnvTruthy(process.env.CLAUDE_USE_GLM) - ? 'glm' - : isEnvTruthy(process.env.CLAUDE_CODE_USE_DEEPSEEK) - ? 'deepseek' - : 'firstParty' + : 'firstParty' } /** - * True when the active provider is GLM (zhipu). - * GLM rides the firstParty SDK path (Anthropic-compatible endpoint via - * ANTHROPIC_BASE_URL) but needs model-aware gating for output limits, betas, - * and auto-permission mode. + * True when the active backend is GLM (zhipu), routed via an + * Anthropic-compatible firstParty endpoint (ANTHROPIC_BASE_URL). GLM stays on + * the firstParty APIProvider — this flag only gates model-aware behavior + * (output token limits, auto-mode, betas) that the generic firstParty path + * doesn't know about. */ export function isGLMProvider(): boolean { - return getAPIProvider() === 'glm' + return isEnvTruthy(process.env.CLAUDE_USE_GLM) } /** - * True when the active provider is DeepSeek. - * DeepSeek rides the firstParty SDK path but needs provider-aware gating for - * thinking simplification, [ERROR] tool_result prefixing, 429 retry policy, - * and model validation (DeepSeek silently remaps unknown model names). + * True when the active backend is DeepSeek, routed via an + * Anthropic-compatible firstParty endpoint. DeepSeek stays on the firstParty + * APIProvider — this flag gates DeepSeek-specific adaptations (thinking + * simplification, [ERROR] tool_result prefixing, 429 retry policy, model + * validation allowlist). */ export function isDeepSeekProvider(): boolean { - return getAPIProvider() === 'deepseek' + return isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK) } export function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean { From 0227be7238b3b48ed52d85157ec9bd3aeb0a2a11 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 18:59:31 +0800 Subject: [PATCH 32/33] fix: gate session_id pinning to non-Anthropic providers, drop dead field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0 fixes from code review: 1. claude.ts getAPIMetadata: the pinned session_id 'claude-code-ds' was unconditional, clobbering Anthropic session telemetry with a fake stable value. Now only applies when isDeepSeekProvider() || isGLMProvider(); real Anthropic keeps getSessionId(). 2. validateModel.ts: remove unused `warning?` field from the return type — it was added by the deepseek branch but never populated by any code path, and no caller destructures it. Co-Authored-By: Claude Opus 4.6 --- src/services/api/claude.ts | 14 +++++++++----- src/utils/model/validateModel.ts | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 7c77297..733896c 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -23,6 +23,7 @@ import { randomUUID } from 'crypto' import { getAPIProvider, isDeepSeekProvider, + isGLMProvider, isFirstPartyAnthropicBaseUrl, } from 'src/utils/model/providers.js' import { @@ -509,13 +510,16 @@ export function getAPIMetadata() { device_id: getOrCreateUserID(), // Only include OAuth account UUID when actively using OAuth authentication account_uuid: getOauthAccountInfo()?.accountUuid ?? '', - // DeepSeek's prompt cache keys on the full request body bytes. The + // GLM/DeepSeek prompt caches key on the full request body bytes. The // real session_id changes every launch and would force a fresh cache // entry per session — defeating the cache entirely. Pin to a stable - // sentinel so identical conversations across sessions share a cache - // entry. Real telemetry/analytics still get the live id via - // getSessionId() at the call sites that need it. - session_id: 'claude-code-ds', + // sentinel for non-Anthropic providers so identical conversations across + // sessions share a cache entry. Real Anthropic telemetry keeps the live + // session id. + session_id: + isDeepSeekProvider() || isGLMProvider() + ? 'claude-code-ds' + : getSessionId(), }), } } diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts index a57b49a..f3c9975 100644 --- a/src/utils/model/validateModel.ts +++ b/src/utils/model/validateModel.ts @@ -29,7 +29,7 @@ const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash']) */ export async function validateModel( model: string, -): Promise<{ valid: boolean; error?: string; warning?: string }> { +): Promise<{ valid: boolean; error?: string }> { const normalizedModel = model.trim() // Empty model is invalid From c529eab4a5472ae9c6d0b70380c50b59e2a0a391 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 24 Jun 2026 20:39:53 +0800 Subject: [PATCH 33/33] Revert "feat: name binary output after current git branch" This reverts commit 649d1f845eba67d0a20455122a04751ff2a25ef6. --- scripts/build.ts | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/build.ts b/scripts/build.ts index 2e8059f..f3c0eae 100644 --- a/scripts/build.ts +++ b/scripts/build.ts @@ -63,12 +63,8 @@ walkDir(SRC_DIR, (filePath) => { const isBinary = process.argv.includes('--binary') -// Name the binary after the current git branch, e.g. dist/claude-glm -const branch = execSync('git rev-parse --abbrev-ref HEAD', { encoding: 'utf-8' }).trim() || 'main' -const binaryName = `claude-${branch}` - console.log(`Enabled features: ${ENABLED_FEATURES.join(', ')} (patched ${modified.length} files)`) -console.log(`Build mode: ${isBinary ? `binary (standalone executable → dist/${binaryName})` : 'bundle (JS)'}`) +console.log(`Build mode: ${isBinary ? 'binary (standalone executable)' : 'bundle (JS)'}`) const MACRO_DEFINES = `--define 'MACRO.VERSION="2.1.87"' ` + @@ -85,7 +81,7 @@ const MACRO_DEFINES = try { if (isBinary) { execSync( - `bun build src/entrypoints/cli.tsx --compile --outfile=dist/${binaryName} ` + MACRO_DEFINES, + `bun build src/entrypoints/cli.tsx --compile --outfile=dist/claude ` + MACRO_DEFINES, { stdio: 'inherit', cwd: join(import.meta.dir, '..') }, ) } else {