diff --git a/docs/deepseek-vs-claude-pricing.md b/docs/deepseek-vs-claude-pricing.md
new file mode 100644
index 0000000..e4632ba
--- /dev/null
+++ b/docs/deepseek-vs-claude-pricing.md
@@ -0,0 +1,98 @@
+# DeepSeek vs Claude API Pricing
+
+Captured 2026-05-12. Sources:
+- DeepSeek: https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+- Anthropic: https://platform.claude.com/docs/en/about-claude/pricing
+- FX assumed: $1 ≈ ¥7.15
+
+## Per-model prices (per 1M tokens)
+
+### Flagship tier
+
+| Item | Claude Opus 4.7 | Claude Sonnet 4.6 | DeepSeek V4 Pro (discount) | DeepSeek V4 Pro (full) |
+|---|---:|---:|---:|---:|
+| Input (cache miss) | $5.00 / ¥35.75 | $3.00 / ¥21.45 | ¥3 | ¥12 |
+| Cache write (5m) | $6.25 / ¥44.69 | $3.75 / ¥26.81 | ¥3 | ¥12 |
+| Cache read | $0.50 / ¥3.58 | $0.30 / ¥2.15 | ¥0.025 | ¥0.10 |
+| Output | $25.00 / ¥178.75 | $15.00 / ¥107.25 | ¥6 | ¥24 |
+| Context window | 200K | 1M | 1M | 1M |
+| Max output tokens | varies | varies | 384K | 384K |
+
+DeepSeek V4 Pro discount (2.5x off) runs through 2026-05-31 23:59 Beijing time.
+After 2026-06-01 the full-price column applies unless DeepSeek extends it.
+
+### Lightweight tier
+
+| Item | Claude Haiku 4.5 | DeepSeek V4 Flash |
+|---|---:|---:|
+| Input (cache miss) | $1.00 / ¥7.15 | ¥1 |
+| Cache write (5m) | $1.25 / ¥8.94 | ¥1 |
+| Cache read | $0.10 / ¥0.72 | ¥0.02 |
+| Output | $5.00 / ¥35.75 | ¥2 |
+| Context window | 200K | 1M |
+
+## Multiplier view (how much Claude costs vs DeepSeek discount price)
+
+### Pro vs Sonnet 4.6 (same-tier comparison)
+
+| Item | Sonnet 4.6 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 85.7x |
+| Output | 17.9x |
+
+### Pro vs Opus 4.7 (cross-tier comparison)
+
+| Item | Opus 4.7 / Pro multiplier |
+|---|---:|
+| Input (cache miss) | 11.9x |
+| Cache read | 143x |
+| Output | 29.8x |
+
+### Flash vs Haiku 4.5
+
+| Item | Haiku 4.5 / Flash multiplier |
+|---|---:|
+| Input (cache miss) | 7.15x |
+| Cache read | 35.8x |
+| Output | 17.9x |
+
+## Realistic Claude Code request cost
+
+Measured pattern: 15,872 cache_read + 66 input + 30 output tokens
+(observed first cache hit after the session_id-pinning fix in 283678a).
+
+| Provider | Cost per request | Per 100 requests |
+|---|---:|---:|
+| Claude Opus 4.7 | $0.0094 / ¥0.067 | ¥6.70 |
+| Claude Sonnet 4.6 | $0.0054 / ¥0.039 | ¥3.90 |
+| Claude Haiku 4.5 | $0.0018 / ¥0.013 | ¥1.30 |
+| DeepSeek V4 Pro (discount) | ¥0.000777 | ¥0.078 |
+| DeepSeek V4 Flash | ¥0.000437 | ¥0.044 |
+
+## Headline ratios at the typical Claude Code workload
+
+- DeepSeek Pro vs Claude Sonnet 4.6: **~50x cheaper**
+- DeepSeek Pro vs Claude Opus 4.7: **~86x cheaper**
+- DeepSeek Flash vs Claude Haiku 4.5: **~30x cheaper**
+
+After the 2026-05-31 discount expires, Pro full price would be 4x its
+current rate; ratios shrink to roughly 12x vs Sonnet, 21x vs Opus, but the
+gap remains substantial.
+
+## Caveats
+
+- **No Opus-class DeepSeek model.** Tasks that genuinely need Opus-level
+ reasoning (long math proofs, deep architecture work) have no DeepSeek
+ equivalent.
+- **Capability gaps with the Anthropic API**: DeepSeek's /anthropic endpoint
+ does not implement image/document content blocks, computer use, server-side
+ web search beyond what the model emits, MCP gateway, or redacted_thinking.
+ See `docs/openclaude-commits-review.md` and the deepseek branch port commits
+ for the adaptations Claude Code makes for these.
+- **Cache hit assumptions**: the per-request cost above assumes the
+ session_id pinning fix is in effect (commit 283678a). Without it,
+ cache_read_input_tokens stays at 0 and DeepSeek per-request cost is
+ roughly 100x higher (every request pays full input price).
+- **No /count_tokens endpoint on DeepSeek.** Local UTF-8 byte estimation is
+ used instead (commit 6fbad90).
diff --git a/scripts/build.ts b/scripts/build.ts
index 4a98314..f3c0eae 100644
--- a/scripts/build.ts
+++ b/scripts/build.ts
@@ -20,7 +20,11 @@ const ENABLED_FEATURES = [
'MCP_SKILLS',
'HISTORY_PICKER',
'TREE_SITTER_BASH',
- 'NATIVE_CLIENT_ATTESTATION',
+ // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000'
+ // placeholder into the x-anthropic-billing-header and computes a body hash.
+ // GLM/DeepSeek's /anthropic endpoints do not validate this header — sending
+ // it adds CPU + an HTTP header without value. Keep off for non-Anthropic
+ // providers.
'BRIDGE_MODE',
'COORDINATOR_MODE',
]
diff --git a/src/commands/cost/cost.ts b/src/commands/cost/cost.ts
index c9fb0cb..0eaf9f6 100644
--- a/src/commands/cost/cost.ts
+++ b/src/commands/cost/cost.ts
@@ -1,24 +1,7 @@
import { formatTotalCost } from '../../cost-tracker.js'
-import { currentLimits } from '../../services/claudeAiLimits.js'
import type { LocalCommandCall } from '../../types/command.js'
-import { isClaudeAISubscriber } from '../../utils/auth.js'
+// DeepSeek: no Claude.ai subscription; always show the formatted cost.
export const call: LocalCommandCall = async () => {
- if (isClaudeAISubscriber()) {
- let value: string
-
- if (currentLimits.isUsingOverage) {
- value =
- 'You are currently using your overages to power your Claude Code usage. We will automatically switch you back to your subscription rate limits when they reset'
- } else {
- value =
- 'You are currently using your subscription to power your Claude Code usage'
- }
-
- if (process.env.USER_TYPE === 'ant') {
- value += `\n\n[ANT-ONLY] Showing cost anyway:\n ${formatTotalCost()}`
- }
- return { type: 'text', value }
- }
return { type: 'text', value: formatTotalCost() }
}
diff --git a/src/components/CostThresholdDialog.tsx b/src/components/CostThresholdDialog.tsx
index bdf9f53..d0639d4 100644
--- a/src/components/CostThresholdDialog.tsx
+++ b/src/components/CostThresholdDialog.tsx
@@ -38,7 +38,7 @@ export function CostThresholdDialog(t0) {
}
let t4;
if ($[4] !== onDone || $[5] !== t3) {
- t4 = ;
+ t4 = ;
$[4] = onDone;
$[5] = t3;
$[6] = t4;
diff --git a/src/components/Message.tsx b/src/components/Message.tsx
index ca2ef76..886e917 100644
--- a/src/components/Message.tsx
+++ b/src/components/Message.tsx
@@ -538,9 +538,6 @@ function AssistantMessageBlock(t0) {
}
case "thinking":
{
- if (!isTranscriptMode && !verbose) {
- return null;
- }
const isLastThinking = !lastThinkingBlockId || thinkingBlockId === lastThinkingBlockId;
const t1 = isTranscriptMode && !isLastThinking;
let t2;
diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx
index 37ee93c..7c09649 100644
--- a/src/components/Settings/Config.tsx
+++ b/src/components/Settings/Config.tsx
@@ -281,6 +281,26 @@ export function Config({
enabled: autoCompactEnabled
});
}
+ }, {
+ id: 'autoMemoryEnabled',
+ label: 'Auto-memory',
+ // settings.json default is "undefined" which the resolver in
+ // src/memdir/paths.ts treats as "disabled" on this branch. Mirror
+ // that so the toggle reflects what the runtime actually does.
+ value: settingsData?.autoMemoryEnabled ?? false,
+ type: 'boolean' as const,
+ onChange(autoMemoryEnabled: boolean) {
+ updateSettingsForSource('localSettings', {
+ autoMemoryEnabled
+ });
+ setSettingsData(prev_auto_mem => ({
+ ...prev_auto_mem,
+ autoMemoryEnabled
+ }));
+ logEvent('tengu_auto_memory_setting_changed', {
+ enabled: autoMemoryEnabled
+ });
+ }
}, {
id: 'spinnerTipsEnabled',
label: 'Show tips',
diff --git a/src/components/ThinkingToggle.tsx b/src/components/ThinkingToggle.tsx
index a7b7a1b..2a4604e 100644
--- a/src/components/ThinkingToggle.tsx
+++ b/src/components/ThinkingToggle.tsx
@@ -30,11 +30,11 @@ export function ThinkingToggle(t0) {
t1 = [{
value: "true",
label: "Enabled",
- description: "Claude will think before responding"
+ description: "Model will think before responding"
}, {
value: "false",
label: "Disabled",
- description: "Claude will respond without extended thinking"
+ description: "Model will respond without extended thinking"
}];
$[0] = t1;
} else {
diff --git a/src/components/messages/AssistantThinkingMessage.tsx b/src/components/messages/AssistantThinkingMessage.tsx
index 3825f5f..af4b492 100644
--- a/src/components/messages/AssistantThinkingMessage.tsx
+++ b/src/components/messages/AssistantThinkingMessage.tsx
@@ -4,6 +4,7 @@ import React from 'react';
import { Box, Text } from '../../ink.js';
import { CtrlOToExpand } from '../CtrlOToExpand.js';
import { Markdown } from '../Markdown.js';
+import { useSettings } from '../../hooks/useSettings.js';
type Props = {
// Accept either full ThinkingBlock/ThinkingBlockParam or a minimal shape with just type and thinking
param: ThinkingBlock | ThinkingBlockParam | {
@@ -36,7 +37,8 @@ export function AssistantThinkingMessage(t0) {
if (hideInTranscript) {
return null;
}
- const shouldShowFullThinking = isTranscriptMode || verbose;
+ const settings = useSettings();
+ const shouldShowFullThinking = isTranscriptMode || verbose || settings.alwaysThinkingEnabled !== false;
if (!shouldShowFullThinking) {
const t4 = addMargin ? 1 : 0;
let t5;
diff --git a/src/constants/system.ts b/src/constants/system.ts
index 0cd2e76..9495bd8 100644
--- a/src/constants/system.ts
+++ b/src/constants/system.ts
@@ -3,7 +3,7 @@
import { feature } from 'bun:bundle'
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
import { logForDebugging } from '../utils/debug.js'
-import { isEnvDefinedFalsy } from '../utils/envUtils.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
import { getAPIProvider } from '../utils/model/providers.js'
import { getWorkload } from '../utils/workloadContext.js'
@@ -47,13 +47,10 @@ export function getCLISyspromptPrefix(options?: {
/**
* Check if attribution header is enabled.
- * Enabled by default, can be disabled via env var or GrowthBook killswitch.
+ * Disabled by default, can be enabled via env var.
*/
function isAttributionHeaderEnabled(): boolean {
- if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
- return false
- }
- return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
+ return isEnvTruthy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)
}
/**
diff --git a/src/context.ts b/src/context.ts
index 423414d..b614490 100644
--- a/src/context.ts
+++ b/src/context.ts
@@ -4,7 +4,7 @@ import {
getAdditionalDirectoriesForClaudeMd,
setCachedClaudeMdContent,
} from './bootstrap/state.js'
-import { getLocalISODate } from './constants/common.js'
+import { getSessionStartDate } from './constants/common.js'
import {
filterInjectedMemoryFiles,
getClaudeMds,
@@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js'
import { shouldIncludeGitInstructions } from './utils/gitSettings.js'
import { logError } from './utils/log.js'
-const MAX_STATUS_CHARS = 2000
+const MAX_STATUS_CHARS = 1000
// System prompt injection for cache breaking (ant-only, ephemeral debugging state)
let systemPromptInjection: string | null = null
@@ -85,7 +85,7 @@ export const getGitStatus = memoize(async (): Promise => {
const truncatedStatus =
status.length > MAX_STATUS_CHARS
? status.substring(0, MAX_STATUS_CHARS) +
- '\n... (truncated because it exceeds 2k characters. If you need more information, run "git status" using BashTool)'
+ '\n... (truncated because it exceeds 1k characters. If you need more information, run "git status" using BashTool)'
: status
logForDiagnosticsNoPII('info', 'git_status_completed', {
@@ -183,7 +183,9 @@ export const getUserContext = memoize(
return {
...(claudeMd && { claudeMd }),
- currentDate: `Today's date is ${getLocalISODate()}.`,
+ // Use session-stable date to preserve DeepSeek's server-side prefix
+ // cache across midnight (was getLocalISODate() — refreshes daily).
+ currentDate: `Today's date is ${getSessionStartDate()}.`,
}
},
)
diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts
index b03184c..72375bf 100644
--- a/src/cost-tracker.ts
+++ b/src/cost-tracker.ts
@@ -44,8 +44,11 @@ import {
import { isFastModeEnabled } from './utils/fastMode.js'
import { formatDuration, formatNumber } from './utils/format.js'
import type { FpsMetrics } from './utils/fpsTracker.js'
-import { getCanonicalName } from './utils/model/model.js'
-import { calculateUSDCost } from './utils/modelCost.js'
+import {
+ getCanonicalName,
+ getDefaultMainLoopModelSetting,
+} from './utils/model/model.js'
+import { calculateUSDCost, getModelCosts } from './utils/modelCost.js'
export {
getTotalCostUSD as getTotalCost,
getTotalDuration,
@@ -175,7 +178,7 @@ export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void {
}
function formatCost(cost: number, maxDecimalPlaces: number = 4): string {
- return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
+ return `¥${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}`
}
function formatModelUsage(): string {
@@ -234,12 +237,35 @@ export function formatTotalCost(): string {
const modelUsageDisplay = formatModelUsage()
+ // DeepSeek-specific: show prompt cache hit rate and yuan savings, since the
+ // /anthropic endpoint exposes cache_read / cache_creation token counts and
+ // these are the headline cost driver (cache reads are ~120x cheaper).
+ let cacheStatsDisplay = ''
+ const cacheRead = getTotalCacheReadInputTokens()
+ const cacheCreation = getTotalCacheCreationInputTokens()
+ const directInput = getTotalInputTokens()
+ const totalInput = cacheRead + cacheCreation + directInput
+ if (totalInput > 0) {
+ const hitRate = (cacheRead / totalInput) * 100
+ const model = getDefaultMainLoopModelSetting()
+ const costs = getModelCosts(model, {
+ input_tokens: 0,
+ output_tokens: 0,
+ } as Usage)
+ const savings =
+ (cacheRead / 1_000_000) *
+ (costs.inputTokens - costs.promptCacheReadTokens)
+ cacheStatsDisplay =
+ `\nCache hit rate: ${hitRate.toFixed(1)}% (${formatNumber(cacheRead)} / ${formatNumber(totalInput)} input tokens)` +
+ `\nCache savings: ${formatCost(savings)}`
+ }
+
return chalk.dim(
`Total cost: ${costDisplay}\n` +
`Total duration (API): ${formatDuration(getTotalAPIDuration())}
Total duration (wall): ${formatDuration(getTotalDuration())}
Total code changes: ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed
-${modelUsageDisplay}`,
+${modelUsageDisplay}${cacheStatsDisplay}`,
)
}
diff --git a/src/costHook.ts b/src/costHook.ts
index 798a093..fa740c4 100644
--- a/src/costHook.ts
+++ b/src/costHook.ts
@@ -1,6 +1,5 @@
import { useEffect } from 'react'
import { formatTotalCost, saveCurrentSessionCosts } from './cost-tracker.js'
-import { hasConsoleBillingAccess } from './utils/billing.js'
import type { FpsMetrics } from './utils/fpsTracker.js'
export function useCostSummary(
@@ -8,10 +7,8 @@ export function useCostSummary(
): void {
useEffect(() => {
const f = () => {
- if (hasConsoleBillingAccess()) {
- process.stdout.write('\n' + formatTotalCost() + '\n')
- }
-
+ // DeepSeek: there's no Anthropic console billing tier; always print.
+ process.stdout.write('\n' + formatTotalCost() + '\n')
saveCurrentSessionCosts(getFpsMetrics?.())
}
process.on('exit', f)
diff --git a/src/memdir/paths.ts b/src/memdir/paths.ts
index 68a6baf..c794a8b 100644
--- a/src/memdir/paths.ts
+++ b/src/memdir/paths.ts
@@ -51,7 +51,12 @@ export function isAutoMemoryEnabled(): boolean {
if (settings.autoMemoryEnabled !== undefined) {
return settings.autoMemoryEnabled
}
- return true
+ // DeepSeek branch default: off. The auto-memory section injects ~3145
+ // fixed tokens into every system prompt (a 32% surcharge on a minimal
+ // -p call). Users who want it can flip it in /config -> Auto-memory or
+ // set autoMemoryEnabled: true in settings.json (or unset
+ // CLAUDE_CODE_DISABLE_AUTO_MEMORY=0).
+ return false
}
/**
diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx
index b483ead..7b99a10 100644
--- a/src/screens/REPL.tsx
+++ b/src/screens/REPL.tsx
@@ -2207,7 +2207,9 @@ export function REPL({
};
useEffect(() => {
const totalCost = getTotalCost();
- if (totalCost >= 5 /* $5 */ && !showCostDialog && !haveShownCostDialog) {
+ // DeepSeek: pricing is in CNY; raise the threshold to roughly match the
+ // user-perceived "5 USD" notification level (1 USD ≈ 7 CNY, rounded up).
+ if (totalCost >= 35 && !showCostDialog && !haveShownCostDialog) {
logEvent('tengu_cost_threshold_reached', {});
// Mark as shown even if the dialog won't render (no console billing
// access). Otherwise this effect re-fires on every message change for
diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts
index 4c09f06..733896c 100644
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -22,6 +22,8 @@ import type { Stream } from '@anthropic-ai/sdk/streaming.mjs'
import { randomUUID } from 'crypto'
import {
getAPIProvider,
+ isDeepSeekProvider,
+ isGLMProvider,
isFirstPartyAnthropicBaseUrl,
} from 'src/utils/model/providers.js'
import {
@@ -508,7 +510,16 @@ export function getAPIMetadata() {
device_id: getOrCreateUserID(),
// Only include OAuth account UUID when actively using OAuth authentication
account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
- session_id: getSessionId(),
+ // GLM/DeepSeek prompt caches key on the full request body bytes. The
+ // real session_id changes every launch and would force a fresh cache
+ // entry per session — defeating the cache entirely. Pin to a stable
+ // sentinel for non-Anthropic providers so identical conversations across
+ // sessions share a cache entry. Real Anthropic telemetry keeps the live
+ // session id.
+ session_id:
+ isDeepSeekProvider() || isGLMProvider()
+ ? 'claude-code-ds'
+ : getSessionId(),
}),
}
}
@@ -935,6 +946,77 @@ function isToolResult(
return block.type === 'tool_result'
}
+/**
+ * DeepSeek silently ignores the `is_error: true` flag on tool_result blocks,
+ * so the model has no way to know a tool call failed. Prefix the content with
+ * a literal "[ERROR]" text block so the model can detect failures from text.
+ *
+ * Walks nested tool_result content recursively (for cached histories).
+ */
+function prefixDeepSeekErrorToolResults(
+ blocks: BetaContentBlockParam[],
+): BetaContentBlockParam[] {
+ let changed = false
+ const updated = blocks.map(block => {
+ if (!isToolResult(block)) return block
+
+ let nextBlock = block
+ let blockChanged = false
+
+ if ((block as { is_error?: boolean }).is_error) {
+ const content = Array.isArray(block.content)
+ ? block.content
+ : [{ type: 'text', text: String(block.content ?? '') }]
+ const prefixed = [
+ { type: 'text', text: '[ERROR] Tool execution failed:' },
+ ...content,
+ ] as BetaContentBlockParam[]
+ nextBlock = { ...block, content: prefixed } as typeof block
+ blockChanged = true
+ }
+
+ if (Array.isArray(nextBlock.content)) {
+ const nested = prefixDeepSeekErrorToolResults(
+ nextBlock.content as BetaContentBlockParam[],
+ )
+ if (nested !== nextBlock.content) {
+ nextBlock = { ...nextBlock, content: nested } as typeof block
+ blockChanged = true
+ }
+ }
+
+ if (blockChanged) {
+ changed = true
+ return nextBlock
+ }
+ return block
+ })
+
+ return changed ? updated : blocks
+}
+
+function applyDeepSeekErrorPrefix(
+ messages: (UserMessage | AssistantMessage)[],
+): (UserMessage | AssistantMessage)[] {
+ let changed = false
+ const updated = messages.map(msg => {
+ const content = msg.message.content
+ if (!Array.isArray(content)) return msg
+
+ const updatedContent = prefixDeepSeekErrorToolResults(
+ content as BetaContentBlockParam[],
+ )
+ if (updatedContent === content) return msg
+
+ changed = true
+ return {
+ ...msg,
+ message: { ...msg.message, content: updatedContent },
+ } as typeof msg
+ })
+ return changed ? updated : messages
+}
+
/**
* Ensures messages contain at most `limit` media items (images + documents).
* Strips oldest media first to preserve the most recent.
@@ -1300,6 +1382,13 @@ async function* queryModel(
API_MAX_MEDIA_PER_REQUEST,
)
+ // DeepSeek ignores is_error on tool_result blocks; prefix failed results
+ // with literal "[ERROR]" text so the model can detect them. Anthropic and
+ // GLM handle is_error correctly, so this is DeepSeek-only.
+ if (isDeepSeekProvider()) {
+ messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI)
+ }
+
// Instrumentation: Track message count after normalization
logEvent('tengu_api_after_normalize', {
postNormalizedMessageCount: messagesForAPI.length,
@@ -1381,6 +1470,14 @@ async function* queryModel(
}
const allTools = [...toolSchemas, ...extraToolSchemas]
+ // Sort tools alphabetically by name for stable ordering to maximize
+ // DeepSeek's server-side prefix cache hits across requests.
+ allTools.sort((a, b) => {
+ const nameA = 'name' in a ? a.name : ''
+ const nameB = 'name' in b ? b.name : ''
+ return nameA.localeCompare(nameB)
+ })
+
const isFastMode =
isFastModeEnabled() &&
isFastModeAvailable() &&
@@ -1588,7 +1685,15 @@ async function* queryModel(
// without notifying the model launch DRI and research. This is a sensitive
// setting that can greatly affect model quality and bashing.
if (hasThinking && modelSupportsThinking(options.model)) {
- if (
+ if (isDeepSeekProvider()) {
+ // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone;
+ // budget_tokens is ignored server-side. Send a minimal thinking param
+ // so the SDK knows to expect thinking blocks in the response.
+ thinking = {
+ budget_tokens: maxOutputTokens - 1,
+ type: 'enabled',
+ } satisfies BetaMessageStreamParams['thinking']
+ } else if (
!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) &&
modelSupportsAdaptiveThinking(options.model)
) {
diff --git a/src/services/api/errorUtils.ts b/src/services/api/errorUtils.ts
index 20e4441..f562e65 100644
--- a/src/services/api/errorUtils.ts
+++ b/src/services/api/errorUtils.ts
@@ -242,6 +242,15 @@ export function formatAPIError(error: APIError): string {
return 'Unable to connect to API. Check your internet connection'
}
+ if (error.status === 402) {
+ return 'DeepSeek 账户余额不足,请在 platform.deepseek.com 充值后重试'
+ }
+
+ if (error.status === 422) {
+ const nested = extractNestedErrorMessage(error)
+ return `DeepSeek 请求参数无效(422):${nested || error.message || '请检查工具定义和消息格式'}`
+ }
+
// Guard: when deserialized from JSONL (e.g. --resume), the error object may
// be a plain object without a `.message` property. Return a safe fallback
// instead of undefined, which would crash callers that access `.length`.
diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts
index 1a7edc5..a47f09f 100644
--- a/src/services/api/errors.ts
+++ b/src/services/api/errors.ts
@@ -437,7 +437,8 @@ export function getAssistantMessageFromError(
error.message.toLowerCase().includes('timeout'))
) {
return createAssistantAPIErrorMessage({
- content: API_TIMEOUT_ERROR_MESSAGE,
+ content:
+ '请求超时。DeepSeek 服务端排队等待超过上限后断开了连接,请稍后重试或降低 effort 等级',
error: 'unknown',
})
}
@@ -462,6 +463,14 @@ export function getAssistantMessageFromError(
})
}
+ // DeepSeek 429 — simple rate-limit message (no Anthropic-specific headers).
+ if (error instanceof APIError && error.status === 429) {
+ return createAssistantAPIErrorMessage({
+ content: '请求频率超限,请稍后重试',
+ error: 'rate_limit',
+ })
+ }
+
if (
error instanceof APIError &&
error.status === 429 &&
@@ -993,6 +1002,16 @@ export function classifyAPIError(error: unknown): string {
return 'capacity_off_switch'
}
+ // DeepSeek: insufficient account balance
+ if (error instanceof APIError && error.status === 402) {
+ return 'insufficient_balance'
+ }
+
+ // DeepSeek: invalid request parameters
+ if (error instanceof APIError && error.status === 422) {
+ return 'invalid_parameters'
+ }
+
// Rate limiting
if (error instanceof APIError && error.status === 429) {
return 'rate_limit'
@@ -1205,3 +1224,15 @@ export function getErrorMessageIfRefusal(
error: 'invalid_request',
})
}
+
+/**
+ * Extract DeepSeek's trace ID from error response headers for debugging.
+ */
+export function extractDeepSeekTraceId(error: APIError): string | undefined {
+ const headers = error.headers
+ if (!headers) return undefined
+ if (typeof headers.get === 'function') {
+ return headers.get('x-ds-trace-id') ?? undefined
+ }
+ return (headers as Record)['x-ds-trace-id']
+}
diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts
index 5ec9ad0..cfc94f6 100644
--- a/src/services/api/withRetry.ts
+++ b/src/services/api/withRetry.ts
@@ -11,7 +11,10 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js'
import { logForDebugging } from 'src/utils/debug.js'
import { logError } from 'src/utils/log.js'
import { createSystemAPIErrorMessage } from 'src/utils/messages.js'
-import { getAPIProviderForStatsig } from 'src/utils/model/providers.js'
+import {
+ getAPIProviderForStatsig,
+ isDeepSeekProvider,
+} from 'src/utils/model/providers.js'
import {
clearApiKeyHelperCache,
clearAwsCredentialsCache,
@@ -694,6 +697,12 @@ function handleGcpCredentialError(error: unknown): boolean {
}
function shouldRetry(error: APIError): boolean {
+ // 402 Insufficient Balance (DeepSeek) — retrying won't help, the account
+ // needs a top-up. Fail fast.
+ if (error.status === 402) {
+ return false
+ }
+
// Never retry mock errors - they're from /mock-limits command for testing
if (isMockRateLimitError(error)) {
return false
@@ -762,10 +771,11 @@ function shouldRetry(error: APIError): boolean {
// Retry on lock timeouts.
if (error.status === 409) return true
- // Retry on rate limits, but not for ClaudeAI Subscription users
- // Enterprise users can retry because they typically use PAYG instead of rate limits
+ // Retry on rate limits. DeepSeek has no subscriber tiers, so always retry
+ // 429 with exponential backoff. Anthropic retains the subscriber gate so
+ // ClaudeAI subscription users don't churn through rate limits.
if (error.status === 429) {
- return !isClaudeAISubscriber() || isEnterpriseSubscriber()
+ return isDeepSeekProvider() || !isClaudeAISubscriber() || isEnterpriseSubscriber()
}
// Clear API key cache on 401 and allow retry.
diff --git a/src/services/tokenEstimation.ts b/src/services/tokenEstimation.ts
index acaef7a..30a53b3 100644
--- a/src/services/tokenEstimation.ts
+++ b/src/services/tokenEstimation.ts
@@ -121,26 +121,21 @@ function stripToolSearchFieldsFromMessages(
})
}
+// DeepSeek has no /count_tokens endpoint. Returning null forces callers to
+// use rough estimation (roughTokenCountEstimation), which uses UTF-8 byte
+// length for accuracy on CJK-heavy content.
export async function countTokensWithAPI(
- content: string,
+ _content: string,
): Promise {
- // Special case for empty content - API doesn't accept empty messages
- if (!content) {
- return 0
- }
-
- const message: Anthropic.Beta.Messages.BetaMessageParam = {
- role: 'user',
- content: content,
- }
-
- return countMessagesTokensWithAPI([message], [])
+ return null
}
export async function countMessagesTokensWithAPI(
messages: Anthropic.Beta.Messages.BetaMessageParam[],
tools: Anthropic.Beta.Messages.BetaToolUnion[],
): Promise {
+ // DeepSeek has no /count_tokens endpoint; fall back to rough estimation.
+ return null
return withTokenCountVCR(messages, tools, async () => {
try {
const model = getMainLoopModel()
@@ -204,7 +199,10 @@ export function roughTokenCountEstimation(
content: string,
bytesPerToken: number = 4,
): number {
- return Math.round(content.length / bytesPerToken)
+ // DeepSeek tokenizer is byte-pair on UTF-8 bytes, so CJK characters consume
+ // 3 bytes/char rather than 1. content.length under-estimates by ~3x on
+ // Chinese content. Buffer.byteLength matches DeepSeek's actual tokenization.
+ return Math.round(Buffer.byteLength(content, 'utf8') / bytesPerToken)
}
/**
@@ -252,6 +250,9 @@ export async function countTokensViaHaikuFallback(
messages: Anthropic.Beta.Messages.BetaMessageParam[],
tools: Anthropic.Beta.Messages.BetaToolUnion[],
): Promise {
+ // DeepSeek: no count_tokens endpoint, no Haiku fallback model — return null
+ // so callers use roughTokenCountEstimation.
+ return null
// Check if messages contain thinking blocks
const containsThinking = hasThinkingBlocks(messages)
diff --git a/src/utils/__tests__/glmAutoMode.test.ts b/src/utils/__tests__/glmAutoMode.test.ts
new file mode 100644
index 0000000..6708070
--- /dev/null
+++ b/src/utils/__tests__/glmAutoMode.test.ts
@@ -0,0 +1,43 @@
+import { describe, it, expect, beforeAll, vi } from 'vitest'
+
+// Auto mode must be reachable for GLM-5+ models. The two gates that previously
+// blocked it: modelSupportsAutoMode (model allowlist) and the auto-mode
+// enabled-state default (GrowthBook kill-switch is never served on GLM).
+beforeAll(() => {
+ process.env.FEATURES = 'TRANSCRIPT_CLASSIFIER'
+ process.env.USER_TYPE = 'external'
+ // GLM runs against an Anthropic-compatible firstParty endpoint.
+ delete process.env.CLAUDE_CODE_USE_BEDROCK
+ delete process.env.CLAUDE_CODE_USE_VERTEX
+ delete process.env.CLAUDE_CODE_USE_FOUNDRY
+})
+
+describe('modelSupportsAutoMode for GLM', () => {
+ it('enables auto mode for glm-5 and above, not glm-4', async () => {
+ const { modelSupportsAutoMode } = await import('../betas.js')
+ expect(modelSupportsAutoMode('glm-5')).toBe(true)
+ expect(modelSupportsAutoMode('glm-5.2')).toBe(true)
+ expect(modelSupportsAutoMode('glm-6')).toBe(true)
+ expect(modelSupportsAutoMode('glm-4.5')).toBe(false)
+ // Anthropic allowlist still honored.
+ expect(modelSupportsAutoMode('claude-opus-4-6')).toBe(true)
+ expect(modelSupportsAutoMode('claude-opus-4-1')).toBe(false)
+ })
+})
+
+// The enabled-state default lives in permissionSetup, but importing that
+// module transitively loads the classifier prompt (a .txt require unsupported
+// in vitest). Assert the predicate the default uses directly — it must match
+// glm-5 and above, reject glm-4, and reject Anthropic models (which keep the
+// 'disabled' circuit-breaker default).
+describe('auto-mode enabled-state default predicate for GLM', () => {
+ const isGlmForced = (m: string) => /glm-[5-9]/.test(m.toLowerCase())
+ it('matches glm-5+ only', () => {
+ expect(isGlmForced('glm-5')).toBe(true)
+ expect(isGlmForced('glm-5.2')).toBe(true)
+ expect(isGlmForced('glm-6')).toBe(true)
+ expect(isGlmForced('GLM-5.2')).toBe(true)
+ expect(isGlmForced('glm-4.5')).toBe(false)
+ expect(isGlmForced('claude-opus-4-6')).toBe(false)
+ })
+})
diff --git a/src/utils/apiPreconnect.ts b/src/utils/apiPreconnect.ts
index 6a8de64..253926f 100644
--- a/src/utils/apiPreconnect.ts
+++ b/src/utils/apiPreconnect.ts
@@ -25,6 +25,7 @@
import { getOauthConfig } from '../constants/oauth.js'
import { isEnvTruthy } from './envUtils.js'
+import { isEssentialTrafficOnly } from './privacyLevel.js'
let fired = false
@@ -32,6 +33,10 @@ export function preconnectAnthropicApi(): void {
if (fired) return
fired = true
+ // Also skip when non-essential traffic is disabled via
+ // CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC / DISABLE_TELEMETRY / proxy env.
+ if (isEssentialTrafficOnly()) return
+
// Skip if using a cloud provider — different endpoint + auth
if (
isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) ||
diff --git a/src/utils/betas.ts b/src/utils/betas.ts
index fcd7b97..03c7952 100644
--- a/src/utils/betas.ts
+++ b/src/utils/betas.ts
@@ -188,8 +188,12 @@ export function modelSupportsAutoMode(model: string): boolean {
if (/claude-(opus|sonnet|haiku)-4(?!-[6-9])/.test(m)) return false
return true
}
+ // GLM models (served over an Anthropic-compatible firstParty endpoint)
+ // support auto mode — the classifier runs against the same transcript.
+ // glm-5 and above (glm-5, glm-5.2, glm-6, …).
+ if (/glm-[5-9]/.test(m)) return true
// External allowlist (firstParty already checked above).
- return /^claude-(opus|sonnet)-4-6/.test(m)
+ return /^claude-(opus|sonnet)-4-6/.test(m) || /^deepseek-/.test(m)
}
return false
}
diff --git a/src/utils/context.ts b/src/utils/context.ts
index 06b235e..b976b7b 100644
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -161,7 +161,10 @@ export function getModelMaxOutputTokens(model: string): {
const m = getCanonicalName(model)
- if (m.includes('opus-4-6')) {
+ if (m.includes('glm-5.2')) {
+ defaultTokens = 64_000
+ upperLimit = 131_072
+ } else if (m.includes('opus-4-6')) {
defaultTokens = 64_000
upperLimit = 128_000
} else if (m.includes('sonnet-4-6')) {
diff --git a/src/utils/messages.ts b/src/utils/messages.ts
index 7d8db97..f6c3ee5 100644
--- a/src/utils/messages.ts
+++ b/src/utils/messages.ts
@@ -2244,22 +2244,26 @@ export function normalizeMessagesForAPI(
}
// Find a previous assistant message with the same message ID and merge.
- // Walk backwards, skipping tool results and different-ID assistants,
- // since concurrent agents (teammates) can interleave streaming content
- // blocks from multiple API responses with different message IDs.
+ // Walk backwards, skipping different-ID assistants, since concurrent
+ // agents (teammates) can interleave streaming content blocks from
+ // multiple API responses with different message IDs.
+ //
+ // Do NOT skip tool_result messages — when claude.ts yields separate
+ // AssistantMessages for thinking and tool_use blocks (same message.id),
+ // a StreamingToolExecutor tool_result can land between them. Merging
+ // across that boundary produces duplicate tool_use IDs that downstream
+ // ensureToolResultPairing strips, leaving orphaned tool_results and
+ // ultimately consecutive user messages → API 400 (CC-1215).
for (let i = result.length - 1; i >= 0; i--) {
const msg = result[i]!
- if (msg.type !== 'assistant' && !isToolResultMessage(msg)) {
+ if (msg.type !== 'assistant') {
break
}
- if (msg.type === 'assistant') {
- if (msg.message.id === normalizedMessage.message.id) {
- result[i] = mergeAssistantMessages(msg, normalizedMessage)
- return
- }
- continue
+ if (msg.message.id === normalizedMessage.message.id) {
+ result[i] = mergeAssistantMessages(msg, normalizedMessage)
+ return
}
}
@@ -2399,15 +2403,6 @@ export function mergeAssistantMessages(
}
}
-function isToolResultMessage(msg: Message): boolean {
- if (msg.type !== 'user') {
- return false
- }
- const content = msg.message.content
- if (typeof content === 'string') return false
- return content.some(block => block.type === 'tool_result')
-}
-
export function mergeUserMessages(a: UserMessage, b: UserMessage): UserMessage {
const lastContent = normalizeUserTextContent(a.message.content)
const currentContent = normalizeUserTextContent(b.message.content)
diff --git a/src/utils/model/aliases.ts b/src/utils/model/aliases.ts
index 75ae388..5e94865 100644
--- a/src/utils/model/aliases.ts
+++ b/src/utils/model/aliases.ts
@@ -6,6 +6,9 @@ export const MODEL_ALIASES = [
'sonnet[1m]',
'opus[1m]',
'opusplan',
+ // DeepSeek convenience aliases (resolved by ANTHROPIC_DEFAULT_*_MODEL envs)
+ 'pro',
+ 'flash',
] as const
export type ModelAlias = (typeof MODEL_ALIASES)[number]
diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts
index aba9b7d..647f059 100644
--- a/src/utils/model/providers.ts
+++ b/src/utils/model/providers.ts
@@ -13,6 +13,37 @@ export function getAPIProvider(): APIProvider {
: 'firstParty'
}
+/**
+ * True when the active backend is GLM (zhipu), routed via an
+ * Anthropic-compatible firstParty endpoint (ANTHROPIC_BASE_URL). GLM stays on
+ * the firstParty APIProvider — this flag only gates model-aware behavior
+ * (output token limits, auto-mode, betas) that the generic firstParty path
+ * doesn't know about.
+ */
+export function isGLMProvider(): boolean {
+ return isEnvTruthy(process.env.CLAUDE_USE_GLM)
+}
+
+/**
+ * True when the active backend is DeepSeek, routed via an
+ * Anthropic-compatible firstParty endpoint. DeepSeek stays on the firstParty
+ * APIProvider — this flag gates DeepSeek-specific adaptations (thinking
+ * simplification, [ERROR] tool_result prefixing, 429 retry policy, model
+ * validation allowlist).
+ */
+export function isDeepSeekProvider(): boolean {
+ return isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK)
+}
+
+export function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean {
+ if (!baseUrl) return false
+ try {
+ return new URL(baseUrl).host.endsWith('deepseek.com')
+ } catch {
+ return false
+ }
+}
+
export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
return getAPIProvider() as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
}
diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts
index 14b8167..f3c9975 100644
--- a/src/utils/model/validateModel.ts
+++ b/src/utils/model/validateModel.ts
@@ -1,7 +1,7 @@
// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
import { MODEL_ALIASES } from './aliases.js'
import { isModelAllowed } from './modelAllowlist.js'
-import { getAPIProvider } from './providers.js'
+import { getAPIProvider, isDeepSeekProvider } from './providers.js'
import { sideQuery } from '../sideQuery.js'
import {
NotFoundError,
@@ -14,8 +14,18 @@ import { getModelStrings } from './modelStrings.js'
// Cache valid models to avoid repeated API calls
const validModelCache = new Map()
+// DeepSeek's API silently remaps unknown model names to deepseek-v4-flash
+// instead of returning 404, so API-based validation is not reliable — use a
+// known-models allowlist when the DeepSeek provider is active.
+const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash'])
+
/**
* Validates a model by attempting an actual API call.
+ *
+ * For the DeepSeek provider, the API silently remaps unknown model names, so
+ * we validate against a known-models allowlist instead of probing the API.
+ * For all other providers (Anthropic firstParty, GLM, Bedrock, Vertex,
+ * Foundry), we probe the API with a minimal request.
*/
export async function validateModel(
model: string,
@@ -41,6 +51,17 @@ export async function validateModel(
return { valid: true }
}
+ // DeepSeek: use known-models allowlist (API silently remaps unknown names).
+ if (isDeepSeekProvider()) {
+ if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) {
+ return { valid: true }
+ }
+ return {
+ valid: false,
+ error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型(会被服务端静默映射为 deepseek-v4-flash)。可用模型:deepseek-v4-pro, deepseek-v4-flash`,
+ }
+ }
+
// Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user)
if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) {
return { valid: true }
@@ -51,7 +72,6 @@ export async function validateModel(
return { valid: true }
}
-
// Try to make an actual API call with minimal parameters
try {
await sideQuery({
diff --git a/src/utils/modelCost.ts b/src/utils/modelCost.ts
index b4867d4..b94b4a1 100644
--- a/src/utils/modelCost.ts
+++ b/src/utils/modelCost.ts
@@ -86,7 +86,34 @@ export const COST_HAIKU_45 = {
webSearchRequests: 0.01,
} as const satisfies ModelCosts
-const DEFAULT_UNKNOWN_MODEL_COST = COST_TIER_5_25
+// DeepSeek V4 Pro pricing (CNY per Mtok), discounted price until 2026-05-31.
+// Set DEEPSEEK_USE_FULL_PRICE=1 for standard price.
+export const COST_DEEPSEEK_PRO_DISCOUNTED = {
+ inputTokens: 3,
+ outputTokens: 6,
+ promptCacheWriteTokens: 3,
+ promptCacheReadTokens: 0.025,
+ webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+export const COST_DEEPSEEK_PRO_FULL = {
+ inputTokens: 12,
+ outputTokens: 24,
+ promptCacheWriteTokens: 12,
+ promptCacheReadTokens: 0.1,
+ webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+// DeepSeek V4 Flash pricing (CNY per Mtok).
+export const COST_DEEPSEEK_FLASH = {
+ inputTokens: 1,
+ outputTokens: 2,
+ promptCacheWriteTokens: 1,
+ promptCacheReadTokens: 0.02,
+ webSearchRequests: 0,
+} as const satisfies ModelCosts
+
+const DEFAULT_UNKNOWN_MODEL_COST = COST_DEEPSEEK_FLASH
/**
* Get the cost tier for Opus 4.6 based on fast mode.
@@ -98,10 +125,17 @@ export function getOpus46CostTier(fastMode: boolean): ModelCosts {
return COST_TIER_5_25
}
+export function getDeepSeekProCostTier(): ModelCosts {
+ if (process.env.DEEPSEEK_USE_FULL_PRICE === '1') {
+ return COST_DEEPSEEK_PRO_FULL
+ }
+ return COST_DEEPSEEK_PRO_DISCOUNTED
+}
+
// @[MODEL LAUNCH]: Add a pricing entry for the new model below.
// Costs from https://platform.claude.com/docs/en/about-claude/pricing
// Web search cost: $10 per 1000 requests = $0.01 per request
-export const MODEL_COSTS: Record = {
+export const MODEL_COSTS: Record = {
[firstPartyNameToCanonical(CLAUDE_3_5_HAIKU_CONFIG.firstParty)]:
COST_HAIKU_35,
[firstPartyNameToCanonical(CLAUDE_HAIKU_4_5_CONFIG.firstParty)]:
@@ -123,6 +157,8 @@ export const MODEL_COSTS: Record = {
COST_TIER_5_25,
[firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]:
COST_TIER_5_25,
+ 'deepseek-v4-pro': COST_DEEPSEEK_PRO_DISCOUNTED,
+ 'deepseek-v4-flash': COST_DEEPSEEK_FLASH,
}
/**
@@ -152,6 +188,11 @@ export function getModelCosts(model: string, usage: Usage): ModelCosts {
return getOpus46CostTier(isFastMode)
}
+ // DeepSeek V4 Pro pricing depends on discount-period flag.
+ if (shortName === 'deepseek-v4-pro') {
+ return getDeepSeekProCostTier()
+ }
+
const costs = MODEL_COSTS[shortName]
if (!costs) {
trackUnknownModelCost(model, shortName)
@@ -202,12 +243,13 @@ export function calculateCostFromTokens(
}
function formatPrice(price: number): string {
- // Format price: integers without decimals, others with 2 decimal places
- // e.g., 3 -> "$3", 0.8 -> "$0.80", 22.5 -> "$22.50"
+ // Format price: integers without decimals, fractions with 2 decimal places
+ // (3 decimals for very small values like cache-read tokens).
+ // e.g., 3 -> "¥3", 0.8 -> "¥0.80", 0.025 -> "¥0.025"
if (Number.isInteger(price)) {
- return `$${price}`
+ return `¥${price}`
}
- return `$${price.toFixed(2)}`
+ return `¥${price.toFixed(price < 0.1 ? 3 : 2)}`
}
/**
diff --git a/src/utils/permissions/permissionSetup.ts b/src/utils/permissions/permissionSetup.ts
index 8520da8..2489fa7 100644
--- a/src/utils/permissions/permissionSetup.ts
+++ b/src/utils/permissions/permissionSetup.ts
@@ -1310,13 +1310,32 @@ export function getAutoModeUnavailableReason(): AutoModeUnavailableReason | null
*/
export type AutoModeEnabledState = 'enabled' | 'disabled' | 'opt-in'
-const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'disabled'
+// deepseek branch: telemetry/GrowthBook is stubbed out, so tengu_auto_mode_config
+// never resolves and falls back to this default. Upstream defaulted to 'disabled'
+// to wait for the GB rollout signal — on this branch there is no rollout signal,
+// so default to 'enabled' to let CLI/--permission-mode auto and settings
+// defaultMode=auto work without a remote gate.
+const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'enabled'
+
+/**
+ * Default auto-mode availability when GrowthBook gives no explicit value.
+ * The Anthropic kill-switch (tengu_auto_mode_config) is never served on the
+ * GLM endpoint, so the stock 'disabled' default would permanently lock auto
+ * mode out for GLM users. Force-enable it for GLM (model gate still applies
+ * via modelSupportsAutoMode), while keeping 'disabled' for everyone else so
+ * the circuit breaker stays the safe default on real Anthropic models.
+ */
+function autoModeEnabledDefault(): AutoModeEnabledState {
+ // glm-5 and above (glm-5, glm-5.2, glm-6, …).
+ if (/glm-[5-9]/.test(getMainLoopModel().toLowerCase())) return 'enabled'
+ return AUTO_MODE_ENABLED_DEFAULT
+}
function parseAutoModeEnabledState(value: unknown): AutoModeEnabledState {
if (value === 'enabled' || value === 'disabled' || value === 'opt-in') {
return value
}
- return AUTO_MODE_ENABLED_DEFAULT
+ return autoModeEnabledDefault()
}
/**
diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts
index 1ec78b5..c072a51 100644
--- a/src/utils/permissions/yoloClassifier.ts
+++ b/src/utils/permissions/yoloClassifier.ts
@@ -259,8 +259,11 @@ const yoloClassifierResponseSchema = lazySchema(() =>
export const YOLO_CLASSIFIER_TOOL_NAME = 'classify_result'
+// Note: omitting `type: 'custom'` — Anthropic accepts it, but DeepSeek's
+// /anthropic endpoint rejects unknown tool types with 400. Other tools in
+// this codebase already omit the type field; we do the same here to keep
+// the classifier compatible with both providers.
const YOLO_CLASSIFIER_TOOL_SCHEMA: BetaToolUnion = {
- type: 'custom',
name: YOLO_CLASSIFIER_TOOL_NAME,
description: 'Report the security classification result for the agent action',
input_schema: {
@@ -1328,14 +1331,12 @@ type AutoModeConfig = {
/**
* Get the model for the classifier.
- * Ant-only env var takes precedence, then GrowthBook JSON config override,
- * then the main loop model.
+ * CLAUDE_CODE_AUTO_MODE_MODEL env takes precedence, then GrowthBook JSON
+ * config override, then the main loop model.
*/
function getClassifierModel(): string {
- if (process.env.USER_TYPE === 'ant') {
- const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL
- if (envModel) return envModel
- }
+ const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL
+ if (envModel) return envModel
const config = getFeatureValue_CACHED_MAY_BE_STALE(
'tengu_auto_mode_config',
{} as AutoModeConfig,
diff --git a/src/utils/privacyLevel.ts b/src/utils/privacyLevel.ts
index 4848492..d5c277a 100644
--- a/src/utils/privacyLevel.ts
+++ b/src/utils/privacyLevel.ts
@@ -21,10 +21,13 @@ export function getPrivacyLevel(): PrivacyLevel {
if (process.env.CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC) {
return 'essential-traffic'
}
- if (process.env.DISABLE_TELEMETRY) {
- return 'no-telemetry'
- }
- return 'default'
+ // DeepSeek branch: there is no first-party Anthropic telemetry pipeline to
+ // talk to, and the analytics module has been replaced with no-op stubs
+ // upstream (commits 058cf17 + c40af24). Default to 'no-telemetry' so the
+ // few remaining call sites that gate on isTelemetryDisabled() (feedback
+ // survey, analytics config) take the disabled path without requiring
+ // users to set DISABLE_TELEMETRY=1.
+ return 'no-telemetry'
}
/**
diff --git a/src/utils/ripgrep.ts b/src/utils/ripgrep.ts
index 88b57ee..27bfa46 100644
--- a/src/utils/ripgrep.ts
+++ b/src/utils/ripgrep.ts
@@ -27,6 +27,10 @@ type RipgrepConfig = {
command: string
args: string[]
argv0?: string
+ // Human-readable explanation when ripgrep resolution took a fallback path
+ // (e.g. the bundled binary was missing and we fell back to system rg).
+ // Surfaced in the doctor screen and as a one-time startup warning.
+ note?: string
}
const getRipgrepConfig = memoize((): RipgrepConfig => {
@@ -70,13 +74,41 @@ const getRipgrepConfig = memoize((): RipgrepConfig => {
path.resolve(__dirname, '..', 'vendor', 'ripgrep'),
path.resolve(__dirname, '..', '..', 'vendor', 'ripgrep'),
]
- const rgRoot =
- candidateRoots.find(root =>
- existsSync(path.resolve(root, platformDir, executable)),
- ) ?? candidateRoots[0]
- const command = path.resolve(rgRoot, platformDir, executable)
+ const rgRoot = candidateRoots.find(root =>
+ existsSync(path.resolve(root, platformDir, executable)),
+ )
+
+ // Bundled binary found on disk: use it.
+ if (rgRoot) {
+ const command = path.resolve(rgRoot, platformDir, executable)
+ return { mode: 'builtin', command, args: [] }
+ }
- return { mode: 'builtin', command, args: [] }
+ // No bundled binary for this platform (e.g. Android/Termux, or an
+ // incomplete install). Fall back to system rg on PATH so file discovery,
+ // suggestions, and hooks keep working instead of spawning a non-existent
+ // path and failing with ENOENT.
+ const { cmd: systemPath } = findExecutable('rg', [])
+ if (systemPath !== 'rg') {
+ // SECURITY: spawn the bare name 'rg', not the resolved path, to prevent
+ // PATH hijacking via a malicious ./rg in the cwd (see system branch above).
+ return {
+ mode: 'system',
+ command: 'rg',
+ args: [],
+ note: 'bundled ripgrep binary not found; using system rg from PATH',
+ }
+ }
+
+ // Nothing available. Preserve historical behavior: return the expected
+ // builtin path so callers surface a clear ENOENT, with a note explaining why.
+ const command = path.resolve(candidateRoots[0], platformDir, executable)
+ return {
+ mode: 'builtin',
+ command,
+ args: [],
+ note: 'bundled ripgrep binary not found and no system rg on PATH',
+ }
})
export function ripgrepCommand(): {
@@ -551,12 +583,14 @@ export function getRipgrepStatus(): {
mode: 'system' | 'builtin' | 'embedded'
path: string
working: boolean | null // null if not yet tested
+ note?: string
} {
const config = getRipgrepConfig()
return {
mode: config.mode,
path: config.command,
working: ripgrepStatus?.working ?? null,
+ note: config.note,
}
}