diff --git a/docs/deepseek-vs-claude-pricing.md b/docs/deepseek-vs-claude-pricing.md new file mode 100644 index 0000000..e4632ba --- /dev/null +++ b/docs/deepseek-vs-claude-pricing.md @@ -0,0 +1,98 @@ +# DeepSeek vs Claude API Pricing + +Captured 2026-05-12. Sources: +- DeepSeek: https://api-docs.deepseek.com/zh-cn/quick_start/pricing +- Anthropic: https://platform.claude.com/docs/en/about-claude/pricing +- FX assumed: $1 ≈ ¥7.15 + +## Per-model prices (per 1M tokens) + +### Flagship tier + +| Item | Claude Opus 4.7 | Claude Sonnet 4.6 | DeepSeek V4 Pro (discount) | DeepSeek V4 Pro (full) | +|---|---:|---:|---:|---:| +| Input (cache miss) | $5.00 / ¥35.75 | $3.00 / ¥21.45 | ¥3 | ¥12 | +| Cache write (5m) | $6.25 / ¥44.69 | $3.75 / ¥26.81 | ¥3 | ¥12 | +| Cache read | $0.50 / ¥3.58 | $0.30 / ¥2.15 | ¥0.025 | ¥0.10 | +| Output | $25.00 / ¥178.75 | $15.00 / ¥107.25 | ¥6 | ¥24 | +| Context window | 200K | 1M | 1M | 1M | +| Max output tokens | varies | varies | 384K | 384K | + +DeepSeek V4 Pro discount (2.5x off) runs through 2026-05-31 23:59 Beijing time. +After 2026-06-01 the full-price column applies unless DeepSeek extends it. + +### Lightweight tier + +| Item | Claude Haiku 4.5 | DeepSeek V4 Flash | +|---|---:|---:| +| Input (cache miss) | $1.00 / ¥7.15 | ¥1 | +| Cache write (5m) | $1.25 / ¥8.94 | ¥1 | +| Cache read | $0.10 / ¥0.72 | ¥0.02 | +| Output | $5.00 / ¥35.75 | ¥2 | +| Context window | 200K | 1M | + +## Multiplier view (how much Claude costs vs DeepSeek discount price) + +### Pro vs Sonnet 4.6 (same-tier comparison) + +| Item | Sonnet 4.6 / Pro multiplier | +|---|---:| +| Input (cache miss) | 7.15x | +| Cache read | 85.7x | +| Output | 17.9x | + +### Pro vs Opus 4.7 (cross-tier comparison) + +| Item | Opus 4.7 / Pro multiplier | +|---|---:| +| Input (cache miss) | 11.9x | +| Cache read | 143x | +| Output | 29.8x | + +### Flash vs Haiku 4.5 + +| Item | Haiku 4.5 / Flash multiplier | +|---|---:| +| Input (cache miss) | 7.15x | +| Cache read | 35.8x | +| Output | 17.9x | + +## Realistic Claude Code request cost + +Measured pattern: 15,872 cache_read + 66 input + 30 output tokens +(observed first cache hit after the session_id-pinning fix in 283678a). + +| Provider | Cost per request | Per 100 requests | +|---|---:|---:| +| Claude Opus 4.7 | $0.0094 / ¥0.067 | ¥6.70 | +| Claude Sonnet 4.6 | $0.0054 / ¥0.039 | ¥3.90 | +| Claude Haiku 4.5 | $0.0018 / ¥0.013 | ¥1.30 | +| DeepSeek V4 Pro (discount) | ¥0.000777 | ¥0.078 | +| DeepSeek V4 Flash | ¥0.000437 | ¥0.044 | + +## Headline ratios at the typical Claude Code workload + +- DeepSeek Pro vs Claude Sonnet 4.6: **~50x cheaper** +- DeepSeek Pro vs Claude Opus 4.7: **~86x cheaper** +- DeepSeek Flash vs Claude Haiku 4.5: **~30x cheaper** + +After the 2026-05-31 discount expires, Pro full price would be 4x its +current rate; ratios shrink to roughly 12x vs Sonnet, 21x vs Opus, but the +gap remains substantial. + +## Caveats + +- **No Opus-class DeepSeek model.** Tasks that genuinely need Opus-level + reasoning (long math proofs, deep architecture work) have no DeepSeek + equivalent. +- **Capability gaps with the Anthropic API**: DeepSeek's /anthropic endpoint + does not implement image/document content blocks, computer use, server-side + web search beyond what the model emits, MCP gateway, or redacted_thinking. + See `docs/openclaude-commits-review.md` and the deepseek branch port commits + for the adaptations Claude Code makes for these. +- **Cache hit assumptions**: the per-request cost above assumes the + session_id pinning fix is in effect (commit 283678a). Without it, + cache_read_input_tokens stays at 0 and DeepSeek per-request cost is + roughly 100x higher (every request pays full input price). +- **No /count_tokens endpoint on DeepSeek.** Local UTF-8 byte estimation is + used instead (commit 6fbad90). diff --git a/scripts/build.ts b/scripts/build.ts index 4a98314..f3c0eae 100644 --- a/scripts/build.ts +++ b/scripts/build.ts @@ -20,7 +20,11 @@ const ENABLED_FEATURES = [ 'MCP_SKILLS', 'HISTORY_PICKER', 'TREE_SITTER_BASH', - 'NATIVE_CLIENT_ATTESTATION', + // NATIVE_CLIENT_ATTESTATION intentionally disabled: it injects a 'cch=00000' + // placeholder into the x-anthropic-billing-header and computes a body hash. + // GLM/DeepSeek's /anthropic endpoints do not validate this header — sending + // it adds CPU + an HTTP header without value. Keep off for non-Anthropic + // providers. 'BRIDGE_MODE', 'COORDINATOR_MODE', ] diff --git a/src/commands/cost/cost.ts b/src/commands/cost/cost.ts index c9fb0cb..0eaf9f6 100644 --- a/src/commands/cost/cost.ts +++ b/src/commands/cost/cost.ts @@ -1,24 +1,7 @@ import { formatTotalCost } from '../../cost-tracker.js' -import { currentLimits } from '../../services/claudeAiLimits.js' import type { LocalCommandCall } from '../../types/command.js' -import { isClaudeAISubscriber } from '../../utils/auth.js' +// DeepSeek: no Claude.ai subscription; always show the formatted cost. export const call: LocalCommandCall = async () => { - if (isClaudeAISubscriber()) { - let value: string - - if (currentLimits.isUsingOverage) { - value = - 'You are currently using your overages to power your Claude Code usage. We will automatically switch you back to your subscription rate limits when they reset' - } else { - value = - 'You are currently using your subscription to power your Claude Code usage' - } - - if (process.env.USER_TYPE === 'ant') { - value += `\n\n[ANT-ONLY] Showing cost anyway:\n ${formatTotalCost()}` - } - return { type: 'text', value } - } return { type: 'text', value: formatTotalCost() } } diff --git a/src/components/CostThresholdDialog.tsx b/src/components/CostThresholdDialog.tsx index bdf9f53..d0639d4 100644 --- a/src/components/CostThresholdDialog.tsx +++ b/src/components/CostThresholdDialog.tsx @@ -38,7 +38,7 @@ export function CostThresholdDialog(t0) { } let t4; if ($[4] !== onDone || $[5] !== t3) { - t4 = {t1}{t3}; + t4 = {t1}{t3}; $[4] = onDone; $[5] = t3; $[6] = t4; diff --git a/src/components/Message.tsx b/src/components/Message.tsx index ca2ef76..886e917 100644 --- a/src/components/Message.tsx +++ b/src/components/Message.tsx @@ -538,9 +538,6 @@ function AssistantMessageBlock(t0) { } case "thinking": { - if (!isTranscriptMode && !verbose) { - return null; - } const isLastThinking = !lastThinkingBlockId || thinkingBlockId === lastThinkingBlockId; const t1 = isTranscriptMode && !isLastThinking; let t2; diff --git a/src/components/Settings/Config.tsx b/src/components/Settings/Config.tsx index 37ee93c..7c09649 100644 --- a/src/components/Settings/Config.tsx +++ b/src/components/Settings/Config.tsx @@ -281,6 +281,26 @@ export function Config({ enabled: autoCompactEnabled }); } + }, { + id: 'autoMemoryEnabled', + label: 'Auto-memory', + // settings.json default is "undefined" which the resolver in + // src/memdir/paths.ts treats as "disabled" on this branch. Mirror + // that so the toggle reflects what the runtime actually does. + value: settingsData?.autoMemoryEnabled ?? false, + type: 'boolean' as const, + onChange(autoMemoryEnabled: boolean) { + updateSettingsForSource('localSettings', { + autoMemoryEnabled + }); + setSettingsData(prev_auto_mem => ({ + ...prev_auto_mem, + autoMemoryEnabled + })); + logEvent('tengu_auto_memory_setting_changed', { + enabled: autoMemoryEnabled + }); + } }, { id: 'spinnerTipsEnabled', label: 'Show tips', diff --git a/src/components/ThinkingToggle.tsx b/src/components/ThinkingToggle.tsx index a7b7a1b..2a4604e 100644 --- a/src/components/ThinkingToggle.tsx +++ b/src/components/ThinkingToggle.tsx @@ -30,11 +30,11 @@ export function ThinkingToggle(t0) { t1 = [{ value: "true", label: "Enabled", - description: "Claude will think before responding" + description: "Model will think before responding" }, { value: "false", label: "Disabled", - description: "Claude will respond without extended thinking" + description: "Model will respond without extended thinking" }]; $[0] = t1; } else { diff --git a/src/components/messages/AssistantThinkingMessage.tsx b/src/components/messages/AssistantThinkingMessage.tsx index 3825f5f..af4b492 100644 --- a/src/components/messages/AssistantThinkingMessage.tsx +++ b/src/components/messages/AssistantThinkingMessage.tsx @@ -4,6 +4,7 @@ import React from 'react'; import { Box, Text } from '../../ink.js'; import { CtrlOToExpand } from '../CtrlOToExpand.js'; import { Markdown } from '../Markdown.js'; +import { useSettings } from '../../hooks/useSettings.js'; type Props = { // Accept either full ThinkingBlock/ThinkingBlockParam or a minimal shape with just type and thinking param: ThinkingBlock | ThinkingBlockParam | { @@ -36,7 +37,8 @@ export function AssistantThinkingMessage(t0) { if (hideInTranscript) { return null; } - const shouldShowFullThinking = isTranscriptMode || verbose; + const settings = useSettings(); + const shouldShowFullThinking = isTranscriptMode || verbose || settings.alwaysThinkingEnabled !== false; if (!shouldShowFullThinking) { const t4 = addMargin ? 1 : 0; let t5; diff --git a/src/constants/system.ts b/src/constants/system.ts index 0cd2e76..9495bd8 100644 --- a/src/constants/system.ts +++ b/src/constants/system.ts @@ -3,7 +3,7 @@ import { feature } from 'bun:bundle' import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js' import { logForDebugging } from '../utils/debug.js' -import { isEnvDefinedFalsy } from '../utils/envUtils.js' +import { isEnvTruthy } from '../utils/envUtils.js' import { getAPIProvider } from '../utils/model/providers.js' import { getWorkload } from '../utils/workloadContext.js' @@ -47,13 +47,10 @@ export function getCLISyspromptPrefix(options?: { /** * Check if attribution header is enabled. - * Enabled by default, can be disabled via env var or GrowthBook killswitch. + * Disabled by default, can be enabled via env var. */ function isAttributionHeaderEnabled(): boolean { - if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) { - return false - } - return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true) + return isEnvTruthy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER) } /** diff --git a/src/context.ts b/src/context.ts index 423414d..b614490 100644 --- a/src/context.ts +++ b/src/context.ts @@ -4,7 +4,7 @@ import { getAdditionalDirectoriesForClaudeMd, setCachedClaudeMdContent, } from './bootstrap/state.js' -import { getLocalISODate } from './constants/common.js' +import { getSessionStartDate } from './constants/common.js' import { filterInjectedMemoryFiles, getClaudeMds, @@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js' import { shouldIncludeGitInstructions } from './utils/gitSettings.js' import { logError } from './utils/log.js' -const MAX_STATUS_CHARS = 2000 +const MAX_STATUS_CHARS = 1000 // System prompt injection for cache breaking (ant-only, ephemeral debugging state) let systemPromptInjection: string | null = null @@ -85,7 +85,7 @@ export const getGitStatus = memoize(async (): Promise => { const truncatedStatus = status.length > MAX_STATUS_CHARS ? status.substring(0, MAX_STATUS_CHARS) + - '\n... (truncated because it exceeds 2k characters. If you need more information, run "git status" using BashTool)' + '\n... (truncated because it exceeds 1k characters. If you need more information, run "git status" using BashTool)' : status logForDiagnosticsNoPII('info', 'git_status_completed', { @@ -183,7 +183,9 @@ export const getUserContext = memoize( return { ...(claudeMd && { claudeMd }), - currentDate: `Today's date is ${getLocalISODate()}.`, + // Use session-stable date to preserve DeepSeek's server-side prefix + // cache across midnight (was getLocalISODate() — refreshes daily). + currentDate: `Today's date is ${getSessionStartDate()}.`, } }, ) diff --git a/src/cost-tracker.ts b/src/cost-tracker.ts index b03184c..72375bf 100644 --- a/src/cost-tracker.ts +++ b/src/cost-tracker.ts @@ -44,8 +44,11 @@ import { import { isFastModeEnabled } from './utils/fastMode.js' import { formatDuration, formatNumber } from './utils/format.js' import type { FpsMetrics } from './utils/fpsTracker.js' -import { getCanonicalName } from './utils/model/model.js' -import { calculateUSDCost } from './utils/modelCost.js' +import { + getCanonicalName, + getDefaultMainLoopModelSetting, +} from './utils/model/model.js' +import { calculateUSDCost, getModelCosts } from './utils/modelCost.js' export { getTotalCostUSD as getTotalCost, getTotalDuration, @@ -175,7 +178,7 @@ export function saveCurrentSessionCosts(fpsMetrics?: FpsMetrics): void { } function formatCost(cost: number, maxDecimalPlaces: number = 4): string { - return `$${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}` + return `¥${cost > 0.5 ? round(cost, 100).toFixed(2) : cost.toFixed(maxDecimalPlaces)}` } function formatModelUsage(): string { @@ -234,12 +237,35 @@ export function formatTotalCost(): string { const modelUsageDisplay = formatModelUsage() + // DeepSeek-specific: show prompt cache hit rate and yuan savings, since the + // /anthropic endpoint exposes cache_read / cache_creation token counts and + // these are the headline cost driver (cache reads are ~120x cheaper). + let cacheStatsDisplay = '' + const cacheRead = getTotalCacheReadInputTokens() + const cacheCreation = getTotalCacheCreationInputTokens() + const directInput = getTotalInputTokens() + const totalInput = cacheRead + cacheCreation + directInput + if (totalInput > 0) { + const hitRate = (cacheRead / totalInput) * 100 + const model = getDefaultMainLoopModelSetting() + const costs = getModelCosts(model, { + input_tokens: 0, + output_tokens: 0, + } as Usage) + const savings = + (cacheRead / 1_000_000) * + (costs.inputTokens - costs.promptCacheReadTokens) + cacheStatsDisplay = + `\nCache hit rate: ${hitRate.toFixed(1)}% (${formatNumber(cacheRead)} / ${formatNumber(totalInput)} input tokens)` + + `\nCache savings: ${formatCost(savings)}` + } + return chalk.dim( `Total cost: ${costDisplay}\n` + `Total duration (API): ${formatDuration(getTotalAPIDuration())} Total duration (wall): ${formatDuration(getTotalDuration())} Total code changes: ${getTotalLinesAdded()} ${getTotalLinesAdded() === 1 ? 'line' : 'lines'} added, ${getTotalLinesRemoved()} ${getTotalLinesRemoved() === 1 ? 'line' : 'lines'} removed -${modelUsageDisplay}`, +${modelUsageDisplay}${cacheStatsDisplay}`, ) } diff --git a/src/costHook.ts b/src/costHook.ts index 798a093..fa740c4 100644 --- a/src/costHook.ts +++ b/src/costHook.ts @@ -1,6 +1,5 @@ import { useEffect } from 'react' import { formatTotalCost, saveCurrentSessionCosts } from './cost-tracker.js' -import { hasConsoleBillingAccess } from './utils/billing.js' import type { FpsMetrics } from './utils/fpsTracker.js' export function useCostSummary( @@ -8,10 +7,8 @@ export function useCostSummary( ): void { useEffect(() => { const f = () => { - if (hasConsoleBillingAccess()) { - process.stdout.write('\n' + formatTotalCost() + '\n') - } - + // DeepSeek: there's no Anthropic console billing tier; always print. + process.stdout.write('\n' + formatTotalCost() + '\n') saveCurrentSessionCosts(getFpsMetrics?.()) } process.on('exit', f) diff --git a/src/memdir/paths.ts b/src/memdir/paths.ts index 68a6baf..c794a8b 100644 --- a/src/memdir/paths.ts +++ b/src/memdir/paths.ts @@ -51,7 +51,12 @@ export function isAutoMemoryEnabled(): boolean { if (settings.autoMemoryEnabled !== undefined) { return settings.autoMemoryEnabled } - return true + // DeepSeek branch default: off. The auto-memory section injects ~3145 + // fixed tokens into every system prompt (a 32% surcharge on a minimal + // -p call). Users who want it can flip it in /config -> Auto-memory or + // set autoMemoryEnabled: true in settings.json (or unset + // CLAUDE_CODE_DISABLE_AUTO_MEMORY=0). + return false } /** diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx index b483ead..7b99a10 100644 --- a/src/screens/REPL.tsx +++ b/src/screens/REPL.tsx @@ -2207,7 +2207,9 @@ export function REPL({ }; useEffect(() => { const totalCost = getTotalCost(); - if (totalCost >= 5 /* $5 */ && !showCostDialog && !haveShownCostDialog) { + // DeepSeek: pricing is in CNY; raise the threshold to roughly match the + // user-perceived "5 USD" notification level (1 USD ≈ 7 CNY, rounded up). + if (totalCost >= 35 && !showCostDialog && !haveShownCostDialog) { logEvent('tengu_cost_threshold_reached', {}); // Mark as shown even if the dialog won't render (no console billing // access). Otherwise this effect re-fires on every message change for diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 4c09f06..733896c 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -22,6 +22,8 @@ import type { Stream } from '@anthropic-ai/sdk/streaming.mjs' import { randomUUID } from 'crypto' import { getAPIProvider, + isDeepSeekProvider, + isGLMProvider, isFirstPartyAnthropicBaseUrl, } from 'src/utils/model/providers.js' import { @@ -508,7 +510,16 @@ export function getAPIMetadata() { device_id: getOrCreateUserID(), // Only include OAuth account UUID when actively using OAuth authentication account_uuid: getOauthAccountInfo()?.accountUuid ?? '', - session_id: getSessionId(), + // GLM/DeepSeek prompt caches key on the full request body bytes. The + // real session_id changes every launch and would force a fresh cache + // entry per session — defeating the cache entirely. Pin to a stable + // sentinel for non-Anthropic providers so identical conversations across + // sessions share a cache entry. Real Anthropic telemetry keeps the live + // session id. + session_id: + isDeepSeekProvider() || isGLMProvider() + ? 'claude-code-ds' + : getSessionId(), }), } } @@ -935,6 +946,77 @@ function isToolResult( return block.type === 'tool_result' } +/** + * DeepSeek silently ignores the `is_error: true` flag on tool_result blocks, + * so the model has no way to know a tool call failed. Prefix the content with + * a literal "[ERROR]" text block so the model can detect failures from text. + * + * Walks nested tool_result content recursively (for cached histories). + */ +function prefixDeepSeekErrorToolResults( + blocks: BetaContentBlockParam[], +): BetaContentBlockParam[] { + let changed = false + const updated = blocks.map(block => { + if (!isToolResult(block)) return block + + let nextBlock = block + let blockChanged = false + + if ((block as { is_error?: boolean }).is_error) { + const content = Array.isArray(block.content) + ? block.content + : [{ type: 'text', text: String(block.content ?? '') }] + const prefixed = [ + { type: 'text', text: '[ERROR] Tool execution failed:' }, + ...content, + ] as BetaContentBlockParam[] + nextBlock = { ...block, content: prefixed } as typeof block + blockChanged = true + } + + if (Array.isArray(nextBlock.content)) { + const nested = prefixDeepSeekErrorToolResults( + nextBlock.content as BetaContentBlockParam[], + ) + if (nested !== nextBlock.content) { + nextBlock = { ...nextBlock, content: nested } as typeof block + blockChanged = true + } + } + + if (blockChanged) { + changed = true + return nextBlock + } + return block + }) + + return changed ? updated : blocks +} + +function applyDeepSeekErrorPrefix( + messages: (UserMessage | AssistantMessage)[], +): (UserMessage | AssistantMessage)[] { + let changed = false + const updated = messages.map(msg => { + const content = msg.message.content + if (!Array.isArray(content)) return msg + + const updatedContent = prefixDeepSeekErrorToolResults( + content as BetaContentBlockParam[], + ) + if (updatedContent === content) return msg + + changed = true + return { + ...msg, + message: { ...msg.message, content: updatedContent }, + } as typeof msg + }) + return changed ? updated : messages +} + /** * Ensures messages contain at most `limit` media items (images + documents). * Strips oldest media first to preserve the most recent. @@ -1300,6 +1382,13 @@ async function* queryModel( API_MAX_MEDIA_PER_REQUEST, ) + // DeepSeek ignores is_error on tool_result blocks; prefix failed results + // with literal "[ERROR]" text so the model can detect them. Anthropic and + // GLM handle is_error correctly, so this is DeepSeek-only. + if (isDeepSeekProvider()) { + messagesForAPI = applyDeepSeekErrorPrefix(messagesForAPI) + } + // Instrumentation: Track message count after normalization logEvent('tengu_api_after_normalize', { postNormalizedMessageCount: messagesForAPI.length, @@ -1381,6 +1470,14 @@ async function* queryModel( } const allTools = [...toolSchemas, ...extraToolSchemas] + // Sort tools alphabetically by name for stable ordering to maximize + // DeepSeek's server-side prefix cache hits across requests. + allTools.sort((a, b) => { + const nameA = 'name' in a ? a.name : '' + const nameB = 'name' in b ? b.name : '' + return nameA.localeCompare(nameB) + }) + const isFastMode = isFastModeEnabled() && isFastModeAvailable() && @@ -1588,7 +1685,15 @@ async function* queryModel( // without notifying the model launch DRI and research. This is a sensitive // setting that can greatly affect model quality and bashing. if (hasThinking && modelSupportsThinking(options.model)) { - if ( + if (isDeepSeekProvider()) { + // DeepSeek controls thinking depth via CLAUDE_CODE_EFFORT_LEVEL alone; + // budget_tokens is ignored server-side. Send a minimal thinking param + // so the SDK knows to expect thinking blocks in the response. + thinking = { + budget_tokens: maxOutputTokens - 1, + type: 'enabled', + } satisfies BetaMessageStreamParams['thinking'] + } else if ( !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) && modelSupportsAdaptiveThinking(options.model) ) { diff --git a/src/services/api/errorUtils.ts b/src/services/api/errorUtils.ts index 20e4441..f562e65 100644 --- a/src/services/api/errorUtils.ts +++ b/src/services/api/errorUtils.ts @@ -242,6 +242,15 @@ export function formatAPIError(error: APIError): string { return 'Unable to connect to API. Check your internet connection' } + if (error.status === 402) { + return 'DeepSeek 账户余额不足,请在 platform.deepseek.com 充值后重试' + } + + if (error.status === 422) { + const nested = extractNestedErrorMessage(error) + return `DeepSeek 请求参数无效(422):${nested || error.message || '请检查工具定义和消息格式'}` + } + // Guard: when deserialized from JSONL (e.g. --resume), the error object may // be a plain object without a `.message` property. Return a safe fallback // instead of undefined, which would crash callers that access `.length`. diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts index 1a7edc5..a47f09f 100644 --- a/src/services/api/errors.ts +++ b/src/services/api/errors.ts @@ -437,7 +437,8 @@ export function getAssistantMessageFromError( error.message.toLowerCase().includes('timeout')) ) { return createAssistantAPIErrorMessage({ - content: API_TIMEOUT_ERROR_MESSAGE, + content: + '请求超时。DeepSeek 服务端排队等待超过上限后断开了连接,请稍后重试或降低 effort 等级', error: 'unknown', }) } @@ -462,6 +463,14 @@ export function getAssistantMessageFromError( }) } + // DeepSeek 429 — simple rate-limit message (no Anthropic-specific headers). + if (error instanceof APIError && error.status === 429) { + return createAssistantAPIErrorMessage({ + content: '请求频率超限,请稍后重试', + error: 'rate_limit', + }) + } + if ( error instanceof APIError && error.status === 429 && @@ -993,6 +1002,16 @@ export function classifyAPIError(error: unknown): string { return 'capacity_off_switch' } + // DeepSeek: insufficient account balance + if (error instanceof APIError && error.status === 402) { + return 'insufficient_balance' + } + + // DeepSeek: invalid request parameters + if (error instanceof APIError && error.status === 422) { + return 'invalid_parameters' + } + // Rate limiting if (error instanceof APIError && error.status === 429) { return 'rate_limit' @@ -1205,3 +1224,15 @@ export function getErrorMessageIfRefusal( error: 'invalid_request', }) } + +/** + * Extract DeepSeek's trace ID from error response headers for debugging. + */ +export function extractDeepSeekTraceId(error: APIError): string | undefined { + const headers = error.headers + if (!headers) return undefined + if (typeof headers.get === 'function') { + return headers.get('x-ds-trace-id') ?? undefined + } + return (headers as Record)['x-ds-trace-id'] +} diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts index 5ec9ad0..cfc94f6 100644 --- a/src/services/api/withRetry.ts +++ b/src/services/api/withRetry.ts @@ -11,7 +11,10 @@ import { isAwsCredentialsProviderError } from 'src/utils/aws.js' import { logForDebugging } from 'src/utils/debug.js' import { logError } from 'src/utils/log.js' import { createSystemAPIErrorMessage } from 'src/utils/messages.js' -import { getAPIProviderForStatsig } from 'src/utils/model/providers.js' +import { + getAPIProviderForStatsig, + isDeepSeekProvider, +} from 'src/utils/model/providers.js' import { clearApiKeyHelperCache, clearAwsCredentialsCache, @@ -694,6 +697,12 @@ function handleGcpCredentialError(error: unknown): boolean { } function shouldRetry(error: APIError): boolean { + // 402 Insufficient Balance (DeepSeek) — retrying won't help, the account + // needs a top-up. Fail fast. + if (error.status === 402) { + return false + } + // Never retry mock errors - they're from /mock-limits command for testing if (isMockRateLimitError(error)) { return false @@ -762,10 +771,11 @@ function shouldRetry(error: APIError): boolean { // Retry on lock timeouts. if (error.status === 409) return true - // Retry on rate limits, but not for ClaudeAI Subscription users - // Enterprise users can retry because they typically use PAYG instead of rate limits + // Retry on rate limits. DeepSeek has no subscriber tiers, so always retry + // 429 with exponential backoff. Anthropic retains the subscriber gate so + // ClaudeAI subscription users don't churn through rate limits. if (error.status === 429) { - return !isClaudeAISubscriber() || isEnterpriseSubscriber() + return isDeepSeekProvider() || !isClaudeAISubscriber() || isEnterpriseSubscriber() } // Clear API key cache on 401 and allow retry. diff --git a/src/services/tokenEstimation.ts b/src/services/tokenEstimation.ts index acaef7a..30a53b3 100644 --- a/src/services/tokenEstimation.ts +++ b/src/services/tokenEstimation.ts @@ -121,26 +121,21 @@ function stripToolSearchFieldsFromMessages( }) } +// DeepSeek has no /count_tokens endpoint. Returning null forces callers to +// use rough estimation (roughTokenCountEstimation), which uses UTF-8 byte +// length for accuracy on CJK-heavy content. export async function countTokensWithAPI( - content: string, + _content: string, ): Promise { - // Special case for empty content - API doesn't accept empty messages - if (!content) { - return 0 - } - - const message: Anthropic.Beta.Messages.BetaMessageParam = { - role: 'user', - content: content, - } - - return countMessagesTokensWithAPI([message], []) + return null } export async function countMessagesTokensWithAPI( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], ): Promise { + // DeepSeek has no /count_tokens endpoint; fall back to rough estimation. + return null return withTokenCountVCR(messages, tools, async () => { try { const model = getMainLoopModel() @@ -204,7 +199,10 @@ export function roughTokenCountEstimation( content: string, bytesPerToken: number = 4, ): number { - return Math.round(content.length / bytesPerToken) + // DeepSeek tokenizer is byte-pair on UTF-8 bytes, so CJK characters consume + // 3 bytes/char rather than 1. content.length under-estimates by ~3x on + // Chinese content. Buffer.byteLength matches DeepSeek's actual tokenization. + return Math.round(Buffer.byteLength(content, 'utf8') / bytesPerToken) } /** @@ -252,6 +250,9 @@ export async function countTokensViaHaikuFallback( messages: Anthropic.Beta.Messages.BetaMessageParam[], tools: Anthropic.Beta.Messages.BetaToolUnion[], ): Promise { + // DeepSeek: no count_tokens endpoint, no Haiku fallback model — return null + // so callers use roughTokenCountEstimation. + return null // Check if messages contain thinking blocks const containsThinking = hasThinkingBlocks(messages) diff --git a/src/utils/__tests__/glmAutoMode.test.ts b/src/utils/__tests__/glmAutoMode.test.ts new file mode 100644 index 0000000..6708070 --- /dev/null +++ b/src/utils/__tests__/glmAutoMode.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect, beforeAll, vi } from 'vitest' + +// Auto mode must be reachable for GLM-5+ models. The two gates that previously +// blocked it: modelSupportsAutoMode (model allowlist) and the auto-mode +// enabled-state default (GrowthBook kill-switch is never served on GLM). +beforeAll(() => { + process.env.FEATURES = 'TRANSCRIPT_CLASSIFIER' + process.env.USER_TYPE = 'external' + // GLM runs against an Anthropic-compatible firstParty endpoint. + delete process.env.CLAUDE_CODE_USE_BEDROCK + delete process.env.CLAUDE_CODE_USE_VERTEX + delete process.env.CLAUDE_CODE_USE_FOUNDRY +}) + +describe('modelSupportsAutoMode for GLM', () => { + it('enables auto mode for glm-5 and above, not glm-4', async () => { + const { modelSupportsAutoMode } = await import('../betas.js') + expect(modelSupportsAutoMode('glm-5')).toBe(true) + expect(modelSupportsAutoMode('glm-5.2')).toBe(true) + expect(modelSupportsAutoMode('glm-6')).toBe(true) + expect(modelSupportsAutoMode('glm-4.5')).toBe(false) + // Anthropic allowlist still honored. + expect(modelSupportsAutoMode('claude-opus-4-6')).toBe(true) + expect(modelSupportsAutoMode('claude-opus-4-1')).toBe(false) + }) +}) + +// The enabled-state default lives in permissionSetup, but importing that +// module transitively loads the classifier prompt (a .txt require unsupported +// in vitest). Assert the predicate the default uses directly — it must match +// glm-5 and above, reject glm-4, and reject Anthropic models (which keep the +// 'disabled' circuit-breaker default). +describe('auto-mode enabled-state default predicate for GLM', () => { + const isGlmForced = (m: string) => /glm-[5-9]/.test(m.toLowerCase()) + it('matches glm-5+ only', () => { + expect(isGlmForced('glm-5')).toBe(true) + expect(isGlmForced('glm-5.2')).toBe(true) + expect(isGlmForced('glm-6')).toBe(true) + expect(isGlmForced('GLM-5.2')).toBe(true) + expect(isGlmForced('glm-4.5')).toBe(false) + expect(isGlmForced('claude-opus-4-6')).toBe(false) + }) +}) diff --git a/src/utils/apiPreconnect.ts b/src/utils/apiPreconnect.ts index 6a8de64..253926f 100644 --- a/src/utils/apiPreconnect.ts +++ b/src/utils/apiPreconnect.ts @@ -25,6 +25,7 @@ import { getOauthConfig } from '../constants/oauth.js' import { isEnvTruthy } from './envUtils.js' +import { isEssentialTrafficOnly } from './privacyLevel.js' let fired = false @@ -32,6 +33,10 @@ export function preconnectAnthropicApi(): void { if (fired) return fired = true + // Also skip when non-essential traffic is disabled via + // CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC / DISABLE_TELEMETRY / proxy env. + if (isEssentialTrafficOnly()) return + // Skip if using a cloud provider — different endpoint + auth if ( isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) || diff --git a/src/utils/betas.ts b/src/utils/betas.ts index fcd7b97..03c7952 100644 --- a/src/utils/betas.ts +++ b/src/utils/betas.ts @@ -188,8 +188,12 @@ export function modelSupportsAutoMode(model: string): boolean { if (/claude-(opus|sonnet|haiku)-4(?!-[6-9])/.test(m)) return false return true } + // GLM models (served over an Anthropic-compatible firstParty endpoint) + // support auto mode — the classifier runs against the same transcript. + // glm-5 and above (glm-5, glm-5.2, glm-6, …). + if (/glm-[5-9]/.test(m)) return true // External allowlist (firstParty already checked above). - return /^claude-(opus|sonnet)-4-6/.test(m) + return /^claude-(opus|sonnet)-4-6/.test(m) || /^deepseek-/.test(m) } return false } diff --git a/src/utils/context.ts b/src/utils/context.ts index 06b235e..b976b7b 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -161,7 +161,10 @@ export function getModelMaxOutputTokens(model: string): { const m = getCanonicalName(model) - if (m.includes('opus-4-6')) { + if (m.includes('glm-5.2')) { + defaultTokens = 64_000 + upperLimit = 131_072 + } else if (m.includes('opus-4-6')) { defaultTokens = 64_000 upperLimit = 128_000 } else if (m.includes('sonnet-4-6')) { diff --git a/src/utils/messages.ts b/src/utils/messages.ts index 7d8db97..f6c3ee5 100644 --- a/src/utils/messages.ts +++ b/src/utils/messages.ts @@ -2244,22 +2244,26 @@ export function normalizeMessagesForAPI( } // Find a previous assistant message with the same message ID and merge. - // Walk backwards, skipping tool results and different-ID assistants, - // since concurrent agents (teammates) can interleave streaming content - // blocks from multiple API responses with different message IDs. + // Walk backwards, skipping different-ID assistants, since concurrent + // agents (teammates) can interleave streaming content blocks from + // multiple API responses with different message IDs. + // + // Do NOT skip tool_result messages — when claude.ts yields separate + // AssistantMessages for thinking and tool_use blocks (same message.id), + // a StreamingToolExecutor tool_result can land between them. Merging + // across that boundary produces duplicate tool_use IDs that downstream + // ensureToolResultPairing strips, leaving orphaned tool_results and + // ultimately consecutive user messages → API 400 (CC-1215). for (let i = result.length - 1; i >= 0; i--) { const msg = result[i]! - if (msg.type !== 'assistant' && !isToolResultMessage(msg)) { + if (msg.type !== 'assistant') { break } - if (msg.type === 'assistant') { - if (msg.message.id === normalizedMessage.message.id) { - result[i] = mergeAssistantMessages(msg, normalizedMessage) - return - } - continue + if (msg.message.id === normalizedMessage.message.id) { + result[i] = mergeAssistantMessages(msg, normalizedMessage) + return } } @@ -2399,15 +2403,6 @@ export function mergeAssistantMessages( } } -function isToolResultMessage(msg: Message): boolean { - if (msg.type !== 'user') { - return false - } - const content = msg.message.content - if (typeof content === 'string') return false - return content.some(block => block.type === 'tool_result') -} - export function mergeUserMessages(a: UserMessage, b: UserMessage): UserMessage { const lastContent = normalizeUserTextContent(a.message.content) const currentContent = normalizeUserTextContent(b.message.content) diff --git a/src/utils/model/aliases.ts b/src/utils/model/aliases.ts index 75ae388..5e94865 100644 --- a/src/utils/model/aliases.ts +++ b/src/utils/model/aliases.ts @@ -6,6 +6,9 @@ export const MODEL_ALIASES = [ 'sonnet[1m]', 'opus[1m]', 'opusplan', + // DeepSeek convenience aliases (resolved by ANTHROPIC_DEFAULT_*_MODEL envs) + 'pro', + 'flash', ] as const export type ModelAlias = (typeof MODEL_ALIASES)[number] diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index aba9b7d..647f059 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -13,6 +13,37 @@ export function getAPIProvider(): APIProvider { : 'firstParty' } +/** + * True when the active backend is GLM (zhipu), routed via an + * Anthropic-compatible firstParty endpoint (ANTHROPIC_BASE_URL). GLM stays on + * the firstParty APIProvider — this flag only gates model-aware behavior + * (output token limits, auto-mode, betas) that the generic firstParty path + * doesn't know about. + */ +export function isGLMProvider(): boolean { + return isEnvTruthy(process.env.CLAUDE_USE_GLM) +} + +/** + * True when the active backend is DeepSeek, routed via an + * Anthropic-compatible firstParty endpoint. DeepSeek stays on the firstParty + * APIProvider — this flag gates DeepSeek-specific adaptations (thinking + * simplification, [ERROR] tool_result prefixing, 429 retry policy, model + * validation allowlist). + */ +export function isDeepSeekProvider(): boolean { + return isEnvTruthy(process.env.CLAUDE_USE_DEEPSEEK) +} + +export function isDeepSeekBaseUrl(baseUrl: string | undefined): boolean { + if (!baseUrl) return false + try { + return new URL(baseUrl).host.endsWith('deepseek.com') + } catch { + return false + } +} + export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS { return getAPIProvider() as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } diff --git a/src/utils/model/validateModel.ts b/src/utils/model/validateModel.ts index 14b8167..f3c9975 100644 --- a/src/utils/model/validateModel.ts +++ b/src/utils/model/validateModel.ts @@ -1,7 +1,7 @@ // biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered import { MODEL_ALIASES } from './aliases.js' import { isModelAllowed } from './modelAllowlist.js' -import { getAPIProvider } from './providers.js' +import { getAPIProvider, isDeepSeekProvider } from './providers.js' import { sideQuery } from '../sideQuery.js' import { NotFoundError, @@ -14,8 +14,18 @@ import { getModelStrings } from './modelStrings.js' // Cache valid models to avoid repeated API calls const validModelCache = new Map() +// DeepSeek's API silently remaps unknown model names to deepseek-v4-flash +// instead of returning 404, so API-based validation is not reliable — use a +// known-models allowlist when the DeepSeek provider is active. +const KNOWN_DEEPSEEK_MODELS = new Set(['deepseek-v4-pro', 'deepseek-v4-flash']) + /** * Validates a model by attempting an actual API call. + * + * For the DeepSeek provider, the API silently remaps unknown model names, so + * we validate against a known-models allowlist instead of probing the API. + * For all other providers (Anthropic firstParty, GLM, Bedrock, Vertex, + * Foundry), we probe the API with a minimal request. */ export async function validateModel( model: string, @@ -41,6 +51,17 @@ export async function validateModel( return { valid: true } } + // DeepSeek: use known-models allowlist (API silently remaps unknown names). + if (isDeepSeekProvider()) { + if (KNOWN_DEEPSEEK_MODELS.has(lowerModel)) { + return { valid: true } + } + return { + valid: false, + error: `模型 '${normalizedModel}' 不是已知的 DeepSeek 模型(会被服务端静默映射为 deepseek-v4-flash)。可用模型:deepseek-v4-pro, deepseek-v4-flash`, + } + } + // Check if it matches ANTHROPIC_CUSTOM_MODEL_OPTION (pre-validated by the user) if (normalizedModel === process.env.ANTHROPIC_CUSTOM_MODEL_OPTION) { return { valid: true } @@ -51,7 +72,6 @@ export async function validateModel( return { valid: true } } - // Try to make an actual API call with minimal parameters try { await sideQuery({ diff --git a/src/utils/modelCost.ts b/src/utils/modelCost.ts index b4867d4..b94b4a1 100644 --- a/src/utils/modelCost.ts +++ b/src/utils/modelCost.ts @@ -86,7 +86,34 @@ export const COST_HAIKU_45 = { webSearchRequests: 0.01, } as const satisfies ModelCosts -const DEFAULT_UNKNOWN_MODEL_COST = COST_TIER_5_25 +// DeepSeek V4 Pro pricing (CNY per Mtok), discounted price until 2026-05-31. +// Set DEEPSEEK_USE_FULL_PRICE=1 for standard price. +export const COST_DEEPSEEK_PRO_DISCOUNTED = { + inputTokens: 3, + outputTokens: 6, + promptCacheWriteTokens: 3, + promptCacheReadTokens: 0.025, + webSearchRequests: 0, +} as const satisfies ModelCosts + +export const COST_DEEPSEEK_PRO_FULL = { + inputTokens: 12, + outputTokens: 24, + promptCacheWriteTokens: 12, + promptCacheReadTokens: 0.1, + webSearchRequests: 0, +} as const satisfies ModelCosts + +// DeepSeek V4 Flash pricing (CNY per Mtok). +export const COST_DEEPSEEK_FLASH = { + inputTokens: 1, + outputTokens: 2, + promptCacheWriteTokens: 1, + promptCacheReadTokens: 0.02, + webSearchRequests: 0, +} as const satisfies ModelCosts + +const DEFAULT_UNKNOWN_MODEL_COST = COST_DEEPSEEK_FLASH /** * Get the cost tier for Opus 4.6 based on fast mode. @@ -98,10 +125,17 @@ export function getOpus46CostTier(fastMode: boolean): ModelCosts { return COST_TIER_5_25 } +export function getDeepSeekProCostTier(): ModelCosts { + if (process.env.DEEPSEEK_USE_FULL_PRICE === '1') { + return COST_DEEPSEEK_PRO_FULL + } + return COST_DEEPSEEK_PRO_DISCOUNTED +} + // @[MODEL LAUNCH]: Add a pricing entry for the new model below. // Costs from https://platform.claude.com/docs/en/about-claude/pricing // Web search cost: $10 per 1000 requests = $0.01 per request -export const MODEL_COSTS: Record = { +export const MODEL_COSTS: Record = { [firstPartyNameToCanonical(CLAUDE_3_5_HAIKU_CONFIG.firstParty)]: COST_HAIKU_35, [firstPartyNameToCanonical(CLAUDE_HAIKU_4_5_CONFIG.firstParty)]: @@ -123,6 +157,8 @@ export const MODEL_COSTS: Record = { COST_TIER_5_25, [firstPartyNameToCanonical(CLAUDE_OPUS_4_6_CONFIG.firstParty)]: COST_TIER_5_25, + 'deepseek-v4-pro': COST_DEEPSEEK_PRO_DISCOUNTED, + 'deepseek-v4-flash': COST_DEEPSEEK_FLASH, } /** @@ -152,6 +188,11 @@ export function getModelCosts(model: string, usage: Usage): ModelCosts { return getOpus46CostTier(isFastMode) } + // DeepSeek V4 Pro pricing depends on discount-period flag. + if (shortName === 'deepseek-v4-pro') { + return getDeepSeekProCostTier() + } + const costs = MODEL_COSTS[shortName] if (!costs) { trackUnknownModelCost(model, shortName) @@ -202,12 +243,13 @@ export function calculateCostFromTokens( } function formatPrice(price: number): string { - // Format price: integers without decimals, others with 2 decimal places - // e.g., 3 -> "$3", 0.8 -> "$0.80", 22.5 -> "$22.50" + // Format price: integers without decimals, fractions with 2 decimal places + // (3 decimals for very small values like cache-read tokens). + // e.g., 3 -> "¥3", 0.8 -> "¥0.80", 0.025 -> "¥0.025" if (Number.isInteger(price)) { - return `$${price}` + return `¥${price}` } - return `$${price.toFixed(2)}` + return `¥${price.toFixed(price < 0.1 ? 3 : 2)}` } /** diff --git a/src/utils/permissions/permissionSetup.ts b/src/utils/permissions/permissionSetup.ts index 8520da8..2489fa7 100644 --- a/src/utils/permissions/permissionSetup.ts +++ b/src/utils/permissions/permissionSetup.ts @@ -1310,13 +1310,32 @@ export function getAutoModeUnavailableReason(): AutoModeUnavailableReason | null */ export type AutoModeEnabledState = 'enabled' | 'disabled' | 'opt-in' -const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'disabled' +// deepseek branch: telemetry/GrowthBook is stubbed out, so tengu_auto_mode_config +// never resolves and falls back to this default. Upstream defaulted to 'disabled' +// to wait for the GB rollout signal — on this branch there is no rollout signal, +// so default to 'enabled' to let CLI/--permission-mode auto and settings +// defaultMode=auto work without a remote gate. +const AUTO_MODE_ENABLED_DEFAULT: AutoModeEnabledState = 'enabled' + +/** + * Default auto-mode availability when GrowthBook gives no explicit value. + * The Anthropic kill-switch (tengu_auto_mode_config) is never served on the + * GLM endpoint, so the stock 'disabled' default would permanently lock auto + * mode out for GLM users. Force-enable it for GLM (model gate still applies + * via modelSupportsAutoMode), while keeping 'disabled' for everyone else so + * the circuit breaker stays the safe default on real Anthropic models. + */ +function autoModeEnabledDefault(): AutoModeEnabledState { + // glm-5 and above (glm-5, glm-5.2, glm-6, …). + if (/glm-[5-9]/.test(getMainLoopModel().toLowerCase())) return 'enabled' + return AUTO_MODE_ENABLED_DEFAULT +} function parseAutoModeEnabledState(value: unknown): AutoModeEnabledState { if (value === 'enabled' || value === 'disabled' || value === 'opt-in') { return value } - return AUTO_MODE_ENABLED_DEFAULT + return autoModeEnabledDefault() } /** diff --git a/src/utils/permissions/yoloClassifier.ts b/src/utils/permissions/yoloClassifier.ts index 1ec78b5..c072a51 100644 --- a/src/utils/permissions/yoloClassifier.ts +++ b/src/utils/permissions/yoloClassifier.ts @@ -259,8 +259,11 @@ const yoloClassifierResponseSchema = lazySchema(() => export const YOLO_CLASSIFIER_TOOL_NAME = 'classify_result' +// Note: omitting `type: 'custom'` — Anthropic accepts it, but DeepSeek's +// /anthropic endpoint rejects unknown tool types with 400. Other tools in +// this codebase already omit the type field; we do the same here to keep +// the classifier compatible with both providers. const YOLO_CLASSIFIER_TOOL_SCHEMA: BetaToolUnion = { - type: 'custom', name: YOLO_CLASSIFIER_TOOL_NAME, description: 'Report the security classification result for the agent action', input_schema: { @@ -1328,14 +1331,12 @@ type AutoModeConfig = { /** * Get the model for the classifier. - * Ant-only env var takes precedence, then GrowthBook JSON config override, - * then the main loop model. + * CLAUDE_CODE_AUTO_MODE_MODEL env takes precedence, then GrowthBook JSON + * config override, then the main loop model. */ function getClassifierModel(): string { - if (process.env.USER_TYPE === 'ant') { - const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL - if (envModel) return envModel - } + const envModel = process.env.CLAUDE_CODE_AUTO_MODE_MODEL + if (envModel) return envModel const config = getFeatureValue_CACHED_MAY_BE_STALE( 'tengu_auto_mode_config', {} as AutoModeConfig, diff --git a/src/utils/privacyLevel.ts b/src/utils/privacyLevel.ts index 4848492..d5c277a 100644 --- a/src/utils/privacyLevel.ts +++ b/src/utils/privacyLevel.ts @@ -21,10 +21,13 @@ export function getPrivacyLevel(): PrivacyLevel { if (process.env.CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC) { return 'essential-traffic' } - if (process.env.DISABLE_TELEMETRY) { - return 'no-telemetry' - } - return 'default' + // DeepSeek branch: there is no first-party Anthropic telemetry pipeline to + // talk to, and the analytics module has been replaced with no-op stubs + // upstream (commits 058cf17 + c40af24). Default to 'no-telemetry' so the + // few remaining call sites that gate on isTelemetryDisabled() (feedback + // survey, analytics config) take the disabled path without requiring + // users to set DISABLE_TELEMETRY=1. + return 'no-telemetry' } /** diff --git a/src/utils/ripgrep.ts b/src/utils/ripgrep.ts index 88b57ee..27bfa46 100644 --- a/src/utils/ripgrep.ts +++ b/src/utils/ripgrep.ts @@ -27,6 +27,10 @@ type RipgrepConfig = { command: string args: string[] argv0?: string + // Human-readable explanation when ripgrep resolution took a fallback path + // (e.g. the bundled binary was missing and we fell back to system rg). + // Surfaced in the doctor screen and as a one-time startup warning. + note?: string } const getRipgrepConfig = memoize((): RipgrepConfig => { @@ -70,13 +74,41 @@ const getRipgrepConfig = memoize((): RipgrepConfig => { path.resolve(__dirname, '..', 'vendor', 'ripgrep'), path.resolve(__dirname, '..', '..', 'vendor', 'ripgrep'), ] - const rgRoot = - candidateRoots.find(root => - existsSync(path.resolve(root, platformDir, executable)), - ) ?? candidateRoots[0] - const command = path.resolve(rgRoot, platformDir, executable) + const rgRoot = candidateRoots.find(root => + existsSync(path.resolve(root, platformDir, executable)), + ) + + // Bundled binary found on disk: use it. + if (rgRoot) { + const command = path.resolve(rgRoot, platformDir, executable) + return { mode: 'builtin', command, args: [] } + } - return { mode: 'builtin', command, args: [] } + // No bundled binary for this platform (e.g. Android/Termux, or an + // incomplete install). Fall back to system rg on PATH so file discovery, + // suggestions, and hooks keep working instead of spawning a non-existent + // path and failing with ENOENT. + const { cmd: systemPath } = findExecutable('rg', []) + if (systemPath !== 'rg') { + // SECURITY: spawn the bare name 'rg', not the resolved path, to prevent + // PATH hijacking via a malicious ./rg in the cwd (see system branch above). + return { + mode: 'system', + command: 'rg', + args: [], + note: 'bundled ripgrep binary not found; using system rg from PATH', + } + } + + // Nothing available. Preserve historical behavior: return the expected + // builtin path so callers surface a clear ENOENT, with a note explaining why. + const command = path.resolve(candidateRoots[0], platformDir, executable) + return { + mode: 'builtin', + command, + args: [], + note: 'bundled ripgrep binary not found and no system rg on PATH', + } }) export function ripgrepCommand(): { @@ -551,12 +583,14 @@ export function getRipgrepStatus(): { mode: 'system' | 'builtin' | 'embedded' path: string working: boolean | null // null if not yet tested + note?: string } { const config = getRipgrepConfig() return { mode: config.mode, path: config.command, working: ripgrepStatus?.working ?? null, + note: config.note, } }