From d04e760d1dd048ee0ed7655f46d05514426a8021 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 15:17:58 -0800 Subject: [PATCH 01/10] fix: auto collapse thinking --- cli/src/hooks/use-send-message.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts index 632ece5c4..4a5371908 100644 --- a/cli/src/hooks/use-send-message.ts +++ b/cli/src/hooks/use-send-message.ts @@ -681,6 +681,16 @@ export const useSendMessage = ({ return } + // Auto-collapse thinking blocks on first reasoning content + if (delta.type === 'reasoning') { + autoCollapseThinkingBlock( + aiMessageId, + undefined, + autoCollapsedThinkingIdsRef, + setCollapsedAgents, + ) + } + queueMessageUpdate((prev) => prev.map((msg) => { if (msg.id !== aiMessageId) { @@ -854,16 +864,6 @@ export const useSendMessage = ({ (rootStreamBufferRef.current ?? '') + eventObj.text } - // Auto-collapse thinking blocks by default (only once per thinking block) - if (eventObj.type === 'reasoning') { - autoCollapseThinkingBlock( - aiMessageId, - undefined, - autoCollapsedThinkingIdsRef, - setCollapsedAgents, - ) - } - rootStreamSeenRef.current = true appendRootChunk(eventObj) } else if (event.type === 'subagent_chunk') { From 4a365d404335aaa8560d5e1a7335d42dc61fc734 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 15:22:05 -0800 Subject: [PATCH 02/10] Initial impl --- .env.example | 1 + packages/internal/src/env-schema.ts | 2 + web/src/app/api/v1/chat/completions/_post.ts | 30 +- web/src/llm-api/openai.ts | 316 +++++++++++++++++++ 4 files changed, 341 insertions(+), 8 deletions(-) create mode 100644 web/src/llm-api/openai.ts diff --git a/.env.example b/.env.example index d0e1fcd29..3f9808ce9 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,7 @@ # AI API Keys CLAUDE_CODE_KEY=dummy_claude_code_key OPEN_ROUTER_API_KEY=dummy_openrouter_key +OPENAI_API_KEY=dummy_openai_key # Database & Server DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index cdddabbaa..c90d6885c 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -5,6 +5,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ // Backend variables CODEBUFF_API_KEY: z.string().optional(), OPEN_ROUTER_API_KEY: z.string().min(1), + OPENAI_API_KEY: z.string().min(1), RELACE_API_KEY: z.string().min(1), LINKUP_API_KEY: z.string().min(1), CONTEXT7_API_KEY: z.string().optional(), @@ -44,6 +45,7 @@ export const serverProcessEnv: ServerInput = { // Backend variables CODEBUFF_API_KEY: process.env.CODEBUFF_API_KEY, OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY, + OPENAI_API_KEY: process.env.OPENAI_API_KEY, RELACE_API_KEY: process.env.RELACE_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY, CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY, diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index e4f1d3b64..38409e27b 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -20,6 +20,7 @@ import { handleOpenRouterNonStream, handleOpenRouterStream, } from '@/llm-api/openrouter' +import { handleOpenAIStream } from '@/llm-api/openai' import { extractApiKeyFromHeader } from '@/util/auth' export async function postChatCompletions(params: { @@ -204,14 +205,27 @@ export async function postChatCompletions(params: { try { if (bodyStream) { // Streaming request - const stream = await handleOpenRouterStream({ - body, - userId, - agentId, - fetch, - logger, - insertMessageBigquery, - }) + const model = (body as any)?.model + const isOpenAIDirectModel = + typeof model === 'string' && + (!model.includes('/') || model.startsWith('openai/')) + const stream = await (isOpenAIDirectModel + ? handleOpenAIStream({ + body, + userId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : handleOpenRouterStream({ + body, + userId, + agentId, + fetch, + logger, + insertMessageBigquery, + })) trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_STREAM_STARTED, diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts new file mode 100644 index 000000000..fbdefd2bf --- /dev/null +++ b/web/src/llm-api/openai.ts @@ -0,0 +1,316 @@ +import { setupBigQuery } from '@codebuff/bigquery' +import { consumeCreditsAndAddAgentStep } from '@codebuff/billing' +import { PROFIT_MARGIN } from '@codebuff/common/old-constants' +import { getErrorObject } from '@codebuff/common/util/error' +import { env } from '@codebuff/internal/env' + +import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' +import type { Logger } from '@codebuff/common/types/contracts/logger' + +type StreamState = { responseText: string; reasoningText: string } + +function extractRequestMetadata(params: { body: unknown; logger: Logger }) { + const { body, logger } = params + const rawClientId = (body as any)?.codebuff_metadata?.client_id + const clientId = typeof rawClientId === 'string' ? rawClientId : null + if (!clientId) { + logger.warn({ body }, 'Received request without client_id') + } + const rawRunId = (body as any)?.codebuff_metadata?.run_id + const clientRequestId: string | null = typeof rawRunId === 'string' ? rawRunId : null + if (!clientRequestId) { + logger.warn({ body }, 'Received request without run_id') + } + return { clientId, clientRequestId } +} + +function normalizeOpenAIModel(model: unknown): string | undefined { + if (typeof model !== 'string') return undefined + return model.startsWith('openai/') ? model.slice('openai/'.length) : model +} + +type OpenAIUsage = { + prompt_tokens?: number + prompt_tokens_details?: { cached_tokens?: number } | null + completion_tokens?: number + completion_tokens_details?: { reasoning_tokens?: number } | null + total_tokens?: number + // We will inject cost fields below + cost?: number + cost_details?: { upstream_inference_cost?: number | null } | null +} + +function getOpenAIRatesPerMTokens(model: string): { inUsd: number; outUsd: number } { + const m = model.toLowerCase() + if (m.includes('gpt-4o-mini') || m.includes('4o-mini') || m.includes('o4-mini')) { + return { inUsd: 0.15, outUsd: 0.6 } + } + if (m.includes('gpt-4o')) { + return { inUsd: 2.5, outUsd: 10 } + } + if (m.includes('gpt-4.1')) { + return { inUsd: 5, outUsd: 15 } + } + if (m.startsWith('o3-pro')) { + return { inUsd: 5, outUsd: 15 } + } + if (m.startsWith('o3')) { + return { inUsd: 5, outUsd: 15 } + } + if (m.startsWith('gpt-5')) { + return { inUsd: 5, outUsd: 15 } + } + return { inUsd: 2.5, outUsd: 10 } +} + +function computeCostDollars(usage: OpenAIUsage, model: string): number { + const { inUsd, outUsd } = getOpenAIRatesPerMTokens(model) + const inTokens = usage.prompt_tokens ?? 0 + const outTokens = usage.completion_tokens ?? 0 + return (inTokens / 1_000_000) * inUsd + (outTokens / 1_000_000) * outUsd +} + +export async function handleOpenAIStream({ + body, + userId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: any + userId: string + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const startTime = new Date() + const { clientId, clientRequestId } = extractRequestMetadata({ body, logger }) + + const model = normalizeOpenAIModel((body as any)?.model) + + // Build OpenAI-compatible body + const openaiBody: Record = { ...body, model, stream: true } + // Ensure usage in final chunk + const streamOptions = (openaiBody.stream_options as any) ?? {} + streamOptions.include_usage = true + openaiBody.stream_options = streamOptions + + // Remove fields that OpenAI might not accept + delete (openaiBody as any).usage + delete (openaiBody as any).provider + delete (openaiBody as any).transforms + delete (openaiBody as any).codebuff_metadata + + const response = await fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + headers: { + Authorization: `Bearer ${env.OPENAI_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(openaiBody), + }) + + if (!response.ok) { + throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Failed to get response reader') + } + + let heartbeatInterval: NodeJS.Timeout + let state: StreamState = { responseText: '', reasoningText: '' } + let clientDisconnected = false + + const stream = new ReadableStream({ + async start(controller) { + const decoder = new TextDecoder() + let buffer = '' + + controller.enqueue(new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`)) + + heartbeatInterval = setInterval(() => { + if (!clientDisconnected) { + try { + controller.enqueue(new TextEncoder().encode(`: heartbeat ${new Date().toISOString()}\n\n`)) + } catch {} + } + }, 30000) + + try { + while (true) { + const { done, value } = await reader.read() + if (done) break + + buffer += decoder.decode(value, { stream: true }) + let lineEnd = buffer.indexOf('\n') + + while (lineEnd !== -1) { + let line = buffer.slice(0, lineEnd + 1) + buffer = buffer.slice(lineEnd + 1) + + const handled = await handleOpenAILine({ + userId, + agentId, + clientId, + clientRequestId, + startTime, + request: openaiBody, + line, + state, + logger, + insertMessage: insertMessageBigquery, + }) + state = handled.state + line = handled.outgoingLine + + if (!clientDisconnected) { + try { + controller.enqueue(new TextEncoder().encode(line)) + } catch (error) { + logger.warn('Client disconnected during stream, continuing for billing') + clientDisconnected = true + } + } + + lineEnd = buffer.indexOf('\n') + } + } + + if (!clientDisconnected) { + controller.close() + } + } catch (error) { + if (!clientDisconnected) { + controller.error(error) + } else { + logger.warn(getErrorObject(error), 'Error after client disconnect in OpenAI stream') + } + } finally { + clearInterval(heartbeatInterval) + } + }, + cancel() { + clearInterval(heartbeatInterval) + clientDisconnected = true + logger.warn({ clientDisconnected, state }, 'Client cancelled stream, continuing OpenAI consumption for billing') + }, + }) + + return stream +} + +async function handleOpenAILine({ + userId, + agentId, + clientId, + clientRequestId, + startTime, + request, + line, + state, + logger, + insertMessage, +}: { + userId: string + agentId: string + clientId: string | null + clientRequestId: string | null + startTime: Date + request: unknown + line: string + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise<{ state: StreamState; outgoingLine: string }> { + if (!line.startsWith('data: ')) { + return { state, outgoingLine: line } + } + const raw = line.slice('data: '.length) + if (raw === '[DONE]\n') { + return { state, outgoingLine: line } + } + + let obj: any + try { + obj = JSON.parse(raw) + } catch (error) { + logger.warn(`Received non-JSON OpenAI response: ${JSON.stringify(getErrorObject(error), null, 2)}`) + return { state, outgoingLine: line } + } + + // Accumulate text + try { + const choice = Array.isArray(obj.choices) && obj.choices.length ? obj.choices[0] : undefined + const delta = choice?.delta + if (delta) { + if (typeof delta.content === 'string') state.responseText += delta.content + // OpenAI may not provide reasoning delta in standard chat completions; keep parity + if (typeof delta.reasoning === 'string') state.reasoningText += delta.reasoning + } + } catch {} + + // If usage present, it's the final chunk. Compute cost, log, and consume credits. + if (obj && obj.usage) { + const usage: OpenAIUsage = obj.usage + const model: string = typeof obj.model === 'string' ? obj.model : (typeof (request as any)?.model === 'string' ? (request as any).model : '') + + const cost = computeCostDollars(usage, model) + obj.usage.cost = cost + obj.usage.cost_details = { upstream_inference_cost: null } + + // BigQuery insert (do not await) + setupBigQuery({ logger }).then(async () => { + const success = await insertMessage({ + row: { + id: obj.id, + user_id: userId, + finished_at: new Date(), + created_at: startTime, + request, + reasoning_text: state.reasoningText, + response: state.responseText, + output_tokens: usage.completion_tokens ?? 0, + reasoning_tokens: usage.completion_tokens_details?.reasoning_tokens, + cost: cost, + upstream_inference_cost: null, + input_tokens: usage.prompt_tokens ?? 0, + cache_read_input_tokens: usage.prompt_tokens_details?.cached_tokens, + }, + logger, + }) + if (!success) { + logger.error({ request }, 'Failed to insert message into BigQuery (OpenAI)') + } + }) + + await consumeCreditsAndAddAgentStep({ + messageId: obj.id, + userId, + agentId, + clientId, + clientRequestId, + startTime, + model: obj.model, + reasoningText: state.reasoningText, + response: state.responseText, + cost, + credits: Math.round(cost * 100 * (1 + PROFIT_MARGIN)), + inputTokens: obj.usage.prompt_tokens ?? 0, + cacheCreationInputTokens: null, + cacheReadInputTokens: obj.usage.prompt_tokens_details?.cached_tokens ?? 0, + reasoningTokens: obj.usage.completion_tokens_details?.reasoning_tokens ?? null, + outputTokens: obj.usage.completion_tokens ?? 0, + logger, + }) + + // Reconstruct outgoing line with injected cost + const newLine = `data: ${JSON.stringify(obj)}\n` + return { state, outgoingLine: newLine } + } + + return { state, outgoingLine: line } +} From d2b84eeaaba0cf45c9b117a4123b15618008860e Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 16:08:04 -0800 Subject: [PATCH 03/10] Add @ai-sdk/provider-utils dependency to internal package --- bun.lock | 11 ++++++++++- packages/internal/package.json | 1 + .../openai-compatible/openai-compatible-provider.ts | 1 - 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bun.lock b/bun.lock index 25e80648f..68e4e0b75 100644 --- a/bun.lock +++ b/bun.lock @@ -260,6 +260,7 @@ "name": "@codebuff/internal", "version": "1.0.0", "dependencies": { + "@ai-sdk/provider-utils": "3.0.17", "@codebuff/common": "workspace:*", "drizzle-orm": "*", "loops": "^5.0.1", @@ -436,7 +437,7 @@ "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.17", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-TR3Gs4I3Tym4Ll+EPdzRdvo/rc8Js6c4nVhFLuvGLX/Y4V9ZcQMa/HTiYsHEgmYrf1zVi6Q145UEZUfleOwOjw=="], "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], @@ -4078,8 +4079,16 @@ "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], + "@ai-sdk/anthropic/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.0", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-BoQZtGcBxkeSH1zK+SRYNDtJPIPpacTeiMZqnG4Rv6xXjEwM0FH4MGs9c+PlhyEWmQCzjRM2HAotEydFhD4dYw=="], + "@ai-sdk/google/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + + "@ai-sdk/google-vertex/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + + "@ai-sdk/openai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.2", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.3", "zod-to-json-schema": "^3.24.1" }, "peerDependencies": { "zod": "^3.25.76 || ^4" } }, "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w=="], + "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.15", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-kOc6Pxb7CsRlNt+sLZKL7/VGQUd7ccl3/tIK+Bqf5/QhHR0Qm3qRBMz1IwU1RmjJEZA73x+KB5cUckbDl2WF7Q=="], "@auth/core/jose": ["jose@6.1.0", "", {}, "sha512-TTQJyoEoKcC1lscpVDCSsVgYzUDg/0Bt3WE//WiTPK6uOCQC2KZS4MpugbMWt/zyjkopgZoXhZuCi00gLudfUA=="], diff --git a/packages/internal/package.json b/packages/internal/package.json index 9a827cbfe..d3edf8197 100644 --- a/packages/internal/package.json +++ b/packages/internal/package.json @@ -48,6 +48,7 @@ "bun": "^1.3.0" }, "dependencies": { + "@ai-sdk/provider-utils": "3.0.17", "@codebuff/common": "workspace:*", "drizzle-orm": "*", "loops": "^5.0.1" diff --git a/packages/internal/src/openai-compatible/openai-compatible-provider.ts b/packages/internal/src/openai-compatible/openai-compatible-provider.ts index 58a1ba71d..6dae5540f 100644 --- a/packages/internal/src/openai-compatible/openai-compatible-provider.ts +++ b/packages/internal/src/openai-compatible/openai-compatible-provider.ts @@ -8,7 +8,6 @@ import { FetchFunction, withoutTrailingSlash, withUserAgentSuffix, - getRuntimeEnvironmentUserAgent, } from '@ai-sdk/provider-utils'; import { OpenAICompatibleChatConfig, From aa8b1840861012a5d25079dfbc89eb213b0593b3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 16:20:43 -0800 Subject: [PATCH 04/10] Update max_tokens to max_completion_tokens. Remove stop field. --- web/src/llm-api/openai.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index fbdefd2bf..d6cf2c4e2 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -97,7 +97,12 @@ export async function handleOpenAIStream({ streamOptions.include_usage = true openaiBody.stream_options = streamOptions - // Remove fields that OpenAI might not accept + // Transform max_tokens to max_completion_tokens + openaiBody.max_completion_tokens = openaiBody.max_tokens + delete (openaiBody as any).max_tokens + + // Remove fields that OpenAI doesn't support + delete (openaiBody as any).stop delete (openaiBody as any).usage delete (openaiBody as any).provider delete (openaiBody as any).transforms @@ -113,10 +118,10 @@ export async function handleOpenAIStream({ }) if (!response.ok) { - throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`) + throw new Error(`OpenAI API error: ${response.status} ${response.statusText} ${await response.text()}`) } - const reader = response.body?.getReader() + const reader = response.body?.getReader?.() if (!reader) { throw new Error('Failed to get response reader') } From 5783876dc58a16563693b7a2311e4b2432883a18 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 16:29:29 -0800 Subject: [PATCH 05/10] tweak to handle either max_completion_tokens or max_tokens --- web/src/llm-api/openai.ts | 72 ++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index d6cf2c4e2..47bb05467 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -17,7 +17,8 @@ function extractRequestMetadata(params: { body: unknown; logger: Logger }) { logger.warn({ body }, 'Received request without client_id') } const rawRunId = (body as any)?.codebuff_metadata?.run_id - const clientRequestId: string | null = typeof rawRunId === 'string' ? rawRunId : null + const clientRequestId: string | null = + typeof rawRunId === 'string' ? rawRunId : null if (!clientRequestId) { logger.warn({ body }, 'Received request without run_id') } @@ -40,9 +41,16 @@ type OpenAIUsage = { cost_details?: { upstream_inference_cost?: number | null } | null } -function getOpenAIRatesPerMTokens(model: string): { inUsd: number; outUsd: number } { +function getOpenAIRatesPerMTokens(model: string): { + inUsd: number + outUsd: number +} { const m = model.toLowerCase() - if (m.includes('gpt-4o-mini') || m.includes('4o-mini') || m.includes('o4-mini')) { + if ( + m.includes('gpt-4o-mini') || + m.includes('4o-mini') || + m.includes('o4-mini') + ) { return { inUsd: 0.15, outUsd: 0.6 } } if (m.includes('gpt-4o')) { @@ -98,7 +106,8 @@ export async function handleOpenAIStream({ openaiBody.stream_options = streamOptions // Transform max_tokens to max_completion_tokens - openaiBody.max_completion_tokens = openaiBody.max_tokens + openaiBody.max_completion_tokens = + openaiBody.max_completion_tokens ?? openaiBody.max_tokens delete (openaiBody as any).max_tokens // Remove fields that OpenAI doesn't support @@ -118,7 +127,9 @@ export async function handleOpenAIStream({ }) if (!response.ok) { - throw new Error(`OpenAI API error: ${response.status} ${response.statusText} ${await response.text()}`) + throw new Error( + `OpenAI API error: ${response.status} ${response.statusText} ${await response.text()}`, + ) } const reader = response.body?.getReader?.() @@ -135,12 +146,18 @@ export async function handleOpenAIStream({ const decoder = new TextDecoder() let buffer = '' - controller.enqueue(new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`)) + controller.enqueue( + new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`), + ) heartbeatInterval = setInterval(() => { if (!clientDisconnected) { try { - controller.enqueue(new TextEncoder().encode(`: heartbeat ${new Date().toISOString()}\n\n`)) + controller.enqueue( + new TextEncoder().encode( + `: heartbeat ${new Date().toISOString()}\n\n`, + ), + ) } catch {} } }, 30000) @@ -176,7 +193,9 @@ export async function handleOpenAIStream({ try { controller.enqueue(new TextEncoder().encode(line)) } catch (error) { - logger.warn('Client disconnected during stream, continuing for billing') + logger.warn( + 'Client disconnected during stream, continuing for billing', + ) clientDisconnected = true } } @@ -192,7 +211,10 @@ export async function handleOpenAIStream({ if (!clientDisconnected) { controller.error(error) } else { - logger.warn(getErrorObject(error), 'Error after client disconnect in OpenAI stream') + logger.warn( + getErrorObject(error), + 'Error after client disconnect in OpenAI stream', + ) } } finally { clearInterval(heartbeatInterval) @@ -201,7 +223,10 @@ export async function handleOpenAIStream({ cancel() { clearInterval(heartbeatInterval) clientDisconnected = true - logger.warn({ clientDisconnected, state }, 'Client cancelled stream, continuing OpenAI consumption for billing') + logger.warn( + { clientDisconnected, state }, + 'Client cancelled stream, continuing OpenAI consumption for billing', + ) }, }) @@ -243,25 +268,36 @@ async function handleOpenAILine({ try { obj = JSON.parse(raw) } catch (error) { - logger.warn(`Received non-JSON OpenAI response: ${JSON.stringify(getErrorObject(error), null, 2)}`) + logger.warn( + `Received non-JSON OpenAI response: ${JSON.stringify(getErrorObject(error), null, 2)}`, + ) return { state, outgoingLine: line } } // Accumulate text try { - const choice = Array.isArray(obj.choices) && obj.choices.length ? obj.choices[0] : undefined + const choice = + Array.isArray(obj.choices) && obj.choices.length + ? obj.choices[0] + : undefined const delta = choice?.delta if (delta) { if (typeof delta.content === 'string') state.responseText += delta.content // OpenAI may not provide reasoning delta in standard chat completions; keep parity - if (typeof delta.reasoning === 'string') state.reasoningText += delta.reasoning + if (typeof delta.reasoning === 'string') + state.reasoningText += delta.reasoning } } catch {} // If usage present, it's the final chunk. Compute cost, log, and consume credits. if (obj && obj.usage) { const usage: OpenAIUsage = obj.usage - const model: string = typeof obj.model === 'string' ? obj.model : (typeof (request as any)?.model === 'string' ? (request as any).model : '') + const model: string = + typeof obj.model === 'string' + ? obj.model + : typeof (request as any)?.model === 'string' + ? (request as any).model + : '' const cost = computeCostDollars(usage, model) obj.usage.cost = cost @@ -288,7 +324,10 @@ async function handleOpenAILine({ logger, }) if (!success) { - logger.error({ request }, 'Failed to insert message into BigQuery (OpenAI)') + logger.error( + { request }, + 'Failed to insert message into BigQuery (OpenAI)', + ) } }) @@ -307,7 +346,8 @@ async function handleOpenAILine({ inputTokens: obj.usage.prompt_tokens ?? 0, cacheCreationInputTokens: null, cacheReadInputTokens: obj.usage.prompt_tokens_details?.cached_tokens ?? 0, - reasoningTokens: obj.usage.completion_tokens_details?.reasoning_tokens ?? null, + reasoningTokens: + obj.usage.completion_tokens_details?.reasoning_tokens ?? null, outputTokens: obj.usage.completion_tokens ?? 0, logger, }) From ea730ba4d3ead1cc3566a0311d248e8b7b36cbed Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 16:29:49 -0800 Subject: [PATCH 06/10] fix base2 to use reasoningOptions high --- .agents/base2/base2.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts index d1be7d36f..794908b84 100644 --- a/.agents/base2/base2.ts +++ b/.agents/base2/base2.ts @@ -29,7 +29,7 @@ export function createBase2( ? 'z-ai/glm-4.6:nitro' : 'anthropic/claude-sonnet-4.5', ...(isGpt5 && { - reasoningModel: { + reasoningOptions: { effort: 'high', }, }), From d0174523c786616ba4752dbcfc13b94e79d8b81e Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 16:34:10 -0800 Subject: [PATCH 07/10] fix: only route to openai if prefixed with openai/ --- web/src/app/api/v1/chat/completions/_post.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 38409e27b..56c9f2e9d 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -207,8 +207,7 @@ export async function postChatCompletions(params: { // Streaming request const model = (body as any)?.model const isOpenAIDirectModel = - typeof model === 'string' && - (!model.includes('/') || model.startsWith('openai/')) + typeof model === 'string' && model.startsWith('openai/') const stream = await (isOpenAIDirectModel ? handleOpenAIStream({ body, From 714cef613b09261b63e87b7575966e64764af3f4 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 17:01:08 -0800 Subject: [PATCH 08/10] Support only gpt-5. Compute cost --- web/src/app/api/v1/chat/completions/_post.ts | 8 +- web/src/llm-api/openai.ts | 90 ++++++++++---------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 56c9f2e9d..3b7e30a25 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -20,7 +20,7 @@ import { handleOpenRouterNonStream, handleOpenRouterStream, } from '@/llm-api/openrouter' -import { handleOpenAIStream } from '@/llm-api/openai' +import { handleOpenAIStream, OPENAI_SUPPORTED_MODELS } from '@/llm-api/openai' import { extractApiKeyFromHeader } from '@/util/auth' export async function postChatCompletions(params: { @@ -206,8 +206,12 @@ export async function postChatCompletions(params: { if (bodyStream) { // Streaming request const model = (body as any)?.model + const shortModelName = + typeof model === 'string' ? model.split('/')[1] : undefined const isOpenAIDirectModel = - typeof model === 'string' && model.startsWith('openai/') + typeof model === 'string' && + model.startsWith('openai/') && + OPENAI_SUPPORTED_MODELS.includes(shortModelName as any) const stream = await (isOpenAIDirectModel ? handleOpenAIStream({ body, diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index 47bb05467..a9d50c60a 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -7,6 +7,19 @@ import { env } from '@codebuff/internal/env' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' import type { Logger } from '@codebuff/common/types/contracts/logger' +export const OPENAI_SUPPORTED_MODELS = ['gpt-5'] as const +export type OpenAIModel = (typeof OPENAI_SUPPORTED_MODELS)[number] + +const INPUT_TOKEN_COSTS: Record = { + 'gpt-5': 1.25, +} as const +const CACHED_INPUT_TOKEN_COSTS: Record = { + 'gpt-5': 0.125, +} as const +const OUTPUT_TOKEN_COSTS: Record = { + 'gpt-5': 10, +} as const + type StreamState = { responseText: string; reasoningText: string } function extractRequestMetadata(params: { body: unknown; logger: Logger }) { @@ -25,11 +38,6 @@ function extractRequestMetadata(params: { body: unknown; logger: Logger }) { return { clientId, clientRequestId } } -function normalizeOpenAIModel(model: unknown): string | undefined { - if (typeof model !== 'string') return undefined - return model.startsWith('openai/') ? model.slice('openai/'.length) : model -} - type OpenAIUsage = { prompt_tokens?: number prompt_tokens_details?: { cached_tokens?: number } | null @@ -41,41 +49,19 @@ type OpenAIUsage = { cost_details?: { upstream_inference_cost?: number | null } | null } -function getOpenAIRatesPerMTokens(model: string): { - inUsd: number - outUsd: number -} { - const m = model.toLowerCase() - if ( - m.includes('gpt-4o-mini') || - m.includes('4o-mini') || - m.includes('o4-mini') - ) { - return { inUsd: 0.15, outUsd: 0.6 } - } - if (m.includes('gpt-4o')) { - return { inUsd: 2.5, outUsd: 10 } - } - if (m.includes('gpt-4.1')) { - return { inUsd: 5, outUsd: 15 } - } - if (m.startsWith('o3-pro')) { - return { inUsd: 5, outUsd: 15 } - } - if (m.startsWith('o3')) { - return { inUsd: 5, outUsd: 15 } - } - if (m.startsWith('gpt-5')) { - return { inUsd: 5, outUsd: 15 } - } - return { inUsd: 2.5, outUsd: 10 } -} +function computeCostDollars(usage: OpenAIUsage, model: OpenAIModel): number { + const inputTokenCost = INPUT_TOKEN_COSTS[model] + const cachedInputTokenCost = CACHED_INPUT_TOKEN_COSTS[model] + const outputTokenCost = OUTPUT_TOKEN_COSTS[model] -function computeCostDollars(usage: OpenAIUsage, model: string): number { - const { inUsd, outUsd } = getOpenAIRatesPerMTokens(model) const inTokens = usage.prompt_tokens ?? 0 + const cachedInTokens = usage.prompt_tokens_details?.cached_tokens ?? 0 const outTokens = usage.completion_tokens ?? 0 - return (inTokens / 1_000_000) * inUsd + (outTokens / 1_000_000) * outUsd + return ( + (inTokens / 1_000_000) * inputTokenCost + + (cachedInTokens / 1_000_000) * cachedInputTokenCost + + (outTokens / 1_000_000) * outputTokenCost + ) } export async function handleOpenAIStream({ @@ -96,10 +82,24 @@ export async function handleOpenAIStream({ const startTime = new Date() const { clientId, clientRequestId } = extractRequestMetadata({ body, logger }) - const model = normalizeOpenAIModel((body as any)?.model) + const { model } = body + const modelShortName = + typeof model === 'string' ? model.split('/')[1] : undefined + if ( + !modelShortName || + !OPENAI_SUPPORTED_MODELS.includes(modelShortName as OpenAIModel) + ) { + throw new Error( + `Unsupported OpenAI model: ${model} (supported models include only: ${OPENAI_SUPPORTED_MODELS.map((m) => `'${m}'`).join(', ')})`, + ) + } // Build OpenAI-compatible body - const openaiBody: Record = { ...body, model, stream: true } + const openaiBody: Record = { + ...body, + model: modelShortName, + stream: true, + } // Ensure usage in final chunk const streamOptions = (openaiBody.stream_options as any) ?? {} streamOptions.include_usage = true @@ -182,6 +182,7 @@ export async function handleOpenAIStream({ startTime, request: openaiBody, line, + modelShortName: modelShortName as OpenAIModel, state, logger, insertMessage: insertMessageBigquery, @@ -239,6 +240,7 @@ async function handleOpenAILine({ clientId, clientRequestId, startTime, + modelShortName, request, line, state, @@ -250,6 +252,7 @@ async function handleOpenAILine({ clientId: string | null clientRequestId: string | null startTime: Date + modelShortName: OpenAIModel request: unknown line: string state: StreamState @@ -292,14 +295,7 @@ async function handleOpenAILine({ // If usage present, it's the final chunk. Compute cost, log, and consume credits. if (obj && obj.usage) { const usage: OpenAIUsage = obj.usage - const model: string = - typeof obj.model === 'string' - ? obj.model - : typeof (request as any)?.model === 'string' - ? (request as any).model - : '' - - const cost = computeCostDollars(usage, model) + const cost = computeCostDollars(usage, modelShortName) obj.usage.cost = cost obj.usage.cost_details = { upstream_inference_cost: null } From 2c16e3ffd277d09989162eef041c8244d9a1eb85 Mon Sep 17 00:00:00 2001 From: Charles Lien Date: Tue, 11 Nov 2025 16:38:57 -0800 Subject: [PATCH 09/10] include previous providerOptions --- sdk/src/impl/llm.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 4be708918..66e81ed13 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -63,15 +63,18 @@ function getProviderOptions(params: { model: string runId: string clientSessionId: string + providerOptions?: Record }): { codebuff: JSONObject } { - const { model, runId, clientSessionId } = params + const { model, runId, clientSessionId, providerOptions } = params // Set allow_fallbacks based on whether model is explicitly defined const isExplicitlyDefined = isExplicitlyDefinedModel(model) return { + ...providerOptions, // Could either be "codebuff" or "openaiCompatible" codebuff: { + ...providerOptions?.codebuff, // All values here get appended to the request body codebuff_metadata: { run_id: runId, From 7daafe39bbc8d5156a93e386eb637eb7d1bf043d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 11 Nov 2025 17:18:25 -0800 Subject: [PATCH 10/10] Update reasoning field to be reasoning_effort --- web/src/llm-api/openai.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index a9d50c60a..7d5423e77 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -110,6 +110,20 @@ export async function handleOpenAIStream({ openaiBody.max_completion_tokens ?? openaiBody.max_tokens delete (openaiBody as any).max_tokens + // Transform reasoning to reasoning_effort + if (openaiBody.reasoning && typeof openaiBody.reasoning === 'object') { + const reasoning = openaiBody.reasoning as { + enabled?: boolean + effort?: 'high' | 'medium' | 'low' + } + const enabled = reasoning.enabled ?? true + + if (enabled) { + openaiBody.reasoning_effort = reasoning.effort ?? 'medium' + } + } + delete (openaiBody as any).reasoning + // Remove fields that OpenAI doesn't support delete (openaiBody as any).stop delete (openaiBody as any).usage