diff --git a/apps/web/src/app/api/dev/consume-credits/route.ts b/apps/web/src/app/api/dev/consume-credits/route.ts index 167b0dcb51..3aa24054ce 100644 --- a/apps/web/src/app/api/dev/consume-credits/route.ts +++ b/apps/web/src/app/api/dev/consume-credits/route.ts @@ -89,6 +89,7 @@ export async function POST(request: NextRequest): Promise { editor_name: null, machine_id: null, user_byok: false, + is_free: false, has_tools: false, feature: null, session_id: null, diff --git a/apps/web/src/app/api/fim/completions/route.ts b/apps/web/src/app/api/fim/completions/route.ts index dfb1181756..584278f64f 100644 --- a/apps/web/src/app/api/fim/completions/route.ts +++ b/apps/web/src/app/api/fim/completions/route.ts @@ -158,6 +158,7 @@ export async function POST(request: NextRequest) { const userByok = organizationId ? await getBYOKforOrganization(readDb, organizationId, [byokProviderKey]) : await getBYOKforUser(readDb, user.id, [byokProviderKey]); + const isFreeRequest = await isFreeModel(requestBody.model); const usageContext: MicrodollarUsageContext = { api_kind: 'fim_completions', @@ -177,6 +178,7 @@ export async function POST(request: NextRequest) { editor_name: extractHeaderAndLimitLength(request, 'x-kilocode-editorname'), machine_id: extractHeaderAndLimitLength(request, 'x-kilocode-machineid'), user_byok: !!userByok, + is_free: isFreeRequest, has_tools: false, feature: validateFeatureHeader(request.headers.get(FEATURE_HEADER)), session_id: taskId ?? null, @@ -190,7 +192,7 @@ export async function POST(request: NextRequest) { // slight replication lag, and provides lower latency for US users const { balance, settings, plan } = await getBalanceAndOrgSettings(organizationId, user, readDb); - if (balance <= 0 && !(await isFreeModel(requestBody.model)) && !userByok) { + if (balance <= 0 && !isFreeRequest && !userByok) { return NextResponse.json( { error: { message: 'Insufficient credits' }, diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts index ee49a96162..a73ee58392 100644 --- a/apps/web/src/app/api/openrouter/[...path]/route.ts +++ b/apps/web/src/app/api/openrouter/[...path]/route.ts @@ -104,6 +104,19 @@ const MAX_TOKENS_LIMIT = 99999999999; // GPT4.1 default is ~32k const PAID_MODEL_AUTH_REQUIRED = 'PAID_MODEL_AUTH_REQUIRED'; const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED'; +function paidModelAuthRequiredResponse() { + return NextResponse.json( + { + error: { + code: PAID_MODEL_AUTH_REQUIRED, + message: 'You need to sign in to use this model.', + }, + error_type: ProxyErrorType.paid_model_auth_required, + }, + { status: 401 } + ); +} + function validatePath( url: URL ): @@ -274,6 +287,11 @@ export async function POST(request: NextRequest): Promise 0, botId, tokenSource, @@ -499,7 +517,7 @@ export async function POST(request: NextRequest): Promise { return ( @@ -18,7 +13,6 @@ export async function isFreeModel(model: string): Promise { model === KILO_AUTO_FREE_MODEL.id || (model ?? '').endsWith(':free') || model === 'openrouter/free' || - isOpenRouterStealthModel(model ?? '') || - (await isPublicIdExperimented(model ?? '')) + isOpenRouterStealthModel(model ?? '') ); } diff --git a/apps/web/src/lib/ai-gateway/models.test.ts b/apps/web/src/lib/ai-gateway/models.test.ts index f527879b9b..ed13733cfd 100644 --- a/apps/web/src/lib/ai-gateway/models.test.ts +++ b/apps/web/src/lib/ai-gateway/models.test.ts @@ -114,6 +114,7 @@ describe('isFreeModel', () => { expect(await isFreeModel('claude-3.7-sonnet')).toBe(false); expect(await isFreeModel('anthropic/claude-sonnet-4')).toBe(false); expect(await isFreeModel('google/gemini-2.5-pro')).toBe(false); + expect(await isFreeModel('preview/provider-funded-model')).toBe(false); }); test('should return false for models with "free" in the middle', async () => { diff --git a/apps/web/src/lib/ai-gateway/processUsage.test.ts b/apps/web/src/lib/ai-gateway/processUsage.test.ts index 96215edfd8..4abadb5afb 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.test.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.test.ts @@ -8,6 +8,7 @@ import { mapToUsageStats, logMicrodollarUsage, processOpenRouterUsage, + processTokenData, stripNulBytesInPlace, toInsertableDbUsageRecord, } from './processUsage'; @@ -383,6 +384,7 @@ describe('logMicrodollarUsage', () => { editor_name: null, machine_id: null, user_byok: false, + is_free: false, has_tools: false, feature: 'vscode-extension', session_id: null, @@ -512,6 +514,79 @@ describe('logMicrodollarUsage', () => { expect(metadataRecord?.has_middle_out_transform).toBe(false); }); + test('zeroes selected provider-funded traffic using the persisted route decision', async () => { + const user = await insertTestUser({ + id: 'test-provider-funded-user', + microdollars_used: 2000, + google_user_email: 'provider-funded@example.com', + }); + const usageStats: MicrodollarUsageStats = { + ...BASE_USAGE_STATS, + messageId: 'test-provider-funded-msg', + cost_mUsd: 500, + cacheDiscount_mUsd: 25, + is_byok: false, + }; + const usageContext: MicrodollarUsageContext = { + ...createBaseUsageContext(user), + requested_model: 'preview/provider-funded-model', + is_free: true, + }; + + await processTokenData(usageStats, usageContext); + + const metadataRecord = await db.query.microdollar_usage_metadata.findFirst({ + where: eq(microdollar_usage_metadata.message_id, 'test-provider-funded-msg'), + }); + const usageRecord = metadataRecord + ? await db.query.microdollar_usage.findFirst({ + where: eq(microdollar_usage.id, metadataRecord.id), + }) + : undefined; + const updatedUser = await findUserById(user.id); + + expect(usageRecord?.cost).toBe(0); + expect(usageRecord?.cache_discount).toBe(0); + expect(metadataRecord?.market_cost).toBe(500); + expect(metadataRecord?.is_free).toBe(true); + expect(updatedUser?.microdollars_used).toBe(2000); + }); + + test('bills ordinary traffic without a persisted provider-funded decision', async () => { + const user = await insertTestUser({ + id: 'test-non-funded-preview-user', + microdollars_used: 2000, + google_user_email: 'non-funded-preview@example.com', + }); + const usageStats: MicrodollarUsageStats = { + ...BASE_USAGE_STATS, + messageId: 'test-non-funded-preview-msg', + cost_mUsd: 500, + is_byok: false, + }; + const usageContext: MicrodollarUsageContext = { + ...createBaseUsageContext(user), + requested_model: 'preview/provider-funded-model', + is_free: false, + }; + + await processTokenData(usageStats, usageContext); + + const metadataRecord = await db.query.microdollar_usage_metadata.findFirst({ + where: eq(microdollar_usage_metadata.message_id, 'test-non-funded-preview-msg'), + }); + const usageRecord = metadataRecord + ? await db.query.microdollar_usage.findFirst({ + where: eq(microdollar_usage.id, metadataRecord.id), + }) + : undefined; + const updatedUser = await findUserById(user.id); + + expect(usageRecord?.cost).toBe(500); + expect(metadataRecord?.is_free).toBe(false); + expect(updatedUser?.microdollars_used).toBe(2500); + }); + test('stores 3 usage records with overlapping data and tests metadata deduplication', async () => { const user = await insertTestUser({ id: 'test-dedup-user', @@ -943,6 +1018,7 @@ describe('toInsertableDbUsageRecord NUL-byte sanitization', () => { editor_name: 'vscode', machine_id: 'machine', user_byok: false, + is_free: false, has_tools: false, feature: null, session_id: 'session', diff --git a/apps/web/src/lib/ai-gateway/processUsage.ts b/apps/web/src/lib/ai-gateway/processUsage.ts index 54a46a2c21..4dd4bb4ee6 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.ts @@ -22,7 +22,6 @@ import { sentryRootSpan } from '../getRootSpan'; import { ingestOrganizationTokenUsage } from '@/lib/organizations/organization-usage'; import type { ProviderId } from '@/lib/ai-gateway/providers/types'; import { findKiloExclusiveModel, isKiloStealthModel } from '@/lib/ai-gateway/models'; -import { isFreeModel } from '@/lib/ai-gateway/is-free-model'; import { sentryLogger } from '@/lib/utils.server'; import { maybeIssueKiloPassBonusFromUsageThreshold } from '@/lib/kilo-pass/usage-triggered-bonus'; import { getEffectiveKiloPassThreshold } from '@/lib/kilo-pass/threshold'; @@ -132,6 +131,7 @@ export function extractUsageContextInfo(usageContext: MicrodollarUsageContext) { api_kind: usageContext.api_kind, machine_id: usageContext.machine_id, is_user_byok: usageContext.user_byok, + is_free: usageContext.is_free, has_tools: usageContext.has_tools, feature: usageContext.feature, session_id: usageContext.session_id, @@ -178,8 +178,15 @@ export async function toInsertableDbUsageRecord( const id = randomUUID(); const created_at = new Date().toISOString(); - const { kilo_user_id, organization_id, project_id, provider, ttfb_ms, ...metadataFromContext } = - usageContextInfo; + const { + kilo_user_id, + organization_id, + project_id, + provider, + ttfb_ms, + is_free, + ...metadataFromContext + } = usageContextInfo; const core: MicrodollarUsage = { id, @@ -215,7 +222,7 @@ export async function toInsertableDbUsageRecord( streamed: usageStats.streamed, cancelled: usageStats.cancelled, market_cost: usageStats.market_cost ?? null, - is_free: await isFreeModel(usageContextInfo.requested_model), + is_free, }; // Legacy heuristic classification removed - abuse_classification is now handled @@ -1018,16 +1025,16 @@ export async function processTokenData( usageStats.cost_mUsd = calculateKiloExclusiveCost_mUsd(kiloExclusiveModel, usageStats); } - // Report upstream cost to abuse service BEFORE zeroing for free/BYOK + // Report upstream cost to abuse service BEFORE zeroing for free/BYOK traffic // (abuse service needs actual spend for heuristics like free_tier_exhausted) reportAbuseCost(usageContext, usageStats).catch(error => { console.error('[Abuse] Failed to report cost:', error); }); - // Preserve the real cost before zeroing for free/BYOK + // Preserve the real cost before zeroing for free/BYOK traffic. usageStats.market_cost = usageStats.cost_mUsd; - if ((await isFreeModel(usageContext.requested_model)) || usageContext.user_byok) { + if (usageContext.is_free || usageContext.user_byok) { usageStats.cost_mUsd = 0; usageStats.cacheDiscount_mUsd = 0; } @@ -1048,9 +1055,7 @@ async function useGenerationLookup( const isSuccessStatusCode = (usageStats?.status_code ?? 200) < 400; const hasOutputTokens = (usageStats?.outputTokens ?? 0) > 0; const hasCostWhenPaid = - (await isFreeModel(usageContext.requested_model)) || - usageContext.user_byok || - (usageStats?.cost_mUsd ?? 0) > 0; + usageContext.is_free || usageContext.user_byok || (usageStats?.cost_mUsd ?? 0) > 0; return isGatewayProvider && isSuccessStatusCode && (!hasOutputTokens || !hasCostWhenPaid); } diff --git a/apps/web/src/lib/ai-gateway/processUsage.types.ts b/apps/web/src/lib/ai-gateway/processUsage.types.ts index 7ac89d06e2..1daf20bea7 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.types.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.types.ts @@ -129,6 +129,8 @@ export type MicrodollarUsageContext = { machine_id: string | null; /** True if user/org is using their own API key - cost should be zeroed out */ user_byok: boolean; + /** Effective free/billable decision captured during routing and access checks. */ + is_free: boolean; has_tools: boolean; botId?: string; tokenSource?: string; diff --git a/apps/web/src/tests/helpers/microdollar-usage.helper.ts b/apps/web/src/tests/helpers/microdollar-usage.helper.ts index e9bf14e6e4..f64a3c83ec 100644 --- a/apps/web/src/tests/helpers/microdollar-usage.helper.ts +++ b/apps/web/src/tests/helpers/microdollar-usage.helper.ts @@ -61,6 +61,7 @@ function defineDefaultContextInfo(): UsageContextInfo { api_kind: 'chat_completions', machine_id: null, is_user_byok: false, + is_free: false, has_tools: false, feature: null, session_id: null, @@ -116,6 +117,7 @@ export function createMockUsageContext( editor_name: null, machine_id: null, user_byok: false, + is_free: false, has_tools: false, feature: 'vscode-extension', session_id: null,