Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/web/src/app/api/dev/consume-credits/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ export async function POST(request: NextRequest): Promise<NextResponse> {
editor_name: null,
machine_id: null,
user_byok: false,
is_free: false,
has_tools: false,
feature: null,
session_id: null,
Expand Down
4 changes: 3 additions & 1 deletion apps/web/src/app/api/fim/completions/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ export async function POST(request: NextRequest) {
const userByok = organizationId
? await getBYOKforOrganization(readDb, organizationId, [byokProviderKey])
: await getBYOKforUser(readDb, user.id, [byokProviderKey]);
const isFreeRequest = await isFreeModel(requestBody.model);

const usageContext: MicrodollarUsageContext = {
api_kind: 'fim_completions',
Expand All @@ -177,6 +178,7 @@ export async function POST(request: NextRequest) {
editor_name: extractHeaderAndLimitLength(request, 'x-kilocode-editorname'),
machine_id: extractHeaderAndLimitLength(request, 'x-kilocode-machineid'),
user_byok: !!userByok,
is_free: isFreeRequest,
has_tools: false,
feature: validateFeatureHeader(request.headers.get(FEATURE_HEADER)),
session_id: taskId ?? null,
Expand All @@ -190,7 +192,7 @@ export async function POST(request: NextRequest) {
// slight replication lag, and provides lower latency for US users
const { balance, settings, plan } = await getBalanceAndOrgSettings(organizationId, user, readDb);

if (balance <= 0 && !(await isFreeModel(requestBody.model)) && !userByok) {
if (balance <= 0 && !isFreeRequest && !userByok) {
return NextResponse.json(
{
error: { message: 'Insufficient credits' },
Expand Down
70 changes: 43 additions & 27 deletions apps/web/src/app/api/openrouter/[...path]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ const MAX_TOKENS_LIMIT = 99999999999; // GPT4.1 default is ~32k
const PAID_MODEL_AUTH_REQUIRED = 'PAID_MODEL_AUTH_REQUIRED';
const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED';

function paidModelAuthRequiredResponse() {
return NextResponse.json(
{
error: {
code: PAID_MODEL_AUTH_REQUIRED,
message: 'You need to sign in to use this model.',
},
error_type: ProxyErrorType.paid_model_auth_required,
},
{ status: 401 }
);
}

function validatePath(
url: URL
):
Expand Down Expand Up @@ -274,14 +287,19 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
);
}

const [isExperimentCandidate, isIntrinsicallyFreeModel] = await Promise.all([
isPublicIdExperimented(originalModelIdLowerCased),
isFreeModel(originalModelIdLowerCased),
]);

// For FREE models: check rate limit, log at start.
// Server-side products (cloud-agent, code-review, app-builder) rate-limit
// per user when the request comes from Cloudflare IPs (Kilo infrastructure).
// All other products rate-limit per IP (fast pre-auth path).
const isRateLimitedFreeModelRequest =
isKiloExclusiveFreeModel(originalModelIdLowerCased) ||
autoModel === KILO_AUTO_FREE_MODEL.id ||
(await isPublicIdExperimented(originalModelIdLowerCased));
isExperimentCandidate;
if (isRateLimitedFreeModelRequest) {
const rateLimit = await resolveRateLimit(feature, ipAddress, authPromise);
if (rateLimit instanceof NextResponse) return rateLimit;
Expand Down Expand Up @@ -319,19 +337,10 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
let tokenSource: string | undefined = authTokenSource;

if (authFailedResponse) {
// No valid auth
if (!(await isFreeModel(originalModelIdLowerCased))) {
// Paid model requires authentication
return NextResponse.json(
{
error: {
code: PAID_MODEL_AUTH_REQUIRED,
message: 'You need to sign in to use this model.',
},
error_type: ProxyErrorType.paid_model_auth_required,
},
{ status: 401 }
);
// A potential experiment request must reach provider selection before we
// know whether this specific request is provider-funded.
if (!isIntrinsicallyFreeModel && !isExperimentCandidate) {
return paidModelAuthRequiredResponse();
}

const promotionLimit = await checkPromotionLimit(ipAddress);
Expand All @@ -358,7 +367,8 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
);
}

// Anonymous access for free model (already rate-limited above)
// Anonymous access for a possibly free request; provider selection below
// rejects stale experiment membership before any paid fallback is sent.
user = createAnonymousContext(ipAddress);
organizationId = undefined;
botId = undefined;
Expand All @@ -374,15 +384,6 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
return storeAndPreviousResponseIdIsNotSupported();
}

// Log to free_model_usage for rate limiting (at request start, before processing)
if (isRateLimitedFreeModelRequest) {
await logFreeModelRequest(
ipAddress,
originalModelIdLowerCased,
isAnonymousContext(user) ? undefined : user.id
);
}

// Use new shared helper for fraud & project headers
const { fraudHeaders, projectId } = extractFraudAndProjectHeaders(request);
const providerResult = await getProvider({
Expand Down Expand Up @@ -410,6 +411,22 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
skipKiloExclusiveModelSettings,
experiment,
} = providerResult;
const providerFunded = experiment !== undefined;
const isFreeRequest = isIntrinsicallyFreeModel || providerFunded;

// A stale experiment-membership hit can allow an anonymous request as far as
// provider selection. It does not make ordinary fallback routing free.
if (isAnonymousContext(user) && !isFreeRequest) {
return paidModelAuthRequiredResponse();
}

if (isRateLimitedFreeModelRequest) {
await logFreeModelRequest(
ipAddress,
originalModelIdLowerCased,
isAnonymousContext(user) ? undefined : user.id
);
}

// Request-level data-collection opt-out: a caller can set
// `provider.data_collection: 'deny'` or `provider.zdr: true` on any
Expand Down Expand Up @@ -482,6 +499,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
editor_name: extractHeaderAndLimitLength(request, 'x-kilocode-editorname'),
machine_id: machineIdHeader,
user_byok: !!userByok,
is_free: isFreeRequest,
has_tools: (requestBodyParsed.body.tools?.length ?? 0) > 0,
botId,
tokenSource,
Expand All @@ -499,7 +517,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
if (!isAnonymousContext(user) && !bypassAccessCheck) {
const { balance, settings, plan } = await balanceAndSettingsPromise;

if (balance <= 0 && !(await isFreeModel(originalModelIdLowerCased)) && !userByok) {
if (balance <= 0 && !isFreeRequest && !userByok) {
return await usageLimitExceededResponse(user, balance);
}

Expand Down Expand Up @@ -540,8 +558,6 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
if (experiment) {
usageContext.modelExperimentVariantVersionId = experiment.variantVersionId;
usageContext.modelExperimentAllocationSubject = experiment.allocationSubject;
// Cost zeroing for experiment traffic is handled by `isFreeModel`, which
// returns true for experimented public ids.
}

sentryRootSpan()?.setAttribute(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
editor_name: extractHeaderAndLimitLength(request, 'x-kilocode-editorname'),
machine_id: extractHeaderAndLimitLength(request, 'x-kilocode-machineid'),
user_byok: !!userByok,
is_free: false,
has_tools: false,
botId,
tokenSource,
Expand Down
6 changes: 4 additions & 2 deletions apps/web/src/app/api/openrouter/embeddings/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno

const requestedModel = requestBodyParsed.model.trim();
const requestedModelLowerCased = requestedModel.toLowerCase();
const isFreeRequest = await isFreeModel(requestedModelLowerCased);

// Extract IP for all requests (needed for free model rate limiting)
const ipAddress = request.headers.get('x-forwarded-for')?.split(',')[0]?.trim();
Expand Down Expand Up @@ -132,7 +133,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
const tokenSource: string | undefined = authTokenSource;

if (authFailedResponse) {
if (!(await isFreeModel(requestedModelLowerCased))) {
if (!isFreeRequest) {
return NextResponse.json(
{
error: {
Expand Down Expand Up @@ -183,6 +184,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
editor_name: extractHeaderAndLimitLength(request, 'x-kilocode-editorname'),
machine_id: extractHeaderAndLimitLength(request, 'x-kilocode-machineid'),
user_byok: !!userByok,
is_free: isFreeRequest,
has_tools: false,
botId,
tokenSource,
Expand All @@ -199,7 +201,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
if (!isAnonymousContext(user)) {
const { balance, settings, plan } = await getBalanceAndOrgSettings(organizationId, user);

if (balance <= 0 && !(await isFreeModel(requestedModelLowerCased)) && !userByok) {
if (balance <= 0 && !isFreeRequest && !userByok) {
return await usageLimitExceededResponse(user, balance);
}

Expand Down
16 changes: 5 additions & 11 deletions apps/web/src/lib/ai-gateway/is-free-model.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
import { KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/auto-model';
import { isKiloExclusiveFreeModel, isOpenRouterStealthModel } from '@/lib/ai-gateway/models';
import { isPublicIdExperimented } from '@/lib/ai-gateway/experiments/membership';

/**
* Returns true if `model` should be treated as free for the requesting user
* this request — including dedicated experimented public ids, which are
* partner/Kilo-funded for v1.
*
* Server-only: consults a Redis-backed membership set for experiment routing.
* Lives outside `models.ts` so client bundles importing the model-id
* constants (`PRIMARY_DEFAULT_MODEL`, `preferredModels`, …) from `models.ts`
* don't transitively pull in the Redis client.
* Returns true when `model` is intrinsically free. Request-specific funding,
* such as a selected provider-funded experiment variant, is recorded on the
* usage context after routing and must not be inferred from mutable model
* membership.
*/
export async function isFreeModel(model: string): Promise<boolean> {
return (
isKiloExclusiveFreeModel(model) ||
model === KILO_AUTO_FREE_MODEL.id ||
(model ?? '').endsWith(':free') ||
model === 'openrouter/free' ||
isOpenRouterStealthModel(model ?? '') ||
(await isPublicIdExperimented(model ?? ''))
isOpenRouterStealthModel(model ?? '')
);
}
1 change: 1 addition & 0 deletions apps/web/src/lib/ai-gateway/models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ describe('isFreeModel', () => {
expect(await isFreeModel('claude-3.7-sonnet')).toBe(false);
expect(await isFreeModel('anthropic/claude-sonnet-4')).toBe(false);
expect(await isFreeModel('google/gemini-2.5-pro')).toBe(false);
expect(await isFreeModel('preview/provider-funded-model')).toBe(false);
});

test('should return false for models with "free" in the middle', async () => {
Expand Down
76 changes: 76 additions & 0 deletions apps/web/src/lib/ai-gateway/processUsage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
mapToUsageStats,
logMicrodollarUsage,
processOpenRouterUsage,
processTokenData,
stripNulBytesInPlace,
toInsertableDbUsageRecord,
} from './processUsage';
Expand Down Expand Up @@ -383,6 +384,7 @@ describe('logMicrodollarUsage', () => {
editor_name: null,
machine_id: null,
user_byok: false,
is_free: false,
has_tools: false,
feature: 'vscode-extension',
session_id: null,
Expand Down Expand Up @@ -512,6 +514,79 @@ describe('logMicrodollarUsage', () => {
expect(metadataRecord?.has_middle_out_transform).toBe(false);
});

test('zeroes selected provider-funded traffic using the persisted route decision', async () => {
const user = await insertTestUser({
id: 'test-provider-funded-user',
microdollars_used: 2000,
google_user_email: 'provider-funded@example.com',
});
const usageStats: MicrodollarUsageStats = {
...BASE_USAGE_STATS,
messageId: 'test-provider-funded-msg',
cost_mUsd: 500,
cacheDiscount_mUsd: 25,
is_byok: false,
};
const usageContext: MicrodollarUsageContext = {
...createBaseUsageContext(user),
requested_model: 'preview/provider-funded-model',
is_free: true,
};

await processTokenData(usageStats, usageContext);

const metadataRecord = await db.query.microdollar_usage_metadata.findFirst({
where: eq(microdollar_usage_metadata.message_id, 'test-provider-funded-msg'),
});
const usageRecord = metadataRecord
? await db.query.microdollar_usage.findFirst({
where: eq(microdollar_usage.id, metadataRecord.id),
})
: undefined;
const updatedUser = await findUserById(user.id);

expect(usageRecord?.cost).toBe(0);
expect(usageRecord?.cache_discount).toBe(0);
expect(metadataRecord?.market_cost).toBe(500);
expect(metadataRecord?.is_free).toBe(true);
expect(updatedUser?.microdollars_used).toBe(2000);
});

test('bills ordinary traffic without a persisted provider-funded decision', async () => {
const user = await insertTestUser({
id: 'test-non-funded-preview-user',
microdollars_used: 2000,
google_user_email: 'non-funded-preview@example.com',
});
const usageStats: MicrodollarUsageStats = {
...BASE_USAGE_STATS,
messageId: 'test-non-funded-preview-msg',
cost_mUsd: 500,
is_byok: false,
};
const usageContext: MicrodollarUsageContext = {
...createBaseUsageContext(user),
requested_model: 'preview/provider-funded-model',
is_free: false,
};

await processTokenData(usageStats, usageContext);

const metadataRecord = await db.query.microdollar_usage_metadata.findFirst({
where: eq(microdollar_usage_metadata.message_id, 'test-non-funded-preview-msg'),
});
const usageRecord = metadataRecord
? await db.query.microdollar_usage.findFirst({
where: eq(microdollar_usage.id, metadataRecord.id),
})
: undefined;
const updatedUser = await findUserById(user.id);

expect(usageRecord?.cost).toBe(500);
expect(metadataRecord?.is_free).toBe(false);
expect(updatedUser?.microdollars_used).toBe(2500);
});

test('stores 3 usage records with overlapping data and tests metadata deduplication', async () => {
const user = await insertTestUser({
id: 'test-dedup-user',
Expand Down Expand Up @@ -943,6 +1018,7 @@ describe('toInsertableDbUsageRecord NUL-byte sanitization', () => {
editor_name: 'vscode',
machine_id: 'machine',
user_byok: false,
is_free: false,
has_tools: false,
feature: null,
session_id: 'session',
Expand Down
Loading