Skip to content

Commit b28479c

Browse files
committed
Switch to baseten provider for minimax
1 parent 45a7ec1 commit b28479c

File tree

6 files changed

+654
-16
lines changed

6 files changed

+654
-16
lines changed

agents/base2/base2.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ export function createBase2(
3030
publisher,
3131
model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6',
3232
providerOptions: isFree ? {
33-
only: ['inceptron/fp8'],
34-
order: ['inceptron/fp8'],
35-
allow_fallbacks: false,
3633
data_collection: 'deny',
3734
} : {
3835
only: ['amazon-bedrock'],

agents/tmux-cli.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ const definition: AgentDefinition = {
7575
// Provider options are tightly coupled to the model choice above.
7676
// If you change the model, update these accordingly.
7777
providerOptions: {
78-
only: ['inceptron/fp8'],
79-
order: ['inceptron/fp8'],
80-
allow_fallbacks: false,
8178
data_collection: 'deny',
8279
},
8380

packages/internal/src/env-schema.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
77
OPENAI_API_KEY: z.string().min(1),
88
ANTHROPIC_API_KEY: z.string().min(1),
99
FIREWORKS_API_KEY: z.string().min(1),
10+
BASETEN_API_KEY: z.string().min(1).optional(),
1011
LINKUP_API_KEY: z.string().min(1),
1112
CONTEXT7_API_KEY: z.string().optional(),
1213
GRAVITY_API_KEY: z.string().min(1),
@@ -50,6 +51,7 @@ export const serverProcessEnv: ServerInput = {
5051
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
5152
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
5253
FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
54+
BASETEN_API_KEY: process.env.BASETEN_API_KEY,
5355
LINKUP_API_KEY: process.env.LINKUP_API_KEY,
5456
CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY,
5557
GRAVITY_API_KEY: process.env.GRAVITY_API_KEY,

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ import type { NextRequest } from 'next/server'
3535

3636
import type { ChatCompletionRequestBody } from '@/llm-api/types'
3737

38+
import {
39+
BasetenError,
40+
handleBasetenNonStream,
41+
handleBasetenStream,
42+
isBasetenModel,
43+
} from '@/llm-api/baseten'
3844
import {
3945
FireworksError,
4046
handleFireworksNonStream,
@@ -354,9 +360,20 @@ export async function postChatCompletions(params: {
354360
// Handle streaming vs non-streaming
355361
try {
356362
if (bodyStream) {
357-
// Streaming request — route to Fireworks for supported models
358-
const useFireworks = isFireworksModel(typedBody.model)
359-
const stream = useFireworks
363+
// Streaming request — route to Baseten/Fireworks for supported models
364+
const useBaseten = isBasetenModel(typedBody.model)
365+
const useFireworks = !useBaseten && isFireworksModel(typedBody.model)
366+
const stream = useBaseten
367+
? await handleBasetenStream({
368+
body: typedBody,
369+
userId,
370+
stripeCustomerId,
371+
agentId,
372+
fetch,
373+
logger,
374+
insertMessageBigquery,
375+
})
376+
: useFireworks
360377
? await handleFireworksStream({
361378
body: typedBody,
362379
userId,
@@ -396,9 +413,10 @@ export async function postChatCompletions(params: {
396413
},
397414
})
398415
} else {
399-
// Non-streaming request — route to Fireworks for supported models
416+
// Non-streaming request — route to Baseten/Fireworks for supported models
400417
const model = typedBody.model
401-
const useFireworks = isFireworksModel(model)
418+
const useBaseten = isBasetenModel(model)
419+
const useFireworks = !useBaseten && isFireworksModel(model)
402420
const modelParts = model.split('/')
403421
const shortModelName = modelParts.length > 1 ? modelParts[1] : model
404422
const isOpenAIDirectModel =
@@ -409,7 +427,17 @@ export async function postChatCompletions(params: {
409427
const shouldUseOpenAIEndpoint =
410428
isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined
411429

412-
const nonStreamRequest = useFireworks
430+
const nonStreamRequest = useBaseten
431+
? handleBasetenNonStream({
432+
body: typedBody,
433+
userId,
434+
stripeCustomerId,
435+
agentId,
436+
fetch,
437+
logger,
438+
insertMessageBigquery,
439+
})
440+
: useFireworks
413441
? handleFireworksNonStream({
414442
body: typedBody,
415443
userId,
@@ -463,10 +491,14 @@ export async function postChatCompletions(params: {
463491
if (error instanceof FireworksError) {
464492
fireworksError = error
465493
}
494+
let basetenError: BasetenError | undefined
495+
if (error instanceof BasetenError) {
496+
basetenError = error
497+
}
466498

467499
// Log detailed error information for debugging
468500
const errorDetails = openrouterError?.toJSON()
469-
const providerLabel = fireworksError ? 'Fireworks' : 'OpenRouter'
501+
const providerLabel = basetenError ? 'Baseten' : fireworksError ? 'Fireworks' : 'OpenRouter'
470502
logger.error(
471503
{
472504
error: getErrorObject(error),
@@ -480,8 +512,8 @@ export async function postChatCompletions(params: {
480512
? typedBody.messages.length
481513
: 0,
482514
messages: typedBody.messages,
483-
providerStatusCode: (openrouterError ?? fireworksError)?.statusCode,
484-
providerStatusText: (openrouterError ?? fireworksError)?.statusText,
515+
providerStatusCode: (openrouterError ?? fireworksError ?? basetenError)?.statusCode,
516+
providerStatusText: (openrouterError ?? fireworksError ?? basetenError)?.statusText,
485517
openrouterErrorCode: errorDetails?.error?.code,
486518
openrouterErrorType: errorDetails?.error?.type,
487519
openrouterErrorMessage: errorDetails?.error?.message,
@@ -509,6 +541,9 @@ export async function postChatCompletions(params: {
509541
if (error instanceof FireworksError) {
510542
return NextResponse.json(error.toJSON(), { status: error.statusCode })
511543
}
544+
if (error instanceof BasetenError) {
545+
return NextResponse.json(error.toJSON(), { status: error.statusCode })
546+
}
512547

513548
return NextResponse.json(
514549
{ error: 'Failed to process request' },

0 commit comments

Comments
 (0)