diff --git a/.env.example b/.env.example index a1b46a0b8..f21e7a9e4 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,7 @@ ANTHROPIC_API_KEY=dummy_anthropic_key FIREWORKS_API_KEY=dummy_fireworks_key CANOPYWAVE_API_KEY=dummy_canopywave_key SILICONFLOW_API_KEY=dummy_siliconflow_key +MINIMAX_API_KEY=dummy_minimax_key # Database & Server DATABASE_URL=postgresql://manicode_user_local:secretpassword_local@localhost:5432/manicode_db_local diff --git a/common/src/constants/model-config.ts b/common/src/constants/model-config.ts index c75bda26e..75a6d768e 100644 --- a/common/src/constants/model-config.ts +++ b/common/src/constants/model-config.ts @@ -6,6 +6,7 @@ export const ALLOWED_MODEL_PREFIXES = [ 'openai', 'google', 'x-ai', + 'minimax', ] as const export const costModes = [ @@ -47,6 +48,8 @@ export const openrouterModels = { openrouter_gemini2_5_flash_thinking: 'google/gemini-2.5-flash-preview:thinking', openrouter_grok_4: 'x-ai/grok-4-07-09', + openrouter_minimax_m2_5: 'minimax/minimax-m2.5', + openrouter_minimax_m2_5_highspeed: 'minimax/minimax-m2.5-highspeed', } as const export type openrouterModel = (typeof openrouterModels)[keyof typeof openrouterModels] @@ -171,6 +174,7 @@ export const providerDomains = { openai: 'chatgpt.com', deepseek: 'deepseek.com', xai: 'x.ai', + minimax: 'minimax.io', } as const export function getLogoForModel(modelName: string): string | undefined { @@ -182,6 +186,7 @@ export function getLogoForModel(modelName: string): string | undefined { domain = providerDomains.deepseek else if (modelName.includes('claude')) domain = providerDomains.anthropic else if (modelName.includes('grok')) domain = providerDomains.xai + else if (modelName.includes('minimax')) domain = providerDomains.minimax return domain ? `https://www.google.com/s2/favicons?domain=${domain}&sz=256` diff --git a/packages/internal/src/env-schema.ts b/packages/internal/src/env-schema.ts index c4bfa7423..5b0e4ddd9 100644 --- a/packages/internal/src/env-schema.ts +++ b/packages/internal/src/env-schema.ts @@ -9,6 +9,7 @@ export const serverEnvSchema = clientEnvSchema.extend({ FIREWORKS_API_KEY: z.string().min(1), CANOPYWAVE_API_KEY: z.string().min(1).optional(), SILICONFLOW_API_KEY: z.string().min(1).optional(), + MINIMAX_API_KEY: z.string().min(1).optional(), LINKUP_API_KEY: z.string().min(1), CONTEXT7_API_KEY: z.string().optional(), GRAVITY_API_KEY: z.string().min(1), @@ -54,6 +55,7 @@ export const serverProcessEnv: ServerInput = { FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, CANOPYWAVE_API_KEY: process.env.CANOPYWAVE_API_KEY, SILICONFLOW_API_KEY: process.env.SILICONFLOW_API_KEY, + MINIMAX_API_KEY: process.env.MINIMAX_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY, CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY, GRAVITY_API_KEY: process.env.GRAVITY_API_KEY, diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 77a2ab901..dbb953118 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -53,6 +53,12 @@ import { handleSiliconFlowStream, isSiliconFlowModel, } from '@/llm-api/siliconflow' +import { + MiniMaxError, + handleMiniMaxNonStream, + handleMiniMaxStream, + isMiniMaxModel, +} from '@/llm-api/minimax' import { handleOpenAINonStream, OPENAI_SUPPORTED_MODELS, @@ -366,11 +372,22 @@ export async function postChatCompletions(params: { // Handle streaming vs non-streaming try { if (bodyStream) { - // Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models + // Streaming request — route to MiniMax/SiliconFlow/CanopyWave/Fireworks for supported models + const useMiniMax = isMiniMaxModel(typedBody.model) const useSiliconFlow = false // isSiliconFlowModel(typedBody.model) const useCanopyWave = false // isCanopyWaveModel(typedBody.model) - const useFireworks = isFireworksModel(typedBody.model) - const stream = useSiliconFlow + const useFireworks = !useMiniMax && isFireworksModel(typedBody.model) + const stream = useMiniMax + ? await handleMiniMaxStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useSiliconFlow ? await handleSiliconFlowStream({ body: typedBody, userId, @@ -430,12 +447,13 @@ export async function postChatCompletions(params: { }, }) } else { - // Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models + // Non-streaming request — route to MiniMax/SiliconFlow/CanopyWave/Fireworks for supported models // TEMPORARILY DISABLED: route through OpenRouter const model = typedBody.model + const useMiniMaxDirect = isMiniMaxModel(model) const useSiliconFlow = false // isSiliconFlowModel(model) const useCanopyWave = false // isCanopyWaveModel(model) - const useFireworks = isFireworksModel(model) + const useFireworks = !useMiniMaxDirect && isFireworksModel(model) const modelParts = model.split('/') const shortModelName = modelParts.length > 1 ? modelParts[1] : model const isOpenAIDirectModel = @@ -446,7 +464,17 @@ export async function postChatCompletions(params: { const shouldUseOpenAIEndpoint = isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined - const nonStreamRequest = useSiliconFlow + const nonStreamRequest = useMiniMaxDirect + ? handleMiniMaxNonStream({ + body: typedBody, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, + }) + : useSiliconFlow ? handleSiliconFlowNonStream({ body: typedBody, userId, @@ -528,10 +556,14 @@ export async function postChatCompletions(params: { if (error instanceof SiliconFlowError) { siliconflowError = error } + let minimaxError: MiniMaxError | undefined + if (error instanceof MiniMaxError) { + minimaxError = error + } // Log detailed error information for debugging const errorDetails = openrouterError?.toJSON() - const providerLabel = siliconflowError ? 'SiliconFlow' : canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter' + const providerLabel = minimaxError ? 'MiniMax' : siliconflowError ? 'SiliconFlow' : canopywaveError ? 'CanopyWave' : fireworksError ? 'Fireworks' : 'OpenRouter' logger.error( { error: getErrorObject(error), @@ -545,8 +577,8 @@ export async function postChatCompletions(params: { ? typedBody.messages.length : 0, messages: typedBody.messages, - providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusCode, - providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError)?.statusText, + providerStatusCode: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError ?? minimaxError)?.statusCode, + providerStatusText: (openrouterError ?? fireworksError ?? canopywaveError ?? siliconflowError ?? minimaxError)?.statusText, openrouterErrorCode: errorDetails?.error?.code, openrouterErrorType: errorDetails?.error?.type, openrouterErrorMessage: errorDetails?.error?.message, @@ -580,6 +612,9 @@ export async function postChatCompletions(params: { if (error instanceof SiliconFlowError) { return NextResponse.json(error.toJSON(), { status: error.statusCode }) } + if (error instanceof MiniMaxError) { + return NextResponse.json(error.toJSON(), { status: error.statusCode }) + } return NextResponse.json( { error: 'Failed to process request' }, diff --git a/web/src/llm-api/minimax.ts b/web/src/llm-api/minimax.ts new file mode 100644 index 000000000..0bd13c7bb --- /dev/null +++ b/web/src/llm-api/minimax.ts @@ -0,0 +1,686 @@ +import { Agent } from 'undici' + +import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' +import { getErrorObject } from '@codebuff/common/util/error' +import { env } from '@codebuff/internal/env' + +import { + consumeCreditsForMessage, + extractRequestMetadata, + insertMessageToBigQuery, +} from './helpers' + +import type { UsageData } from './helpers' +import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ChatCompletionRequestBody } from './types' + +const MINIMAX_BASE_URL = 'https://api.minimax.io/v1' + +// Extended timeout for models that can take a long time to start streaming. +const MINIMAX_HEADERS_TIMEOUT_MS = 10 * 60 * 1000 + +const minimaxAgent = new Agent({ + headersTimeout: MINIMAX_HEADERS_TIMEOUT_MS, + bodyTimeout: 0, +}) + +/** Map from OpenRouter-style model IDs to MiniMax model IDs */ +const MINIMAX_MODEL_MAP: Record = { + 'minimax/minimax-m2.5': 'MiniMax-M2.5', + 'minimax/minimax-m2.5-highspeed': 'MiniMax-M2.5-highspeed', +} + +export function isMiniMaxModel(model: string): boolean { + return model in MINIMAX_MODEL_MAP +} + +function getMiniMaxModelId(openrouterModel: string): string { + return MINIMAX_MODEL_MAP[openrouterModel] ?? openrouterModel +} + +type StreamState = { responseText: string; reasoningText: string } + +type LineResult = { + state: StreamState + billedCredits?: number + patchedLine: string +} + +function createMiniMaxRequest(params: { + body: ChatCompletionRequestBody + originalModel: string + fetch: typeof globalThis.fetch +}) { + const { body, originalModel, fetch } = params + const minimaxBody: Record = { + ...body, + model: getMiniMaxModelId(originalModel), + } + + // Strip OpenRouter-specific / internal fields + delete minimaxBody.provider + delete minimaxBody.transforms + delete minimaxBody.codebuff_metadata + delete minimaxBody.usage + + // For streaming, request usage in the final chunk + if (minimaxBody.stream) { + minimaxBody.stream_options = { include_usage: true } + } + + // MiniMax temperature constraint: (0.0, 1.0], default to 1.0 + if ( + minimaxBody.temperature === undefined || + minimaxBody.temperature === null || + minimaxBody.temperature === 0 + ) { + minimaxBody.temperature = 1.0 + } + + if (!env.MINIMAX_API_KEY) { + throw new Error('MINIMAX_API_KEY is not configured') + } + + return fetch(`${MINIMAX_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${env.MINIMAX_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(minimaxBody), + // @ts-expect-error - dispatcher is a valid undici option not in fetch types + dispatcher: minimaxAgent, + }) +} + +// MiniMax per-token pricing (dollars per token) for MiniMax-M2.5 +const MINIMAX_INPUT_COST_PER_TOKEN = 0.3 / 1_000_000 +const MINIMAX_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 +const MINIMAX_OUTPUT_COST_PER_TOKEN = 1.2 / 1_000_000 + +function extractUsageAndCost( + usage: Record | undefined | null, +): UsageData { + if (!usage) + return { + inputTokens: 0, + outputTokens: 0, + cacheReadInputTokens: 0, + reasoningTokens: 0, + cost: 0, + } + const promptDetails = usage.prompt_tokens_details as + | Record + | undefined + | null + const completionDetails = usage.completion_tokens_details as + | Record + | undefined + | null + + const inputTokens = + typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0 + const outputTokens = + typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0 + const cacheReadInputTokens = + typeof promptDetails?.cached_tokens === 'number' + ? promptDetails.cached_tokens + : 0 + const reasoningTokens = + typeof completionDetails?.reasoning_tokens === 'number' + ? completionDetails.reasoning_tokens + : 0 + + const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) + const cost = + nonCachedInputTokens * MINIMAX_INPUT_COST_PER_TOKEN + + cacheReadInputTokens * MINIMAX_CACHED_INPUT_COST_PER_TOKEN + + outputTokens * MINIMAX_OUTPUT_COST_PER_TOKEN + + return { + inputTokens, + outputTokens, + cacheReadInputTokens, + reasoningTokens, + cost, + } +} + +export async function handleMiniMaxNonStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ + body, + logger, + }) + + const response = await createMiniMaxRequest({ + body, + originalModel, + fetch, + }) + + if (!response.ok) { + throw await parseMiniMaxError(response) + } + + const data = await response.json() + const content = data.choices?.[0]?.message?.content ?? '' + const reasoningText = + data.choices?.[0]?.message?.reasoning_content ?? + data.choices?.[0]?.message?.reasoning ?? + '' + const usageData = extractUsageAndCost(data.usage) + + insertMessageToBigQuery({ + messageId: data.id, + userId, + startTime, + request: body, + reasoningText, + responseText: content, + usageData, + logger, + insertMessageBigquery, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId: data.id, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText, + responseText: content, + usageData, + byok: false, + logger, + costMode, + }) + + // Overwrite cost so SDK calculates exact credits we charged + if (data.usage) { + data.usage.cost = creditsToFakeCost(billedCredits) + data.usage.cost_details = { upstream_inference_cost: 0 } + } + + // Normalise model name back to OpenRouter format for client compatibility + data.model = originalModel + if (!data.provider) data.provider = 'MiniMax' + + return data +} + +export async function handleMiniMaxStream({ + body, + userId, + stripeCustomerId, + agentId, + fetch, + logger, + insertMessageBigquery, +}: { + body: ChatCompletionRequestBody + userId: string + stripeCustomerId?: string | null + agentId: string + fetch: typeof globalThis.fetch + logger: Logger + insertMessageBigquery: InsertMessageBigqueryFn +}) { + const originalModel = body.model + const startTime = new Date() + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ + body, + logger, + }) + + const response = await createMiniMaxRequest({ + body, + originalModel, + fetch, + }) + + if (!response.ok) { + throw await parseMiniMaxError(response) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Failed to get response reader') + } + + let heartbeatInterval: NodeJS.Timeout + let state: StreamState = { responseText: '', reasoningText: '' } + let clientDisconnected = false + + const stream = new ReadableStream({ + async start(controller) { + const decoder = new TextDecoder() + let buffer = '' + + controller.enqueue( + new TextEncoder().encode(`: connected ${new Date().toISOString()}\n`), + ) + + heartbeatInterval = setInterval(() => { + if (!clientDisconnected) { + try { + controller.enqueue( + new TextEncoder().encode( + `: heartbeat ${new Date().toISOString()}\n\n`, + ), + ) + } catch { + // client disconnected + } + } + }, 30000) + + try { + let done = false + while (!done) { + const result = await reader.read() + done = result.done + const value = result.value + + if (done) break + + buffer += decoder.decode(value, { stream: true }) + let lineEnd = buffer.indexOf('\n') + + while (lineEnd !== -1) { + const line = buffer.slice(0, lineEnd + 1) + buffer = buffer.slice(lineEnd + 1) + + const lineResult = await handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request: body, + originalModel, + line, + state, + logger, + insertMessage: insertMessageBigquery, + }) + state = lineResult.state + + if (!clientDisconnected) { + try { + controller.enqueue( + new TextEncoder().encode(lineResult.patchedLine), + ) + } catch { + logger.warn( + 'Client disconnected during stream, continuing for billing', + ) + clientDisconnected = true + } + } + + lineEnd = buffer.indexOf('\n') + } + } + + if (!clientDisconnected) { + controller.close() + } + } catch (error) { + if (!clientDisconnected) { + controller.error(error) + } else { + logger.warn( + getErrorObject(error), + 'Error after client disconnect in MiniMax stream', + ) + } + } finally { + clearInterval(heartbeatInterval) + } + }, + cancel() { + clearInterval(heartbeatInterval) + clientDisconnected = true + logger.warn( + { + clientDisconnected, + responseTextLength: state.responseText.length, + reasoningTextLength: state.reasoningText.length, + }, + 'Client cancelled stream, continuing MiniMax consumption for billing', + ) + }, + }) + + return stream +} + +async function handleLine({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + line, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + line: string + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise { + if (!line.startsWith('data: ')) { + return { state, patchedLine: line } + } + + const raw = line.slice('data: '.length) + if (raw === '[DONE]\n' || raw === '[DONE]') { + return { state, patchedLine: line } + } + + let obj: Record + try { + obj = JSON.parse(raw) + } catch (error) { + logger.warn( + { error: getErrorObject(error, { includeRawError: true }) }, + 'Received non-JSON MiniMax response', + ) + return { state, patchedLine: line } + } + + // Patch model and provider for SDK compatibility + if (obj.model) obj.model = originalModel + if (!obj.provider) obj.provider = 'MiniMax' + + // Process the chunk for billing / state tracking + const result = await handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data: obj, + state, + logger, + insertMessage, + }) + + // If this is the final chunk with billing, overwrite cost in the patched object + if (result.billedCredits !== undefined && obj.usage) { + const usage = obj.usage as Record + usage.cost = creditsToFakeCost(result.billedCredits) + usage.cost_details = { upstream_inference_cost: 0 } + } + + const patchedLine = `data: ${JSON.stringify(obj)}\n` + return { + state: result.state, + billedCredits: result.billedCredits, + patchedLine, + } +} + +async function handleResponse({ + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + costMode, + startTime, + request, + originalModel, + data, + state, + logger, + insertMessage, +}: { + userId: string + stripeCustomerId?: string | null + agentId: string + clientId: string | null + clientRequestId: string | null + costMode: string | undefined + startTime: Date + request: unknown + originalModel: string + data: Record + state: StreamState + logger: Logger + insertMessage: InsertMessageBigqueryFn +}): Promise<{ state: StreamState; billedCredits?: number }> { + state = handleStreamChunk({ + data, + state, + logger, + userId, + agentId, + model: originalModel, + }) + + if ('error' in data || !data.usage) { + return { state } + } + + const usageData = extractUsageAndCost( + data.usage as Record, + ) + const messageId = typeof data.id === 'string' ? data.id : 'unknown' + + insertMessageToBigQuery({ + messageId, + userId, + startTime, + request, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + logger, + insertMessageBigquery: insertMessage, + }).catch((error) => { + logger.error({ error }, 'Failed to insert message into BigQuery') + }) + + const billedCredits = await consumeCreditsForMessage({ + messageId, + userId, + stripeCustomerId, + agentId, + clientId, + clientRequestId, + startTime, + model: originalModel, + reasoningText: state.reasoningText, + responseText: state.responseText, + usageData, + byok: false, + logger, + costMode, + }) + + return { state, billedCredits } +} + +function handleStreamChunk({ + data, + state, + logger, + userId, + agentId, + model, +}: { + data: Record + state: StreamState + logger: Logger + userId: string + agentId: string + model: string +}): StreamState { + const MAX_BUFFER_SIZE = 1 * 1024 * 1024 + + if ('error' in data) { + const errorData = data.error as Record + logger.error( + { + userId, + agentId, + model, + errorCode: errorData?.code, + errorType: errorData?.type, + errorMessage: errorData?.message, + }, + 'Received error chunk in MiniMax stream', + ) + return state + } + + const choices = data.choices as Array> | undefined + if (!choices?.length) { + return state + } + const choice = choices[0] + const delta = choice.delta as Record | undefined + + const contentDelta = typeof delta?.content === 'string' ? delta.content : '' + if (state.responseText.length < MAX_BUFFER_SIZE) { + state.responseText += contentDelta + if (state.responseText.length >= MAX_BUFFER_SIZE) { + state.responseText = + state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn( + { userId, agentId, model }, + 'Response text buffer truncated at 1MB', + ) + } + } + + const reasoningDelta = + typeof delta?.reasoning_content === 'string' + ? delta.reasoning_content + : typeof delta?.reasoning === 'string' + ? delta.reasoning + : '' + if (state.reasoningText.length < MAX_BUFFER_SIZE) { + state.reasoningText += reasoningDelta + if (state.reasoningText.length >= MAX_BUFFER_SIZE) { + state.reasoningText = + state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---' + logger.warn( + { userId, agentId, model }, + 'Reasoning text buffer truncated at 1MB', + ) + } + } + + return state +} + +export class MiniMaxError extends Error { + constructor( + public readonly statusCode: number, + public readonly statusText: string, + public readonly errorBody: { + error: { + message: string + code: string | number | null + type?: string | null + } + }, + ) { + super(errorBody.error.message) + this.name = 'MiniMaxError' + } + + toJSON() { + return { + error: { + message: this.errorBody.error.message, + code: this.errorBody.error.code, + type: this.errorBody.error.type, + }, + } + } +} + +async function parseMiniMaxError(response: Response): Promise { + const errorText = await response.text() + let errorBody: MiniMaxError['errorBody'] + try { + const parsed = JSON.parse(errorText) + if (parsed?.error?.message) { + errorBody = { + error: { + message: parsed.error.message, + code: parsed.error.code ?? null, + type: parsed.error.type ?? null, + }, + } + } else { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + } catch { + errorBody = { + error: { + message: errorText || response.statusText, + code: response.status, + }, + } + } + return new MiniMaxError(response.status, response.statusText, errorBody) +} + +function creditsToFakeCost(credits: number): number { + return credits / ((1 + PROFIT_MARGIN) * 100) +}