diff --git a/.env.example b/.env.example index f7a3ad005..446d199bb 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,7 @@ # Required for integration tests when TEST_INTEGRATION=1 ANTHROPIC_API_KEY=sk-ant-... OPENAI_API_KEY=sk-proj-... +OPENROUTER_API_KEY=sk-or-v1-... # Optional: Set to 1 to run integration tests # Integration tests require API keys to be set diff --git a/README.md b/README.md index 99401f545..652b7b268 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ Here are some specific use cases we enable: - **Local**: git worktrees on your local machine ([docs](https://cmux.io/local.html)) - **SSH**: regular git clones on a remote server ([docs](https://cmux.io/ssh.html)) - Multi-model (`sonnet-4-*`, `gpt-5-*`, `opus-4-*`) - - Ollama supported for local LLMs ([docs](https://cmux.io/models.html)) + - Ollama supported for local LLMs ([docs](https://cmux.io/models.html#ollama-local)) + - OpenRouter supported for long-tail of LLMs ([docs](https://cmux.io/models.html#openrouter-cloud)) - Supporting UI and keybinds for efficiently managing a suite of agents - Rich markdown outputs (mermaid diagrams, LaTeX, etc.) diff --git a/bun.lock b/bun.lock index 9167d62f2..29f3f6229 100644 --- a/bun.lock +++ b/bun.lock @@ -6,6 +6,7 @@ "dependencies": { "@ai-sdk/anthropic": "^2.0.29", "@ai-sdk/openai": "^2.0.52", + "@openrouter/ai-sdk-provider": "^1.2.1", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-scroll-area": "^1.2.10", @@ -405,6 +406,8 @@ "@nodelib/fs.walk": ["@nodelib/fs.walk@1.2.8", "", { "dependencies": { "@nodelib/fs.scandir": "2.1.5", "fastq": "^1.6.0" } }, "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg=="], + "@openrouter/ai-sdk-provider": ["@openrouter/ai-sdk-provider@1.2.1", "", { "peerDependencies": { "ai": "^5.0.0", "zod": "^3.24.1 || ^v4" } }, "sha512-sDc+/tlEM9VTsYlZ3YMwD9AHinSNusdLFGQhtb50eo5r68U/yBixEHRsKEevqSspiX3V6J06hU7C25t4KE9iag=="], + "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], "@pkgjs/parseargs": ["@pkgjs/parseargs@0.11.0", "", {}, "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg=="], diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 813b5aedd..25fa973a0 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -224,6 +224,11 @@ This project uses **Make** as the primary build orchestrator. See `Makefile` for - Always run `make typecheck` after making changes to verify types (checks both main and renderer) - **⚠️ CRITICAL: Unit tests MUST be colocated with the code they test** - Place `*.test.ts` files in the same directory as the implementation file (e.g., `src/utils/foo.test.ts` next to `src/utils/foo.ts`). Tests in `./tests/` are ONLY for integration/E2E tests that require complex setup. - **Don't test simple mapping operations** - If the test just verifies the code does what it obviously does from reading it, skip the test. + - ❌ **Bad**: `expect(REGISTRY.foo).toBe("bar")` - This just duplicates the implementation + - ✅ **Good**: `expect(Object.keys(REGISTRY).length).toBeGreaterThan(0)` - Tests an invariant + - ❌ **Bad**: `expect(isValid("foo")).toBe(true)` for every valid value - Duplicates implementation + - ✅ **Good**: `expect(isValid("invalid")).toBe(false)` - Tests boundary/error cases + - **Rule of thumb**: If changing the implementation requires changing the test in the same way, the test is probably useless - Strive to decompose complex logic away from the components and into `.src/utils/` - utils should be either pure functions or easily isolated (e.g. if they operate on the FS they accept a path). Testing them should not require complex mocks or setup. diff --git a/docs/models.md b/docs/models.md index a6a53a8b9..496c6c223 100644 --- a/docs/models.md +++ b/docs/models.md @@ -27,6 +27,79 @@ GPT-5 family of models: TODO: add issue link here. +#### OpenRouter (Cloud) + +Access 300+ models from multiple providers through a single API: + +- `openrouter:z-ai/glm-4.6` +- `openrouter:anthropic/claude-3.5-sonnet` +- `openrouter:google/gemini-2.0-flash-thinking-exp` +- `openrouter:deepseek/deepseek-chat` +- `openrouter:openai/gpt-4o` +- Any model from [OpenRouter Models](https://openrouter.ai/models) + +**Setup:** + +1. Get your API key from [openrouter.ai](https://openrouter.ai/) +2. Add to `~/.cmux/providers.jsonc`: + +```jsonc +{ + "openrouter": { + "apiKey": "sk-or-v1-...", + }, +} +``` + +**Provider Routing (Advanced):** + +OpenRouter can route requests to specific infrastructure providers (Cerebras, Fireworks, Together, etc.). Configure provider preferences in `~/.cmux/providers.jsonc`: + +```jsonc +{ + "openrouter": { + "apiKey": "sk-or-v1-...", + // Use Cerebras for ultra-fast inference + "order": ["Cerebras", "Fireworks"], // Try in order + "allow_fallbacks": true, // Allow other providers if unavailable + }, +} +``` + +Or require a specific provider (no fallbacks): + +```jsonc +{ + "openrouter": { + "apiKey": "sk-or-v1-...", + "order": ["Cerebras"], // Only try Cerebras + "allow_fallbacks": false, // Fail if Cerebras unavailable + }, +} +``` + +**Provider Routing Options:** + +- `order`: Array of provider names to try in priority order (e.g., `["Cerebras", "Fireworks"]`) +- `allow_fallbacks`: Boolean - whether to fall back to other providers (default: `true`) +- `only`: Array - restrict to only these providers +- `ignore`: Array - exclude specific providers +- `require_parameters`: Boolean - only use providers supporting all your request parameters +- `data_collection`: `"allow"` or `"deny"` - control whether providers can store/train on your data + +See [OpenRouter Provider Routing docs](https://openrouter.ai/docs/features/provider-routing) for details. + +**Reasoning Models:** + +OpenRouter supports reasoning models like Claude Sonnet Thinking. Use the thinking slider to control reasoning effort: + +- **Off**: No extended reasoning +- **Low**: Quick reasoning for straightforward tasks +- **Medium**: Standard reasoning for moderate complexity (default) +- **High**: Deep reasoning for complex problems + +The thinking level is passed to OpenRouter as `reasoning.effort` and works with any reasoning-capable model. See [OpenRouter Reasoning docs](https://openrouter.ai/docs/use-cases/reasoning-tokens) for details. + #### Ollama (Local) Run models locally with Ollama. No API key required: @@ -68,6 +141,10 @@ All providers are configured in `~/.cmux/providers.jsonc`. Example configuration "openai": { "apiKey": "sk-...", }, + // Required for OpenRouter models + "openrouter": { + "apiKey": "sk-or-v1-...", + }, // Optional for Ollama (only needed for custom URL) "ollama": { "baseUrl": "http://your-server:11434/api", diff --git a/package.json b/package.json index 1092a1ff7..d64845575 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "dependencies": { "@ai-sdk/anthropic": "^2.0.29", "@ai-sdk/openai": "^2.0.52", + "@openrouter/ai-sdk-provider": "^1.2.1", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-scroll-area": "^1.2.10", diff --git a/src/constants/providers.test.ts b/src/constants/providers.test.ts new file mode 100644 index 000000000..16b417307 --- /dev/null +++ b/src/constants/providers.test.ts @@ -0,0 +1,31 @@ +/** + * Test that provider registry structure is correct + */ + +import { describe, test, expect } from "bun:test"; +import { PROVIDER_REGISTRY, SUPPORTED_PROVIDERS, isValidProvider } from "./providers"; + +describe("Provider Registry", () => { + test("registry is not empty", () => { + expect(Object.keys(PROVIDER_REGISTRY).length).toBeGreaterThan(0); + }); + + test("all registry values are import functions", () => { + // Registry should map provider names to async import functions + for (const importFn of Object.values(PROVIDER_REGISTRY)) { + expect(typeof importFn).toBe("function"); + expect(importFn.constructor.name).toBe("AsyncFunction"); + } + }); + + test("SUPPORTED_PROVIDERS array stays in sync with registry keys", () => { + // If these don't match, derived array is out of sync + expect(SUPPORTED_PROVIDERS.length).toBe(Object.keys(PROVIDER_REGISTRY).length); + }); + + test("isValidProvider rejects invalid providers", () => { + expect(isValidProvider("invalid")).toBe(false); + expect(isValidProvider("")).toBe(false); + expect(isValidProvider("gpt-4")).toBe(false); + }); +}); diff --git a/src/constants/providers.ts b/src/constants/providers.ts new file mode 100644 index 000000000..938770657 --- /dev/null +++ b/src/constants/providers.ts @@ -0,0 +1,72 @@ +/** + * Typed import helpers for provider packages + * + * These functions provide type-safe dynamic imports for provider packages. + * TypeScript can infer the correct module type from literal string imports, + * giving consuming code full type safety for provider constructors. + */ + +/** + * Dynamically import the Anthropic provider package + */ +export async function importAnthropic() { + return await import("@ai-sdk/anthropic"); +} + +/** + * Dynamically import the OpenAI provider package + */ +export async function importOpenAI() { + return await import("@ai-sdk/openai"); +} + +/** + * Dynamically import the Ollama provider package + */ +export async function importOllama() { + return await import("ollama-ai-provider-v2"); +} + +/** + * Dynamically import the OpenRouter provider package + */ +export async function importOpenRouter() { + return await import("@openrouter/ai-sdk-provider"); +} + +/** + * Centralized provider registry mapping provider names to their import functions + * + * This is the single source of truth for supported providers. By mapping to import + * functions rather than package strings, we eliminate duplication while maintaining + * perfect type safety. + * + * When adding a new provider: + * 1. Create an importXxx() function above + * 2. Add entry mapping provider name to the import function + * 3. Implement provider handling in aiService.ts createModel() + * 4. Runtime check will fail if provider in registry but no handler + */ +export const PROVIDER_REGISTRY = { + anthropic: importAnthropic, + openai: importOpenAI, + ollama: importOllama, + openrouter: importOpenRouter, +} as const; + +/** + * Union type of all supported provider names + */ +export type ProviderName = keyof typeof PROVIDER_REGISTRY; + +/** + * Array of all supported provider names (for UI lists, iteration, etc.) + */ +export const SUPPORTED_PROVIDERS = Object.keys(PROVIDER_REGISTRY) as ProviderName[]; + +/** + * Type guard to check if a string is a valid provider name + */ +export function isValidProvider(provider: string): provider is ProviderName { + return provider in PROVIDER_REGISTRY; +} diff --git a/src/services/aiService.ts b/src/services/aiService.ts index ae7c58203..53102fbba 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -7,10 +7,11 @@ import { sanitizeToolInputs } from "@/utils/messages/sanitizeToolInput"; import type { Result } from "@/types/result"; import { Ok, Err } from "@/types/result"; import type { WorkspaceMetadata } from "@/types/workspace"; +import { PROVIDER_REGISTRY } from "@/constants/providers"; import type { CmuxMessage, CmuxTextPart } from "@/types/message"; import { createCmuxMessage } from "@/types/message"; -import type { Config } from "@/config"; +import type { Config, ProviderConfig } from "@/config"; import { StreamManager } from "./streamManager"; import type { InitStateManager } from "./initStateManager"; import type { SendMessageError } from "@/types/errors"; @@ -91,6 +92,15 @@ if (typeof globalFetchWithExtras.certificate === "function") { globalFetchWithExtras.certificate.bind(globalFetchWithExtras); } +/** + * Get fetch function for provider - use custom if provided, otherwise unlimited timeout default + */ +function getProviderFetch(providerConfig: ProviderConfig): typeof fetch { + return typeof providerConfig.fetch === "function" + ? (providerConfig.fetch as typeof fetch) + : defaultFetchWithUnlimitedTimeout; +} + /** * Preload AI SDK provider modules to avoid race conditions in concurrent test environments. * This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly @@ -101,11 +111,7 @@ if (typeof globalFetchWithExtras.certificate === "function") { */ export async function preloadAISDKProviders(): Promise { // Preload providers to ensure they're in the module cache before concurrent tests run - await Promise.all([ - import("@ai-sdk/anthropic"), - import("@ai-sdk/openai"), - import("ollama-ai-provider-v2"), - ]); + await Promise.all(Object.values(PROVIDER_REGISTRY).map((importFn) => importFn())); } /** @@ -260,6 +266,15 @@ export class AIService extends EventEmitter { }); } + // Check if provider is supported (prevents silent failures when adding to PROVIDER_REGISTRY + // but forgetting to implement handler below) + if (!(providerName in PROVIDER_REGISTRY)) { + return Err({ + type: "provider_not_supported", + provider: providerName, + }); + } + // Load providers configuration - the ONLY source of truth const providersConfig = this.config.loadProvidersConfig(); let providerConfig = providersConfig?.[providerName] ?? {}; @@ -291,7 +306,7 @@ export class AIService extends EventEmitter { : existingHeaders; // Lazy-load Anthropic provider to reduce startup time - const { createAnthropic } = await import("@ai-sdk/anthropic"); + const { createAnthropic } = await PROVIDER_REGISTRY.anthropic(); const provider = createAnthropic({ ...providerConfig, headers }); return Ok(provider(modelId)); } @@ -304,11 +319,7 @@ export class AIService extends EventEmitter { provider: providerName, }); } - // Use custom fetch if provided, otherwise default with unlimited timeout - const baseFetch = - typeof providerConfig.fetch === "function" - ? (providerConfig.fetch as typeof fetch) - : defaultFetchWithUnlimitedTimeout; + const baseFetch = getProviderFetch(providerConfig); // Wrap fetch to force truncation: "auto" for OpenAI Responses API calls. // This is a temporary override until @ai-sdk/openai supports passing @@ -383,11 +394,12 @@ export class AIService extends EventEmitter { ); // Lazy-load OpenAI provider to reduce startup time - const { createOpenAI } = await import("@ai-sdk/openai"); + const { createOpenAI } = await PROVIDER_REGISTRY.openai(); const provider = createOpenAI({ ...providerConfig, - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment - fetch: fetchWithOpenAITruncation as any, + // Cast is safe: our fetch implementation is compatible with the SDK's fetch type. + // The preconnect method is optional in our implementation but required by the SDK type. + fetch: fetchWithOpenAITruncation as typeof fetch, }); // Use Responses API for persistence and built-in tools // OpenAI manages reasoning state via previousResponseId - no middleware needed @@ -398,24 +410,77 @@ export class AIService extends EventEmitter { // Handle Ollama provider if (providerName === "ollama") { // Ollama doesn't require API key - it's a local service - // Use custom fetch if provided, otherwise default with unlimited timeout - const baseFetch = - typeof providerConfig.fetch === "function" - ? (providerConfig.fetch as typeof fetch) - : defaultFetchWithUnlimitedTimeout; + const baseFetch = getProviderFetch(providerConfig); // Lazy-load Ollama provider to reduce startup time - const { createOllama } = await import("ollama-ai-provider-v2"); + const { createOllama } = await PROVIDER_REGISTRY.ollama(); const provider = createOllama({ ...providerConfig, - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment - fetch: baseFetch as any, + fetch: baseFetch, // Use strict mode for better compatibility with Ollama API compatibility: "strict", }); return Ok(provider(modelId)); } + // Handle OpenRouter provider + if (providerName === "openrouter") { + if (!providerConfig.apiKey) { + return Err({ + type: "api_key_not_found", + provider: providerName, + }); + } + const baseFetch = getProviderFetch(providerConfig); + + // Extract standard provider settings (apiKey, baseUrl, headers, fetch) + const { apiKey, baseUrl, headers, fetch: _fetch, ...extraOptions } = providerConfig; + + // OpenRouter routing options that need to be nested under "provider" in API request + // See: https://openrouter.ai/docs/features/provider-routing + const OPENROUTER_ROUTING_OPTIONS = [ + "order", + "allow_fallbacks", + "only", + "ignore", + "require_parameters", + "data_collection", + "sort", + "quantizations", + ]; + + // Build extraBody: routing options go under "provider", others stay at root + const routingOptions: Record = {}; + const otherOptions: Record = {}; + + for (const [key, value] of Object.entries(extraOptions)) { + if (OPENROUTER_ROUTING_OPTIONS.includes(key)) { + routingOptions[key] = value; + } else { + otherOptions[key] = value; + } + } + + // Build extraBody with provider nesting if routing options exist + let extraBody: Record | undefined; + if (Object.keys(routingOptions).length > 0) { + extraBody = { provider: routingOptions, ...otherOptions }; + } else if (Object.keys(otherOptions).length > 0) { + extraBody = otherOptions; + } + + // Lazy-load OpenRouter provider to reduce startup time + const { createOpenRouter } = await PROVIDER_REGISTRY.openrouter(); + const provider = createOpenRouter({ + apiKey, + baseURL: baseUrl, + headers: headers as Record | undefined, + fetch: baseFetch, + extraBody, + }); + return Ok(provider(modelId)); + } + return Err({ type: "provider_not_supported", provider: providerName, diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts index 4c27fbf80..eaf06ed6e 100644 --- a/src/services/ipcMain.ts +++ b/src/services/ipcMain.ts @@ -14,6 +14,7 @@ import { log } from "@/services/log"; import { countTokens, countTokensBatch } from "@/utils/main/tokenizer"; import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator"; import { IPC_CHANNELS, getChatChannel } from "@/constants/ipc-constants"; +import { SUPPORTED_PROVIDERS } from "@/constants/providers"; import type { SendMessageError } from "@/types/errors"; import type { SendMessageOptions, DeleteMessage } from "@/types/ipc"; import { Ok, Err } from "@/types/result"; @@ -1120,9 +1121,9 @@ export class IpcMain { ipcMain.handle(IPC_CHANNELS.PROVIDERS_LIST, () => { try { - // Return all supported providers, not just configured ones - // This matches the providers defined in the registry - return ["anthropic", "openai"]; + // Return all supported providers from centralized registry + // This automatically stays in sync as new providers are added + return [...SUPPORTED_PROVIDERS]; } catch (error) { log.error("Failed to list providers:", error); return []; diff --git a/src/types/providerOptions.ts b/src/types/providerOptions.ts index a8ad0fcc4..6a132df9d 100644 --- a/src/types/providerOptions.ts +++ b/src/types/providerOptions.ts @@ -37,6 +37,14 @@ export interface OpenAIProviderOptions { // eslint-disable-next-line @typescript-eslint/no-empty-object-type export interface OllamaProviderOptions {} +/** + * OpenRouter-specific options + * Transparently passes through options to the OpenRouter provider + * @see https://openrouter.ai/docs + */ +// eslint-disable-next-line @typescript-eslint/no-empty-object-type +export interface OpenRouterProviderOptions {} + /** * Cmux provider options - used by both frontend and backend */ @@ -45,4 +53,5 @@ export interface CmuxProviderOptions { anthropic?: AnthropicProviderOptions; openai?: OpenAIProviderOptions; ollama?: OllamaProviderOptions; + openrouter?: OpenRouterProviderOptions; } diff --git a/src/types/thinking.ts b/src/types/thinking.ts index a5d19a873..b8e1c4163 100644 --- a/src/types/thinking.ts +++ b/src/types/thinking.ts @@ -47,3 +47,19 @@ export const OPENAI_REASONING_EFFORT: Record medium: "medium", high: "high", }; + +/** + * OpenRouter reasoning effort mapping + * + * Maps our unified levels to OpenRouter's reasoning.effort parameter + * (used by Claude Sonnet Thinking and other reasoning models via OpenRouter) + */ +export const OPENROUTER_REASONING_EFFORT: Record< + ThinkingLevel, + "low" | "medium" | "high" | undefined +> = { + off: undefined, + low: "low", + medium: "medium", + high: "high", +}; diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts index ed2d95a86..d6e7198dd 100644 --- a/src/utils/ai/providerOptions.ts +++ b/src/utils/ai/providerOptions.ts @@ -7,7 +7,11 @@ import type { AnthropicProviderOptions } from "@ai-sdk/anthropic"; import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai"; import type { ThinkingLevel } from "@/types/thinking"; -import { ANTHROPIC_THINKING_BUDGETS, OPENAI_REASONING_EFFORT } from "@/types/thinking"; +import { + ANTHROPIC_THINKING_BUDGETS, + OPENAI_REASONING_EFFORT, + OPENROUTER_REASONING_EFFORT, +} from "@/types/thinking"; import { log } from "@/services/log"; import type { CmuxMessage } from "@/types/message"; import { enforceThinkingPolicy } from "@/utils/thinking/policy"; @@ -29,12 +33,25 @@ type ExtendedOpenAIResponsesProviderOptions = OpenAIResponsesProviderOptions & { truncation?: "auto" | "disabled"; }; +/** + * OpenRouter reasoning options + * @see https://openrouter.ai/docs/use-cases/reasoning-tokens + */ +interface OpenRouterReasoningOptions { + reasoning?: { + enabled?: boolean; + exclude?: boolean; + effort?: "low" | "medium" | "high"; + }; +} + /** * Provider-specific options structure for AI SDK */ type ProviderOptions = | { anthropic: AnthropicProviderOptions } | { openai: ExtendedOpenAIResponsesProviderOptions } + | { openrouter: OpenRouterReasoningOptions } | Record; // Empty object for unsupported providers /** @@ -150,6 +167,36 @@ export function buildProviderOptions( return options; } + // Build OpenRouter-specific options + if (provider === "openrouter") { + const reasoningEffort = OPENROUTER_REASONING_EFFORT[effectiveThinking]; + + log.debug("buildProviderOptions: OpenRouter config", { + reasoningEffort, + thinkingLevel: effectiveThinking, + }); + + // Only add reasoning config if thinking is enabled + if (reasoningEffort) { + const options: ProviderOptions = { + openrouter: { + reasoning: { + enabled: true, + effort: reasoningEffort, + // Don't exclude reasoning content - we want to display it in the UI + exclude: false, + }, + }, + }; + log.debug("buildProviderOptions: Returning OpenRouter options", options); + return options; + } + + // No reasoning config needed when thinking is off + log.debug("buildProviderOptions: OpenRouter (thinking off, no provider options)"); + return {}; + } + // No provider-specific options for unsupported providers log.debug("buildProviderOptions: Unsupported provider", provider); return {}; diff --git a/src/utils/providers/ensureProvidersConfig.ts b/src/utils/providers/ensureProvidersConfig.ts index 915de62c2..0ec771057 100644 --- a/src/utils/providers/ensureProvidersConfig.ts +++ b/src/utils/providers/ensureProvidersConfig.ts @@ -49,6 +49,11 @@ const buildProvidersFromEnv = (env: NodeJS.ProcessEnv): ProvidersConfig => { providers.openai = entry; } + const openRouterKey = trim(env.OPENROUTER_API_KEY); + if (openRouterKey.length > 0) { + providers.openrouter = { apiKey: openRouterKey }; + } + if (!providers.openai) { const azureKey = trim(env.AZURE_OPENAI_API_KEY); const azureEndpoint = trim(env.AZURE_OPENAI_ENDPOINT); @@ -97,7 +102,7 @@ export const ensureProvidersConfig = ( const providersFromEnv = buildProvidersFromEnv(env); if (!hasAnyConfiguredProvider(providersFromEnv)) { throw new Error( - "No provider credentials found. Configure providers.jsonc or set ANTHROPIC_API_KEY / OPENAI_API_KEY." + "No provider credentials found. Configure providers.jsonc or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OPENROUTER_API_KEY." ); } diff --git a/src/utils/tokens/models-extra.ts b/src/utils/tokens/models-extra.ts index cfa643181..949c56b10 100644 --- a/src/utils/tokens/models-extra.ts +++ b/src/utils/tokens/models-extra.ts @@ -55,4 +55,19 @@ export const modelsExtra: Record = { supports_vision: true, supports_response_schema: true, }, + + // Z.AI GLM 4.6 via OpenRouter + // $0.40/M input, $1.75/M output (OpenRouter pricing) + // 200K context window, supports tool use and reasoning + "openrouter/z-ai/glm-4.6": { + max_input_tokens: 202752, + max_output_tokens: 202752, + input_cost_per_token: 0.0000004, // $0.40 per million input tokens + output_cost_per_token: 0.00000175, // $1.75 per million output tokens + litellm_provider: "openrouter", + mode: "chat", + supports_function_calling: true, + supports_reasoning: true, + supports_response_schema: true, + }, }; diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index ae6f03b52..6b21fc735 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -1,4 +1,38 @@ { + "sample_spec": { + "code_interpreter_cost_per_session": 0, + "computer_use_input_cost_per_1k_tokens": 0, + "computer_use_output_cost_per_1k_tokens": 0, + "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", + "file_search_cost_per_1k_calls": 0, + "file_search_cost_per_gb_per_day": 0, + "input_cost_per_audio_token": 0, + "input_cost_per_token": 0, + "litellm_provider": "one of https://docs.litellm.ai/docs/providers", + "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", + "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", + "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", + "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank, search", + "output_cost_per_reasoning_token": 0, + "output_cost_per_token": 0, + "search_context_cost_per_query": { + "search_context_size_high": 0, + "search_context_size_low": 0, + "search_context_size_medium": 0 + }, + "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "vector_store_cost_per_gb_per_day": 0 + }, "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": { "litellm_provider": "bedrock", "max_input_tokens": 2600, @@ -276,6 +310,24 @@ "output_cost_per_token": 0, "output_vector_size": 1024 }, + "amazon.titan-image-generator-v1": { + "input_cost_per_image": 0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_512_and_512_pixels": 0.01, + "output_cost_per_image_above_512_and_512_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2": { + "input_cost_per_image": 0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, "twelvelabs.marengo-embed-2-7-v1:0": { "input_cost_per_token": 0.00007, "litellm_provider": "bedrock", @@ -380,6 +432,44 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000005, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "anthropic.claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000005, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", @@ -413,6 +503,26 @@ "supports_tool_choice": true, "supports_vision": true }, + "anthropic.claude-3-7-sonnet-20240620-v1:0": { + "cache_creation_input_token_cost": 0.0000045, + "cache_read_input_token_cost": 3.6e-7, + "input_cost_per_token": 0.0000036, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000018, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "anthropic.claude-3-7-sonnet-20250219-v1:0": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, @@ -790,6 +900,25 @@ "supports_tool_choice": true, "supports_vision": true }, + "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", @@ -857,7 +986,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { @@ -936,7 +1065,13 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "azure", + "mode": "chat" + }, "azure/eu/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.000001375, "input_cost_per_token": 0.00000275, "litellm_provider": "azure", @@ -953,6 +1088,7 @@ "supports_vision": true }, "azure/eu/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", "cache_creation_input_token_cost": 0.00000138, "input_cost_per_token": 0.00000275, "litellm_provider": "azure", @@ -1105,7 +1241,7 @@ }, "azure/global-standard/gpt-4o-2024-08-06": { "cache_read_input_token_cost": 0.00000125, - "deprecation_date": "2025-08-20", + "deprecation_date": "2026-02-27", "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, @@ -1122,7 +1258,7 @@ }, "azure/global-standard/gpt-4o-2024-11-20": { "cache_read_input_token_cost": 0.00000125, - "deprecation_date": "2025-12-20", + "deprecation_date": "2026-03-01", "input_cost_per_token": 0.0000025, "litellm_provider": "azure", "max_input_tokens": 128000, @@ -1151,6 +1287,7 @@ "supports_vision": true }, "azure/global/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.0000025, "litellm_provider": "azure", @@ -1167,6 +1304,7 @@ "supports_vision": true }, "azure/global/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.0000025, "litellm_provider": "azure", @@ -1441,6 +1579,7 @@ "supports_web_search": false }, "azure/gpt-4.1-2025-04-14": { + "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 5e-7, "input_cost_per_token": 0.000002, "input_cost_per_token_batches": 0.000001, @@ -1489,6 +1628,7 @@ "supports_web_search": false }, "azure/gpt-4.1-mini-2025-04-14": { + "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 4e-7, "input_cost_per_token_batches": 2e-7, @@ -1536,6 +1676,7 @@ "supports_vision": true }, "azure/gpt-4.1-nano-2025-04-14": { + "deprecation_date": "2026-11-04", "cache_read_input_token_cost": 2.5e-8, "input_cost_per_token": 1e-7, "input_cost_per_token_batches": 5e-8, @@ -1608,6 +1749,7 @@ "supports_vision": true }, "azure/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.0000025, "litellm_provider": "azure", @@ -1624,6 +1766,7 @@ "supports_vision": true }, "azure/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.00000275, "litellm_provider": "azure", @@ -2014,6 +2157,28 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure/gpt-5-pro": { + "input_cost_per_token": 0.000015, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 400000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", @@ -2102,6 +2267,76 @@ "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, + "azure/gpt-image-1-mini": { + "input_cost_per_pixel": 8.0566406e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0345052083e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 7.9752604167e-9, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, + "azure/high/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.1575520833e-8, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0, + "supported_endpoints": ["/v1/images/generations"] + }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, "litellm_provider": "azure", @@ -2234,6 +2469,7 @@ "supports_vision": true }, "azure/o3-2025-04-16": { + "deprecation_date": "2026-04-16", "cache_read_input_token_cost": 0.0000025, "input_cost_per_token": 0.00001, "litellm_provider": "azure", @@ -2417,6 +2653,7 @@ "output_cost_per_token": 0 }, "azure/text-embedding-3-small": { + "deprecation_date": "2026-04-30", "input_cost_per_token": 2e-8, "litellm_provider": "azure", "max_input_tokens": 8191, @@ -2432,6 +2669,18 @@ "mode": "embedding", "output_cost_per_token": 0 }, + "azure/speech/azure-tts": { + "input_cost_per_character": 0.000015, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, + "azure/speech/azure-tts-hd": { + "input_cost_per_character": 0.00003, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, "azure/tts-1": { "input_cost_per_character": 0.000015, "litellm_provider": "azure", @@ -2443,6 +2692,7 @@ "mode": "audio_speech" }, "azure/us/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.000001375, "input_cost_per_token": 0.00000275, "litellm_provider": "azure", @@ -2459,6 +2709,7 @@ "supports_vision": true }, "azure/us/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", "cache_creation_input_token_cost": 0.00000138, "input_cost_per_token": 0.00000275, "litellm_provider": "azure", @@ -2928,6 +3179,34 @@ "supports_tool_choice": true, "supports_reasoning": true }, + "azure_ai/mistral-document-ai-2505": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" + }, + "azure_ai/doc-intelligence/prebuilt-read": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.0015, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-layout": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-document": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, "azure_ai/MAI-DS-R1": { "input_cost_per_token": 0.00000135, "litellm_provider": "azure_ai", @@ -3089,7 +3368,6 @@ "output_cost_per_token": 0.0000275, "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", "supports_function_calling": true, - "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_web_search": true @@ -3117,7 +3395,6 @@ "mode": "chat", "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/announcing-the-grok-4-fast-models-from-xai-now-available-in-azure-ai-foundry/4456701", "supports_function_calling": true, - "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_web_search": true @@ -3911,6 +4188,26 @@ "mode": "chat", "output_cost_per_token": 0.0000015 }, + "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 0.0000045, + "cache_read_input_token_cost": 3.6e-7, + "input_cost_per_token": 0.0000036, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000018, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.0000036, "litellm_provider": "bedrock", @@ -4317,6 +4614,48 @@ "supports_web_search": true, "tool_use_system_prompt_tokens": 264 }, + "claude-haiku-4-5-20251001": { + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-haiku-4-5": { + "cache_creation_input_token_cost": 0.00000125, + "cache_creation_input_token_cost_above_1hr": 0.000002, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "claude-3-5-sonnet-20240620": { "cache_creation_input_token_cost": 0.00000375, "cache_creation_input_token_cost_above_1hr": 0.000006, @@ -4398,7 +4737,7 @@ "cache_creation_input_token_cost": 0.00000375, "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, - "deprecation_date": "2026-02-01", + "deprecation_date": "2026-02-19", "input_cost_per_token": 0.000003, "litellm_provider": "anthropic", "max_input_tokens": 200000, @@ -4455,7 +4794,6 @@ "cache_creation_input_token_cost": 3e-7, "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-8, - "deprecation_date": "2025-03-01", "input_cost_per_token": 2.5e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, @@ -4475,7 +4813,7 @@ "cache_creation_input_token_cost": 0.00001875, "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 0.0000015, - "deprecation_date": "2025-03-01", + "deprecation_date": "2026-05-01", "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", "max_input_tokens": 200000, @@ -4578,7 +4916,7 @@ "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "search_context_cost_per_query": { @@ -4608,7 +4946,7 @@ "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "search_context_cost_per_query": { @@ -4625,6 +4963,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, + "supports_web_search": true, "tool_use_system_prompt_tokens": 346 }, "claude-opus-4-1": { @@ -4659,6 +4998,7 @@ "cache_creation_input_token_cost_above_1hr": 0.00003, "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, + "deprecation_date": "2026-08-05", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, @@ -4686,6 +5026,7 @@ "cache_creation_input_token_cost_above_1hr": 0.00003, "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, + "deprecation_date": "2026-05-14", "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 32000, @@ -4709,6 +5050,7 @@ "tool_use_system_prompt_tokens": 159 }, "claude-sonnet-4-20250514": { + "deprecation_date": "2026-05-14", "cache_creation_input_token_cost": 0.00000375, "cache_creation_input_token_cost_above_1hr": 0.000006, "cache_read_input_token_cost": 3e-7, @@ -5068,6 +5410,16 @@ "output_vector_size": 1536, "supports_embedding_image_input": true }, + "cohere/embed-v4.0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "cohere", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, "input_cost_per_token": 0, @@ -5860,6 +6212,11 @@ "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "supports_tool_choice": true }, + "dataforseo/search": { + "input_cost_per_query": 0.003, + "litellm_provider": "dataforseo", + "mode": "search" + }, "davinci-002": { "input_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -6363,7 +6720,8 @@ "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "deepinfra/Qwen/Qwen3-14B": { "max_tokens": 40960, @@ -7128,6 +7486,82 @@ "output_cost_per_token": 0, "output_vector_size": 2560 }, + "exa_ai/search": { + "litellm_provider": "exa_ai", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.005, + "max_results_range": [0, 25] + }, + { + "input_cost_per_query": 0.025, + "max_results_range": [26, 100] + } + ] + }, + "firecrawl/search": { + "litellm_provider": "firecrawl", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.00166, + "max_results_range": [1, 10] + }, + { + "input_cost_per_query": 0.00332, + "max_results_range": [11, 20] + }, + { + "input_cost_per_query": 0.00498, + "max_results_range": [21, 30] + }, + { + "input_cost_per_query": 0.00664, + "max_results_range": [31, 40] + }, + { + "input_cost_per_query": 0.0083, + "max_results_range": [41, 50] + }, + { + "input_cost_per_query": 0.00996, + "max_results_range": [51, 60] + }, + { + "input_cost_per_query": 0.01162, + "max_results_range": [61, 70] + }, + { + "input_cost_per_query": 0.01328, + "max_results_range": [71, 80] + }, + { + "input_cost_per_query": 0.01494, + "max_results_range": [81, 90] + }, + { + "input_cost_per_query": 0.0166, + "max_results_range": [91, 100] + } + ], + "metadata": { + "notes": "Firecrawl search pricing: $83 for 100,000 credits, 2 credits per 10 results. Cost = ceiling(limit/10) * 2 * $0.00083" + } + }, + "perplexity/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "perplexity", + "mode": "search" + }, + "searxng/search": { + "litellm_provider": "searxng", + "mode": "search", + "input_cost_per_query": 0, + "metadata": { + "notes": "SearXNG is an open-source metasearch engine. Free to use when self-hosted or using public instances." + } + }, "elevenlabs/scribe_v1": { "input_cost_per_second": 0.0000611, "litellm_provider": "elevenlabs", @@ -7265,6 +7699,26 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "deprecation_date": "2026-10-15", + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", @@ -7448,7 +7902,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { @@ -7500,6 +7954,36 @@ "supports_function_calling": true, "supports_tool_choice": false }, + "fal_ai/bria/text-to-image/3.2": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/flux-pro/v1.1-ultra": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/imagen4/preview": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/recraft/v3/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, + "fal_ai/fal-ai/stable-diffusion-v35-medium": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": ["/v1/images/generations"] + }, "featherless_ai/featherless-ai/Qwerky-72B": { "litellm_provider": "featherless_ai", "max_input_tokens": 32768, @@ -8883,7 +9367,7 @@ "supports_web_search": true }, "gemini-2.5-flash": { - "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost": 3e-8, "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", @@ -8916,6 +9400,42 @@ "supports_vision": true, "supports_web_search": true }, + "gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000 + }, "gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, "input_cost_per_audio_token": 0.000001, @@ -9054,6 +9574,76 @@ "supports_vision": true, "supports_web_search": true }, + "gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_audio_token": 0.000012, + "output_cost_per_token": 0.000002, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, "gemini-2.5-flash-lite-preview-06-17": { "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 5e-7, @@ -9156,7 +9746,8 @@ "supports_web_search": true }, "gemini-2.5-pro": { - "cache_read_input_token_cost": 3.125e-7, + "cache_read_input_token_cost": 1.25e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", @@ -9430,6 +10021,18 @@ "supports_tool_choice": true, "supports_vision": true }, + "gemini/gemini-embedding-001": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "gemini", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#model-versions", + "tpm": 10000000 + }, "gemini/gemini-1.5-flash": { "input_cost_per_token": 7.5e-8, "input_cost_per_token_above_128k_tokens": 1.5e-7, @@ -10113,7 +10716,7 @@ "tpm": 1000000 }, "gemini/gemini-2.5-flash": { - "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost": 3e-8, "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 3e-7, "litellm_provider": "gemini", @@ -10148,6 +10751,42 @@ "supports_web_search": true, "tpm": 8000000 }, + "gemini/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, "gemini/gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-8, "input_cost_per_audio_token": 0.000001, @@ -10943,6 +11582,31 @@ "supported_modalities": ["text"], "supported_output_modalities": ["video"] }, + "gemini/veo-3.1-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.1-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "google_pse/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "google_pse", + "mode": "search" + }, "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, @@ -10954,7 +11618,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "search_context_cost_per_query": { @@ -11003,11 +11667,32 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "gpt-3.5-turbo": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openai", - "max_input_tokens": 16385, - "max_output_tokens": 4096, + "global.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "gpt-3.5-turbo": { + "input_cost_per_token": 5e-7, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, "max_tokens": 4097, "mode": "chat", "output_cost_per_token": 0.0000015, @@ -11056,6 +11741,7 @@ "supports_tool_choice": true }, "gpt-3.5-turbo-1106": { + "deprecation_date": "2026-09-28", "input_cost_per_token": 0.000001, "litellm_provider": "openai", "max_input_tokens": 16385, @@ -11125,6 +11811,7 @@ "supports_tool_choice": true }, "gpt-4-0125-preview": { + "deprecation_date": "2026-03-26", "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, @@ -11165,6 +11852,7 @@ "supports_tool_choice": true }, "gpt-4-1106-preview": { + "deprecation_date": "2026-03-26", "input_cost_per_token": 0.00001, "litellm_provider": "openai", "max_input_tokens": 128000, @@ -11316,6 +12004,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.1-2025-04-14": { @@ -11340,6 +12029,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.1-mini": { @@ -11367,6 +12057,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.1-mini-2025-04-14": { @@ -11391,6 +12082,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.1-nano": { @@ -11418,6 +12110,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.1-nano-2025-04-14": { @@ -11442,6 +12135,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4.5-preview": { @@ -11506,6 +12200,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4o-2024-05-13": { @@ -11546,6 +12241,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4o-2024-11-20": { @@ -11566,6 +12262,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4o-audio-preview": { @@ -11657,6 +12354,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4o-mini-2024-07-18": { @@ -11682,6 +12380,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-4o-mini-audio-preview": { @@ -11979,6 +12678,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-5-pro": { @@ -12120,11 +12820,11 @@ "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_function_calling": true, - "supports_native_streaming": false, + "supports_native_streaming": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": false, "supports_tool_choice": true, @@ -12157,6 +12857,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-5-mini-2025-08-07": { @@ -12186,6 +12887,7 @@ "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "gpt-5-nano": { @@ -12453,8 +13155,56 @@ "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { "input_cost_per_token": 0, "litellm_provider": "lemonade", - "max_tokens": 32768, - "max_input_tokens": 32768, + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-20b-mxfp4-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-120b-mxfp-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Gemma-3-4b-it-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Qwen3-4B-Instruct-2507-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 262144, + "max_input_tokens": 262144, "max_output_tokens": 32768, "mode": "chat", "output_cost_per_token": 0, @@ -13244,7 +13994,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { @@ -13263,6 +14013,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-7, "litellm_provider": "lambda_ai", @@ -14032,6 +14801,22 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/mistral-ocr-latest": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://mistral.ai/pricing#api-pricing" + }, + "mistral/mistral-ocr-2505-completion": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": ["/v1/ocr"], + "source": "https://mistral.ai/pricing#api-pricing" + }, "mistral/magistral-medium-latest": { "input_cost_per_token": 0.000002, "litellm_provider": "mistral", @@ -14084,6 +14869,20 @@ "max_tokens": 8192, "mode": "embedding" }, + "mistral/codestral-embed": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/codestral-embed-2505": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, "mistral/mistral-large-2402": { "input_cost_per_token": 0.000004, "litellm_provider": "mistral", @@ -14801,6 +15600,7 @@ "supports_vision": true }, "o1-mini-2024-09-12": { + "deprecation_date": "2025-10-27", "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000003, "litellm_provider": "openai", @@ -14920,6 +15720,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "o3-2025-04-16": { @@ -14946,6 +15747,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "o3-deep-research": { @@ -15096,6 +15898,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "o4-mini-2025-04-16": { @@ -15114,6 +15917,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, + "supports_service_tier": true, "supports_vision": true }, "o4-mini-deep-research": { @@ -15802,6 +16606,8 @@ }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, @@ -15812,6 +16618,7 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": true, @@ -15819,6 +16626,9 @@ }, "openrouter/anthropic/claude-opus-4.1": { "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, + "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, "litellm_provider": "openrouter", "max_input_tokens": 200000, @@ -15829,6 +16639,7 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": true, @@ -15836,6 +16647,10 @@ }, "openrouter/anthropic/claude-sonnet-4": { "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, "output_cost_per_token_above_200k_tokens": 0.0000225, @@ -15848,6 +16663,7 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": true, @@ -15855,9 +16671,13 @@ }, "openrouter/anthropic/claude-sonnet-4.5": { "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "openrouter", "max_input_tokens": 1000000, "max_output_tokens": 1000000, @@ -15867,11 +16687,31 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "openrouter/bytedance/ui-tars-1.5-7b": { "input_cost_per_token": 1e-7, "litellm_provider": "openrouter", @@ -16629,7 +17469,8 @@ "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 6.3e-7, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "openrouter/qwen/qwen3-coder": { "input_cost_per_token": 0.000001, @@ -16951,6 +17792,16 @@ "output_cost_per_token": 1.25e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "parallel_ai/search": { + "input_cost_per_query": 0.004, + "litellm_provider": "parallel_ai", + "mode": "search" + }, + "parallel_ai/search-pro": { + "input_cost_per_query": 0.009, + "litellm_provider": "parallel_ai", + "mode": "search" + }, "perplexity/codellama-34b-instruct": { "input_cost_per_token": 3.5e-7, "litellm_provider": "perplexity", @@ -17726,40 +18577,6 @@ "supports_reasoning": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, - "sample_spec": { - "code_interpreter_cost_per_session": 0, - "computer_use_input_cost_per_1k_tokens": 0, - "computer_use_output_cost_per_1k_tokens": 0, - "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", - "file_search_cost_per_1k_calls": 0, - "file_search_cost_per_gb_per_day": 0, - "input_cost_per_audio_token": 0, - "input_cost_per_token": 0, - "litellm_provider": "one of https://docs.litellm.ai/docs/providers", - "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", - "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", - "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", - "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank", - "output_cost_per_reasoning_token": 0, - "output_cost_per_token": 0, - "search_context_cost_per_query": { - "search_context_size_high": 0, - "search_context_size_low": 0, - "search_context_size_medium": 0 - }, - "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], - "supports_audio_input": true, - "supports_audio_output": true, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_vision": true, - "supports_web_search": true, - "vector_store_cost_per_gb_per_day": 0 - }, "snowflake/claude-3-5-sonnet": { "litellm_provider": "snowflake", "max_input_tokens": 18000, @@ -17990,6 +18807,16 @@ "mode": "image_generation", "output_cost_per_pixel": 0 }, + "tavily/search": { + "input_cost_per_query": 0.008, + "litellm_provider": "tavily", + "mode": "search" + }, + "tavily/search-advanced": { + "input_cost_per_query": 0.016, + "litellm_provider": "tavily", + "mode": "search" + }, "text-bison": { "input_cost_per_character": 2.5e-7, "litellm_provider": "vertex_ai-text-models", @@ -18707,6 +19534,25 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "us.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", @@ -18838,7 +19684,7 @@ "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { @@ -18857,6 +19703,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346 }, + "au.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 0.000001375, + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 0.0000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000055, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "us.anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00001875, "cache_read_input_token_cost": 0.0000015, @@ -19940,6 +20805,20 @@ "mode": "chat", "output_cost_per_token": 0.0000011 }, + "vercel_ai_gateway/zai/glm-4.6": { + "litellm_provider": "vercel_ai_gateway", + "cache_read_input_token_cost": 1.1e-7, + "input_cost_per_token": 4.5e-7, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000018, + "source": "https://vercel.com/ai-gateway/models/glm-4.6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, "vertex_ai/claude-3-5-haiku": { "input_cost_per_token": 0.000001, "litellm_provider": "vertex_ai-anthropic_models", @@ -19966,6 +20845,25 @@ "supports_pdf_input": true, "supports_tool_choice": true }, + "vertex_ai/claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000005, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "vertex_ai/claude-3-5-sonnet": { "input_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-anthropic_models", @@ -20158,8 +21056,8 @@ "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, + "max_output_tokens": 32000, + "max_tokens": 32000, "mode": "chat", "output_cost_per_token": 0.000075, "output_cost_per_token_batches": 0.0000375, @@ -20175,8 +21073,8 @@ "input_cost_per_token_batches": 0.0000075, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, + "max_output_tokens": 32000, + "max_tokens": 32000, "mode": "chat", "output_cost_per_token": 0.000075, "output_cost_per_token_batches": 0.0000375, @@ -20197,7 +21095,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "output_cost_per_token_batches": 0.0000075, @@ -20223,7 +21121,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, "max_output_tokens": 64000, - "max_tokens": 200000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, "output_cost_per_token_batches": 0.0000075, @@ -20323,6 +21221,50 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "vertex_ai/mistralai/codestral-2@001": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2@001": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/codestral-2": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true + }, "vertex_ai/codestral-2501": { "input_cost_per_token": 2e-7, "litellm_provider": "vertex_ai-mistral_models", @@ -20626,6 +21568,62 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", "supports_tool_choice": true }, + "vertex_ai/minimaxai/minimax-m2-maas": { + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-minimax_models", + "max_input_tokens": 196608, + "max_output_tokens": 196608, + "max_tokens": 196608, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000002, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3@001": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000002, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000002, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3@001": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000002, + "supports_function_calling": true, + "supports_tool_choice": true + }, "vertex_ai/mistral-large-2411": { "input_cost_per_token": 0.000002, "litellm_provider": "vertex_ai-mistral_models", @@ -20715,6 +21713,13 @@ "supports_function_calling": true, "supports_tool_choice": true }, + "vertex_ai/mistral-ocr-2505": { + "litellm_provider": "vertex_ai", + "mode": "ocr", + "ocr_cost_per_page": 0.0005, + "supported_endpoints": ["/v1/ocr"], + "source": "https://cloud.google.com/generative-ai-app-builder/pricing" + }, "vertex_ai/openai/gpt-oss-120b-maas": { "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-openai_models", @@ -20815,6 +21820,26 @@ "supported_modalities": ["text"], "supported_output_modalities": ["video"] }, + "vertex_ai/veo-3.1-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.1-fast-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, "input_cost_per_token": 5e-8, @@ -21068,13 +22093,13 @@ "mode": "chat" }, "watsonx/ibm/granite-3-8b-instruct": { - "input_cost_per_token": 0.0002, + "input_cost_per_token": 2e-7, "litellm_provider": "watsonx", "max_input_tokens": 8192, "max_output_tokens": 1024, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0002, + "output_cost_per_token": 2e-7, "supports_audio_input": false, "supports_audio_output": false, "supports_function_calling": true, @@ -21131,8 +22156,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0001, - "output_cost_per_token": 0.00025, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21143,8 +22168,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0005, - "output_cost_per_token": 0.002, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21155,8 +22180,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0005, - "output_cost_per_token": 0.002, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21167,8 +22192,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.00025, - "output_cost_per_token": 0.001, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21179,8 +22204,8 @@ "max_tokens": 20480, "max_input_tokens": 20480, "max_output_tokens": 20480, - "input_cost_per_token": 0.000625, - "output_cost_per_token": 0.0025, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21191,8 +22216,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.0006, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21203,8 +22228,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.00025, - "output_cost_per_token": 0.001, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21215,8 +22240,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.000625, - "output_cost_per_token": 0.000625, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21227,8 +22252,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.000625, - "output_cost_per_token": 0.000625, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21239,8 +22264,8 @@ "max_tokens": 512, "max_input_tokens": 512, "max_output_tokens": 512, - "input_cost_per_token": 0.000625, - "output_cost_per_token": 0.000625, + "input_cost_per_token": 3.8e-7, + "output_cost_per_token": 3.8e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21251,8 +22276,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.0006, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21263,8 +22288,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.00025, - "output_cost_per_token": 0.001, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21275,8 +22300,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.0001, - "output_cost_per_token": 0.0002, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21287,8 +22312,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.0006, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21299,8 +22324,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.002, - "output_cost_per_token": 0.008, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21311,8 +22336,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.002, - "output_cost_per_token": 0.006, + "input_cost_per_token": 7.1e-7, + "output_cost_per_token": 7.1e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21323,8 +22348,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.0005, - "output_cost_per_token": 0.002, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.0000014, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21335,8 +22360,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.00025, - "output_cost_per_token": 0.001, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21347,8 +22372,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.00225, - "output_cost_per_token": 0.00675, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.00001, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21359,8 +22384,20 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.0002, - "output_cost_per_token": 0.0006, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-3-1-24b-instruct-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -21371,8 +22408,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.00015, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21383,8 +22420,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.004, - "output_cost_per_token": 0.016, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21395,8 +22432,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0005, - "output_cost_per_token": 0.002, + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.0000018, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": false, @@ -21658,7 +22695,6 @@ "output_cost_per_token": 0.000015, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, @@ -21669,11 +22705,12 @@ "max_tokens": 2000000, "mode": "chat", "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "cache_read_input_token_cost": 5e-8, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, @@ -21685,7 +22722,9 @@ "max_tokens": 2000000, "mode": "chat", "input_cost_per_token": 2e-7, + "input_cost_per_token_above_128k_tokens": 4e-7, "output_cost_per_token": 5e-7, + "output_cost_per_token_above_128k_tokens": 0.000001, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, @@ -21693,29 +22732,31 @@ }, "xai/grok-4-0709": { "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-latest": { "input_cost_per_token": 0.000003, + "input_cost_per_token_above_128k_tokens": 0.000006, "litellm_provider": "xai", "max_input_tokens": 256000, "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_128k_tokens": 0.00003, "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, - "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, @@ -21787,5 +22828,60 @@ "supports_tool_choice": true, "supports_vision": true, "supports_web_search": true + }, + "vertex_ai/search_api": { + "input_cost_per_query": 0.0015, + "litellm_provider": "vertex_ai", + "mode": "vector_store" + }, + "openai/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "openai", + "mode": "chat" + }, + "openai/sora-2": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "openai/sora-2-pro": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2-pro": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "azure/sora-2-pro-high-res": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] } }