diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index e03abea6352..ba039459202 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -3,13 +3,31 @@ vitest.mock("vscode", () => ({})) import { Anthropic } from "@anthropic-ai/sdk" -import OpenAI from "openai" import { OpenRouterHandler } from "../openrouter" import { ApiHandlerOptions } from "../../../shared/api" -import { Package } from "../../../shared/package" -vitest.mock("openai") +// Mock the AI SDK +const mockStreamText = vitest.fn() +const mockGenerateText = vitest.fn() +const mockCreateOpenRouter = vitest.fn() + +vitest.mock("ai", () => ({ + streamText: (...args: unknown[]) => mockStreamText(...args), + generateText: (...args: unknown[]) => mockGenerateText(...args), + tool: vitest.fn((t) => t), + jsonSchema: vitest.fn((s) => s), +})) + +vitest.mock("@openrouter/ai-sdk-provider", () => ({ + createOpenRouter: (...args: unknown[]) => { + mockCreateOpenRouter(...args) + return { + chat: vitest.fn((modelId: string) => ({ modelId })), + } + }, +})) + vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) })) const mockCaptureException = vitest.fn() @@ -60,6 +78,16 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 0.3, description: "Claude 3.7 Sonnet with thinking", }, + "deepseek/deepseek-r1": { + maxTokens: 8192, + contextWindow: 64000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.55, + outputPrice: 2.19, + description: "DeepSeek R1", + supportsReasoningEffort: true, + }, "openai/gpt-4o": { maxTokens: 16384, contextWindow: 128000, @@ -80,8 +108,32 @@ vitest.mock("../fetchers/modelCache", () => ({ excludedTools: ["existing_excluded"], includedTools: ["existing_included"], }, + "google/gemini-2.5-pro": { + maxTokens: 65536, + contextWindow: 1048576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.25, + outputPrice: 10, + description: "Gemini 2.5 Pro", + thinking: true, + }, + "google/gemini-2.5-flash": { + maxTokens: 65536, + contextWindow: 1048576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + description: "Gemini 2.5 Flash", + }, }) }), + getModelsFromCache: vitest.fn().mockReturnValue(null), +})) + +vitest.mock("../fetchers/modelEndpointCache", () => ({ + getModelEndpoints: vitest.fn().mockResolvedValue({}), })) describe("OpenRouterHandler", () => { @@ -90,21 +142,13 @@ describe("OpenRouterHandler", () => { openRouterModelId: "anthropic/claude-sonnet-4", } - beforeEach(() => vitest.clearAllMocks()) + beforeEach(() => { + vitest.clearAllMocks() + }) it("initializes with correct options", () => { const handler = new OpenRouterHandler(mockOptions) expect(handler).toBeInstanceOf(OpenRouterHandler) - - expect(OpenAI).toHaveBeenCalledWith({ - baseURL: "https://openrouter.ai/api/v1", - apiKey: mockOptions.openRouterApiKey, - defaultHeaders: { - "HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline", - "X-Title": "Roo Code", - "User-Agent": `RooCode/${Package.version}`, - }, - }) }) describe("fetchModel", () => { @@ -204,29 +248,24 @@ describe("OpenRouterHandler", () => { }) describe("createMessage", () => { - it("generates correct stream chunks", async () => { + it("generates correct stream chunks with basic usage and totalCost", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: mockOptions.openRouterModelId, - choices: [{ delta: { content: "test response" } }], - } - yield { - id: "test-id", - choices: [{ delta: {} }], - usage: { prompt_tokens: 10, completion_tokens: 20, cost: 0.001 }, - } - }, - } + // Create mock async iterator for fullStream + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test response", id: "1" } + })() - // Mock OpenAI chat.completions.create - const mockCreate = vitest.fn().mockResolvedValue(mockStream) + // Mock usage promises + const mockUsage = Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }) + const mockTotalUsage = Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: mockUsage, + totalUsage: mockTotalUsage, + providerMetadata: Promise.resolve(undefined), + }) const systemPrompt = "test system prompt" const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] @@ -238,462 +277,891 @@ describe("OpenRouterHandler", () => { chunks.push(chunk) } - // Verify stream chunks - expect(chunks).toHaveLength(2) // One text chunk and one usage chunk + // Verify stream chunks - should have text and usage chunks + expect(chunks).toHaveLength(2) expect(chunks[0]).toEqual({ type: "text", text: "test response" }) - expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20, totalCost: 0.001 }) + // Usage chunk should include totalCost calculated from model pricing + // Model: anthropic/claude-sonnet-4 with inputPrice: 3, outputPrice: 15 (per million) + // Cost = (10 * 3 / 1_000_000) + (20 * 15 / 1_000_000) = 0.00003 + 0.0003 = 0.00033 + expect(chunks[1]).toMatchObject({ + type: "usage", + inputTokens: 10, + outputTokens: 20, + totalCost: expect.any(Number), + }) + expect((chunks[1] as any).totalCost).toBeCloseTo(0.00033, 6) - // Verify OpenAI client was called with correct parameters. - expect(mockCreate).toHaveBeenCalledWith( + // Verify streamText was called with correct parameters + expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - max_tokens: 8192, - messages: [ - { - content: [ - { cache_control: { type: "ephemeral" }, text: "test system prompt", type: "text" }, - ], - role: "system", - }, - { - content: [{ cache_control: { type: "ephemeral" }, text: "test message", type: "text" }], - role: "user", - }, - ], - model: "anthropic/claude-sonnet-4", - stream: true, - stream_options: { include_usage: true }, + messages: expect.any(Array), + maxOutputTokens: 8192, temperature: 0, - top_p: undefined, }), - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, ) }) - it("adds cache control for supported models", async () => { - const handler = new OpenRouterHandler({ - ...mockOptions, - openRouterModelId: "anthropic/claude-3.5-sonnet", + it("includes cache read tokens in usage when provider metadata contains them", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 30, + }, + }), }) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: "test-id", - choices: [{ delta: { content: "test response" } }], - } - }, - } + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + for await (const chunk of generator) { + chunks.push(chunk) + } - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "message 1" }, - { role: "assistant", content: "response 1" }, - { role: "user", content: "message 2" }, - ] + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 30, + totalCost: expect.any(Number), + }) + }) - await handler.createMessage("test system", messages).next() + it("includes reasoning tokens in usage when provider metadata contains them", async () => { + const handler = new OpenRouterHandler(mockOptions) - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - messages: expect.arrayContaining([ - expect.objectContaining({ - role: "system", - content: expect.arrayContaining([ - expect.objectContaining({ cache_control: { type: "ephemeral" } }), - ]), - }), - ]), + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 150, totalTokens: 250 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 150, totalTokens: 250 }), + providerMetadata: Promise.resolve({ + openrouter: { + reasoningOutputTokens: 50, + }, }), - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, - ) + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 150, + reasoningTokens: 50, + totalCost: expect.any(Number), + }) }) - it("handles API errors and captures telemetry", async () => { + it("includes all detailed usage metrics when provider metadata contains them", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { error: { message: "API Error", code: 500 } } - }, + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }), + totalUsage: Promise.resolve({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }), + providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 50, + cacheCreationInputTokens: 20, + reasoningOutputTokens: 30, + }, + }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) } - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 200, + outputTokens: 100, + cacheReadTokens: 50, + cacheWriteTokens: 20, + reasoningTokens: 30, + totalCost: expect.any(Number), + }) + }) - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error") + it("handles experimental_providerMetadata fallback", async () => { + const handler = new OpenRouterHandler(mockOptions) - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "API Error", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - errorCode: 500, - status: 500, + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + providerMetadata: Promise.resolve(undefined), + experimental_providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 25, + }, }), - ) + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 25, + totalCost: expect.any(Number), + }) }) - it("captures telemetry when createMessage throws an exception", async () => { + it("handles reasoning delta chunks", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockCreate = vitest.fn().mockRejectedValue(new Error("Connection failed")) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow() + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "thinking...", id: "1" } + yield { type: "text-delta", text: "result", id: "2" } + })() - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Connection failed", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + expect(chunks[0]).toEqual({ type: "reasoning", text: "thinking..." }) + expect(chunks[1]).toEqual({ type: "text", text: "result" }) }) - it("passes SDK exceptions with status 429 to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("accumulates reasoning details for getReasoningDetails()", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Rate limit exceeded: free-models-per-day") as any - error.status = 429 - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "step 1...", id: "1" } + yield { type: "reasoning-delta", text: "step 2...", id: "2" } + yield { type: "text-delta", text: "result", id: "3" } + })() - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("Rate limit exceeded") + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume all chunks + } + + // After streaming, getReasoningDetails should return accumulated reasoning + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeDefined() + expect(reasoningDetails).toHaveLength(1) + expect(reasoningDetails![0].type).toBe("reasoning.text") + expect(reasoningDetails![0].text).toBe("step 1...step 2...") + expect(reasoningDetails![0].index).toBe(0) }) - it("passes SDK exceptions with 429 in message to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles tool call streaming", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("429 Rate limit exceeded: free-models-per-day") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("429 Rate limit exceeded") + const mockFullStream = (async function* () { + yield { type: "tool-input-start", id: "call_1", toolName: "read_file" } + yield { type: "tool-input-delta", id: "call_1", delta: '{"path":' } + yield { type: "tool-input-delta", id: "call_1", delta: '"test.ts"}' } + yield { type: "tool-input-end", id: "call_1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "429 Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) + expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' }) + expect(chunks[2]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' }) + expect(chunks[3]).toEqual({ type: "tool_call_end", id: "call_1" }) }) - it("passes SDK exceptions containing 'rate limit' to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("ignores tool-call events (handled by tool-input-start/delta/end)", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Request failed due to rate limit") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("rate limit") + const mockFullStream = (async function* () { + yield { + type: "tool-call", + toolCallId: "call_1", + toolName: "read_file", + input: { path: "test.ts" }, + } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + // tool-call is intentionally ignored by processAiSdkStreamPart, + // only usage chunk should be present + expect(chunks).toHaveLength(1) + expect(chunks[0]).toMatchObject({ type: "usage" }) + }) + + it("handles API errors gracefully", async () => { + const handler = new OpenRouterHandler(mockOptions) + + mockStreamText.mockImplementation(() => { + throw new Error("API Error") + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + expect(chunks[0]).toEqual({ + type: "error", + error: "OpenRouterError", + message: "OpenRouter API Error: API Error", + }) + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) expect(mockCaptureException).toHaveBeenCalledWith( expect.objectContaining({ - message: "Request failed due to rate limit", + message: "API Error", provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, operation: "createMessage", }), ) }) - it("passes 429 rate limit errors from stream to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles stream errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { error: { message: "Rate limit exceeded", code: 429 } } - }, + + const mockFullStream = (async function* () { + yield { type: "error", error: new Error("Stream error") } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 0, outputTokens: 0, totalTokens: 0 }), + totalUsage: Promise.resolve({ inputTokens: 0, outputTokens: 0, totalTokens: 0 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) } - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + expect(chunks[0]).toEqual({ + type: "error", + error: "StreamError", + message: "Stream error", + }) + }) + + it("passes tools to streamText when provided", async () => { + const handler = new OpenRouterHandler(mockOptions) - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("OpenRouter API Error 429: Rate limit exceeded") + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() - expect(mockCaptureException).toHaveBeenCalledWith( + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const tools = [ + { + type: "function" as const, + function: { + name: "read_file", + description: "Read a file", + parameters: { type: "object", properties: { path: { type: "string" } } }, + }, + }, + ] + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }], { + taskId: "test", + tools, + }) + + for await (const _ of generator) { + // consume + } + + expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - message: "Rate limit exceeded", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - errorCode: 429, - status: 429, + tools: expect.objectContaining({ + read_file: expect.any(Object), + }), }), ) }) - it("yields tool_call_end events when finish_reason is tool_calls", async () => { - // Import NativeToolCallParser to set up state - const { NativeToolCallParser } = await import("../../../core/assistant-message/NativeToolCallParser") + it("passes reasoning parameters via extraBody when reasoning effort is enabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + reasoningEffort: "high", + enableReasoningEffort: true, + }) - // Clear any previous state - NativeToolCallParser.clearRawChunkState() + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "thinking...", id: "1" } + yield { type: "text-delta", text: "result", id: "2" } + })() - const handler = new OpenRouterHandler(mockOptions) + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: "test-id", - choices: [ - { - delta: { - tool_calls: [ - { - index: 0, - id: "call_openrouter_test", - function: { name: "read_file", arguments: '{"path":"test.ts"}' }, - }, - ], - }, - index: 0, - }, - ], - } - yield { - id: "test-id", - choices: [ - { - delta: {}, - finish_reason: "tool_calls", - index: 0, - }, - ], - usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, - } - }, + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume } - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + // Verify that reasoning was passed via extraBody when creating the provider + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + reasoning: expect.objectContaining({ + effort: "high", + }), + }), + }), + ) - const generator = handler.createMessage("test", []) - const chunks = [] + // Verify that providerOptions does NOT contain extended_thinking + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, + }), + ) + }) - for await (const chunk of generator) { - // Simulate what Task.ts does: when we receive tool_call_partial, - // process it through NativeToolCallParser to populate rawChunkTracker - if (chunk.type === "tool_call_partial") { - NativeToolCallParser.processRawChunk({ - index: chunk.index, - id: chunk.id, - name: chunk.name, - arguments: chunk.arguments, - }) - } - chunks.push(chunk) + it("does not pass reasoning via extraBody when reasoning is disabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume } - // Should have tool_call_partial and tool_call_end - const partialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial") - const endChunks = chunks.filter((chunk) => chunk.type === "tool_call_end") + // Verify that createOpenRouter was called with correct base config + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", + }), + ) - expect(partialChunks).toHaveLength(1) - expect(endChunks).toHaveLength(1) - expect(endChunks[0].id).toBe("call_openrouter_test") + // Verify that providerOptions is undefined when no provider routing + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, + }), + ) }) }) describe("completePrompt", () => { it("returns correct response", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockResponse = { choices: [{ message: { content: "test completion" } }] } - const mockCreate = vitest.fn().mockResolvedValue(mockResponse) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + mockGenerateText.mockResolvedValue({ + text: "test completion", + }) const result = await handler.completePrompt("test prompt") expect(result).toBe("test completion") - - expect(mockCreate).toHaveBeenCalledWith( - { - model: mockOptions.openRouterModelId, - max_tokens: 8192, + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "test prompt", + maxOutputTokens: 8192, temperature: 0, - messages: [{ role: "user", content: "test prompt" }], - stream: false, - }, - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, + }), ) }) - it("handles API errors and captures telemetry", async () => { - const handler = new OpenRouterHandler(mockOptions) - const mockError = { - error: { - message: "API Error", - code: 500, - }, - } + it("passes reasoning parameters via extraBody when reasoning effort is enabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + reasoningEffort: "medium", + enableReasoningEffort: true, + }) + + mockGenerateText.mockResolvedValue({ + text: "test completion with reasoning", + }) - const mockCreate = vitest.fn().mockResolvedValue(mockError) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const result = await handler.completePrompt("test prompt") - await expect(handler.completePrompt("test prompt")).rejects.toThrow("OpenRouter API Error 500: API Error") + expect(result).toBe("test completion with reasoning") - // Verify telemetry was captured - expect(mockCaptureException).toHaveBeenCalledWith( + // Verify that reasoning was passed via extraBody when creating the provider + expect(mockCreateOpenRouter).toHaveBeenCalledWith( expect.objectContaining({ - message: "API Error", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - errorCode: 500, - status: 500, + extraBody: expect.objectContaining({ + reasoning: expect.objectContaining({ + effort: "medium", + }), + }), + }), + ) + + // Verify that providerOptions does NOT contain extended_thinking + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, }), ) }) - it("handles unexpected errors and captures telemetry", async () => { + it("handles API errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Unexpected error") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error") + mockGenerateText.mockRejectedValue(new Error("API Error")) + + await expect(handler.completePrompt("test prompt")).rejects.toThrow( + "OpenRouter completion error: API Error", + ) - // Verify telemetry was captured (filtering now happens inside PostHogTelemetryClient) + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) expect(mockCaptureException).toHaveBeenCalledWith( expect.objectContaining({ - message: "Unexpected error", + message: "API Error", provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, operation: "completePrompt", }), ) }) - it("passes SDK exceptions with status 429 to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles rate limit errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Rate limit exceeded: free-models-per-day") as any - error.status = 429 - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("Rate limit exceeded") + mockGenerateText.mockRejectedValue(new Error("Rate limit exceeded")) + + await expect(handler.completePrompt("test prompt")).rejects.toThrow( + "OpenRouter completion error: Rate limit exceeded", + ) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) expect(mockCaptureException).toHaveBeenCalledWith( expect.objectContaining({ - message: "Rate limit exceeded: free-models-per-day", + message: "Rate limit exceeded", provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, operation: "completePrompt", }), ) }) + }) - it("passes SDK exceptions with 429 in message to telemetry (filtering happens in PostHogTelemetryClient)", async () => { - const handler = new OpenRouterHandler(mockOptions) - const error = new Error("429 Rate limit exceeded: free-models-per-day") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + describe("provider configuration", () => { + it("creates OpenRouter provider with correct API key and base URL", async () => { + const customOptions: ApiHandlerOptions = { + openRouterApiKey: "custom-key", + openRouterBaseUrl: "https://custom.openrouter.ai/api/v1", + openRouterModelId: "anthropic/claude-sonnet-4", + } - await expect(handler.completePrompt("test prompt")).rejects.toThrow("429 Rate limit exceeded") + const handler = new OpenRouterHandler(customOptions) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally - expect(mockCaptureException).toHaveBeenCalledWith( + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( expect.objectContaining({ - message: "429 Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", + apiKey: "custom-key", + baseURL: "https://custom.openrouter.ai/api/v1", }), ) }) - it("passes SDK exceptions containing 'rate limit' to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("uses default base URL when not specified", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Request failed due to rate limit") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("rate limit") + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() - // captureException is called, but PostHogTelemetryClient filters out rate limit errors internally - expect(mockCaptureException).toHaveBeenCalledWith( + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( expect.objectContaining({ - message: "Request failed due to rate limit", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", }), ) }) + }) - it("passes 429 rate limit errors from response to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + describe("getReasoningDetails", () => { + it("returns undefined when no reasoning was captured", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockError = { - error: { - message: "Rate limit exceeded", - code: 429, - }, + + // Stream with no reasoning + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "just text", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume all chunks } - const mockCreate = vitest.fn().mockResolvedValue(mockError) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + // No reasoning was captured, should return undefined + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) - await expect(handler.completePrompt("test prompt")).rejects.toThrow( - "OpenRouter API Error 429: Rate limit exceeded", + it("resets reasoning details between requests", async () => { + const handler = new OpenRouterHandler(mockOptions) + + // First request with reasoning + const mockFullStream1 = (async function* () { + yield { type: "reasoning-delta", text: "first request reasoning", id: "1" } + yield { type: "text-delta", text: "result 1", id: "2" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream1, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator1 = handler.createMessage("test", [{ role: "user", content: "test" }]) + for await (const _ of generator1) { + // consume + } + + // Verify first request captured reasoning + let reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeDefined() + expect(reasoningDetails![0].text).toBe("first request reasoning") + + // Second request without reasoning + const mockFullStream2 = (async function* () { + yield { type: "text-delta", text: "result 2", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream2, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator2 = handler.createMessage("test", [{ role: "user", content: "test" }]) + for await (const _ of generator2) { + // consume + } + + // Reasoning details should be reset (undefined since second request had no reasoning) + reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) + + it("returns undefined before any streaming occurs", () => { + const handler = new OpenRouterHandler(mockOptions) + + // getReasoningDetails before any createMessage call + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) + }) + + describe("model-specific handling", () => { + const mockStreamResult = () => { + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "response", id: "1" } + })() + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + } + + const consumeGenerator = async ( + handler: any, + system = "test", + msgs: any[] = [{ role: "user", content: "test" }], + ) => { + const generator = handler.createMessage(system, msgs) + for await (const _ of generator) { + // consume + } + } + + it("passes topP for DeepSeek R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + topP: 0.95, + }), ) + }) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally - expect(mockCaptureException).toHaveBeenCalledWith( + it("does not pass topP for non-R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "openai/gpt-4o", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - message: "Rate limit exceeded", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - errorCode: 429, - status: 429, + topP: undefined, + }), + ) + }) + + it("does not use R1 format for DeepSeek R1 models (uses standard AI SDK path)", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockStreamResult() + await consumeGenerator(handler, "system prompt") + + // R1 models should NOT pass extraBody.messages (R1 format conversion removed) + const providerCall = mockCreateOpenRouter.mock.calls[0][0] + expect(providerCall?.extraBody?.messages).toBeUndefined() + + // System prompt should be passed normally via streamText + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBe("system prompt") + }) + + it("applies Anthropic beta headers for Anthropic models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" }, + }), + ) + }) + + it("does not apply Anthropic beta headers for non-Anthropic models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "openai/gpt-4o", + }) + mockStreamResult() + await consumeGenerator(handler) + + const call = mockCreateOpenRouter.mock.calls[0][0] + expect(call.headers).toBeUndefined() + }) + + it("passes system prompt directly for Anthropic models (no caching transform)", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + mockStreamResult() + await consumeGenerator(handler) + + // System prompt should be passed directly via streamText + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBe("test") + + // Messages should be the converted AI SDK messages (no system-role message injected) + const systemMsgs = streamTextCall.messages.filter((m: any) => m.role === "system") + expect(systemMsgs).toHaveLength(0) + }) + + it("disables reasoning for Gemini 2.5 Pro when not explicitly configured", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-2.5-pro", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + reasoning: { exclude: true }, + }), + }), + ) + }) + + it("passes system prompt directly for Gemini models (no caching transform)", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-2.5-flash", + }) + mockStreamResult() + await consumeGenerator(handler) + + // System prompt should be passed directly via streamText + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBe("test") + + // No system-role message should be injected + const systemMsgs = streamTextCall.messages.filter((m: any) => m.role === "system") + expect(systemMsgs).toHaveLength(0) + }) + + it("does not use extraBody.messages for Gemini models outside caching set", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-3-pro-preview", + }) + mockStreamResult() + await consumeGenerator(handler) + + // Non-caching Gemini models should go through the AI SDK natively + // (no extraBody.messages — reasoning_details are wired via providerOptions) + const callArgs = mockCreateOpenRouter.mock.calls[0]?.[0] ?? {} + const extraBody = callArgs.extraBody ?? {} + expect(extraBody.messages).toBeUndefined() + }) + + it("passes topP to completePrompt for R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockGenerateText.mockResolvedValue({ text: "completion" }) + + await handler.completePrompt("test prompt") + + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + topP: 0.95, }), ) }) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 7fcc24b15f6..d48fc4bb430 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -1,161 +1,44 @@ import { Anthropic } from "@anthropic-ai/sdk" -import OpenAI from "openai" -import { z } from "zod" +import { createOpenRouter } from "@openrouter/ai-sdk-provider" +import { streamText, generateText } from "ai" import { type ModelRecord, - ApiProviderError, + type ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo, OPENROUTER_DEFAULT_PROVIDER_NAME, - OPEN_ROUTER_PROMPT_CACHING_MODELS, DEEP_SEEK_DEFAULT_TEMPERATURE, + ApiProviderError, } from "@roo-code/types" import { TelemetryService } from "@roo-code/telemetry" -import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser" - import type { ApiHandlerOptions } from "../../shared/api" +import { calculateApiCostOpenAI } from "../../shared/cost" -import { - convertToOpenAiMessages, - sanitizeGeminiMessages, - consolidateReasoningDetails, -} from "../transform/openai-format" -import { normalizeMistralToolCallId } from "../transform/mistral-format" -import { ApiStreamChunk } from "../transform/stream" -import { convertToR1Format } from "../transform/r1-format" -import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" -import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" -import type { OpenRouterReasoningParams } from "../transform/reasoning" +import { type ReasoningDetail } from "../transform/openai-format" import { getModelParams } from "../transform/model-params" +import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" -import { getModels } from "./fetchers/modelCache" -import { getModelEndpoints } from "./fetchers/modelEndpointCache" - -import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" -import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" -import { handleOpenAIError } from "./utils/openai-error-handler" -import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" +import { getModels, getModelsFromCache } from "./fetchers/modelCache" +import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" -// Add custom interface for OpenRouter params. -type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { - transforms?: string[] - include_reasoning?: boolean - // https://openrouter.ai/docs/use-cases/reasoning-tokens - reasoning?: OpenRouterReasoningParams -} - -// Zod schema for OpenRouter error response structure (for caught exceptions) -const OpenRouterErrorResponseSchema = z.object({ - error: z - .object({ - message: z.string().optional(), - code: z.number().optional(), - metadata: z - .object({ - raw: z.string().optional(), - }) - .optional(), - }) - .optional(), -}) - -// OpenRouter error structure that may include error.metadata.raw with actual upstream error -// This is for caught exceptions which have the error wrapped in an "error" property -interface OpenRouterErrorResponse { - error?: { - message?: string - code?: number - metadata?: { raw?: string } - } -} - -// Direct error object structure (for streaming errors passed directly) -interface OpenRouterError { - message?: string - code?: number - metadata?: { raw?: string } -} - -/** - * Helper function to parse and extract error message from metadata.raw - * metadata.raw is often a JSON encoded string that may contain .message or .error fields - * Example structures: - * - {"message": "Error text"} - * - {"error": "Error text"} - * - {"error": {"message": "Error text"}} - * - {"type":"error","error":{"type":"invalid_request_error","message":"tools: Tool names must be unique."}} - */ -function extractErrorFromMetadataRaw(raw: string | undefined): string | undefined { - if (!raw) { - return undefined - } - - try { - const parsed = JSON.parse(raw) - // Check for common error message fields - if (typeof parsed === "object" && parsed !== null) { - // Check for direct message field - if (typeof parsed.message === "string") { - return parsed.message - } - // Check for nested error.message field (e.g., Anthropic error format) - if (typeof parsed.error === "object" && parsed.error !== null && typeof parsed.error.message === "string") { - return parsed.error.message - } - // Check for error as a string - if (typeof parsed.error === "string") { - return parsed.error - } - } - // If we can't extract a specific field, return the raw string - return raw - } catch { - // If it's not valid JSON, return as-is - return raw - } -} - -// See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]` -// `CompletionsAPI.CompletionUsage` -// See also: https://openrouter.ai/docs/use-cases/usage-accounting -interface CompletionUsage { - completion_tokens?: number - completion_tokens_details?: { - reasoning_tokens?: number - } - prompt_tokens?: number - prompt_tokens_details?: { - cached_tokens?: number - } - total_tokens?: number - cost?: number - cost_details?: { - upstream_inference_cost?: number - } -} +import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" +import type { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream" export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions - private client: OpenAI protected models: ModelRecord = {} protected endpoints: ModelRecord = {} private readonly providerName = "OpenRouter" - private currentReasoningDetails: any[] = [] + private currentReasoningDetails: ReasoningDetail[] = [] constructor(options: ApiHandlerOptions) { super() this.options = options - - const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - const apiKey = this.options.openRouterApiKey ?? "not-provided" - - this.client = new OpenAI({ baseURL, apiKey, defaultHeaders: DEFAULT_HEADERS }) - - // Load models asynchronously to populate cache before getModel() is called this.loadDynamicModels().catch((error) => { console.error("[OpenRouterHandler] Failed to load dynamic models:", error) }) @@ -171,7 +54,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH endpoint: this.options.openRouterSpecificProvider, }), ]) - this.models = models this.endpoints = endpoints } catch (error) { @@ -182,28 +64,68 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - getReasoningDetails(): any[] | undefined { + private createOpenRouterProvider(options?: { + reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean } + headers?: Record + }) { + const apiKey = this.options.openRouterApiKey ?? "not-provided" + const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" + const extraBody: Record = {} + if (options?.reasoning) { + extraBody.reasoning = options.reasoning + } + return createOpenRouter({ + apiKey, + baseURL, + ...(Object.keys(extraBody).length > 0 && { extraBody }), + ...(options?.headers && { headers: options.headers }), + }) + } + + getReasoningDetails(): ReasoningDetail[] | undefined { return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined } - /** - * Handle OpenRouter streaming error response and report to telemetry. - * OpenRouter may include metadata.raw with the actual upstream provider error. - * @param error The error object (not wrapped - receives the error directly) - */ - private handleStreamingError(error: OpenRouterError, modelId: string, operation: string): never { - const rawString = error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError(rawErrorMessage, this.providerName, modelId, operation, error?.code), - { status: error?.code, error }, + private normalizeUsage( + usage: { inputTokens: number; outputTokens: number }, + providerMetadata: Record | undefined, + modelInfo: ModelInfo, + ): ApiStreamUsageChunk { + const inputTokens = usage.inputTokens ?? 0 + const outputTokens = usage.outputTokens ?? 0 + const openrouterMeta = providerMetadata?.openrouter ?? {} + const cacheReadTokens = + openrouterMeta.cachedInputTokens ?? + openrouterMeta.cache_read_input_tokens ?? + openrouterMeta.cacheReadTokens ?? + openrouterMeta.cached_tokens ?? + 0 + const cacheWriteTokens = + openrouterMeta.cacheCreationInputTokens ?? + openrouterMeta.cache_creation_input_tokens ?? + openrouterMeta.cacheWriteTokens ?? + 0 + const reasoningTokens = + openrouterMeta.reasoningOutputTokens ?? + openrouterMeta.reasoning_tokens ?? + openrouterMeta.output_tokens_details?.reasoning_tokens ?? + undefined + const { totalCost } = calculateApiCostOpenAI( + modelInfo, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, ) - - TelemetryService.instance.captureException(apiError) - - throw new Error(`OpenRouter API Error ${error?.code}: ${rawErrorMessage}`) + return { + type: "usage", + inputTokens, + outputTokens, + ...(cacheWriteTokens > 0 ? { cacheWriteTokens } : {}), + ...(cacheReadTokens > 0 ? { cacheReadTokens } : {}), + ...(typeof reasoningTokens === "number" && reasoningTokens > 0 ? { reasoningTokens } : {}), + totalCost, + } } override async *createMessage( @@ -211,19 +133,10 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { + this.currentReasoningDetails = [] const model = await this.fetchModel() - let { id: modelId, maxTokens, temperature, topP, reasoning } = model - // Reset reasoning_details accumulator for this request - this.currentReasoningDetails = [] - - // OpenRouter sends reasoning tokens by default for Gemini 2.5 Pro models - // even if you don't request them. This is not the default for - // other providers (including Gemini), so we need to explicitly disable - // them unless the user has explicitly configured reasoning. - // Note: Gemini 3 models use reasoning_details format with thought signatures, - // but we handle this via skip_thought_signature_validator injection below. if ( (modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") && typeof reasoning === "undefined" @@ -231,304 +144,97 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH reasoning = { exclude: true } } - // Convert Anthropic messages to OpenAI format. - // Pass normalization function for Mistral compatibility (requires 9-char alphanumeric IDs) - const isMistral = modelId.toLowerCase().includes("mistral") - let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages( - messages, - isMistral ? { normalizeToolCallId: normalizeMistralToolCallId } : undefined, - ), - ] - - // DeepSeek highly recommends using user instead of system role. - if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") { - openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) - } - - // Process reasoning_details when switching models to Gemini. - const isGemini = modelId.startsWith("google/gemini") - - // For Gemini models with native protocol: - // 1. Sanitize messages to handle thought signature validation issues. - // This must happen BEFORE fake encrypted block injection to avoid injecting for - // tool calls that will be dropped due to missing/mismatched reasoning_details. - // 2. Inject fake reasoning.encrypted block for tool calls without existing encrypted reasoning. - // This is required when switching from other models to Gemini to satisfy API validation. - // Per OpenRouter documentation (conversation with Toven, Nov 2025): - // - Create ONE reasoning_details entry per assistant message with tool calls - // - Set `id` to the FIRST tool call's ID from the tool_calls array - // - Set `data` to "skip_thought_signature_validator" to bypass signature validation - // - Set `index` to 0 - // See: https://github.com/cline/cline/issues/8214 - if (isGemini) { - // Step 1: Sanitize messages - filter out tool calls with missing/mismatched reasoning_details - openAiMessages = sanitizeGeminiMessages(openAiMessages, modelId) - - // Step 2: Inject fake reasoning.encrypted block for tool calls that survived sanitization - openAiMessages = openAiMessages.map((msg) => { - if (msg.role === "assistant") { - const toolCalls = (msg as any).tool_calls as any[] | undefined - const existingDetails = (msg as any).reasoning_details as any[] | undefined - - // Only inject if there are tool calls and no existing encrypted reasoning - if (toolCalls && toolCalls.length > 0) { - const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false - - if (!hasEncrypted) { - // Create ONE fake encrypted block with the FIRST tool call's ID - // This is the documented format from OpenRouter for skipping thought signature validation - const fakeEncrypted = { - type: "reasoning.encrypted", - data: "skip_thought_signature_validator", - id: toolCalls[0].id, - format: "google-gemini-v1", - index: 0, - } - - return { - ...msg, - reasoning_details: [...(existingDetails ?? []), fakeEncrypted], - } - } - } - } - return msg - }) - } - - // https://openrouter.ai/docs/features/prompt-caching - // TODO: Add a `promptCacheStratey` field to `ModelInfo`. - if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) { - if (modelId.startsWith("google")) { - addGeminiCacheBreakpoints(systemPrompt, openAiMessages) - } else { - addAnthropicCacheBreakpoints(systemPrompt, openAiMessages) - } - } - - // https://openrouter.ai/docs/transforms - const completionParams: OpenRouterChatCompletionParams = { - model: modelId, - ...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }), - temperature, - top_p: topP, - messages: openAiMessages, - stream: true, - stream_options: { include_usage: true }, - // Only include provider if openRouterSpecificProvider is not "[default]". - ...(this.options.openRouterSpecificProvider && - this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME && { - provider: { - order: [this.options.openRouterSpecificProvider], - only: [this.options.openRouterSpecificProvider], - allow_fallbacks: false, - }, - }), - ...(reasoning && { reasoning }), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - } - - // Add Anthropic beta header for fine-grained tool streaming when using Anthropic models - const requestOptions = modelId.startsWith("anthropic/") - ? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } } + const isAnthropic = modelId.startsWith("anthropic/") + const headers: Record | undefined = isAnthropic + ? { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } : undefined - let stream - try { - stream = await this.client.chat.completions.create(completionParams, requestOptions) - } catch (error) { - // Try to parse as OpenRouter error structure using Zod - const parseResult = OpenRouterErrorResponseSchema.safeParse(error) - - if (parseResult.success && parseResult.data.error) { - const openRouterError = parseResult.data - const rawString = openRouterError.error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError( - rawErrorMessage, - this.providerName, - modelId, - "createMessage", - openRouterError.error?.code, - ), - { - status: openRouterError.error?.code, - error: openRouterError.error, - }, - ) - - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } - } + const aiSdkMessages = convertToAiSdkMessages(messages) - let lastUsage: CompletionUsage | undefined = undefined - // Accumulator for reasoning_details FROM the API. - // We preserve the original shape of reasoning_details to prevent malformed responses. - const reasoningDetailsAccumulator = new Map< - string, - { - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index: number - } - >() + const openrouter = this.createOpenRouterProvider({ reasoning, headers }) - // Track whether we've yielded displayable text from reasoning_details. - // When reasoning_details has displayable content (reasoning.text or reasoning.summary), - // we skip yielding the top-level reasoning field to avoid duplicate display. - let hasYieldedReasoningFromDetails = false + const tools = convertToolsForAiSdk(metadata?.tools) - for await (const chunk of stream) { - // OpenRouter returns an error object instead of the OpenAI SDK throwing an error. - if ("error" in chunk) { - this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage") - } - - const delta = chunk.choices[0]?.delta - const finishReason = chunk.choices[0]?.finish_reason - - if (delta) { - // Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.) - // See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks - // Priority: Check for reasoning_details first, as it's the newer format - const deltaWithReasoning = delta as typeof delta & { - reasoning_details?: Array<{ - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index?: number - }> - } - - if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) { - for (const detail of deltaWithReasoning.reasoning_details) { - const index = detail.index ?? 0 - const key = `${detail.type}-${index}` - const existing = reasoningDetailsAccumulator.get(key) - - if (existing) { - // Accumulate text/summary/data for existing reasoning detail - if (detail.text !== undefined) { - existing.text = (existing.text || "") + detail.text - } - if (detail.summary !== undefined) { - existing.summary = (existing.summary || "") + detail.summary - } - if (detail.data !== undefined) { - existing.data = (existing.data || "") + detail.data - } - // Update other fields if provided - if (detail.id !== undefined) existing.id = detail.id - if (detail.format !== undefined) existing.format = detail.format - if (detail.signature !== undefined) existing.signature = detail.signature - } else { - // Start new reasoning detail accumulation - reasoningDetailsAccumulator.set(key, { - type: detail.type, - text: detail.text, - summary: detail.summary, - data: detail.data, - id: detail.id, - format: detail.format, - signature: detail.signature, - index, - }) - } - - // Yield text for display (still fragmented for live streaming) - // Only reasoning.text and reasoning.summary have displayable content - // reasoning.encrypted is intentionally skipped as it contains redacted content - let reasoningText: string | undefined - if (detail.type === "reasoning.text" && typeof detail.text === "string") { - reasoningText = detail.text - } else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") { - reasoningText = detail.summary - } - - if (reasoningText) { - hasYieldedReasoningFromDetails = true - yield { type: "reasoning", text: reasoningText } - } + const providerOptions: + | { + openrouter?: { + provider?: { order: string[]; only: string[]; allow_fallbacks: boolean } } - } - - // Handle top-level reasoning field for UI display. - // Skip if we've already yielded from reasoning_details to avoid duplicate display. - if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { - if (!hasYieldedReasoningFromDetails) { - yield { type: "reasoning", text: delta.reasoning } + } + | undefined = + this.options.openRouterSpecificProvider && + this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME + ? { + openrouter: { + provider: { + order: [this.options.openRouterSpecificProvider], + only: [this.options.openRouterSpecificProvider], + allow_fallbacks: false, + }, + }, } - } + : undefined - // Emit raw tool call chunks - NativeToolCallParser handles state management - if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) { - for (const toolCall of delta.tool_calls) { - yield { - type: "tool_call_partial", - index: toolCall.index, - id: toolCall.id, - name: toolCall.function?.name, - arguments: toolCall.function?.arguments, - } - } - } + let accumulatedReasoningText = "" - if (delta.content) { - yield { type: "text", text: delta.content } - } - } + try { + const result = streamText({ + model: openrouter.chat(modelId), + system: systemPrompt, + messages: aiSdkMessages, + maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, + temperature, + topP, + tools, + toolChoice: metadata?.tool_choice as any, + providerOptions, + }) - // Process finish_reason to emit tool_call_end events - // This ensures tool calls are finalized even if the stream doesn't properly close - if (finishReason) { - const endEvents = NativeToolCallParser.processFinishReason(finishReason) - for (const event of endEvents) { - yield event + for await (const part of result.fullStream) { + if (part.type === "reasoning-delta" && part.text !== "[REDACTED]") { + accumulatedReasoningText += part.text } + yield* processAiSdkStreamPart(part) } - if (chunk.usage) { - lastUsage = chunk.usage + if (accumulatedReasoningText) { + this.currentReasoningDetails.push({ + type: "reasoning.text", + text: accumulatedReasoningText, + index: 0, + }) } - } - // After streaming completes, consolidate and store reasoning_details from the API. - // This filters out corrupted encrypted blocks (missing `data`) and consolidates by index. - if (reasoningDetailsAccumulator.size > 0) { - const rawDetails = Array.from(reasoningDetailsAccumulator.values()) - this.currentReasoningDetails = consolidateReasoningDetails(rawDetails) - } + const providerMetadata = + (await result.providerMetadata) ?? (await (result as any).experimental_providerMetadata) + + const providerReasoningDetails = providerMetadata?.openrouter?.reasoning_details as + | ReasoningDetail[] + | undefined - if (lastUsage) { + if (providerReasoningDetails && providerReasoningDetails.length > 0) { + this.currentReasoningDetails = providerReasoningDetails + } + + const usage = await result.usage + const totalUsage = await result.totalUsage + const usageChunk = this.normalizeUsage( + { + inputTokens: totalUsage.inputTokens ?? usage.inputTokens ?? 0, + outputTokens: totalUsage.outputTokens ?? usage.outputTokens ?? 0, + }, + providerMetadata, + model.info, + ) + yield usageChunk + } catch (error: any) { + const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") + TelemetryService.instance.captureException(apiError) yield { - type: "usage", - inputTokens: lastUsage.prompt_tokens || 0, - outputTokens: lastUsage.completion_tokens || 0, - cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, - reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens, - totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0), + type: "error", + error: "OpenRouterError", + message: `${this.providerName} API Error: ${errorMessage}`, } } } @@ -542,27 +248,29 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH endpoint: this.options.openRouterSpecificProvider, }), ]) - this.models = models this.endpoints = endpoints - return this.getModel() } override getModel() { const id = this.options.openRouterModelId ?? openRouterDefaultModelId - let info = this.models[id] ?? openRouterDefaultModelInfo - - // If a specific provider is requested, use the endpoint for that provider. + let info = this.models[id] + if (!info) { + const cachedModels = getModelsFromCache("openrouter") + if (cachedModels?.[id]) { + this.models = cachedModels + info = cachedModels[id] + } + } if (this.options.openRouterSpecificProvider && this.endpoints[this.options.openRouterSpecificProvider]) { info = this.endpoints[this.options.openRouterSpecificProvider] } - - // Apply tool preferences for models accessed through routers (OpenAI, Gemini) + if (!info) { + info = openRouterDefaultModelInfo + } info = applyRouterToolPreferences(id, info) - const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning" - const params = getModelParams({ format: "openrouter", modelId: id, @@ -570,92 +278,64 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH settings: this.options, defaultTemperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0, }) - return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params } } - async completePrompt(prompt: string) { - let { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel() - - const completionParams: OpenRouterChatCompletionParams = { - model: modelId, - max_tokens: maxTokens, - temperature, - messages: [{ role: "user", content: prompt }], - stream: false, - // Only include provider if openRouterSpecificProvider is not "[default]". - ...(this.options.openRouterSpecificProvider && - this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME && { - provider: { - order: [this.options.openRouterSpecificProvider], - only: [this.options.openRouterSpecificProvider], - allow_fallbacks: false, - }, - }), - ...(reasoning && { reasoning }), + async completePrompt(prompt: string): Promise { + let { id: modelId, maxTokens, temperature, topP, reasoning } = await this.fetchModel() + + if ( + (modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") && + typeof reasoning === "undefined" + ) { + reasoning = { exclude: true } } - // Add Anthropic beta header for fine-grained tool streaming when using Anthropic models - const requestOptions = modelId.startsWith("anthropic/") - ? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } } + const isAnthropic = modelId.startsWith("anthropic/") + const headers: Record | undefined = isAnthropic + ? { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } : undefined - let response + const openrouter = this.createOpenRouterProvider({ reasoning, headers }) + + const providerOptions: + | { + openrouter?: { + provider?: { order: string[]; only: string[]; allow_fallbacks: boolean } + } + } + | undefined = + this.options.openRouterSpecificProvider && + this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME + ? { + openrouter: { + provider: { + order: [this.options.openRouterSpecificProvider], + only: [this.options.openRouterSpecificProvider], + allow_fallbacks: false, + }, + }, + } + : undefined try { - response = await this.client.chat.completions.create(completionParams, requestOptions) + const result = await generateText({ + model: openrouter.chat(modelId), + prompt, + maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, + temperature, + topP, + providerOptions, + }) + return result.text } catch (error) { - // Try to parse as OpenRouter error structure using Zod - const parseResult = OpenRouterErrorResponseSchema.safeParse(error) - - if (parseResult.success && parseResult.data.error) { - const openRouterError = parseResult.data - const rawString = openRouterError.error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError( - rawErrorMessage, - this.providerName, - modelId, - "completePrompt", - openRouterError.error?.code, - ), - { - status: openRouterError.error?.code, - error: openRouterError.error, - }, - ) - - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt") - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } - } - - if ("error" in response) { - this.handleStreamingError(response.error as OpenRouterError, modelId, "completePrompt") + const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt") + TelemetryService.instance.captureException(apiError) + throw new Error(`${this.providerName} completion error: ${errorMessage}`) } - - const completion = response as OpenAI.Chat.ChatCompletion - return completion.choices[0]?.message?.content || "" } - /** - * Generate an image using OpenRouter's image generation API (chat completions with modalities) - * Note: OpenRouter only supports the chat completions approach, not the /images/generations endpoint - * @param prompt The text prompt for image generation - * @param model The model to use for generation - * @param apiKey The OpenRouter API key (must be explicitly provided) - * @param inputImage Optional base64 encoded input image data URL - * @returns The generated image data and format, or an error - */ async generateImage( prompt: string, model: string, @@ -668,10 +348,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH error: "OpenRouter API key is required for image generation", } } - const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - - // OpenRouter only supports chat completions approach for image generation return generateImageWithProvider({ baseURL, authToken: apiKey, diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index f973fc85a6d..0676f59a2c2 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -501,6 +501,132 @@ describe("AI SDK conversion utilities", () => { expect(toolCallPart).toBeDefined() expect(toolCallPart.providerOptions).toBeUndefined() }) + + it("attaches valid reasoning_details as providerOptions.openrouter, filtering invalid entries", () => { + const validEncrypted = { + type: "reasoning.encrypted", + data: "encrypted_blob_data", + id: "tool_call_123", + format: "google-gemini-v1", + index: 0, + } + const invalidEncrypted = { + // type is "reasoning.encrypted" but has text instead of data — + // this is a plaintext summary mislabeled as encrypted by Gemini/OpenRouter. + // The provider's ReasoningDetailEncryptedSchema requires `data: string`, + // so including this causes the entire Zod safeParse to fail. + type: "reasoning.encrypted", + text: "Plaintext reasoning summary", + id: "tool_call_123", + format: "google-gemini-v1", + index: 0, + } + const textWithSignature = { + type: "reasoning.text", + text: "Some reasoning content", + signature: "stale-signature-from-previous-model", + } + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { type: "text", text: "Using a tool" }, + { + type: "tool_use", + id: "tool_call_123", + name: "attempt_completion", + input: { result: "done" }, + }, + ], + reasoning_details: [validEncrypted, invalidEncrypted, textWithSignature], + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.role).toBe("assistant") + expect(assistantMsg.providerOptions).toBeDefined() + expect(assistantMsg.providerOptions.openrouter).toBeDefined() + const details = assistantMsg.providerOptions.openrouter.reasoning_details + // Only the valid entries should survive filtering (invalidEncrypted dropped) + expect(details).toHaveLength(2) + expect(details[0]).toEqual(validEncrypted) + // Signatures should be preserved as-is for same-model Anthropic conversations via OpenRouter + expect(details[1]).toEqual(textWithSignature) + }) + + it("does not attach providerOptions when no reasoning_details are present", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Just text" }], + }, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.providerOptions).toBeUndefined() + }) + + it("does not attach providerOptions when reasoning_details is an empty array", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Just text" }], + reasoning_details: [], + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.providerOptions).toBeUndefined() + }) + + it("preserves both reasoning_details and thoughtSignature providerOptions", () => { + const reasoningDetails = [ + { + type: "reasoning.encrypted", + data: "encrypted_data", + id: "tool_call_abc", + format: "google-gemini-v1", + index: 0, + }, + ] + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { type: "thoughtSignature", thoughtSignature: "sig-xyz" } as any, + { type: "text", text: "Using tool" }, + { + type: "tool_use", + id: "tool_call_abc", + name: "read_file", + input: { path: "test.ts" }, + }, + ], + reasoning_details: reasoningDetails, + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + // Message-level providerOptions carries reasoning_details + expect(assistantMsg.providerOptions.openrouter.reasoning_details).toEqual(reasoningDetails) + // Part-level providerOptions carries thoughtSignature on the first tool-call + const toolCallPart = assistantMsg.content.find((p: any) => p.type === "tool-call") + expect(toolCallPart.providerOptions.google.thoughtSignature).toBe("sig-xyz") + }) }) describe("convertToolsForAiSdk", () => { @@ -686,6 +812,27 @@ describe("AI SDK conversion utilities", () => { expect(chunks).toHaveLength(0) } }) + it("should filter [REDACTED] from reasoning-delta parts", () => { + const redactedPart = { type: "reasoning-delta" as const, text: "[REDACTED]" } + const normalPart = { type: "reasoning-delta" as const, text: "actual reasoning" } + + const redactedResult = [...processAiSdkStreamPart(redactedPart as any)] + const normalResult = [...processAiSdkStreamPart(normalPart as any)] + + expect(redactedResult).toEqual([]) + expect(normalResult).toEqual([{ type: "reasoning", text: "actual reasoning" }]) + }) + + it("should filter [REDACTED] from reasoning (fullStream format) parts", () => { + const redactedPart = { type: "reasoning" as const, text: "[REDACTED]" } + const normalPart = { type: "reasoning" as const, text: "actual reasoning" } + + const redactedResult = [...processAiSdkStreamPart(redactedPart as any)] + const normalResult = [...processAiSdkStreamPart(normalPart as any)] + + expect(redactedResult).toEqual([]) + expect(normalResult).toEqual([{ type: "reasoning", text: "actual reasoning" }]) + }) }) describe("mapToolChoice", () => { diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts index a50f1291bef..f32cbc606b8 100644 --- a/src/api/transform/__tests__/model-params.spec.ts +++ b/src/api/transform/__tests__/model-params.spec.ts @@ -830,9 +830,29 @@ describe("getModelParams", () => { expect(result.maxTokens).toBe(20000) expect(result.reasoningBudget).toBe(10000) - expect(result.temperature).toBe(1.0) // Overridden for reasoning budget models + expect(result.temperature).toBe(0.8) // User-specified temperature is respected expect(result.reasoningEffort).toBeUndefined() // Budget takes precedence }) + + it("should default to temperature 1.0 for reasoning budget models when no custom temperature is set", () => { + const model: ModelInfo = { + ...baseModel, + maxTokens: 16000, + supportsReasoningBudget: true, + } + + const result = getModelParams({ + ...anthropicParams, + settings: { + enableReasoningEffort: true, + modelMaxTokens: 20000, + }, + model, + }) + + expect(result.temperature).toBe(1.0) // Defaults to 1.0 when no custom temperature + expect(result.reasoningBudget).toBeDefined() + }) }) describe("Provider-specific reasoning behavior", () => { diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index c673fad3d27..3c4bc6d8f9c 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -205,7 +205,7 @@ export function convertToAiSdkMessages( if (typeof thinkingPart.thinking === "string" && thinkingPart.thinking.length > 0) { reasoningParts.push(thinkingPart.thinking) } - // Capture the signature for round-tripping (Anthropic/Bedrock thinking) + // Capture the signature for round-tripping (Anthropic/Bedrock thinking). if (thinkingPart.signature) { thinkingSignature = thinkingPart.signature } @@ -249,10 +249,40 @@ export function convertToAiSdkMessages( } content.push(...toolCalls) - modelMessages.push({ + // Carry reasoning_details through to providerOptions for OpenRouter round-tripping + // (used by Gemini 3, xAI, etc. for encrypted reasoning chain continuity). + // The @openrouter/ai-sdk-provider reads message-level providerOptions.openrouter.reasoning_details + // and validates them against ReasoningDetailUnionSchema (a strict Zod union). + // Invalid entries (e.g. type "reasoning.encrypted" without a `data` field) must be + // filtered out here, otherwise the entire safeParse fails and NO reasoning_details + // are included in the outgoing request. + const rawReasoningDetails = (message as unknown as { reasoning_details?: Record[] }) + .reasoning_details + const validReasoningDetails = rawReasoningDetails?.filter((detail) => { + switch (detail.type) { + case "reasoning.encrypted": + return typeof detail.data === "string" && detail.data.length > 0 + case "reasoning.text": + return typeof detail.text === "string" + case "reasoning.summary": + return typeof detail.summary === "string" + default: + return false + } + }) + + const assistantMessage: Record = { role: "assistant", content: content.length > 0 ? content : [{ type: "text", text: "" }], - } as ModelMessage) + } + + if (validReasoningDetails && validReasoningDetails.length > 0) { + assistantMessage.providerOptions = { + openrouter: { reasoning_details: validReasoningDetails }, + } + } + + modelMessages.push(assistantMessage as ModelMessage) } } } @@ -387,9 +417,13 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator