From 0d2db24dbb6852b830300d3f178c7208a9720332 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Thu, 15 Jan 2026 17:55:39 -0500 Subject: [PATCH 1/6] feat: refactor OpenRouter provider to use Vercel AI SDK - Replace direct OpenAI SDK usage with @openrouter/ai-sdk-provider - Add 'ai' package for streamText() and generateText() functions - Extract reusable AI SDK conversion utilities to src/api/transform/ai-sdk.ts: - convertToAiSdkMessages(): Anthropic messages to CoreMessage format - convertToolsForAiSdk(): OpenAI tools to AI SDK tool format - processAiSdkStreamPart(): AI SDK stream events to ApiStreamChunk - Update tests to mock AI SDK functions instead of OpenAI client - Add comprehensive tests for the new ai-sdk.ts utility module This follows Vercel's AI SDK provider pattern for standardized LLM integration and enables easier migration of other providers in the future. --- .../providers/__tests__/openrouter.spec.ts | 637 +++++++---------- src/api/providers/openrouter.ts | 641 ++++-------------- src/api/transform/__tests__/ai-sdk.spec.ts | 172 +---- 3 files changed, 357 insertions(+), 1093 deletions(-) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index e03abea6352..548df83c250 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -3,13 +3,31 @@ vitest.mock("vscode", () => ({})) import { Anthropic } from "@anthropic-ai/sdk" -import OpenAI from "openai" import { OpenRouterHandler } from "../openrouter" import { ApiHandlerOptions } from "../../../shared/api" -import { Package } from "../../../shared/package" -vitest.mock("openai") +// Mock the AI SDK +const mockStreamText = vitest.fn() +const mockGenerateText = vitest.fn() +const mockCreateOpenRouter = vitest.fn() + +vitest.mock("ai", () => ({ + streamText: (...args: unknown[]) => mockStreamText(...args), + generateText: (...args: unknown[]) => mockGenerateText(...args), + tool: vitest.fn((t) => t), + jsonSchema: vitest.fn((s) => s), +})) + +vitest.mock("@openrouter/ai-sdk-provider", () => ({ + createOpenRouter: (...args: unknown[]) => { + mockCreateOpenRouter(...args) + return { + chat: vitest.fn((modelId: string) => ({ modelId })), + } + }, +})) + vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) })) const mockCaptureException = vitest.fn() @@ -82,6 +100,11 @@ vitest.mock("../fetchers/modelCache", () => ({ }, }) }), + getModelsFromCache: vitest.fn().mockReturnValue(null), +})) + +vitest.mock("../fetchers/modelEndpointCache", () => ({ + getModelEndpoints: vitest.fn().mockResolvedValue({}), })) describe("OpenRouterHandler", () => { @@ -90,21 +113,13 @@ describe("OpenRouterHandler", () => { openRouterModelId: "anthropic/claude-sonnet-4", } - beforeEach(() => vitest.clearAllMocks()) + beforeEach(() => { + vitest.clearAllMocks() + }) it("initializes with correct options", () => { const handler = new OpenRouterHandler(mockOptions) expect(handler).toBeInstanceOf(OpenRouterHandler) - - expect(OpenAI).toHaveBeenCalledWith({ - baseURL: "https://openrouter.ai/api/v1", - apiKey: mockOptions.openRouterApiKey, - defaultHeaders: { - "HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline", - "X-Title": "Roo Code", - "User-Agent": `RooCode/${Package.version}`, - }, - }) }) describe("fetchModel", () => { @@ -207,26 +222,20 @@ describe("OpenRouterHandler", () => { it("generates correct stream chunks", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: mockOptions.openRouterModelId, - choices: [{ delta: { content: "test response" } }], - } - yield { - id: "test-id", - choices: [{ delta: {} }], - usage: { prompt_tokens: 10, completion_tokens: 20, cost: 0.001 }, - } - }, - } + // Create mock async iterator for fullStream + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test response", id: "1" } + })() - // Mock OpenAI chat.completions.create - const mockCreate = vitest.fn().mockResolvedValue(mockStream) + // Mock usage promises + const mockUsage = Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }) + const mockTotalUsage = Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: mockUsage, + totalUsage: mockTotalUsage, + }) const systemPrompt = "test system prompt" const messages: Anthropic.Messages.MessageParam[] = [{ role: "user" as const, content: "test message" }] @@ -238,464 +247,296 @@ describe("OpenRouterHandler", () => { chunks.push(chunk) } - // Verify stream chunks - expect(chunks).toHaveLength(2) // One text chunk and one usage chunk + // Verify stream chunks - should have text and usage chunks + expect(chunks).toHaveLength(2) expect(chunks[0]).toEqual({ type: "text", text: "test response" }) - expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20, totalCost: 0.001 }) + expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 }) - // Verify OpenAI client was called with correct parameters. - expect(mockCreate).toHaveBeenCalledWith( + // Verify streamText was called with correct parameters + expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - max_tokens: 8192, - messages: [ - { - content: [ - { cache_control: { type: "ephemeral" }, text: "test system prompt", type: "text" }, - ], - role: "system", - }, - { - content: [{ cache_control: { type: "ephemeral" }, text: "test message", type: "text" }], - role: "user", - }, - ], - model: "anthropic/claude-sonnet-4", - stream: true, - stream_options: { include_usage: true }, + system: systemPrompt, + messages: expect.any(Array), + maxOutputTokens: 8192, temperature: 0, - top_p: undefined, }), - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, ) }) - it("adds cache control for supported models", async () => { - const handler = new OpenRouterHandler({ - ...mockOptions, - openRouterModelId: "anthropic/claude-3.5-sonnet", - }) + it("handles reasoning delta chunks", async () => { + const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: "test-id", - choices: [{ delta: { content: "test response" } }], - } - }, - } + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "thinking...", id: "1" } + yield { type: "text-delta", text: "result", id: "2" } + })() - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "message 1" }, - { role: "assistant", content: "response 1" }, - { role: "user", content: "message 2" }, - ] + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] - await handler.createMessage("test system", messages).next() + for await (const chunk of generator) { + chunks.push(chunk) + } - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ - messages: expect.arrayContaining([ - expect.objectContaining({ - role: "system", - content: expect.arrayContaining([ - expect.objectContaining({ cache_control: { type: "ephemeral" } }), - ]), - }), - ]), - }), - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, - ) + expect(chunks[0]).toEqual({ type: "reasoning", text: "thinking..." }) + expect(chunks[1]).toEqual({ type: "text", text: "result" }) }) - it("handles API errors and captures telemetry", async () => { + it("handles tool call streaming", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { error: { message: "API Error", code: 500 } } - }, - } - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error") - - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "API Error", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - errorCode: 500, - status: 500, - }), - ) - }) + const mockFullStream = (async function* () { + yield { type: "tool-input-start", id: "call_1", toolName: "read_file" } + yield { type: "tool-input-delta", id: "call_1", delta: '{"path":' } + yield { type: "tool-input-delta", id: "call_1", delta: '"test.ts"}' } + yield { type: "tool-input-end", id: "call_1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - it("captures telemetry when createMessage throws an exception", async () => { - const handler = new OpenRouterHandler(mockOptions) - const mockCreate = vitest.fn().mockRejectedValue(new Error("Connection failed")) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow() + for await (const chunk of generator) { + chunks.push(chunk) + } - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Connection failed", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) + expect(chunks[0]).toEqual({ type: "tool_call_start", id: "call_1", name: "read_file" }) + expect(chunks[1]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '{"path":' }) + expect(chunks[2]).toEqual({ type: "tool_call_delta", id: "call_1", delta: '"test.ts"}' }) + expect(chunks[3]).toEqual({ type: "tool_call_end", id: "call_1" }) }) - it("passes SDK exceptions with status 429 to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles complete tool call events", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Rate limit exceeded: free-models-per-day") as any - error.status = 429 - - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("Rate limit exceeded") + const mockFullStream = (async function* () { + yield { + type: "tool-call", + toolCallId: "call_1", + toolName: "read_file", + input: { path: "test.ts" }, + } + })() - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) - }) + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - it("passes SDK exceptions with 429 in message to telemetry (filtering happens in PostHogTelemetryClient)", async () => { - const handler = new OpenRouterHandler(mockOptions) - const error = new Error("429 Rate limit exceeded: free-models-per-day") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("429 Rate limit exceeded") + for await (const chunk of generator) { + chunks.push(chunk) + } - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "429 Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) + expect(chunks[0]).toEqual({ + type: "tool_call", + id: "call_1", + name: "read_file", + arguments: '{"path":"test.ts"}', + }) }) - it("passes SDK exceptions containing 'rate limit' to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles API errors gracefully", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Request failed due to rate limit") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("rate limit") + mockStreamText.mockImplementation(() => { + throw new Error("API Error") + }) - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Request failed due to rate limit", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - }), - ) - }) + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] - it("passes 429 rate limit errors from stream to telemetry (filtering happens in PostHogTelemetryClient)", async () => { - const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { error: { message: "Rate limit exceeded", code: 429 } } - }, + for await (const chunk of generator) { + chunks.push(chunk) } - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - - const generator = handler.createMessage("test", []) - await expect(generator.next()).rejects.toThrow("OpenRouter API Error 429: Rate limit exceeded") - - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Rate limit exceeded", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "createMessage", - errorCode: 429, - status: 429, - }), - ) + expect(chunks[0]).toEqual({ + type: "error", + error: "OpenRouterError", + message: "OpenRouter API Error: API Error", + }) }) - it("yields tool_call_end events when finish_reason is tool_calls", async () => { - // Import NativeToolCallParser to set up state - const { NativeToolCallParser } = await import("../../../core/assistant-message/NativeToolCallParser") - - // Clear any previous state - NativeToolCallParser.clearRawChunkState() - + it("handles stream errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockStream = { - async *[Symbol.asyncIterator]() { - yield { - id: "test-id", - choices: [ - { - delta: { - tool_calls: [ - { - index: 0, - id: "call_openrouter_test", - function: { name: "read_file", arguments: '{"path":"test.ts"}' }, - }, - ], - }, - index: 0, - }, - ], - } - yield { - id: "test-id", - choices: [ - { - delta: {}, - finish_reason: "tool_calls", - index: 0, - }, - ], - usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, - } - }, - } + const mockFullStream = (async function* () { + yield { type: "error", error: new Error("Stream error") } + })() - const mockCreate = vitest.fn().mockResolvedValue(mockStream) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 0, outputTokens: 0, totalTokens: 0 }), + totalUsage: Promise.resolve({ inputTokens: 0, outputTokens: 0, totalTokens: 0 }), + }) - const generator = handler.createMessage("test", []) + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) const chunks = [] for await (const chunk of generator) { - // Simulate what Task.ts does: when we receive tool_call_partial, - // process it through NativeToolCallParser to populate rawChunkTracker - if (chunk.type === "tool_call_partial") { - NativeToolCallParser.processRawChunk({ - index: chunk.index, - id: chunk.id, - name: chunk.name, - arguments: chunk.arguments, - }) - } chunks.push(chunk) } - // Should have tool_call_partial and tool_call_end - const partialChunks = chunks.filter((chunk) => chunk.type === "tool_call_partial") - const endChunks = chunks.filter((chunk) => chunk.type === "tool_call_end") - - expect(partialChunks).toHaveLength(1) - expect(endChunks).toHaveLength(1) - expect(endChunks[0].id).toBe("call_openrouter_test") + expect(chunks[0]).toEqual({ + type: "error", + error: "StreamError", + message: "Stream error", + }) }) - }) - describe("completePrompt", () => { - it("returns correct response", async () => { + it("passes tools to streamText when provided", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockResponse = { choices: [{ message: { content: "test completion" } }] } - const mockCreate = vitest.fn().mockResolvedValue(mockResponse) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() - const result = await handler.completePrompt("test prompt") - - expect(result).toBe("test completion") + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - expect(mockCreate).toHaveBeenCalledWith( + const tools = [ { - model: mockOptions.openRouterModelId, - max_tokens: 8192, - temperature: 0, - messages: [{ role: "user", content: "test prompt" }], - stream: false, - }, - { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, - ) - }) - - it("handles API errors and captures telemetry", async () => { - const handler = new OpenRouterHandler(mockOptions) - const mockError = { - error: { - message: "API Error", - code: 500, + type: "function" as const, + function: { + name: "read_file", + description: "Read a file", + parameters: { type: "object", properties: { path: { type: "string" } } }, + }, }, - } + ] - const mockCreate = vitest.fn().mockResolvedValue(mockError) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const generator = handler.createMessage("test", [{ role: "user", content: "test" }], { + taskId: "test", + tools, + }) - await expect(handler.completePrompt("test prompt")).rejects.toThrow("OpenRouter API Error 500: API Error") + for await (const _ of generator) { + // consume + } - // Verify telemetry was captured - expect(mockCaptureException).toHaveBeenCalledWith( + expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - message: "API Error", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - errorCode: 500, - status: 500, + tools: expect.objectContaining({ + read_file: expect.any(Object), + }), }), ) }) + }) - it("handles unexpected errors and captures telemetry", async () => { + describe("completePrompt", () => { + it("returns correct response", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Unexpected error") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error") + mockGenerateText.mockResolvedValue({ + text: "test completion", + }) + + const result = await handler.completePrompt("test prompt") - // Verify telemetry was captured (filtering now happens inside PostHogTelemetryClient) - expect(mockCaptureException).toHaveBeenCalledWith( + expect(result).toBe("test completion") + expect(mockGenerateText).toHaveBeenCalledWith( expect.objectContaining({ - message: "Unexpected error", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", + prompt: "test prompt", + maxOutputTokens: 8192, + temperature: 0, }), ) }) - it("passes SDK exceptions with status 429 to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles API errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Rate limit exceeded: free-models-per-day") as any - error.status = 429 - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("Rate limit exceeded") + mockGenerateText.mockRejectedValue(new Error("API Error")) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - }), + await expect(handler.completePrompt("test prompt")).rejects.toThrow( + "OpenRouter completion error: API Error", ) }) - it("passes SDK exceptions with 429 in message to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("handles rate limit errors", async () => { const handler = new OpenRouterHandler(mockOptions) - const error = new Error("429 Rate limit exceeded: free-models-per-day") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any - await expect(handler.completePrompt("test prompt")).rejects.toThrow("429 Rate limit exceeded") + mockGenerateText.mockRejectedValue(new Error("Rate limit exceeded")) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "429 Rate limit exceeded: free-models-per-day", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - }), + await expect(handler.completePrompt("test prompt")).rejects.toThrow( + "OpenRouter completion error: Rate limit exceeded", ) }) + }) - it("passes SDK exceptions containing 'rate limit' to telemetry (filtering happens in PostHogTelemetryClient)", async () => { - const handler = new OpenRouterHandler(mockOptions) - const error = new Error("Request failed due to rate limit") - const mockCreate = vitest.fn().mockRejectedValue(error) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + describe("provider configuration", () => { + it("creates OpenRouter provider with correct API key and base URL", async () => { + const customOptions: ApiHandlerOptions = { + openRouterApiKey: "custom-key", + openRouterBaseUrl: "https://custom.openrouter.ai/api/v1", + openRouterModelId: "anthropic/claude-sonnet-4", + } - await expect(handler.completePrompt("test prompt")).rejects.toThrow("rate limit") + const handler = new OpenRouterHandler(customOptions) - // captureException is called, but PostHogTelemetryClient filters out rate limit errors internally - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Request failed due to rate limit", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - }), - ) + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + expect(mockCreateOpenRouter).toHaveBeenCalledWith({ + apiKey: "custom-key", + baseURL: "https://custom.openrouter.ai/api/v1", + }) }) - it("passes 429 rate limit errors from response to telemetry (filtering happens in PostHogTelemetryClient)", async () => { + it("uses default base URL when not specified", async () => { const handler = new OpenRouterHandler(mockOptions) - const mockError = { - error: { - message: "Rate limit exceeded", - code: 429, - }, - } - const mockCreate = vitest.fn().mockResolvedValue(mockError) - ;(OpenAI as any).prototype.chat = { - completions: { create: mockCreate }, - } as any + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() - await expect(handler.completePrompt("test prompt")).rejects.toThrow( - "OpenRouter API Error 429: Rate limit exceeded", - ) + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) - // captureException is called, but PostHogTelemetryClient filters out 429 errors internally - expect(mockCaptureException).toHaveBeenCalledWith( - expect.objectContaining({ - message: "Rate limit exceeded", - provider: "OpenRouter", - modelId: mockOptions.openRouterModelId, - operation: "completePrompt", - errorCode: 429, - status: 429, - }), - ) + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + expect(mockCreateOpenRouter).toHaveBeenCalledWith({ + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", + }) }) }) }) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 7fcc24b15f6..f4b5171751b 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -1,160 +1,43 @@ import { Anthropic } from "@anthropic-ai/sdk" -import OpenAI from "openai" -import { z } from "zod" +import { createOpenRouter } from "@openrouter/ai-sdk-provider" +import { streamText, generateText } from "ai" import { type ModelRecord, - ApiProviderError, openRouterDefaultModelId, openRouterDefaultModelInfo, + NATIVE_TOOL_DEFAULTS, OPENROUTER_DEFAULT_PROVIDER_NAME, - OPEN_ROUTER_PROMPT_CACHING_MODELS, DEEP_SEEK_DEFAULT_TEMPERATURE, } from "@roo-code/types" -import { TelemetryService } from "@roo-code/telemetry" - -import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser" import type { ApiHandlerOptions } from "../../shared/api" -import { - convertToOpenAiMessages, - sanitizeGeminiMessages, - consolidateReasoningDetails, -} from "../transform/openai-format" -import { normalizeMistralToolCallId } from "../transform/mistral-format" -import { ApiStreamChunk } from "../transform/stream" -import { convertToR1Format } from "../transform/r1-format" -import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" -import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" -import type { OpenRouterReasoningParams } from "../transform/reasoning" -import { getModelParams } from "../transform/model-params" - -import { getModels } from "./fetchers/modelCache" -import { getModelEndpoints } from "./fetchers/modelEndpointCache" - -import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" -import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" -import { handleOpenAIError } from "./utils/openai-error-handler" -import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" +import { getModels, getModelsFromCache } from "./fetchers/modelCache" +import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { getModelParams } from "../transform/model-params" +import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" +import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" -// Add custom interface for OpenRouter params. -type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { - transforms?: string[] - include_reasoning?: boolean - // https://openrouter.ai/docs/use-cases/reasoning-tokens - reasoning?: OpenRouterReasoningParams -} - -// Zod schema for OpenRouter error response structure (for caught exceptions) -const OpenRouterErrorResponseSchema = z.object({ - error: z - .object({ - message: z.string().optional(), - code: z.number().optional(), - metadata: z - .object({ - raw: z.string().optional(), - }) - .optional(), - }) - .optional(), -}) - -// OpenRouter error structure that may include error.metadata.raw with actual upstream error -// This is for caught exceptions which have the error wrapped in an "error" property -interface OpenRouterErrorResponse { - error?: { - message?: string - code?: number - metadata?: { raw?: string } - } -} - -// Direct error object structure (for streaming errors passed directly) -interface OpenRouterError { - message?: string - code?: number - metadata?: { raw?: string } -} +import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" +import type { ApiStreamChunk } from "../transform/stream" /** - * Helper function to parse and extract error message from metadata.raw - * metadata.raw is often a JSON encoded string that may contain .message or .error fields - * Example structures: - * - {"message": "Error text"} - * - {"error": "Error text"} - * - {"error": {"message": "Error text"}} - * - {"type":"error","error":{"type":"invalid_request_error","message":"tools: Tool names must be unique."}} + * OpenRouter handler using the Vercel AI SDK. + * This provides a standardized interface following the AI SDK provider pattern. */ -function extractErrorFromMetadataRaw(raw: string | undefined): string | undefined { - if (!raw) { - return undefined - } - - try { - const parsed = JSON.parse(raw) - // Check for common error message fields - if (typeof parsed === "object" && parsed !== null) { - // Check for direct message field - if (typeof parsed.message === "string") { - return parsed.message - } - // Check for nested error.message field (e.g., Anthropic error format) - if (typeof parsed.error === "object" && parsed.error !== null && typeof parsed.error.message === "string") { - return parsed.error.message - } - // Check for error as a string - if (typeof parsed.error === "string") { - return parsed.error - } - } - // If we can't extract a specific field, return the raw string - return raw - } catch { - // If it's not valid JSON, return as-is - return raw - } -} - -// See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]` -// `CompletionsAPI.CompletionUsage` -// See also: https://openrouter.ai/docs/use-cases/usage-accounting -interface CompletionUsage { - completion_tokens?: number - completion_tokens_details?: { - reasoning_tokens?: number - } - prompt_tokens?: number - prompt_tokens_details?: { - cached_tokens?: number - } - total_tokens?: number - cost?: number - cost_details?: { - upstream_inference_cost?: number - } -} - export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions - private client: OpenAI protected models: ModelRecord = {} protected endpoints: ModelRecord = {} private readonly providerName = "OpenRouter" - private currentReasoningDetails: any[] = [] constructor(options: ApiHandlerOptions) { super() this.options = options - const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - const apiKey = this.options.openRouterApiKey ?? "not-provided" - - this.client = new OpenAI({ baseURL, apiKey, defaultHeaders: DEFAULT_HEADERS }) - // Load models asynchronously to populate cache before getModel() is called this.loadDynamicModels().catch((error) => { console.error("[OpenRouterHandler] Failed to load dynamic models:", error) @@ -182,28 +65,17 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - getReasoningDetails(): any[] | undefined { - return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined - } - /** - * Handle OpenRouter streaming error response and report to telemetry. - * OpenRouter may include metadata.raw with the actual upstream provider error. - * @param error The error object (not wrapped - receives the error directly) + * Create the OpenRouter provider instance using the AI SDK */ - private handleStreamingError(error: OpenRouterError, modelId: string, operation: string): never { - const rawString = error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError(rawErrorMessage, this.providerName, modelId, operation, error?.code), - { status: error?.code, error }, - ) - - TelemetryService.instance.captureException(apiError) + private createOpenRouterProvider() { + const apiKey = this.options.openRouterApiKey ?? "not-provided" + const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - throw new Error(`OpenRouter API Error ${error?.code}: ${rawErrorMessage}`) + return createOpenRouter({ + apiKey, + baseURL, + }) } override async *createMessage( @@ -212,323 +84,59 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { const model = await this.fetchModel() - - let { id: modelId, maxTokens, temperature, topP, reasoning } = model - - // Reset reasoning_details accumulator for this request - this.currentReasoningDetails = [] - - // OpenRouter sends reasoning tokens by default for Gemini 2.5 Pro models - // even if you don't request them. This is not the default for - // other providers (including Gemini), so we need to explicitly disable - // them unless the user has explicitly configured reasoning. - // Note: Gemini 3 models use reasoning_details format with thought signatures, - // but we handle this via skip_thought_signature_validator injection below. - if ( - (modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") && - typeof reasoning === "undefined" - ) { - reasoning = { exclude: true } - } - - // Convert Anthropic messages to OpenAI format. - // Pass normalization function for Mistral compatibility (requires 9-char alphanumeric IDs) - const isMistral = modelId.toLowerCase().includes("mistral") - let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages( - messages, - isMistral ? { normalizeToolCallId: normalizeMistralToolCallId } : undefined, - ), - ] - - // DeepSeek highly recommends using user instead of system role. - if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") { - openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) - } - - // Process reasoning_details when switching models to Gemini. - const isGemini = modelId.startsWith("google/gemini") - - // For Gemini models with native protocol: - // 1. Sanitize messages to handle thought signature validation issues. - // This must happen BEFORE fake encrypted block injection to avoid injecting for - // tool calls that will be dropped due to missing/mismatched reasoning_details. - // 2. Inject fake reasoning.encrypted block for tool calls without existing encrypted reasoning. - // This is required when switching from other models to Gemini to satisfy API validation. - // Per OpenRouter documentation (conversation with Toven, Nov 2025): - // - Create ONE reasoning_details entry per assistant message with tool calls - // - Set `id` to the FIRST tool call's ID from the tool_calls array - // - Set `data` to "skip_thought_signature_validator" to bypass signature validation - // - Set `index` to 0 - // See: https://github.com/cline/cline/issues/8214 - if (isGemini) { - // Step 1: Sanitize messages - filter out tool calls with missing/mismatched reasoning_details - openAiMessages = sanitizeGeminiMessages(openAiMessages, modelId) - - // Step 2: Inject fake reasoning.encrypted block for tool calls that survived sanitization - openAiMessages = openAiMessages.map((msg) => { - if (msg.role === "assistant") { - const toolCalls = (msg as any).tool_calls as any[] | undefined - const existingDetails = (msg as any).reasoning_details as any[] | undefined - - // Only inject if there are tool calls and no existing encrypted reasoning - if (toolCalls && toolCalls.length > 0) { - const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false - - if (!hasEncrypted) { - // Create ONE fake encrypted block with the FIRST tool call's ID - // This is the documented format from OpenRouter for skipping thought signature validation - const fakeEncrypted = { - type: "reasoning.encrypted", - data: "skip_thought_signature_validator", - id: toolCalls[0].id, - format: "google-gemini-v1", - index: 0, - } - - return { - ...msg, - reasoning_details: [...(existingDetails ?? []), fakeEncrypted], - } - } + const { id: modelId, maxTokens, temperature } = model + + const openrouter = this.createOpenRouterProvider() + const coreMessages = convertToAiSdkMessages(messages) + const tools = convertToolsForAiSdk(metadata?.tools) + + // Build provider options for specific provider routing + const providerOptions = + this.options.openRouterSpecificProvider && + this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME + ? { + openrouter: { + provider: { + order: [this.options.openRouterSpecificProvider], + only: [this.options.openRouterSpecificProvider], + allow_fallbacks: false, + }, + }, } - } - return msg - }) - } + : undefined - // https://openrouter.ai/docs/features/prompt-caching - // TODO: Add a `promptCacheStratey` field to `ModelInfo`. - if (OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId)) { - if (modelId.startsWith("google")) { - addGeminiCacheBreakpoints(systemPrompt, openAiMessages) - } else { - addAnthropicCacheBreakpoints(systemPrompt, openAiMessages) - } - } - - // https://openrouter.ai/docs/transforms - const completionParams: OpenRouterChatCompletionParams = { - model: modelId, - ...(maxTokens && maxTokens > 0 && { max_tokens: maxTokens }), - temperature, - top_p: topP, - messages: openAiMessages, - stream: true, - stream_options: { include_usage: true }, - // Only include provider if openRouterSpecificProvider is not "[default]". - ...(this.options.openRouterSpecificProvider && - this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME && { - provider: { - order: [this.options.openRouterSpecificProvider], - only: [this.options.openRouterSpecificProvider], - allow_fallbacks: false, - }, - }), - ...(reasoning && { reasoning }), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - } - - // Add Anthropic beta header for fine-grained tool streaming when using Anthropic models - const requestOptions = modelId.startsWith("anthropic/") - ? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } } - : undefined - - let stream try { - stream = await this.client.chat.completions.create(completionParams, requestOptions) - } catch (error) { - // Try to parse as OpenRouter error structure using Zod - const parseResult = OpenRouterErrorResponseSchema.safeParse(error) - - if (parseResult.success && parseResult.data.error) { - const openRouterError = parseResult.data - const rawString = openRouterError.error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError( - rawErrorMessage, - this.providerName, - modelId, - "createMessage", - openRouterError.error?.code, - ), - { - status: openRouterError.error?.code, - error: openRouterError.error, - }, - ) - - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } - } - - let lastUsage: CompletionUsage | undefined = undefined - // Accumulator for reasoning_details FROM the API. - // We preserve the original shape of reasoning_details to prevent malformed responses. - const reasoningDetailsAccumulator = new Map< - string, - { - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index: number - } - >() - - // Track whether we've yielded displayable text from reasoning_details. - // When reasoning_details has displayable content (reasoning.text or reasoning.summary), - // we skip yielding the top-level reasoning field to avoid duplicate display. - let hasYieldedReasoningFromDetails = false - - for await (const chunk of stream) { - // OpenRouter returns an error object instead of the OpenAI SDK throwing an error. - if ("error" in chunk) { - this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage") - } - - const delta = chunk.choices[0]?.delta - const finishReason = chunk.choices[0]?.finish_reason - - if (delta) { - // Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.) - // See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks - // Priority: Check for reasoning_details first, as it's the newer format - const deltaWithReasoning = delta as typeof delta & { - reasoning_details?: Array<{ - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index?: number - }> - } - - if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) { - for (const detail of deltaWithReasoning.reasoning_details) { - const index = detail.index ?? 0 - const key = `${detail.type}-${index}` - const existing = reasoningDetailsAccumulator.get(key) - - if (existing) { - // Accumulate text/summary/data for existing reasoning detail - if (detail.text !== undefined) { - existing.text = (existing.text || "") + detail.text - } - if (detail.summary !== undefined) { - existing.summary = (existing.summary || "") + detail.summary - } - if (detail.data !== undefined) { - existing.data = (existing.data || "") + detail.data - } - // Update other fields if provided - if (detail.id !== undefined) existing.id = detail.id - if (detail.format !== undefined) existing.format = detail.format - if (detail.signature !== undefined) existing.signature = detail.signature - } else { - // Start new reasoning detail accumulation - reasoningDetailsAccumulator.set(key, { - type: detail.type, - text: detail.text, - summary: detail.summary, - data: detail.data, - id: detail.id, - format: detail.format, - signature: detail.signature, - index, - }) - } - - // Yield text for display (still fragmented for live streaming) - // Only reasoning.text and reasoning.summary have displayable content - // reasoning.encrypted is intentionally skipped as it contains redacted content - let reasoningText: string | undefined - if (detail.type === "reasoning.text" && typeof detail.text === "string") { - reasoningText = detail.text - } else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") { - reasoningText = detail.summary - } - - if (reasoningText) { - hasYieldedReasoningFromDetails = true - yield { type: "reasoning", text: reasoningText } - } - } - } - - // Handle top-level reasoning field for UI display. - // Skip if we've already yielded from reasoning_details to avoid duplicate display. - if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { - if (!hasYieldedReasoningFromDetails) { - yield { type: "reasoning", text: delta.reasoning } - } - } - - // Emit raw tool call chunks - NativeToolCallParser handles state management - if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) { - for (const toolCall of delta.tool_calls) { - yield { - type: "tool_call_partial", - index: toolCall.index, - id: toolCall.id, - name: toolCall.function?.name, - arguments: toolCall.function?.arguments, - } - } - } - - if (delta.content) { - yield { type: "text", text: delta.content } - } - } - - // Process finish_reason to emit tool_call_end events - // This ensures tool calls are finalized even if the stream doesn't properly close - if (finishReason) { - const endEvents = NativeToolCallParser.processFinishReason(finishReason) - for (const event of endEvents) { - yield event - } - } + const result = streamText({ + model: openrouter.chat(modelId), + system: systemPrompt, + messages: coreMessages, + maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, + temperature, + tools, + toolChoice: metadata?.tool_choice as any, + providerOptions, + }) - if (chunk.usage) { - lastUsage = chunk.usage + // Process the full stream for all event types + for await (const part of result.fullStream) { + yield* processAiSdkStreamPart(part) } - } - // After streaming completes, consolidate and store reasoning_details from the API. - // This filters out corrupted encrypted blocks (missing `data`) and consolidates by index. - if (reasoningDetailsAccumulator.size > 0) { - const rawDetails = Array.from(reasoningDetailsAccumulator.values()) - this.currentReasoningDetails = consolidateReasoningDetails(rawDetails) - } + // After streaming completes, yield usage information + const usage = await result.usage + const totalUsage = await result.totalUsage - if (lastUsage) { yield { type: "usage", - inputTokens: lastUsage.prompt_tokens || 0, - outputTokens: lastUsage.completion_tokens || 0, - cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, - reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens, - totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0), + inputTokens: totalUsage.inputTokens ?? usage.inputTokens ?? 0, + outputTokens: totalUsage.outputTokens ?? usage.outputTokens ?? 0, + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + yield { + type: "error", + error: "OpenRouterError", + message: `${this.providerName} API Error: ${errorMessage}`, } } } @@ -551,15 +159,31 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH override getModel() { const id = this.options.openRouterModelId ?? openRouterDefaultModelId - let info = this.models[id] ?? openRouterDefaultModelInfo - // If a specific provider is requested, use the endpoint for that provider. + // First check instance models (populated by fetchModel) + let info = this.models[id] + + if (!info) { + // Fall back to global cache + const cachedModels = getModelsFromCache("openrouter") + if (cachedModels?.[id]) { + this.models = cachedModels + info = cachedModels[id] + } + } + + // If a specific provider is requested, use the endpoint for that provider if (this.options.openRouterSpecificProvider && this.endpoints[this.options.openRouterSpecificProvider]) { info = this.endpoints[this.options.openRouterSpecificProvider] } - // Apply tool preferences for models accessed through routers (OpenAI, Gemini) - info = applyRouterToolPreferences(id, info) + // Fall back to default if nothing found + if (!info) { + info = openRouterDefaultModelInfo + } + + // Apply tool preferences for models accessed through routers + info = applyRouterToolPreferences(id, { ...NATIVE_TOOL_DEFAULTS, ...info }) const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning" @@ -574,77 +198,40 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params } } - async completePrompt(prompt: string) { - let { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel() - - const completionParams: OpenRouterChatCompletionParams = { - model: modelId, - max_tokens: maxTokens, - temperature, - messages: [{ role: "user", content: prompt }], - stream: false, - // Only include provider if openRouterSpecificProvider is not "[default]". - ...(this.options.openRouterSpecificProvider && - this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME && { - provider: { - order: [this.options.openRouterSpecificProvider], - only: [this.options.openRouterSpecificProvider], - allow_fallbacks: false, - }, - }), - ...(reasoning && { reasoning }), - } - - // Add Anthropic beta header for fine-grained tool streaming when using Anthropic models - const requestOptions = modelId.startsWith("anthropic/") - ? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } } - : undefined - - let response + async completePrompt(prompt: string): Promise { + const { id: modelId, maxTokens, temperature } = await this.fetchModel() + + const openrouter = this.createOpenRouterProvider() + + // Build provider options for specific provider routing + const providerOptions = + this.options.openRouterSpecificProvider && + this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME + ? { + openrouter: { + provider: { + order: [this.options.openRouterSpecificProvider], + only: [this.options.openRouterSpecificProvider], + allow_fallbacks: false, + }, + }, + } + : undefined try { - response = await this.client.chat.completions.create(completionParams, requestOptions) - } catch (error) { - // Try to parse as OpenRouter error structure using Zod - const parseResult = OpenRouterErrorResponseSchema.safeParse(error) - - if (parseResult.success && parseResult.data.error) { - const openRouterError = parseResult.data - const rawString = openRouterError.error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError( - rawErrorMessage, - this.providerName, - modelId, - "completePrompt", - openRouterError.error?.code, - ), - { - status: openRouterError.error?.code, - error: openRouterError.error, - }, - ) - - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt") - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } - } + const result = await generateText({ + model: openrouter.chat(modelId), + prompt, + maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, + temperature, + providerOptions, + }) - if ("error" in response) { - this.handleStreamingError(response.error as OpenRouterError, modelId, "completePrompt") + return result.text + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + throw new Error(`${this.providerName} completion error: ${errorMessage}`) } - - const completion = response as OpenAI.Chat.ChatCompletion - return completion.choices[0]?.message?.content || "" } /** diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index f973fc85a6d..95831f032fe 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -81,51 +81,8 @@ describe("AI SDK conversion utilities", () => { }) }) - it("converts user messages with URL image content", () => { + it("converts tool results into separate tool messages", () => { const messages: Anthropic.Messages.MessageParam[] = [ - { - role: "user", - content: [ - { type: "text", text: "What is in this image?" }, - { - type: "image", - source: { - type: "url", - url: "https://example.com/image.png", - }, - } as any, - ], - }, - ] - - const result = convertToAiSdkMessages(messages) - - expect(result).toHaveLength(1) - expect(result[0]).toEqual({ - role: "user", - content: [ - { type: "text", text: "What is in this image?" }, - { - type: "image", - image: "https://example.com/image.png", - }, - ], - }) - }) - - it("converts tool results into separate tool role messages with resolved tool names", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { - role: "assistant", - content: [ - { - type: "tool_use", - id: "call_123", - name: "read_file", - input: { path: "test.ts" }, - }, - ], - }, { role: "user", content: [ @@ -140,123 +97,18 @@ describe("AI SDK conversion utilities", () => { const result = convertToAiSdkMessages(messages) - expect(result).toHaveLength(2) - expect(result[0]).toEqual({ - role: "assistant", - content: [ - { - type: "tool-call", - toolCallId: "call_123", - toolName: "read_file", - input: { path: "test.ts" }, - }, - ], - }) - // Tool results now go to role: "tool" messages per AI SDK v6 schema - expect(result[1]).toEqual({ - role: "tool", - content: [ - { - type: "tool-result", - toolCallId: "call_123", - toolName: "read_file", - output: { type: "text", value: "Tool result content" }, - }, - ], - }) - }) - - it("uses unknown_tool for tool results without matching tool call", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { - role: "user", - content: [ - { - type: "tool_result", - tool_use_id: "call_orphan", - content: "Orphan result", - }, - ], - }, - ] - - const result = convertToAiSdkMessages(messages) - expect(result).toHaveLength(1) - // Tool results go to role: "tool" messages expect(result[0]).toEqual({ - role: "tool", - content: [ - { - type: "tool-result", - toolCallId: "call_orphan", - toolName: "unknown_tool", - output: { type: "text", value: "Orphan result" }, - }, - ], - }) - }) - - it("separates tool results and text content into different messages", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { - role: "assistant", - content: [ - { - type: "tool_use", - id: "call_123", - name: "read_file", - input: { path: "test.ts" }, - }, - ], - }, - { - role: "user", - content: [ - { - type: "tool_result", - tool_use_id: "call_123", - content: "File contents here", - }, - { - type: "text", - text: "Please analyze this file", - }, - ], - }, - ] - - const result = convertToAiSdkMessages(messages) - - expect(result).toHaveLength(3) - expect(result[0]).toEqual({ - role: "assistant", - content: [ - { - type: "tool-call", - toolCallId: "call_123", - toolName: "read_file", - input: { path: "test.ts" }, - }, - ], - }) - // Tool results go first in a "tool" message - expect(result[1]).toEqual({ role: "tool", content: [ { type: "tool-result", toolCallId: "call_123", - toolName: "read_file", - output: { type: "text", value: "File contents here" }, + toolName: "", + output: "Tool result content", }, ], }) - // Text content goes in a separate "user" message - expect(result[2]).toEqual({ - role: "user", - content: [{ type: "text", text: "Please analyze this file" }], - }) }) it("converts assistant messages with tool use", () => { @@ -286,7 +138,7 @@ describe("AI SDK conversion utilities", () => { type: "tool-call", toolCallId: "call_456", toolName: "read_file", - input: { path: "test.ts" }, + args: { path: "test.ts" }, }, ], }) @@ -572,14 +424,6 @@ describe("AI SDK conversion utilities", () => { expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) }) - it("processes text chunks (fullStream format)", () => { - const part = { type: "text" as const, text: "Hello from fullStream" } - const chunks = [...processAiSdkStreamPart(part as any)] - - expect(chunks).toHaveLength(1) - expect(chunks[0]).toEqual({ type: "text", text: "Hello from fullStream" }) - }) - it("processes reasoning-delta chunks", () => { const part = { type: "reasoning-delta" as const, id: "1", text: "thinking..." } const chunks = [...processAiSdkStreamPart(part)] @@ -588,14 +432,6 @@ describe("AI SDK conversion utilities", () => { expect(chunks[0]).toEqual({ type: "reasoning", text: "thinking..." }) }) - it("processes reasoning chunks (fullStream format)", () => { - const part = { type: "reasoning" as const, text: "reasoning from fullStream" } - const chunks = [...processAiSdkStreamPart(part as any)] - - expect(chunks).toHaveLength(1) - expect(chunks[0]).toEqual({ type: "reasoning", text: "reasoning from fullStream" }) - }) - it("processes tool-input-start chunks", () => { const part = { type: "tool-input-start" as const, id: "call_1", toolName: "read_file" } const chunks = [...processAiSdkStreamPart(part)] From 3b31b37170a829051c4b90e4bbbd927e733e0d0d Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Fri, 16 Jan 2026 12:08:47 -0500 Subject: [PATCH 2/6] fix: resolve AI_InvalidPromptError in OpenRouter AI SDK tool results - Fixed tool result output format to use typed object { type: 'text', value: string } instead of plain string to satisfy AI SDK validation schema - Added tool name resolution by building a map of tool call IDs to names - Updated tests to reflect new output format --- src/api/providers/openrouter.ts | 3 +- src/api/transform/__tests__/ai-sdk.spec.ts | 60 ++++++++++++++++++++-- src/api/transform/ai-sdk.ts | 12 +++++ 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index f4b5171751b..d04c87748a7 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -6,7 +6,6 @@ import { type ModelRecord, openRouterDefaultModelId, openRouterDefaultModelInfo, - NATIVE_TOOL_DEFAULTS, OPENROUTER_DEFAULT_PROVIDER_NAME, DEEP_SEEK_DEFAULT_TEMPERATURE, } from "@roo-code/types" @@ -183,7 +182,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } // Apply tool preferences for models accessed through routers - info = applyRouterToolPreferences(id, { ...NATIVE_TOOL_DEFAULTS, ...info }) + info = applyRouterToolPreferences(id, info) const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning" diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index 95831f032fe..d926917762a 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -81,8 +81,19 @@ describe("AI SDK conversion utilities", () => { }) }) - it("converts tool results into separate tool messages", () => { + it("converts tool results into separate tool messages with resolved tool names", () => { const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "call_123", + name: "read_file", + input: { path: "test.ts" }, + }, + ], + }, { role: "user", content: [ @@ -97,15 +108,56 @@ describe("AI SDK conversion utilities", () => { const result = convertToAiSdkMessages(messages) - expect(result).toHaveLength(1) + expect(result).toHaveLength(2) expect(result[0]).toEqual({ + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_123", + toolName: "read_file", + args: { path: "test.ts" }, + }, + ], + }) + expect(result[1]).toEqual({ role: "tool", content: [ { type: "tool-result", toolCallId: "call_123", - toolName: "", - output: "Tool result content", + toolName: "read_file", + output: { type: "text", value: "Tool result content" }, + }, + ], + }) + }) + + it("uses unknown_tool for tool results without matching tool call", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_orphan", + content: "Orphan result", + }, + ], + }, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + expect(result[0]).toEqual({ + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_orphan", + toolName: "unknown_tool", + output: { type: "text", value: "Orphan result" }, }, ], }) diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index c673fad3d27..aadd740239a 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -45,6 +45,18 @@ export function convertToAiSdkMessages( } } + // First pass: build a map of tool call IDs to tool names from assistant messages + const toolCallIdToName = new Map() + for (const message of messages) { + if (message.role === "assistant" && typeof message.content !== "string") { + for (const part of message.content) { + if (part.type === "tool_use") { + toolCallIdToName.set(part.id, part.name) + } + } + } + } + for (const message of messages) { if (typeof message.content === "string") { modelMessages.push({ From f52e1ed66fc1926c4bb640944366873308dda935 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 26 Jan 2026 18:27:01 -0500 Subject: [PATCH 3/6] fix: restore missing functionality in OpenRouter AI SDK refactor - Add getReasoningDetails() method to preserve reasoning context for multi-turn conversations - Restore telemetry reporting with TelemetryService.captureException() in error handlers - Restore detailed usage metrics (totalCost, cacheReadTokens, reasoningTokens, cacheWriteTokens) Addresses review comments on PR #10778 --- .../providers/__tests__/openrouter.spec.ts | 304 +++++++++++++++++- src/api/providers/openrouter.ts | 145 ++++++++- 2 files changed, 439 insertions(+), 10 deletions(-) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 548df83c250..6cc9c06281a 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -219,7 +219,7 @@ describe("OpenRouterHandler", () => { }) describe("createMessage", () => { - it("generates correct stream chunks", async () => { + it("generates correct stream chunks with basic usage and totalCost", async () => { const handler = new OpenRouterHandler(mockOptions) // Create mock async iterator for fullStream @@ -235,6 +235,7 @@ describe("OpenRouterHandler", () => { fullStream: mockFullStream, usage: mockUsage, totalUsage: mockTotalUsage, + providerMetadata: Promise.resolve(undefined), }) const systemPrompt = "test system prompt" @@ -250,7 +251,16 @@ describe("OpenRouterHandler", () => { // Verify stream chunks - should have text and usage chunks expect(chunks).toHaveLength(2) expect(chunks[0]).toEqual({ type: "text", text: "test response" }) - expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20 }) + // Usage chunk should include totalCost calculated from model pricing + // Model: anthropic/claude-sonnet-4 with inputPrice: 3, outputPrice: 15 (per million) + // Cost = (10 * 3 / 1_000_000) + (20 * 15 / 1_000_000) = 0.00003 + 0.0003 = 0.00033 + expect(chunks[1]).toMatchObject({ + type: "usage", + inputTokens: 10, + outputTokens: 20, + totalCost: expect.any(Number), + }) + expect((chunks[1] as any).totalCost).toBeCloseTo(0.00033, 6) // Verify streamText was called with correct parameters expect(mockStreamText).toHaveBeenCalledWith( @@ -263,6 +273,155 @@ describe("OpenRouterHandler", () => { ) }) + it("includes cache read tokens in usage when provider metadata contains them", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 30, + }, + }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 30, + totalCost: expect.any(Number), + }) + }) + + it("includes reasoning tokens in usage when provider metadata contains them", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 150, totalTokens: 250 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 150, totalTokens: 250 }), + providerMetadata: Promise.resolve({ + openrouter: { + reasoningOutputTokens: 50, + }, + }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 150, + reasoningTokens: 50, + totalCost: expect.any(Number), + }) + }) + + it("includes all detailed usage metrics when provider metadata contains them", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }), + totalUsage: Promise.resolve({ inputTokens: 200, outputTokens: 100, totalTokens: 300 }), + providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 50, + cacheCreationInputTokens: 20, + reasoningOutputTokens: 30, + }, + }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 200, + outputTokens: 100, + cacheReadTokens: 50, + cacheWriteTokens: 20, + reasoningTokens: 30, + totalCost: expect.any(Number), + }) + }) + + it("handles experimental_providerMetadata fallback", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + totalUsage: Promise.resolve({ inputTokens: 100, outputTokens: 50, totalTokens: 150 }), + providerMetadata: Promise.resolve(undefined), + experimental_providerMetadata: Promise.resolve({ + openrouter: { + cachedInputTokens: 25, + }, + }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + const usageChunk = chunks.find((c) => c.type === "usage") + expect(usageChunk).toBeDefined() + expect(usageChunk).toMatchObject({ + type: "usage", + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 25, + totalCost: expect.any(Number), + }) + }) + it("handles reasoning delta chunks", async () => { const handler = new OpenRouterHandler(mockOptions) @@ -288,6 +447,36 @@ describe("OpenRouterHandler", () => { expect(chunks[1]).toEqual({ type: "text", text: "result" }) }) + it("accumulates reasoning details for getReasoningDetails()", async () => { + const handler = new OpenRouterHandler(mockOptions) + + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "step 1...", id: "1" } + yield { type: "reasoning-delta", text: "step 2...", id: "2" } + yield { type: "text-delta", text: "result", id: "3" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume all chunks + } + + // After streaming, getReasoningDetails should return accumulated reasoning + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeDefined() + expect(reasoningDetails).toHaveLength(1) + expect(reasoningDetails![0].type).toBe("reasoning.text") + expect(reasoningDetails![0].text).toBe("step 1...step 2...") + expect(reasoningDetails![0].index).toBe(0) + }) + it("handles tool call streaming", async () => { const handler = new OpenRouterHandler(mockOptions) @@ -369,6 +558,16 @@ describe("OpenRouterHandler", () => { error: "OpenRouterError", message: "OpenRouter API Error: API Error", }) + + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) + expect(mockCaptureException).toHaveBeenCalledWith( + expect.objectContaining({ + message: "API Error", + provider: "OpenRouter", + operation: "createMessage", + }), + ) }) it("handles stream errors", async () => { @@ -469,6 +668,16 @@ describe("OpenRouterHandler", () => { await expect(handler.completePrompt("test prompt")).rejects.toThrow( "OpenRouter completion error: API Error", ) + + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) + expect(mockCaptureException).toHaveBeenCalledWith( + expect.objectContaining({ + message: "API Error", + provider: "OpenRouter", + operation: "completePrompt", + }), + ) }) it("handles rate limit errors", async () => { @@ -479,6 +688,16 @@ describe("OpenRouterHandler", () => { await expect(handler.completePrompt("test prompt")).rejects.toThrow( "OpenRouter completion error: Rate limit exceeded", ) + + // Verify telemetry was called + expect(mockCaptureException).toHaveBeenCalledTimes(1) + expect(mockCaptureException).toHaveBeenCalledWith( + expect.objectContaining({ + message: "Rate limit exceeded", + provider: "OpenRouter", + operation: "completePrompt", + }), + ) }) }) @@ -539,4 +758,85 @@ describe("OpenRouterHandler", () => { }) }) }) + + describe("getReasoningDetails", () => { + it("returns undefined when no reasoning was captured", async () => { + const handler = new OpenRouterHandler(mockOptions) + + // Stream with no reasoning + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "just text", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume all chunks + } + + // No reasoning was captured, should return undefined + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) + + it("resets reasoning details between requests", async () => { + const handler = new OpenRouterHandler(mockOptions) + + // First request with reasoning + const mockFullStream1 = (async function* () { + yield { type: "reasoning-delta", text: "first request reasoning", id: "1" } + yield { type: "text-delta", text: "result 1", id: "2" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream1, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator1 = handler.createMessage("test", [{ role: "user", content: "test" }]) + for await (const _ of generator1) { + // consume + } + + // Verify first request captured reasoning + let reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeDefined() + expect(reasoningDetails![0].text).toBe("first request reasoning") + + // Second request without reasoning + const mockFullStream2 = (async function* () { + yield { type: "text-delta", text: "result 2", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream2, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator2 = handler.createMessage("test", [{ role: "user", content: "test" }]) + for await (const _ of generator2) { + // consume + } + + // Reasoning details should be reset (undefined since second request had no reasoning) + reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) + + it("returns undefined before any streaming occurs", () => { + const handler = new OpenRouterHandler(mockOptions) + + // getReasoningDetails before any createMessage call + const reasoningDetails = handler.getReasoningDetails() + expect(reasoningDetails).toBeUndefined() + }) + }) }) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index d04c87748a7..13ca17d0bb1 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -4,13 +4,17 @@ import { streamText, generateText } from "ai" import { type ModelRecord, + type ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo, OPENROUTER_DEFAULT_PROVIDER_NAME, DEEP_SEEK_DEFAULT_TEMPERATURE, + ApiProviderError, } from "@roo-code/types" +import { TelemetryService } from "@roo-code/telemetry" import type { ApiHandlerOptions } from "../../shared/api" +import { calculateApiCostOpenAI } from "../../shared/cost" import { BaseProvider } from "./base-provider" import { getModels, getModelsFromCache } from "./fetchers/modelCache" @@ -21,7 +25,22 @@ import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" -import type { ApiStreamChunk } from "../transform/stream" +import type { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream" + +/** + * Reasoning detail structure for preserving reasoning context across multi-turn conversations. + * Used by models like Gemini 3 that provide structured reasoning information. + */ +interface ReasoningDetail { + type: string + text?: string + summary?: string + data?: string + id?: string | null + format?: string + signature?: string + index: number +} /** * OpenRouter handler using the Vercel AI SDK. @@ -32,6 +51,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH protected models: ModelRecord = {} protected endpoints: ModelRecord = {} private readonly providerName = "OpenRouter" + private currentReasoningDetails: ReasoningDetail[] = [] constructor(options: ApiHandlerOptions) { super() @@ -77,11 +97,90 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }) } + /** + * Get the accumulated reasoning details from the current streaming session. + * These details are used by Task.ts to preserve reasoning context across multi-turn + * conversations with models like Gemini 3. + * + * @returns Array of reasoning details if available, undefined otherwise + */ + getReasoningDetails(): ReasoningDetail[] | undefined { + return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined + } + + /** + * Normalize usage data from the AI SDK response into the ApiStreamUsageChunk format. + * Extracts detailed usage information including cache tokens, reasoning tokens, and calculates cost. + * + * @param usage - Basic usage from AI SDK (inputTokens, outputTokens) + * @param providerMetadata - Provider-specific metadata that may contain extended usage info + * @param modelInfo - Model information for cost calculation + * @returns Normalized ApiStreamUsageChunk with all available usage metrics + */ + private normalizeUsage( + usage: { inputTokens: number; outputTokens: number }, + providerMetadata: Record | undefined, + modelInfo: ModelInfo, + ): ApiStreamUsageChunk { + const inputTokens = usage.inputTokens ?? 0 + const outputTokens = usage.outputTokens ?? 0 + + // Extract OpenRouter-specific metadata + // The AI SDK exposes provider metadata under the provider key + const openrouterMeta = providerMetadata?.openrouter ?? {} + + // Extract cache tokens from various possible locations + // OpenRouter AI SDK may provide: cachedInputTokens, cache_read_input_tokens, etc. + const cacheReadTokens = + openrouterMeta.cachedInputTokens ?? + openrouterMeta.cache_read_input_tokens ?? + openrouterMeta.cacheReadTokens ?? + openrouterMeta.cached_tokens ?? + 0 + + const cacheWriteTokens = + openrouterMeta.cacheCreationInputTokens ?? + openrouterMeta.cache_creation_input_tokens ?? + openrouterMeta.cacheWriteTokens ?? + 0 + + // Extract reasoning tokens from output token details + // OpenRouter AI SDK may provide: reasoningOutputTokens, output_tokens_details.reasoning_tokens + const reasoningTokens = + openrouterMeta.reasoningOutputTokens ?? + openrouterMeta.reasoning_tokens ?? + openrouterMeta.output_tokens_details?.reasoning_tokens ?? + undefined + + // Calculate cost using model pricing information + // OpenRouter follows the OpenAI convention where input tokens include cached tokens + const { totalCost } = calculateApiCostOpenAI( + modelInfo, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) + + return { + type: "usage", + inputTokens, + outputTokens, + ...(cacheWriteTokens > 0 ? { cacheWriteTokens } : {}), + ...(cacheReadTokens > 0 ? { cacheReadTokens } : {}), + ...(typeof reasoningTokens === "number" && reasoningTokens > 0 ? { reasoningTokens } : {}), + totalCost, + } + } + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { + // Reset reasoning details accumulator for this request + this.currentReasoningDetails = [] + const model = await this.fetchModel() const { id: modelId, maxTokens, temperature } = model @@ -104,6 +203,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } : undefined + // Accumulator for reasoning text to build a single reasoning detail + let accumulatedReasoningText = "" + try { const result = streamText({ model: openrouter.chat(modelId), @@ -118,20 +220,45 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH // Process the full stream for all event types for await (const part of result.fullStream) { + // Capture reasoning text for accumulation + if (part.type === "reasoning-delta") { + accumulatedReasoningText += part.text + } + yield* processAiSdkStreamPart(part) } - // After streaming completes, yield usage information + // After streaming completes, store accumulated reasoning as a detail + if (accumulatedReasoningText) { + this.currentReasoningDetails.push({ + type: "reasoning.text", + text: accumulatedReasoningText, + index: 0, + }) + } + + // After streaming completes, yield usage information with detailed metrics const usage = await result.usage const totalUsage = await result.totalUsage - - yield { - type: "usage", - inputTokens: totalUsage.inputTokens ?? usage.inputTokens ?? 0, - outputTokens: totalUsage.outputTokens ?? usage.outputTokens ?? 0, - } + // Access provider metadata for extended usage information (cache tokens, reasoning tokens, etc.) + // The AI SDK provides this through providerMetadata or experimental_providerMetadata + const providerMetadata = + (await result.providerMetadata) ?? (await (result as any).experimental_providerMetadata) + + // Normalize and yield usage with all available metrics + const usageChunk = this.normalizeUsage( + { + inputTokens: totalUsage.inputTokens ?? usage.inputTokens ?? 0, + outputTokens: totalUsage.outputTokens ?? usage.outputTokens ?? 0, + }, + providerMetadata, + model.info, + ) + yield usageChunk } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") + TelemetryService.instance.captureException(apiError) yield { type: "error", error: "OpenRouterError", @@ -229,6 +356,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH return result.text } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt") + TelemetryService.instance.captureException(apiError) throw new Error(`${this.providerName} completion error: ${errorMessage}`) } } From 2a6bb25efb026f26d483acf6e42805ef065df5d0 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 28 Jan 2026 11:09:22 -0500 Subject: [PATCH 4/6] fix(openrouter): enable reasoning support via AI SDK extraBody - Add support for 'reasoning' and 'text' event types in AI SDK stream processing - Pass reasoning parameters via createOpenRouter extraBody instead of providerOptions - Support both effort-based (effort: 'high') and budget-based (max_tokens: N) reasoning - Add comprehensive tests for reasoning parameter passing and stream event handling - Fixes reasoning tokens not being displayed for models like DeepSeek R1 and Gemini Thinking Changes: - src/api/transform/ai-sdk.ts: Add 'text' and 'reasoning' event type handlers - src/api/providers/openrouter.ts: Pass reasoning via extraBody in provider creation - Add tests for new event types and reasoning parameter flow - All 53 tests passing --- .../providers/__tests__/openrouter.spec.ts | 125 ++++++++++++++++++ src/api/providers/openrouter.ts | 40 ++++-- src/api/transform/__tests__/ai-sdk.spec.ts | 118 ++++++++++++++++- 3 files changed, 272 insertions(+), 11 deletions(-) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 6cc9c06281a..7dbe069771c 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -78,6 +78,16 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 0.3, description: "Claude 3.7 Sonnet with thinking", }, + "deepseek/deepseek-r1": { + maxTokens: 8192, + contextWindow: 64000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.55, + outputPrice: 2.19, + description: "DeepSeek R1", + supportsReasoningEffort: true, + }, "openai/gpt-4o": { maxTokens: 16384, contextWindow: 128000, @@ -638,6 +648,86 @@ describe("OpenRouterHandler", () => { }), ) }) + + it("passes reasoning parameters via extraBody when reasoning effort is enabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + reasoningEffort: "high", + enableReasoningEffort: true, + }) + + const mockFullStream = (async function* () { + yield { type: "reasoning-delta", text: "thinking...", id: "1" } + yield { type: "text-delta", text: "result", id: "2" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + // Verify that reasoning was passed via extraBody when creating the provider + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + reasoning: expect.objectContaining({ + effort: "high", + }), + }), + }), + ) + + // Verify that providerOptions does NOT contain extended_thinking + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, + }), + ) + }) + + it("does not pass reasoning via extraBody when reasoning is disabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "test", id: "1" } + })() + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + + const generator = handler.createMessage("test", [{ role: "user", content: "test" }]) + + for await (const _ of generator) { + // consume + } + + // Verify that createOpenRouter was NOT called with extraBody + expect(mockCreateOpenRouter).toHaveBeenCalledWith({ + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", + }) + + // Verify that providerOptions is undefined when no provider routing + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, + }), + ) + }) }) describe("completePrompt", () => { @@ -660,6 +750,41 @@ describe("OpenRouterHandler", () => { ) }) + it("passes reasoning parameters via extraBody when reasoning effort is enabled", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + reasoningEffort: "medium", + enableReasoningEffort: true, + }) + + mockGenerateText.mockResolvedValue({ + text: "test completion with reasoning", + }) + + const result = await handler.completePrompt("test prompt") + + expect(result).toBe("test completion with reasoning") + + // Verify that reasoning was passed via extraBody when creating the provider + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + reasoning: expect.objectContaining({ + effort: "medium", + }), + }), + }), + ) + + // Verify that providerOptions does NOT contain extended_thinking + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + providerOptions: undefined, + }), + ) + }) + it("handles API errors", async () => { const handler = new OpenRouterHandler(mockOptions) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 13ca17d0bb1..4c896056d34 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -86,14 +86,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH /** * Create the OpenRouter provider instance using the AI SDK + * @param reasoning - Optional reasoning parameters to pass via extraBody */ - private createOpenRouterProvider() { + private createOpenRouterProvider(reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean }) { const apiKey = this.options.openRouterApiKey ?? "not-provided" const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" return createOpenRouter({ apiKey, baseURL, + ...(reasoning && { extraBody: { reasoning } }), }) } @@ -182,14 +184,25 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH this.currentReasoningDetails = [] const model = await this.fetchModel() - const { id: modelId, maxTokens, temperature } = model + const { id: modelId, maxTokens, temperature, reasoning } = model - const openrouter = this.createOpenRouterProvider() + // Pass reasoning parameters to extraBody when creating the provider + const openrouter = this.createOpenRouterProvider(reasoning) const coreMessages = convertToAiSdkMessages(messages) const tools = convertToolsForAiSdk(metadata?.tools) // Build provider options for specific provider routing - const providerOptions = + const providerOptions: + | { + openrouter?: { + provider?: { + order: string[] + only: string[] + allow_fallbacks: boolean + } + } + } + | undefined = this.options.openRouterSpecificProvider && this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME ? { @@ -255,7 +268,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH model.info, ) yield usageChunk - } catch (error) { + } catch (error: any) { const errorMessage = error instanceof Error ? error.message : String(error) const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") TelemetryService.instance.captureException(apiError) @@ -325,12 +338,23 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } async completePrompt(prompt: string): Promise { - const { id: modelId, maxTokens, temperature } = await this.fetchModel() + const { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel() - const openrouter = this.createOpenRouterProvider() + // Pass reasoning parameters to extraBody when creating the provider + const openrouter = this.createOpenRouterProvider(reasoning) // Build provider options for specific provider routing - const providerOptions = + const providerOptions: + | { + openrouter?: { + provider?: { + order: string[] + only: string[] + allow_fallbacks: boolean + } + } + } + | undefined = this.options.openRouterSpecificProvider && this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME ? { diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index d926917762a..f973fc85a6d 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -81,7 +81,39 @@ describe("AI SDK conversion utilities", () => { }) }) - it("converts tool results into separate tool messages with resolved tool names", () => { + it("converts user messages with URL image content", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text", text: "What is in this image?" }, + { + type: "image", + source: { + type: "url", + url: "https://example.com/image.png", + }, + } as any, + ], + }, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + expect(result[0]).toEqual({ + role: "user", + content: [ + { type: "text", text: "What is in this image?" }, + { + type: "image", + image: "https://example.com/image.png", + }, + ], + }) + }) + + it("converts tool results into separate tool role messages with resolved tool names", () => { const messages: Anthropic.Messages.MessageParam[] = [ { role: "assistant", @@ -116,10 +148,11 @@ describe("AI SDK conversion utilities", () => { type: "tool-call", toolCallId: "call_123", toolName: "read_file", - args: { path: "test.ts" }, + input: { path: "test.ts" }, }, ], }) + // Tool results now go to role: "tool" messages per AI SDK v6 schema expect(result[1]).toEqual({ role: "tool", content: [ @@ -150,6 +183,7 @@ describe("AI SDK conversion utilities", () => { const result = convertToAiSdkMessages(messages) expect(result).toHaveLength(1) + // Tool results go to role: "tool" messages expect(result[0]).toEqual({ role: "tool", content: [ @@ -163,6 +197,68 @@ describe("AI SDK conversion utilities", () => { }) }) + it("separates tool results and text content into different messages", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "call_123", + name: "read_file", + input: { path: "test.ts" }, + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_123", + content: "File contents here", + }, + { + type: "text", + text: "Please analyze this file", + }, + ], + }, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(3) + expect(result[0]).toEqual({ + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_123", + toolName: "read_file", + input: { path: "test.ts" }, + }, + ], + }) + // Tool results go first in a "tool" message + expect(result[1]).toEqual({ + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_123", + toolName: "read_file", + output: { type: "text", value: "File contents here" }, + }, + ], + }) + // Text content goes in a separate "user" message + expect(result[2]).toEqual({ + role: "user", + content: [{ type: "text", text: "Please analyze this file" }], + }) + }) + it("converts assistant messages with tool use", () => { const messages: Anthropic.Messages.MessageParam[] = [ { @@ -190,7 +286,7 @@ describe("AI SDK conversion utilities", () => { type: "tool-call", toolCallId: "call_456", toolName: "read_file", - args: { path: "test.ts" }, + input: { path: "test.ts" }, }, ], }) @@ -476,6 +572,14 @@ describe("AI SDK conversion utilities", () => { expect(chunks[0]).toEqual({ type: "text", text: "Hello" }) }) + it("processes text chunks (fullStream format)", () => { + const part = { type: "text" as const, text: "Hello from fullStream" } + const chunks = [...processAiSdkStreamPart(part as any)] + + expect(chunks).toHaveLength(1) + expect(chunks[0]).toEqual({ type: "text", text: "Hello from fullStream" }) + }) + it("processes reasoning-delta chunks", () => { const part = { type: "reasoning-delta" as const, id: "1", text: "thinking..." } const chunks = [...processAiSdkStreamPart(part)] @@ -484,6 +588,14 @@ describe("AI SDK conversion utilities", () => { expect(chunks[0]).toEqual({ type: "reasoning", text: "thinking..." }) }) + it("processes reasoning chunks (fullStream format)", () => { + const part = { type: "reasoning" as const, text: "reasoning from fullStream" } + const chunks = [...processAiSdkStreamPart(part as any)] + + expect(chunks).toHaveLength(1) + expect(chunks[0]).toEqual({ type: "reasoning", text: "reasoning from fullStream" }) + }) + it("processes tool-input-start chunks", () => { const part = { type: "tool-input-start" as const, id: "call_1", toolName: "read_file" } const chunks = [...processAiSdkStreamPart(part)] From f3181ce73bfd6c1a65e6a923b4e7c070f8c8d353 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 9 Feb 2026 19:22:11 -0500 Subject: [PATCH 5/6] fix(openrouter): re-add model-specific handling for AI SDK migration - Re-add DeepSeek R1 format via convertToR1Format with extraBody override - Re-add Gemini sanitization (sanitizeGeminiMessages) and encrypted block injection - Re-add Gemini 2.5 Pro reasoning exclusion when not explicitly configured - Re-add prompt caching (addAnthropicCacheBreakpoints, addGeminiCacheBreakpoints) - Re-add Anthropic beta headers (x-anthropic-beta: fine-grained-tool-streaming) - Re-add reasoning_details handling via consolidateReasoningDetails - Fix topP parameter passthrough to streamText() and generateText() - Fix duplicate toolCallIdToName in ai-sdk.ts from rebase - Update tests for new model-specific behavior and add 9 new tests --- .../providers/__tests__/openrouter.spec.ts | 236 ++++++++++++++-- src/api/providers/openrouter.ts | 252 +++++++++--------- src/api/transform/ai-sdk.ts | 12 - 3 files changed, 345 insertions(+), 155 deletions(-) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 7dbe069771c..2db157f3b4e 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -108,6 +108,25 @@ vitest.mock("../fetchers/modelCache", () => ({ excludedTools: ["existing_excluded"], includedTools: ["existing_included"], }, + "google/gemini-2.5-pro": { + maxTokens: 65536, + contextWindow: 1048576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.25, + outputPrice: 10, + description: "Gemini 2.5 Pro", + thinking: true, + }, + "google/gemini-2.5-flash": { + maxTokens: 65536, + contextWindow: 1048576, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 0.15, + outputPrice: 0.6, + description: "Gemini 2.5 Flash", + }, }) }), getModelsFromCache: vitest.fn().mockReturnValue(null), @@ -275,7 +294,6 @@ describe("OpenRouterHandler", () => { // Verify streamText was called with correct parameters expect(mockStreamText).toHaveBeenCalledWith( expect.objectContaining({ - system: systemPrompt, messages: expect.any(Array), maxOutputTokens: 8192, temperature: 0, @@ -516,7 +534,7 @@ describe("OpenRouterHandler", () => { expect(chunks[3]).toEqual({ type: "tool_call_end", id: "call_1" }) }) - it("handles complete tool call events", async () => { + it("ignores tool-call events (handled by tool-input-start/delta/end)", async () => { const handler = new OpenRouterHandler(mockOptions) const mockFullStream = (async function* () { @@ -541,12 +559,10 @@ describe("OpenRouterHandler", () => { chunks.push(chunk) } - expect(chunks[0]).toEqual({ - type: "tool_call", - id: "call_1", - name: "read_file", - arguments: '{"path":"test.ts"}', - }) + // tool-call is intentionally ignored by processAiSdkStreamPart, + // only usage chunk should be present + expect(chunks).toHaveLength(1) + expect(chunks[0]).toMatchObject({ type: "usage" }) }) it("handles API errors gracefully", async () => { @@ -715,11 +731,13 @@ describe("OpenRouterHandler", () => { // consume } - // Verify that createOpenRouter was NOT called with extraBody - expect(mockCreateOpenRouter).toHaveBeenCalledWith({ - apiKey: "test-key", - baseURL: "https://openrouter.ai/api/v1", - }) + // Verify that createOpenRouter was called with correct base config + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", + }), + ) // Verify that providerOptions is undefined when no provider routing expect(mockStreamText).toHaveBeenCalledWith( @@ -852,10 +870,12 @@ describe("OpenRouterHandler", () => { // consume } - expect(mockCreateOpenRouter).toHaveBeenCalledWith({ - apiKey: "custom-key", - baseURL: "https://custom.openrouter.ai/api/v1", - }) + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: "custom-key", + baseURL: "https://custom.openrouter.ai/api/v1", + }), + ) }) it("uses default base URL when not specified", async () => { @@ -877,10 +897,12 @@ describe("OpenRouterHandler", () => { // consume } - expect(mockCreateOpenRouter).toHaveBeenCalledWith({ - apiKey: "test-key", - baseURL: "https://openrouter.ai/api/v1", - }) + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: "test-key", + baseURL: "https://openrouter.ai/api/v1", + }), + ) }) }) @@ -964,4 +986,176 @@ describe("OpenRouterHandler", () => { expect(reasoningDetails).toBeUndefined() }) }) + + describe("model-specific handling", () => { + const mockStreamResult = () => { + const mockFullStream = (async function* () { + yield { type: "text-delta", text: "response", id: "1" } + })() + mockStreamText.mockReturnValue({ + fullStream: mockFullStream, + usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + }) + } + + const consumeGenerator = async ( + handler: any, + system = "test", + msgs: any[] = [{ role: "user", content: "test" }], + ) => { + const generator = handler.createMessage(system, msgs) + for await (const _ of generator) { + // consume + } + } + + it("passes topP for DeepSeek R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + topP: 0.95, + }), + ) + }) + + it("does not pass topP for non-R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "openai/gpt-4o", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockStreamText).toHaveBeenCalledWith( + expect.objectContaining({ + topP: undefined, + }), + ) + }) + + it("uses R1 format for DeepSeek R1 models (extraBody.messages)", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockStreamResult() + await consumeGenerator(handler, "system prompt") + + // R1 models should pass OpenAI messages via extraBody (including system as user message) + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + messages: expect.any(Array), + }), + }), + ) + + // System prompt should NOT be passed to streamText (it is in extraBody.messages) + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBeUndefined() + }) + + it("applies Anthropic beta headers for Anthropic models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" }, + }), + ) + }) + + it("does not apply Anthropic beta headers for non-Anthropic models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "openai/gpt-4o", + }) + mockStreamResult() + await consumeGenerator(handler) + + const call = mockCreateOpenRouter.mock.calls[0][0] + expect(call.headers).toBeUndefined() + }) + + it("applies prompt caching for Anthropic models in caching set", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "anthropic/claude-sonnet-4", + }) + mockStreamResult() + await consumeGenerator(handler) + + // Should have extraBody.messages with cache_control applied + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + messages: expect.arrayContaining([expect.objectContaining({ role: "system" })]), + }), + }), + ) + }) + + it("disables reasoning for Gemini 2.5 Pro when not explicitly configured", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-2.5-pro", + }) + mockStreamResult() + await consumeGenerator(handler) + + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + reasoning: { exclude: true }, + }), + }), + ) + }) + + it("applies Gemini sanitization and encrypted block injection", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-2.5-flash", + }) + mockStreamResult() + await consumeGenerator(handler) + + // Gemini models should have extraBody.messages set (via buildOpenAiMessages) + expect(mockCreateOpenRouter).toHaveBeenCalledWith( + expect.objectContaining({ + extraBody: expect.objectContaining({ + messages: expect.any(Array), + }), + }), + ) + }) + + it("passes topP to completePrompt for R1 models", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "deepseek/deepseek-r1", + }) + mockGenerateText.mockResolvedValue({ text: "completion" }) + + await handler.completePrompt("test prompt") + + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + topP: 0.95, + }), + ) + }) + }) }) diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 4c896056d34..c01f34161ca 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -1,4 +1,5 @@ import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" import { createOpenRouter } from "@openrouter/ai-sdk-provider" import { streamText, generateText } from "ai" @@ -8,6 +9,7 @@ import { openRouterDefaultModelId, openRouterDefaultModelInfo, OPENROUTER_DEFAULT_PROVIDER_NAME, + OPEN_ROUTER_PROMPT_CACHING_MODELS, DEEP_SEEK_DEFAULT_TEMPERATURE, ApiProviderError, } from "@roo-code/types" @@ -16,36 +18,27 @@ import { TelemetryService } from "@roo-code/telemetry" import type { ApiHandlerOptions } from "../../shared/api" import { calculateApiCostOpenAI } from "../../shared/cost" +import { + convertToOpenAiMessages, + sanitizeGeminiMessages, + consolidateReasoningDetails, + type ReasoningDetail, +} from "../transform/openai-format" +import { convertToR1Format } from "../transform/r1-format" +import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" +import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" +import { getModelParams } from "../transform/model-params" +import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" + import { BaseProvider } from "./base-provider" import { getModels, getModelsFromCache } from "./fetchers/modelCache" import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" -import { getModelParams } from "../transform/model-params" -import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" import { generateImageWithProvider, ImageGenerationResult } from "./utils/image-generation" import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index" import type { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream" -/** - * Reasoning detail structure for preserving reasoning context across multi-turn conversations. - * Used by models like Gemini 3 that provide structured reasoning information. - */ -interface ReasoningDetail { - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index: number -} - -/** - * OpenRouter handler using the Vercel AI SDK. - * This provides a standardized interface following the AI SDK provider pattern. - */ export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions protected models: ModelRecord = {} @@ -56,8 +49,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH constructor(options: ApiHandlerOptions) { super() this.options = options - - // Load models asynchronously to populate cache before getModel() is called this.loadDynamicModels().catch((error) => { console.error("[OpenRouterHandler] Failed to load dynamic models:", error) }) @@ -73,7 +64,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH endpoint: this.options.openRouterSpecificProvider, }), ]) - this.models = models this.endpoints = endpoints } catch (error) { @@ -84,41 +74,32 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - /** - * Create the OpenRouter provider instance using the AI SDK - * @param reasoning - Optional reasoning parameters to pass via extraBody - */ - private createOpenRouterProvider(reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean }) { + private createOpenRouterProvider(options?: { + reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean } + headers?: Record + openAiMessages?: OpenAI.Chat.ChatCompletionMessageParam[] + }) { const apiKey = this.options.openRouterApiKey ?? "not-provided" const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - + const extraBody: Record = {} + if (options?.reasoning) { + extraBody.reasoning = options.reasoning + } + if (options?.openAiMessages) { + extraBody.messages = options.openAiMessages + } return createOpenRouter({ apiKey, baseURL, - ...(reasoning && { extraBody: { reasoning } }), + ...(Object.keys(extraBody).length > 0 && { extraBody }), + ...(options?.headers && { headers: options.headers }), }) } - /** - * Get the accumulated reasoning details from the current streaming session. - * These details are used by Task.ts to preserve reasoning context across multi-turn - * conversations with models like Gemini 3. - * - * @returns Array of reasoning details if available, undefined otherwise - */ getReasoningDetails(): ReasoningDetail[] | undefined { return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined } - /** - * Normalize usage data from the AI SDK response into the ApiStreamUsageChunk format. - * Extracts detailed usage information including cache tokens, reasoning tokens, and calculates cost. - * - * @param usage - Basic usage from AI SDK (inputTokens, outputTokens) - * @param providerMetadata - Provider-specific metadata that may contain extended usage info - * @param modelInfo - Model information for cost calculation - * @returns Normalized ApiStreamUsageChunk with all available usage metrics - */ private normalizeUsage( usage: { inputTokens: number; outputTokens: number }, providerMetadata: Record | undefined, @@ -126,36 +107,23 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ): ApiStreamUsageChunk { const inputTokens = usage.inputTokens ?? 0 const outputTokens = usage.outputTokens ?? 0 - - // Extract OpenRouter-specific metadata - // The AI SDK exposes provider metadata under the provider key const openrouterMeta = providerMetadata?.openrouter ?? {} - - // Extract cache tokens from various possible locations - // OpenRouter AI SDK may provide: cachedInputTokens, cache_read_input_tokens, etc. const cacheReadTokens = openrouterMeta.cachedInputTokens ?? openrouterMeta.cache_read_input_tokens ?? openrouterMeta.cacheReadTokens ?? openrouterMeta.cached_tokens ?? 0 - const cacheWriteTokens = openrouterMeta.cacheCreationInputTokens ?? openrouterMeta.cache_creation_input_tokens ?? openrouterMeta.cacheWriteTokens ?? 0 - - // Extract reasoning tokens from output token details - // OpenRouter AI SDK may provide: reasoningOutputTokens, output_tokens_details.reasoning_tokens const reasoningTokens = openrouterMeta.reasoningOutputTokens ?? openrouterMeta.reasoning_tokens ?? openrouterMeta.output_tokens_details?.reasoning_tokens ?? undefined - - // Calculate cost using model pricing information - // OpenRouter follows the OpenAI convention where input tokens include cached tokens const { totalCost } = calculateApiCostOpenAI( modelInfo, inputTokens, @@ -163,7 +131,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH cacheWriteTokens, cacheReadTokens, ) - return { type: "usage", inputTokens, @@ -175,31 +142,93 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } + private buildOpenAiMessages( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + modelId: string, + ): OpenAI.Chat.ChatCompletionMessageParam[] | undefined { + const isR1 = modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning" + const isGemini = modelId.startsWith("google/gemini") + const needsCaching = OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId) + if (!isR1 && !isGemini && !needsCaching) { + return undefined + } + let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] + if (isR1) { + openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + } else { + openAiMessages = [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)] + } + if (isGemini) { + openAiMessages = sanitizeGeminiMessages(openAiMessages, modelId) + openAiMessages = openAiMessages.map((msg) => { + if (msg.role === "assistant") { + const toolCalls = (msg as any).tool_calls as any[] | undefined + const existingDetails = (msg as any).reasoning_details as any[] | undefined + if (toolCalls && toolCalls.length > 0) { + const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false + if (!hasEncrypted) { + const fakeEncrypted = { + type: "reasoning.encrypted", + data: "skip_thought_signature_validator", + id: toolCalls[0].id, + format: "google-gemini-v1", + index: 0, + } + return { + ...msg, + reasoning_details: [...(existingDetails ?? []), fakeEncrypted], + } + } + } + } + return msg + }) + } + if (needsCaching) { + if (modelId.startsWith("google/")) { + addGeminiCacheBreakpoints(systemPrompt, openAiMessages) + } else { + addAnthropicCacheBreakpoints(systemPrompt, openAiMessages) + } + } + return openAiMessages + } + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): AsyncGenerator { - // Reset reasoning details accumulator for this request this.currentReasoningDetails = [] - const model = await this.fetchModel() - const { id: modelId, maxTokens, temperature, reasoning } = model + let { id: modelId, maxTokens, temperature, topP, reasoning } = model + + if ( + (modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") && + typeof reasoning === "undefined" + ) { + reasoning = { exclude: true } + } + + const isAnthropic = modelId.startsWith("anthropic/") + const headers: Record | undefined = isAnthropic + ? { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } + : undefined + + const openAiMessages = this.buildOpenAiMessages(systemPrompt, messages, modelId) + const openrouter = this.createOpenRouterProvider({ reasoning, headers, openAiMessages }) + + const coreMessages = openAiMessages + ? convertToAiSdkMessages([{ role: "user", content: "." }]) + : convertToAiSdkMessages(messages) - // Pass reasoning parameters to extraBody when creating the provider - const openrouter = this.createOpenRouterProvider(reasoning) - const coreMessages = convertToAiSdkMessages(messages) const tools = convertToolsForAiSdk(metadata?.tools) - // Build provider options for specific provider routing const providerOptions: | { openrouter?: { - provider?: { - order: string[] - only: string[] - allow_fallbacks: boolean - } + provider?: { order: string[]; only: string[]; allow_fallbacks: boolean } } } | undefined = @@ -216,32 +245,28 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } : undefined - // Accumulator for reasoning text to build a single reasoning detail let accumulatedReasoningText = "" try { const result = streamText({ model: openrouter.chat(modelId), - system: systemPrompt, + ...(openAiMessages ? {} : { system: systemPrompt }), messages: coreMessages, maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, temperature, + topP, tools, toolChoice: metadata?.tool_choice as any, providerOptions, }) - // Process the full stream for all event types for await (const part of result.fullStream) { - // Capture reasoning text for accumulation if (part.type === "reasoning-delta") { accumulatedReasoningText += part.text } - yield* processAiSdkStreamPart(part) } - // After streaming completes, store accumulated reasoning as a detail if (accumulatedReasoningText) { this.currentReasoningDetails.push({ type: "reasoning.text", @@ -250,15 +275,19 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }) } - // After streaming completes, yield usage information with detailed metrics - const usage = await result.usage - const totalUsage = await result.totalUsage - // Access provider metadata for extended usage information (cache tokens, reasoning tokens, etc.) - // The AI SDK provides this through providerMetadata or experimental_providerMetadata const providerMetadata = (await result.providerMetadata) ?? (await (result as any).experimental_providerMetadata) - // Normalize and yield usage with all available metrics + const providerReasoningDetails = providerMetadata?.openrouter?.reasoning_details as + | ReasoningDetail[] + | undefined + + if (providerReasoningDetails && providerReasoningDetails.length > 0) { + this.currentReasoningDetails = consolidateReasoningDetails(providerReasoningDetails) + } + + const usage = await result.usage + const totalUsage = await result.totalUsage const usageChunk = this.normalizeUsage( { inputTokens: totalUsage.inputTokens ?? usage.inputTokens ?? 0, @@ -289,43 +318,29 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH endpoint: this.options.openRouterSpecificProvider, }), ]) - this.models = models this.endpoints = endpoints - return this.getModel() } override getModel() { const id = this.options.openRouterModelId ?? openRouterDefaultModelId - - // First check instance models (populated by fetchModel) let info = this.models[id] - if (!info) { - // Fall back to global cache const cachedModels = getModelsFromCache("openrouter") if (cachedModels?.[id]) { this.models = cachedModels info = cachedModels[id] } } - - // If a specific provider is requested, use the endpoint for that provider if (this.options.openRouterSpecificProvider && this.endpoints[this.options.openRouterSpecificProvider]) { info = this.endpoints[this.options.openRouterSpecificProvider] } - - // Fall back to default if nothing found if (!info) { info = openRouterDefaultModelInfo } - - // Apply tool preferences for models accessed through routers info = applyRouterToolPreferences(id, info) - const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning" - const params = getModelParams({ format: "openrouter", modelId: id, @@ -333,25 +348,30 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH settings: this.options, defaultTemperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0, }) - return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params } } async completePrompt(prompt: string): Promise { - const { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel() + let { id: modelId, maxTokens, temperature, topP, reasoning } = await this.fetchModel() + + if ( + (modelId === "google/gemini-2.5-pro-preview" || modelId === "google/gemini-2.5-pro") && + typeof reasoning === "undefined" + ) { + reasoning = { exclude: true } + } + + const isAnthropic = modelId.startsWith("anthropic/") + const headers: Record | undefined = isAnthropic + ? { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } + : undefined - // Pass reasoning parameters to extraBody when creating the provider - const openrouter = this.createOpenRouterProvider(reasoning) + const openrouter = this.createOpenRouterProvider({ reasoning, headers }) - // Build provider options for specific provider routing const providerOptions: | { openrouter?: { - provider?: { - order: string[] - only: string[] - allow_fallbacks: boolean - } + provider?: { order: string[]; only: string[]; allow_fallbacks: boolean } } } | undefined = @@ -374,9 +394,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH prompt, maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, temperature, + topP, providerOptions, }) - return result.text } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) @@ -386,15 +406,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - /** - * Generate an image using OpenRouter's image generation API (chat completions with modalities) - * Note: OpenRouter only supports the chat completions approach, not the /images/generations endpoint - * @param prompt The text prompt for image generation - * @param model The model to use for generation - * @param apiKey The OpenRouter API key (must be explicitly provided) - * @param inputImage Optional base64 encoded input image data URL - * @returns The generated image data and format, or an error - */ async generateImage( prompt: string, model: string, @@ -407,10 +418,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH error: "OpenRouter API key is required for image generation", } } - const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" - - // OpenRouter only supports chat completions approach for image generation return generateImageWithProvider({ baseURL, authToken: apiKey, diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index aadd740239a..c673fad3d27 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -45,18 +45,6 @@ export function convertToAiSdkMessages( } } - // First pass: build a map of tool call IDs to tool names from assistant messages - const toolCallIdToName = new Map() - for (const message of messages) { - if (message.role === "assistant" && typeof message.content !== "string") { - for (const part of message.content) { - if (part.type === "tool_use") { - toolCallIdToName.set(part.id, part.name) - } - } - } - } - for (const message of messages) { if (typeof message.content === "string") { modelMessages.push({ From 3f935af5f313ce405046f346978335da793e589a Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Mon, 9 Feb 2026 20:14:03 -0700 Subject: [PATCH 6/6] refactor: remove all manual OpenAI message transforms from OpenRouter provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove buildOpenAiMessages(), extraBody.messages hack, and dummy messages - Wire reasoning_details through AI SDK natively via providerOptions.openrouter - Filter schema-invalid reasoning_details entries (malformed encrypted blocks) - Filter [REDACTED] from thinking UI stream (upstream provider behavior) - Remove unused imports: convertToOpenAiMessages, sanitizeGeminiMessages, consolidateReasoningDetails, convertToR1Format, addAnthropicCacheBreakpoints, addGeminiCacheBreakpoints - All models now use convertToAiSdkMessages() → streamText() natively - Prompt caching deferred (needs providerOptions.openrouter.cacheControl impl) --- .../providers/__tests__/openrouter.spec.ts | 66 ++++---- src/api/providers/openrouter.ts | 84 +--------- src/api/transform/__tests__/ai-sdk.spec.ts | 147 ++++++++++++++++++ .../transform/__tests__/model-params.spec.ts | 22 ++- src/api/transform/ai-sdk.ts | 44 +++++- src/api/transform/model-params.ts | 10 +- 6 files changed, 258 insertions(+), 115 deletions(-) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 2db157f3b4e..ba039459202 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -1040,7 +1040,7 @@ describe("OpenRouterHandler", () => { ) }) - it("uses R1 format for DeepSeek R1 models (extraBody.messages)", async () => { + it("does not use R1 format for DeepSeek R1 models (uses standard AI SDK path)", async () => { const handler = new OpenRouterHandler({ openRouterApiKey: "test-key", openRouterModelId: "deepseek/deepseek-r1", @@ -1048,18 +1048,13 @@ describe("OpenRouterHandler", () => { mockStreamResult() await consumeGenerator(handler, "system prompt") - // R1 models should pass OpenAI messages via extraBody (including system as user message) - expect(mockCreateOpenRouter).toHaveBeenCalledWith( - expect.objectContaining({ - extraBody: expect.objectContaining({ - messages: expect.any(Array), - }), - }), - ) + // R1 models should NOT pass extraBody.messages (R1 format conversion removed) + const providerCall = mockCreateOpenRouter.mock.calls[0][0] + expect(providerCall?.extraBody?.messages).toBeUndefined() - // System prompt should NOT be passed to streamText (it is in extraBody.messages) + // System prompt should be passed normally via streamText const streamTextCall = mockStreamText.mock.calls[0][0] - expect(streamTextCall.system).toBeUndefined() + expect(streamTextCall.system).toBe("system prompt") }) it("applies Anthropic beta headers for Anthropic models", async () => { @@ -1089,7 +1084,7 @@ describe("OpenRouterHandler", () => { expect(call.headers).toBeUndefined() }) - it("applies prompt caching for Anthropic models in caching set", async () => { + it("passes system prompt directly for Anthropic models (no caching transform)", async () => { const handler = new OpenRouterHandler({ openRouterApiKey: "test-key", openRouterModelId: "anthropic/claude-sonnet-4", @@ -1097,14 +1092,13 @@ describe("OpenRouterHandler", () => { mockStreamResult() await consumeGenerator(handler) - // Should have extraBody.messages with cache_control applied - expect(mockCreateOpenRouter).toHaveBeenCalledWith( - expect.objectContaining({ - extraBody: expect.objectContaining({ - messages: expect.arrayContaining([expect.objectContaining({ role: "system" })]), - }), - }), - ) + // System prompt should be passed directly via streamText + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBe("test") + + // Messages should be the converted AI SDK messages (no system-role message injected) + const systemMsgs = streamTextCall.messages.filter((m: any) => m.role === "system") + expect(systemMsgs).toHaveLength(0) }) it("disables reasoning for Gemini 2.5 Pro when not explicitly configured", async () => { @@ -1124,7 +1118,7 @@ describe("OpenRouterHandler", () => { ) }) - it("applies Gemini sanitization and encrypted block injection", async () => { + it("passes system prompt directly for Gemini models (no caching transform)", async () => { const handler = new OpenRouterHandler({ openRouterApiKey: "test-key", openRouterModelId: "google/gemini-2.5-flash", @@ -1132,14 +1126,28 @@ describe("OpenRouterHandler", () => { mockStreamResult() await consumeGenerator(handler) - // Gemini models should have extraBody.messages set (via buildOpenAiMessages) - expect(mockCreateOpenRouter).toHaveBeenCalledWith( - expect.objectContaining({ - extraBody: expect.objectContaining({ - messages: expect.any(Array), - }), - }), - ) + // System prompt should be passed directly via streamText + const streamTextCall = mockStreamText.mock.calls[0][0] + expect(streamTextCall.system).toBe("test") + + // No system-role message should be injected + const systemMsgs = streamTextCall.messages.filter((m: any) => m.role === "system") + expect(systemMsgs).toHaveLength(0) + }) + + it("does not use extraBody.messages for Gemini models outside caching set", async () => { + const handler = new OpenRouterHandler({ + openRouterApiKey: "test-key", + openRouterModelId: "google/gemini-3-pro-preview", + }) + mockStreamResult() + await consumeGenerator(handler) + + // Non-caching Gemini models should go through the AI SDK natively + // (no extraBody.messages — reasoning_details are wired via providerOptions) + const callArgs = mockCreateOpenRouter.mock.calls[0]?.[0] ?? {} + const extraBody = callArgs.extraBody ?? {} + expect(extraBody.messages).toBeUndefined() }) it("passes topP to completePrompt for R1 models", async () => { diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index c01f34161ca..d48fc4bb430 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -1,5 +1,4 @@ import { Anthropic } from "@anthropic-ai/sdk" -import OpenAI from "openai" import { createOpenRouter } from "@openrouter/ai-sdk-provider" import { streamText, generateText } from "ai" @@ -9,7 +8,6 @@ import { openRouterDefaultModelId, openRouterDefaultModelInfo, OPENROUTER_DEFAULT_PROVIDER_NAME, - OPEN_ROUTER_PROMPT_CACHING_MODELS, DEEP_SEEK_DEFAULT_TEMPERATURE, ApiProviderError, } from "@roo-code/types" @@ -18,15 +16,7 @@ import { TelemetryService } from "@roo-code/telemetry" import type { ApiHandlerOptions } from "../../shared/api" import { calculateApiCostOpenAI } from "../../shared/cost" -import { - convertToOpenAiMessages, - sanitizeGeminiMessages, - consolidateReasoningDetails, - type ReasoningDetail, -} from "../transform/openai-format" -import { convertToR1Format } from "../transform/r1-format" -import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" -import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" +import { type ReasoningDetail } from "../transform/openai-format" import { getModelParams } from "../transform/model-params" import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" @@ -77,7 +67,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH private createOpenRouterProvider(options?: { reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean } headers?: Record - openAiMessages?: OpenAI.Chat.ChatCompletionMessageParam[] }) { const apiKey = this.options.openRouterApiKey ?? "not-provided" const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1" @@ -85,9 +74,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH if (options?.reasoning) { extraBody.reasoning = options.reasoning } - if (options?.openAiMessages) { - extraBody.messages = options.openAiMessages - } return createOpenRouter({ apiKey, baseURL, @@ -142,59 +128,6 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } } - private buildOpenAiMessages( - systemPrompt: string, - messages: Anthropic.Messages.MessageParam[], - modelId: string, - ): OpenAI.Chat.ChatCompletionMessageParam[] | undefined { - const isR1 = modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning" - const isGemini = modelId.startsWith("google/gemini") - const needsCaching = OPEN_ROUTER_PROMPT_CACHING_MODELS.has(modelId) - if (!isR1 && !isGemini && !needsCaching) { - return undefined - } - let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] - if (isR1) { - openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) - } else { - openAiMessages = [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)] - } - if (isGemini) { - openAiMessages = sanitizeGeminiMessages(openAiMessages, modelId) - openAiMessages = openAiMessages.map((msg) => { - if (msg.role === "assistant") { - const toolCalls = (msg as any).tool_calls as any[] | undefined - const existingDetails = (msg as any).reasoning_details as any[] | undefined - if (toolCalls && toolCalls.length > 0) { - const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false - if (!hasEncrypted) { - const fakeEncrypted = { - type: "reasoning.encrypted", - data: "skip_thought_signature_validator", - id: toolCalls[0].id, - format: "google-gemini-v1", - index: 0, - } - return { - ...msg, - reasoning_details: [...(existingDetails ?? []), fakeEncrypted], - } - } - } - } - return msg - }) - } - if (needsCaching) { - if (modelId.startsWith("google/")) { - addGeminiCacheBreakpoints(systemPrompt, openAiMessages) - } else { - addAnthropicCacheBreakpoints(systemPrompt, openAiMessages) - } - } - return openAiMessages - } - override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], @@ -216,12 +149,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ? { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } : undefined - const openAiMessages = this.buildOpenAiMessages(systemPrompt, messages, modelId) - const openrouter = this.createOpenRouterProvider({ reasoning, headers, openAiMessages }) + const aiSdkMessages = convertToAiSdkMessages(messages) - const coreMessages = openAiMessages - ? convertToAiSdkMessages([{ role: "user", content: "." }]) - : convertToAiSdkMessages(messages) + const openrouter = this.createOpenRouterProvider({ reasoning, headers }) const tools = convertToolsForAiSdk(metadata?.tools) @@ -250,8 +180,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH try { const result = streamText({ model: openrouter.chat(modelId), - ...(openAiMessages ? {} : { system: systemPrompt }), - messages: coreMessages, + system: systemPrompt, + messages: aiSdkMessages, maxOutputTokens: maxTokens && maxTokens > 0 ? maxTokens : undefined, temperature, topP, @@ -261,7 +191,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }) for await (const part of result.fullStream) { - if (part.type === "reasoning-delta") { + if (part.type === "reasoning-delta" && part.text !== "[REDACTED]") { accumulatedReasoningText += part.text } yield* processAiSdkStreamPart(part) @@ -283,7 +213,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH | undefined if (providerReasoningDetails && providerReasoningDetails.length > 0) { - this.currentReasoningDetails = consolidateReasoningDetails(providerReasoningDetails) + this.currentReasoningDetails = providerReasoningDetails } const usage = await result.usage diff --git a/src/api/transform/__tests__/ai-sdk.spec.ts b/src/api/transform/__tests__/ai-sdk.spec.ts index f973fc85a6d..0676f59a2c2 100644 --- a/src/api/transform/__tests__/ai-sdk.spec.ts +++ b/src/api/transform/__tests__/ai-sdk.spec.ts @@ -501,6 +501,132 @@ describe("AI SDK conversion utilities", () => { expect(toolCallPart).toBeDefined() expect(toolCallPart.providerOptions).toBeUndefined() }) + + it("attaches valid reasoning_details as providerOptions.openrouter, filtering invalid entries", () => { + const validEncrypted = { + type: "reasoning.encrypted", + data: "encrypted_blob_data", + id: "tool_call_123", + format: "google-gemini-v1", + index: 0, + } + const invalidEncrypted = { + // type is "reasoning.encrypted" but has text instead of data — + // this is a plaintext summary mislabeled as encrypted by Gemini/OpenRouter. + // The provider's ReasoningDetailEncryptedSchema requires `data: string`, + // so including this causes the entire Zod safeParse to fail. + type: "reasoning.encrypted", + text: "Plaintext reasoning summary", + id: "tool_call_123", + format: "google-gemini-v1", + index: 0, + } + const textWithSignature = { + type: "reasoning.text", + text: "Some reasoning content", + signature: "stale-signature-from-previous-model", + } + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { type: "text", text: "Using a tool" }, + { + type: "tool_use", + id: "tool_call_123", + name: "attempt_completion", + input: { result: "done" }, + }, + ], + reasoning_details: [validEncrypted, invalidEncrypted, textWithSignature], + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.role).toBe("assistant") + expect(assistantMsg.providerOptions).toBeDefined() + expect(assistantMsg.providerOptions.openrouter).toBeDefined() + const details = assistantMsg.providerOptions.openrouter.reasoning_details + // Only the valid entries should survive filtering (invalidEncrypted dropped) + expect(details).toHaveLength(2) + expect(details[0]).toEqual(validEncrypted) + // Signatures should be preserved as-is for same-model Anthropic conversations via OpenRouter + expect(details[1]).toEqual(textWithSignature) + }) + + it("does not attach providerOptions when no reasoning_details are present", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Just text" }], + }, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.providerOptions).toBeUndefined() + }) + + it("does not attach providerOptions when reasoning_details is an empty array", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Just text" }], + reasoning_details: [], + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + expect(assistantMsg.providerOptions).toBeUndefined() + }) + + it("preserves both reasoning_details and thoughtSignature providerOptions", () => { + const reasoningDetails = [ + { + type: "reasoning.encrypted", + data: "encrypted_data", + id: "tool_call_abc", + format: "google-gemini-v1", + index: 0, + }, + ] + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "assistant", + content: [ + { type: "thoughtSignature", thoughtSignature: "sig-xyz" } as any, + { type: "text", text: "Using tool" }, + { + type: "tool_use", + id: "tool_call_abc", + name: "read_file", + input: { path: "test.ts" }, + }, + ], + reasoning_details: reasoningDetails, + } as any, + ] + + const result = convertToAiSdkMessages(messages) + + expect(result).toHaveLength(1) + const assistantMsg = result[0] as any + // Message-level providerOptions carries reasoning_details + expect(assistantMsg.providerOptions.openrouter.reasoning_details).toEqual(reasoningDetails) + // Part-level providerOptions carries thoughtSignature on the first tool-call + const toolCallPart = assistantMsg.content.find((p: any) => p.type === "tool-call") + expect(toolCallPart.providerOptions.google.thoughtSignature).toBe("sig-xyz") + }) }) describe("convertToolsForAiSdk", () => { @@ -686,6 +812,27 @@ describe("AI SDK conversion utilities", () => { expect(chunks).toHaveLength(0) } }) + it("should filter [REDACTED] from reasoning-delta parts", () => { + const redactedPart = { type: "reasoning-delta" as const, text: "[REDACTED]" } + const normalPart = { type: "reasoning-delta" as const, text: "actual reasoning" } + + const redactedResult = [...processAiSdkStreamPart(redactedPart as any)] + const normalResult = [...processAiSdkStreamPart(normalPart as any)] + + expect(redactedResult).toEqual([]) + expect(normalResult).toEqual([{ type: "reasoning", text: "actual reasoning" }]) + }) + + it("should filter [REDACTED] from reasoning (fullStream format) parts", () => { + const redactedPart = { type: "reasoning" as const, text: "[REDACTED]" } + const normalPart = { type: "reasoning" as const, text: "actual reasoning" } + + const redactedResult = [...processAiSdkStreamPart(redactedPart as any)] + const normalResult = [...processAiSdkStreamPart(normalPart as any)] + + expect(redactedResult).toEqual([]) + expect(normalResult).toEqual([{ type: "reasoning", text: "actual reasoning" }]) + }) }) describe("mapToolChoice", () => { diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts index a50f1291bef..f32cbc606b8 100644 --- a/src/api/transform/__tests__/model-params.spec.ts +++ b/src/api/transform/__tests__/model-params.spec.ts @@ -830,9 +830,29 @@ describe("getModelParams", () => { expect(result.maxTokens).toBe(20000) expect(result.reasoningBudget).toBe(10000) - expect(result.temperature).toBe(1.0) // Overridden for reasoning budget models + expect(result.temperature).toBe(0.8) // User-specified temperature is respected expect(result.reasoningEffort).toBeUndefined() // Budget takes precedence }) + + it("should default to temperature 1.0 for reasoning budget models when no custom temperature is set", () => { + const model: ModelInfo = { + ...baseModel, + maxTokens: 16000, + supportsReasoningBudget: true, + } + + const result = getModelParams({ + ...anthropicParams, + settings: { + enableReasoningEffort: true, + modelMaxTokens: 20000, + }, + model, + }) + + expect(result.temperature).toBe(1.0) // Defaults to 1.0 when no custom temperature + expect(result.reasoningBudget).toBeDefined() + }) }) describe("Provider-specific reasoning behavior", () => { diff --git a/src/api/transform/ai-sdk.ts b/src/api/transform/ai-sdk.ts index c673fad3d27..3c4bc6d8f9c 100644 --- a/src/api/transform/ai-sdk.ts +++ b/src/api/transform/ai-sdk.ts @@ -205,7 +205,7 @@ export function convertToAiSdkMessages( if (typeof thinkingPart.thinking === "string" && thinkingPart.thinking.length > 0) { reasoningParts.push(thinkingPart.thinking) } - // Capture the signature for round-tripping (Anthropic/Bedrock thinking) + // Capture the signature for round-tripping (Anthropic/Bedrock thinking). if (thinkingPart.signature) { thinkingSignature = thinkingPart.signature } @@ -249,10 +249,40 @@ export function convertToAiSdkMessages( } content.push(...toolCalls) - modelMessages.push({ + // Carry reasoning_details through to providerOptions for OpenRouter round-tripping + // (used by Gemini 3, xAI, etc. for encrypted reasoning chain continuity). + // The @openrouter/ai-sdk-provider reads message-level providerOptions.openrouter.reasoning_details + // and validates them against ReasoningDetailUnionSchema (a strict Zod union). + // Invalid entries (e.g. type "reasoning.encrypted" without a `data` field) must be + // filtered out here, otherwise the entire safeParse fails and NO reasoning_details + // are included in the outgoing request. + const rawReasoningDetails = (message as unknown as { reasoning_details?: Record[] }) + .reasoning_details + const validReasoningDetails = rawReasoningDetails?.filter((detail) => { + switch (detail.type) { + case "reasoning.encrypted": + return typeof detail.data === "string" && detail.data.length > 0 + case "reasoning.text": + return typeof detail.text === "string" + case "reasoning.summary": + return typeof detail.summary === "string" + default: + return false + } + }) + + const assistantMessage: Record = { role: "assistant", content: content.length > 0 ? content : [{ type: "text", text: "" }], - } as ModelMessage) + } + + if (validReasoningDetails && validReasoningDetails.length > 0) { + assistantMessage.providerOptions = { + openrouter: { reasoning_details: validReasoningDetails }, + } + } + + modelMessages.push(assistantMessage as ModelMessage) } } } @@ -387,9 +417,13 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator