From 57f29fccae4ad31bf311a84fb4295544e814a42a Mon Sep 17 00:00:00 2001 From: Danilo Date: Mon, 1 Jun 2026 16:11:06 -0300 Subject: [PATCH] fix(opencode): ignore tool calls emitted inside reasoning blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call markup inside their reasoning block while still thinking. The inference server promotes that to a structured tool call, which streamText then executes prematurely — running a side effect and ending the turn before the model produces its real answer. Add a language-model middleware that runs over the provider stream before tool calls are interpreted. It drops any tool call that begins while a reasoning block is still open (and that call's input/result parts) and downgrades a resulting tool-calls finish reason to stop. Tool calls emitted after reasoning-end pass through untouched, and the transform is a no-op for streams that never emit reasoning parts. Opt out per model with options.suppressToolCallsInReasoning: false. Refs: #8851, #6708, #10996 --- packages/opencode/src/session/llm.ts | 7 ++ .../src/session/llm/reasoning-tool-guard.ts | 93 ++++++++++++++++ .../test/session/reasoning-tool-guard.test.ts | 100 ++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 packages/opencode/src/session/llm/reasoning-tool-guard.ts create mode 100644 packages/opencode/test/session/reasoning-tool-guard.test.ts diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index ebaad3e9306d..cf926a7ffbfe 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -26,6 +26,7 @@ import { RuntimeFlags } from "@/effect/runtime-flags" import * as Option from "effect/Option" import * as OtelTracer from "@effect/opentelemetry/Tracer" import { LLMAISDK } from "./llm/ai-sdk" +import { ReasoningToolGuard } from "./llm/reasoning-tool-guard" import { LLMNativeRuntime } from "./llm/native-runtime" import { LLMRequestPrep } from "./llm/request" @@ -335,6 +336,12 @@ const live: Layer.Layer< return args.params }, }, + // Drop tool calls a reasoning model emits *inside* its block so + // they are never executed prematurely. No-op unless the stream emits + // reasoning parts; opt out per model with options.suppressToolCallsInReasoning: false. + ...(input.model.options?.["suppressToolCallsInReasoning"] === false + ? [] + : [ReasoningToolGuard.middleware()]), ], }), experimental_telemetry: { diff --git a/packages/opencode/src/session/llm/reasoning-tool-guard.ts b/packages/opencode/src/session/llm/reasoning-tool-guard.ts new file mode 100644 index 000000000000..08bcb22628ee --- /dev/null +++ b/packages/opencode/src/session/llm/reasoning-tool-guard.ts @@ -0,0 +1,93 @@ +import type { LanguageModelV3Middleware, LanguageModelV3StreamPart } from "@ai-sdk/provider" + +// Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call +// markup *inside* their reasoning block while still "thinking". The inference +// server promotes that to a structured tool call, and the AI SDK would then +// execute it prematurely — running a side effect and ending the turn before the +// model ever produces its real answer (see anomalyco/opencode#8851, #6708, +// #10996). +// +// This transform sits in the language-model middleware, *before* streamText +// interprets and executes tool calls. It drops any tool call that begins while a +// reasoning block is still open, along with that call's input/result parts, and +// downgrades a resulting `tool-calls` finish reason to `stop` so the session loop +// does not wait on a tool that never runs. +// +// It only suppresses tool calls that begin before `reasoning-end`. Legitimate +// post-reasoning tool calls pass through untouched, and the transform is a no-op +// for any stream that never emits reasoning parts. +export function transform(): TransformStream { + let reasoningDepth = 0 + const suppressedIDs = new Set() + let suppressedToolCall = false + let survivingToolCall = false + + const suppress = (id: string) => { + suppressedIDs.add(id) + suppressedToolCall = true + } + + return new TransformStream({ + transform(part, controller) { + switch (part.type) { + case "reasoning-start": + reasoningDepth++ + break + + case "reasoning-end": + if (reasoningDepth > 0) reasoningDepth-- + break + + case "tool-input-start": + if (reasoningDepth > 0) { + suppress(part.id) + return + } + break + + case "tool-input-delta": + case "tool-input-end": + if (suppressedIDs.has(part.id)) return + break + + case "tool-call": + if (reasoningDepth > 0 || suppressedIDs.has(part.toolCallId)) { + suppress(part.toolCallId) + return + } + survivingToolCall = true + break + + case "tool-result": + case "tool-approval-request": + if (suppressedIDs.has(part.toolCallId)) return + break + + case "finish": + // Only rewrite when every tool call this stream produced was suppressed. + // If a real tool call survived (emitted after reasoning closed), keep the + // original finish reason so the agent loop still runs it. + if (suppressedToolCall && !survivingToolCall && part.finishReason.unified === "tool-calls") { + controller.enqueue({ ...part, finishReason: { ...part.finishReason, unified: "stop" } }) + return + } + break + } + + controller.enqueue(part) + }, + }) +} + +// Language-model middleware that runs `transform()` over the provider stream. +export function middleware(): LanguageModelV3Middleware { + return { + specificationVersion: "v3", + async wrapStream({ doStream }) { + const { stream, ...rest } = await doStream() + return { stream: stream.pipeThrough(transform()), ...rest } + }, + } +} + +export * as ReasoningToolGuard from "./reasoning-tool-guard" diff --git a/packages/opencode/test/session/reasoning-tool-guard.test.ts b/packages/opencode/test/session/reasoning-tool-guard.test.ts new file mode 100644 index 000000000000..f870f18ba6e9 --- /dev/null +++ b/packages/opencode/test/session/reasoning-tool-guard.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, test } from "bun:test" +import type { LanguageModelV3StreamPart, LanguageModelV3Usage } from "@ai-sdk/provider" +import { ReasoningToolGuard } from "@/session/llm/reasoning-tool-guard" + +const usage: LanguageModelV3Usage = { + inputTokens: { total: 1, noCache: 1, cacheRead: undefined, cacheWrite: undefined }, + outputTokens: { total: 1, text: 1, reasoning: undefined }, +} + +async function guard(parts: LanguageModelV3StreamPart[]): Promise { + const input = new ReadableStream({ + start(controller) { + for (const part of parts) controller.enqueue(part) + controller.close() + }, + }) + const out: LanguageModelV3StreamPart[] = [] + const reader = input.pipeThrough(ReasoningToolGuard.transform()).getReader() + for (;;) { + const { done, value } = await reader.read() + if (done) break + out.push(value) + } + return out +} + +describe("session.llm.reasoning-tool-guard", () => { + test("suppresses a tool call emitted inside an open reasoning block", async () => { + const out = await guard([ + { type: "reasoning-start", id: "r1" }, + { type: "reasoning-delta", id: "r1", delta: "Let me read the file" }, + { type: "tool-input-start", id: "c1", toolName: "read" }, + { type: "tool-input-delta", id: "c1", delta: '{"path":"a.ts"}' }, + { type: "tool-input-end", id: "c1" }, + { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' }, + { type: "reasoning-end", id: "r1" }, + { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } }, + ]) + + // No tool lifecycle parts survive. + expect(out.some((p) => p.type.startsWith("tool-"))).toBe(false) + // Finish reason downgraded so the session loop does not wait on a tool. + expect(out.find((p) => p.type === "finish")).toMatchObject({ + type: "finish", + finishReason: { unified: "stop", raw: "tool_calls" }, + }) + // Reasoning parts are preserved untouched. + expect(out.map((p) => p.type)).toEqual(["reasoning-start", "reasoning-delta", "reasoning-end", "finish"]) + }) + + test("preserves a legitimate tool call emitted after reasoning-end", async () => { + const out = await guard([ + { type: "reasoning-start", id: "r1" }, + { type: "reasoning-delta", id: "r1", delta: "I should read the file" }, + { type: "reasoning-end", id: "r1" }, + { type: "tool-input-start", id: "c1", toolName: "read" }, + { type: "tool-input-end", id: "c1" }, + { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' }, + { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } }, + ]) + + expect(out.filter((p) => p.type === "tool-call")).toHaveLength(1) + expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } }) + }) + + test("keeps tool-calls finish when an in-reasoning call is suppressed but a later call survives", async () => { + const out = await guard([ + { type: "reasoning-start", id: "r1" }, + { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" }, // inside reasoning -> dropped + { type: "reasoning-end", id: "r1" }, + { type: "tool-call", toolCallId: "c2", toolName: "bash", input: "{}" }, // after reasoning -> kept + { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } }, + ]) + + const calls = out.flatMap((p) => (p.type === "tool-call" ? [p.toolCallId] : [])) + expect(calls).toEqual(["c2"]) + expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } }) + }) + + test("is a no-op for a normal text stream with no reasoning", async () => { + const parts: LanguageModelV3StreamPart[] = [ + { type: "text-start", id: "t1" }, + { type: "text-delta", id: "t1", delta: "Hello" }, + { type: "text-end", id: "t1" }, + { type: "finish", usage, finishReason: { unified: "stop", raw: "stop" } }, + ] + expect(await guard(parts)).toEqual(parts) + }) + + test("suppresses a tool call when reasoning never closes (model stops mid-think)", async () => { + const out = await guard([ + { type: "reasoning-start", id: "r1" }, + { type: "reasoning-delta", id: "r1", delta: "I'll just call the tool" }, + { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" }, + { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } }, + ]) + expect(out.some((p) => p.type === "tool-call")).toBe(false) + expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "stop" } }) + }) +})