From 57f29fccae4ad31bf311a84fb4295544e814a42a Mon Sep 17 00:00:00 2001
From: Danilo <danmaxis@gmail.com>
Date: Mon, 1 Jun 2026 16:11:06 -0300
Subject: [PATCH] fix(opencode): ignore tool calls emitted inside reasoning
 blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call
markup inside their <think> reasoning block while still thinking. The inference
server promotes that to a structured tool call, which streamText then executes
prematurely — running a side effect and ending the turn before the model
produces its real answer.

Add a language-model middleware that runs over the provider stream before tool
calls are interpreted. It drops any tool call that begins while a reasoning
block is still open (and that call's input/result parts) and downgrades a
resulting tool-calls finish reason to stop. Tool calls emitted after
reasoning-end pass through untouched, and the transform is a no-op for streams
that never emit reasoning parts. Opt out per model with
options.suppressToolCallsInReasoning: false.

Refs: #8851, #6708, #10996
---
 packages/opencode/src/session/llm.ts          |   7 ++
 .../src/session/llm/reasoning-tool-guard.ts   |  93 ++++++++++++++++
 .../test/session/reasoning-tool-guard.test.ts | 100 ++++++++++++++++++
 3 files changed, 200 insertions(+)
 create mode 100644 packages/opencode/src/session/llm/reasoning-tool-guard.ts
 create mode 100644 packages/opencode/test/session/reasoning-tool-guard.test.ts
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index ebaad3e9306d..cf926a7ffbfe 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -26,6 +26,7 @@ import { RuntimeFlags } from "@/effect/runtime-flags"
 import * as Option from "effect/Option"
 import * as OtelTracer from "@effect/opentelemetry/Tracer"
 import { LLMAISDK } from "./llm/ai-sdk"
+import { ReasoningToolGuard } from "./llm/reasoning-tool-guard"
 import { LLMNativeRuntime } from "./llm/native-runtime"
 import { LLMRequestPrep } from "./llm/request"
 
@@ -335,6 +336,12 @@ const live: Layer.Layer<
                   return args.params
                 },
               },
+              // Drop tool calls a reasoning model emits *inside* its <think> block so
+              // they are never executed prematurely. No-op unless the stream emits
+              // reasoning parts; opt out per model with options.suppressToolCallsInReasoning: false.
+              ...(input.model.options?.["suppressToolCallsInReasoning"] === false
+                ? []
+                : [ReasoningToolGuard.middleware()]),
             ],
           }),
           experimental_telemetry: {
diff --git a/packages/opencode/src/session/llm/reasoning-tool-guard.ts b/packages/opencode/src/session/llm/reasoning-tool-guard.ts
new file mode 100644
index 000000000000..08bcb22628ee
--- /dev/null
+++ b/packages/opencode/src/session/llm/reasoning-tool-guard.ts
@@ -0,0 +1,93 @@
+import type { LanguageModelV3Middleware, LanguageModelV3StreamPart } from "@ai-sdk/provider"
+
+// Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call
+// markup *inside* their reasoning block while still "thinking". The inference
+// server promotes that to a structured tool call, and the AI SDK would then
+// execute it prematurely — running a side effect and ending the turn before the
+// model ever produces its real answer (see anomalyco/opencode#8851, #6708,
+// #10996).
+//
+// This transform sits in the language-model middleware, *before* streamText
+// interprets and executes tool calls. It drops any tool call that begins while a
+// reasoning block is still open, along with that call's input/result parts, and
+// downgrades a resulting `tool-calls` finish reason to `stop` so the session loop
+// does not wait on a tool that never runs.
+//
+// It only suppresses tool calls that begin before `reasoning-end`. Legitimate
+// post-reasoning tool calls pass through untouched, and the transform is a no-op
+// for any stream that never emits reasoning parts.
+export function transform(): TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart> {
+  let reasoningDepth = 0
+  const suppressedIDs = new Set<string>()
+  let suppressedToolCall = false
+  let survivingToolCall = false
+
+  const suppress = (id: string) => {
+    suppressedIDs.add(id)
+    suppressedToolCall = true
+  }
+
+  return new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
+    transform(part, controller) {
+      switch (part.type) {
+        case "reasoning-start":
+          reasoningDepth++
+          break
+
+        case "reasoning-end":
+          if (reasoningDepth > 0) reasoningDepth--
+          break
+
+        case "tool-input-start":
+          if (reasoningDepth > 0) {
+            suppress(part.id)
+            return
+          }
+          break
+
+        case "tool-input-delta":
+        case "tool-input-end":
+          if (suppressedIDs.has(part.id)) return
+          break
+
+        case "tool-call":
+          if (reasoningDepth > 0 || suppressedIDs.has(part.toolCallId)) {
+            suppress(part.toolCallId)
+            return
+          }
+          survivingToolCall = true
+          break
+
+        case "tool-result":
+        case "tool-approval-request":
+          if (suppressedIDs.has(part.toolCallId)) return
+          break
+
+        case "finish":
+          // Only rewrite when every tool call this stream produced was suppressed.
+          // If a real tool call survived (emitted after reasoning closed), keep the
+          // original finish reason so the agent loop still runs it.
+          if (suppressedToolCall && !survivingToolCall && part.finishReason.unified === "tool-calls") {
+            controller.enqueue({ ...part, finishReason: { ...part.finishReason, unified: "stop" } })
+            return
+          }
+          break
+      }
+
+      controller.enqueue(part)
+    },
+  })
+}
+
+// Language-model middleware that runs `transform()` over the provider stream.
+export function middleware(): LanguageModelV3Middleware {
+  return {
+    specificationVersion: "v3",
+    async wrapStream({ doStream }) {
+      const { stream, ...rest } = await doStream()
+      return { stream: stream.pipeThrough(transform()), ...rest }
+    },
+  }
+}
+
+export * as ReasoningToolGuard from "./reasoning-tool-guard"
diff --git a/packages/opencode/test/session/reasoning-tool-guard.test.ts b/packages/opencode/test/session/reasoning-tool-guard.test.ts
new file mode 100644
index 000000000000..f870f18ba6e9
--- /dev/null
+++ b/packages/opencode/test/session/reasoning-tool-guard.test.ts
@@ -0,0 +1,100 @@
+import { describe, expect, test } from "bun:test"
+import type { LanguageModelV3StreamPart, LanguageModelV3Usage } from "@ai-sdk/provider"
+import { ReasoningToolGuard } from "@/session/llm/reasoning-tool-guard"
+
+const usage: LanguageModelV3Usage = {
+  inputTokens: { total: 1, noCache: 1, cacheRead: undefined, cacheWrite: undefined },
+  outputTokens: { total: 1, text: 1, reasoning: undefined },
+}
+
+async function guard(parts: LanguageModelV3StreamPart[]): Promise<LanguageModelV3StreamPart[]> {
+  const input = new ReadableStream<LanguageModelV3StreamPart>({
+    start(controller) {
+      for (const part of parts) controller.enqueue(part)
+      controller.close()
+    },
+  })
+  const out: LanguageModelV3StreamPart[] = []
+  const reader = input.pipeThrough(ReasoningToolGuard.transform()).getReader()
+  for (;;) {
+    const { done, value } = await reader.read()
+    if (done) break
+    out.push(value)
+  }
+  return out
+}
+
+describe("session.llm.reasoning-tool-guard", () => {
+  test("suppresses a tool call emitted inside an open reasoning block", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "Let me read the file" },
+      { type: "tool-input-start", id: "c1", toolName: "read" },
+      { type: "tool-input-delta", id: "c1", delta: '{"path":"a.ts"}' },
+      { type: "tool-input-end", id: "c1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
+      { type: "reasoning-end", id: "r1" },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    // No tool lifecycle parts survive.
+    expect(out.some((p) => p.type.startsWith("tool-"))).toBe(false)
+    // Finish reason downgraded so the session loop does not wait on a tool.
+    expect(out.find((p) => p.type === "finish")).toMatchObject({
+      type: "finish",
+      finishReason: { unified: "stop", raw: "tool_calls" },
+    })
+    // Reasoning parts are preserved untouched.
+    expect(out.map((p) => p.type)).toEqual(["reasoning-start", "reasoning-delta", "reasoning-end", "finish"])
+  })
+
+  test("preserves a legitimate tool call emitted after reasoning-end", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "I should read the file" },
+      { type: "reasoning-end", id: "r1" },
+      { type: "tool-input-start", id: "c1", toolName: "read" },
+      { type: "tool-input-end", id: "c1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    expect(out.filter((p) => p.type === "tool-call")).toHaveLength(1)
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
+  })
+
+  test("keeps tool-calls finish when an in-reasoning call is suppressed but a later call survives", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" }, // inside reasoning -> dropped
+      { type: "reasoning-end", id: "r1" },
+      { type: "tool-call", toolCallId: "c2", toolName: "bash", input: "{}" }, // after reasoning -> kept
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    const calls = out.flatMap((p) => (p.type === "tool-call" ? [p.toolCallId] : []))
+    expect(calls).toEqual(["c2"])
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
+  })
+
+  test("is a no-op for a normal text stream with no reasoning", async () => {
+    const parts: LanguageModelV3StreamPart[] = [
+      { type: "text-start", id: "t1" },
+      { type: "text-delta", id: "t1", delta: "Hello" },
+      { type: "text-end", id: "t1" },
+      { type: "finish", usage, finishReason: { unified: "stop", raw: "stop" } },
+    ]
+    expect(await guard(parts)).toEqual(parts)
+  })
+
+  test("suppresses a tool call when reasoning never closes (model stops mid-think)", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "I'll just call the tool" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+    expect(out.some((p) => p.type === "tool-call")).toBe(false)
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "stop" } })
+  })
+})