anomalyco · danmaxis · Jun 1, 2026
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
@@ -26,6 +26,7 @@ import { RuntimeFlags } from "@/effect/runtime-flags"
 import * as Option from "effect/Option"
 import * as OtelTracer from "@effect/opentelemetry/Tracer"
 import { LLMAISDK } from "./llm/ai-sdk"
+import { ReasoningToolGuard } from "./llm/reasoning-tool-guard"
 import { LLMNativeRuntime } from "./llm/native-runtime"
 import { LLMRequestPrep } from "./llm/request"
 
@@ -335,6 +336,12 @@ const live: Layer.Layer<
                   return args.params
                 },
               },
+              // Drop tool calls a reasoning model emits *inside* its <think> block so
+              // they are never executed prematurely. No-op unless the stream emits
+              // reasoning parts; opt out per model with options.suppressToolCallsInReasoning: false.
+              ...(input.model.options?.["suppressToolCallsInReasoning"] === false
+                ? []
+                : [ReasoningToolGuard.middleware()]),
             ],
           }),
           experimental_telemetry: {

diff --git a/packages/opencode/src/session/llm/reasoning-tool-guard.ts b/packages/opencode/src/session/llm/reasoning-tool-guard.ts
@@ -0,0 +1,93 @@
+import type { LanguageModelV3Middleware, LanguageModelV3StreamPart } from "@ai-sdk/provider"
+
+// Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call
+// markup *inside* their reasoning block while still "thinking". The inference
+// server promotes that to a structured tool call, and the AI SDK would then
+// execute it prematurely — running a side effect and ending the turn before the
+// model ever produces its real answer (see anomalyco/opencode#8851, #6708,
+// #10996).
+//
+// This transform sits in the language-model middleware, *before* streamText
+// interprets and executes tool calls. It drops any tool call that begins while a
+// reasoning block is still open, along with that call's input/result parts, and
+// downgrades a resulting `tool-calls` finish reason to `stop` so the session loop
+// does not wait on a tool that never runs.
+//
+// It only suppresses tool calls that begin before `reasoning-end`. Legitimate
+// post-reasoning tool calls pass through untouched, and the transform is a no-op
+// for any stream that never emits reasoning parts.
+export function transform(): TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart> {
+  let reasoningDepth = 0
+  const suppressedIDs = new Set<string>()
+  let suppressedToolCall = false
+  let survivingToolCall = false
+
+  const suppress = (id: string) => {
+    suppressedIDs.add(id)
+    suppressedToolCall = true
+  }
+
+  return new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
+    transform(part, controller) {
+      switch (part.type) {
+        case "reasoning-start":
+          reasoningDepth++
+          break
+
+        case "reasoning-end":
+          if (reasoningDepth > 0) reasoningDepth--
+          break
+
+        case "tool-input-start":
+          if (reasoningDepth > 0) {
+            suppress(part.id)
+            return
+          }
+          break
+
+        case "tool-input-delta":
+        case "tool-input-end":
+          if (suppressedIDs.has(part.id)) return
+          break
+
+        case "tool-call":
+          if (reasoningDepth > 0 || suppressedIDs.has(part.toolCallId)) {
+            suppress(part.toolCallId)
+            return
+          }
+          survivingToolCall = true
+          break
+
+        case "tool-result":
+        case "tool-approval-request":
+          if (suppressedIDs.has(part.toolCallId)) return
+          break
+
+        case "finish":
+          // Only rewrite when every tool call this stream produced was suppressed.
+          // If a real tool call survived (emitted after reasoning closed), keep the
+          // original finish reason so the agent loop still runs it.
+          if (suppressedToolCall && !survivingToolCall && part.finishReason.unified === "tool-calls") {
+            controller.enqueue({ ...part, finishReason: { ...part.finishReason, unified: "stop" } })
+            return
+          }
+          break
+      }
+
+      controller.enqueue(part)
+    },
+  })
+}
+
+// Language-model middleware that runs `transform()` over the provider stream.
+export function middleware(): LanguageModelV3Middleware {
+  return {
+    specificationVersion: "v3",
+    async wrapStream({ doStream }) {
+      const { stream, ...rest } = await doStream()
+      return { stream: stream.pipeThrough(transform()), ...rest }
+    },
+  }
+}
+
+export * as ReasoningToolGuard from "./reasoning-tool-guard"
diff --git a/packages/opencode/test/session/reasoning-tool-guard.test.ts b/packages/opencode/test/session/reasoning-tool-guard.test.ts
@@ -0,0 +1,100 @@
+import { describe, expect, test } from "bun:test"
+import type { LanguageModelV3StreamPart, LanguageModelV3Usage } from "@ai-sdk/provider"
+import { ReasoningToolGuard } from "@/session/llm/reasoning-tool-guard"
+
+const usage: LanguageModelV3Usage = {
+  inputTokens: { total: 1, noCache: 1, cacheRead: undefined, cacheWrite: undefined },
+  outputTokens: { total: 1, text: 1, reasoning: undefined },
+}
+
+async function guard(parts: LanguageModelV3StreamPart[]): Promise<LanguageModelV3StreamPart[]> {
+  const input = new ReadableStream<LanguageModelV3StreamPart>({
+    start(controller) {
+      for (const part of parts) controller.enqueue(part)
+      controller.close()
+    },
+  })
+  const out: LanguageModelV3StreamPart[] = []
+  const reader = input.pipeThrough(ReasoningToolGuard.transform()).getReader()
+  for (;;) {
+    const { done, value } = await reader.read()
+    if (done) break
+    out.push(value)
+  }
+  return out
+}
+
+describe("session.llm.reasoning-tool-guard", () => {
+  test("suppresses a tool call emitted inside an open reasoning block", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "Let me read the file" },
+      { type: "tool-input-start", id: "c1", toolName: "read" },
+      { type: "tool-input-delta", id: "c1", delta: '{"path":"a.ts"}' },
+      { type: "tool-input-end", id: "c1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
+      { type: "reasoning-end", id: "r1" },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    // No tool lifecycle parts survive.
+    expect(out.some((p) => p.type.startsWith("tool-"))).toBe(false)
+    // Finish reason downgraded so the session loop does not wait on a tool.
+    expect(out.find((p) => p.type === "finish")).toMatchObject({
+      type: "finish",
+      finishReason: { unified: "stop", raw: "tool_calls" },
+    })
+    // Reasoning parts are preserved untouched.
+    expect(out.map((p) => p.type)).toEqual(["reasoning-start", "reasoning-delta", "reasoning-end", "finish"])
+  })
+
+  test("preserves a legitimate tool call emitted after reasoning-end", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "I should read the file" },
+      { type: "reasoning-end", id: "r1" },
+      { type: "tool-input-start", id: "c1", toolName: "read" },
+      { type: "tool-input-end", id: "c1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    expect(out.filter((p) => p.type === "tool-call")).toHaveLength(1)
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
+  })
+
+  test("keeps tool-calls finish when an in-reasoning call is suppressed but a later call survives", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" }, // inside reasoning -> dropped
+      { type: "reasoning-end", id: "r1" },
+      { type: "tool-call", toolCallId: "c2", toolName: "bash", input: "{}" }, // after reasoning -> kept
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+
+    const calls = out.flatMap((p) => (p.type === "tool-call" ? [p.toolCallId] : []))
+    expect(calls).toEqual(["c2"])
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
+  })
+
+  test("is a no-op for a normal text stream with no reasoning", async () => {
+    const parts: LanguageModelV3StreamPart[] = [
+      { type: "text-start", id: "t1" },
+      { type: "text-delta", id: "t1", delta: "Hello" },
+      { type: "text-end", id: "t1" },
+      { type: "finish", usage, finishReason: { unified: "stop", raw: "stop" } },
+    ]
+    expect(await guard(parts)).toEqual(parts)
+  })
+
+  test("suppresses a tool call when reasoning never closes (model stops mid-think)", async () => {
+    const out = await guard([
+      { type: "reasoning-start", id: "r1" },
+      { type: "reasoning-delta", id: "r1", delta: "I'll just call the tool" },
+      { type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" },
+      { type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
+    ])
+    expect(out.some((p) => p.type === "tool-call")).toBe(false)
+    expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "stop" } })
+  })
+})