Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions packages/opencode/src/session/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { RuntimeFlags } from "@/effect/runtime-flags"
import * as Option from "effect/Option"
import * as OtelTracer from "@effect/opentelemetry/Tracer"
import { LLMAISDK } from "./llm/ai-sdk"
import { ReasoningToolGuard } from "./llm/reasoning-tool-guard"
import { LLMNativeRuntime } from "./llm/native-runtime"
import { LLMRequestPrep } from "./llm/request"

Expand Down Expand Up @@ -335,6 +336,12 @@ const live: Layer.Layer<
return args.params
},
},
// Drop tool calls a reasoning model emits *inside* its <think> block so
// they are never executed prematurely. No-op unless the stream emits
// reasoning parts; opt out per model with options.suppressToolCallsInReasoning: false.
...(input.model.options?.["suppressToolCallsInReasoning"] === false
? []
: [ReasoningToolGuard.middleware()]),
],
}),
experimental_telemetry: {
Expand Down
93 changes: 93 additions & 0 deletions packages/opencode/src/session/llm/reasoning-tool-guard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import type { LanguageModelV3Middleware, LanguageModelV3StreamPart } from "@ai-sdk/provider"

// Some reasoning models (Qwen, Kimi K2, GLM, ...) occasionally emit tool-call
// markup *inside* their reasoning block while still "thinking". The inference
// server promotes that to a structured tool call, and the AI SDK would then
// execute it prematurely — running a side effect and ending the turn before the
// model ever produces its real answer (see anomalyco/opencode#8851, #6708,
// #10996).
//
// This transform sits in the language-model middleware, *before* streamText
// interprets and executes tool calls. It drops any tool call that begins while a
// reasoning block is still open, along with that call's input/result parts, and
// downgrades a resulting `tool-calls` finish reason to `stop` so the session loop
// does not wait on a tool that never runs.
//
// It only suppresses tool calls that begin before `reasoning-end`. Legitimate
// post-reasoning tool calls pass through untouched, and the transform is a no-op
// for any stream that never emits reasoning parts.
export function transform(): TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart> {
let reasoningDepth = 0
const suppressedIDs = new Set<string>()
let suppressedToolCall = false
let survivingToolCall = false

const suppress = (id: string) => {
suppressedIDs.add(id)
suppressedToolCall = true
}

return new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
transform(part, controller) {
switch (part.type) {
case "reasoning-start":
reasoningDepth++
break

case "reasoning-end":
if (reasoningDepth > 0) reasoningDepth--
break

case "tool-input-start":
if (reasoningDepth > 0) {
suppress(part.id)
return
}
break

case "tool-input-delta":
case "tool-input-end":
if (suppressedIDs.has(part.id)) return
break

case "tool-call":
if (reasoningDepth > 0 || suppressedIDs.has(part.toolCallId)) {
suppress(part.toolCallId)
return
}
survivingToolCall = true
break

case "tool-result":
case "tool-approval-request":
if (suppressedIDs.has(part.toolCallId)) return
break

case "finish":
// Only rewrite when every tool call this stream produced was suppressed.
// If a real tool call survived (emitted after reasoning closed), keep the
// original finish reason so the agent loop still runs it.
if (suppressedToolCall && !survivingToolCall && part.finishReason.unified === "tool-calls") {
controller.enqueue({ ...part, finishReason: { ...part.finishReason, unified: "stop" } })
return
}
break
}

controller.enqueue(part)
},
})
}

// Language-model middleware that runs `transform()` over the provider stream.
export function middleware(): LanguageModelV3Middleware {
return {
specificationVersion: "v3",
async wrapStream({ doStream }) {
const { stream, ...rest } = await doStream()
return { stream: stream.pipeThrough(transform()), ...rest }
},
}
}

export * as ReasoningToolGuard from "./reasoning-tool-guard"
100 changes: 100 additions & 0 deletions packages/opencode/test/session/reasoning-tool-guard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { describe, expect, test } from "bun:test"
import type { LanguageModelV3StreamPart, LanguageModelV3Usage } from "@ai-sdk/provider"
import { ReasoningToolGuard } from "@/session/llm/reasoning-tool-guard"

const usage: LanguageModelV3Usage = {
inputTokens: { total: 1, noCache: 1, cacheRead: undefined, cacheWrite: undefined },
outputTokens: { total: 1, text: 1, reasoning: undefined },
}

async function guard(parts: LanguageModelV3StreamPart[]): Promise<LanguageModelV3StreamPart[]> {
const input = new ReadableStream<LanguageModelV3StreamPart>({
start(controller) {
for (const part of parts) controller.enqueue(part)
controller.close()
},
})
const out: LanguageModelV3StreamPart[] = []
const reader = input.pipeThrough(ReasoningToolGuard.transform()).getReader()
for (;;) {
const { done, value } = await reader.read()
if (done) break
out.push(value)
}
return out
}

describe("session.llm.reasoning-tool-guard", () => {
test("suppresses a tool call emitted inside an open reasoning block", async () => {
const out = await guard([
{ type: "reasoning-start", id: "r1" },
{ type: "reasoning-delta", id: "r1", delta: "Let me read the file" },
{ type: "tool-input-start", id: "c1", toolName: "read" },
{ type: "tool-input-delta", id: "c1", delta: '{"path":"a.ts"}' },
{ type: "tool-input-end", id: "c1" },
{ type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
{ type: "reasoning-end", id: "r1" },
{ type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
])

// No tool lifecycle parts survive.
expect(out.some((p) => p.type.startsWith("tool-"))).toBe(false)
// Finish reason downgraded so the session loop does not wait on a tool.
expect(out.find((p) => p.type === "finish")).toMatchObject({
type: "finish",
finishReason: { unified: "stop", raw: "tool_calls" },
})
// Reasoning parts are preserved untouched.
expect(out.map((p) => p.type)).toEqual(["reasoning-start", "reasoning-delta", "reasoning-end", "finish"])
})

test("preserves a legitimate tool call emitted after reasoning-end", async () => {
const out = await guard([
{ type: "reasoning-start", id: "r1" },
{ type: "reasoning-delta", id: "r1", delta: "I should read the file" },
{ type: "reasoning-end", id: "r1" },
{ type: "tool-input-start", id: "c1", toolName: "read" },
{ type: "tool-input-end", id: "c1" },
{ type: "tool-call", toolCallId: "c1", toolName: "read", input: '{"path":"a.ts"}' },
{ type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
])

expect(out.filter((p) => p.type === "tool-call")).toHaveLength(1)
expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
})

test("keeps tool-calls finish when an in-reasoning call is suppressed but a later call survives", async () => {
const out = await guard([
{ type: "reasoning-start", id: "r1" },
{ type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" }, // inside reasoning -> dropped
{ type: "reasoning-end", id: "r1" },
{ type: "tool-call", toolCallId: "c2", toolName: "bash", input: "{}" }, // after reasoning -> kept
{ type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
])

const calls = out.flatMap((p) => (p.type === "tool-call" ? [p.toolCallId] : []))
expect(calls).toEqual(["c2"])
expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "tool-calls" } })
})

test("is a no-op for a normal text stream with no reasoning", async () => {
const parts: LanguageModelV3StreamPart[] = [
{ type: "text-start", id: "t1" },
{ type: "text-delta", id: "t1", delta: "Hello" },
{ type: "text-end", id: "t1" },
{ type: "finish", usage, finishReason: { unified: "stop", raw: "stop" } },
]
expect(await guard(parts)).toEqual(parts)
})

test("suppresses a tool call when reasoning never closes (model stops mid-think)", async () => {
const out = await guard([
{ type: "reasoning-start", id: "r1" },
{ type: "reasoning-delta", id: "r1", delta: "I'll just call the tool" },
{ type: "tool-call", toolCallId: "c1", toolName: "read", input: "{}" },
{ type: "finish", usage, finishReason: { unified: "tool-calls", raw: "tool_calls" } },
])
expect(out.some((p) => p.type === "tool-call")).toBe(false)
expect(out.find((p) => p.type === "finish")).toMatchObject({ finishReason: { unified: "stop" } })
})
})
Loading