From c9b1d5e1cf2279200857fe4f56a6aecc738d08a7 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 9 Feb 2026 18:43:16 +0000 Subject: [PATCH] fix: detect and abort repetitive reasoning loops in model thinking output Adds a ReasoningRepetitionDetector that monitors streaming reasoning/thinking content for repetitive patterns. When a line is repeated more than a threshold number of times (default 5), the stream is aborted early to save tokens and the model receives guidance to try a different approach. This addresses an issue where some models (particularly Gemini) get stuck in reasoning loops, repeating the same lines like "I'll use attempt_completion" or "I'll mention that I verified with tests" indefinitely. Changes: - New ReasoningRepetitionDetector class with streaming chunk support - Integration into Task.ts reasoning streaming handler - New reasoningRepetitionDetected response format for model guidance - 18 unit tests covering detection, normalization, reset, and edge cases Closes #11337 --- src/core/prompts/responses.ts | 17 ++ src/core/task/Task.ts | 47 ++++ src/core/tools/ReasoningRepetitionDetector.ts | 125 +++++++++ .../ReasoningRepetitionDetector.spec.ts | 256 ++++++++++++++++++ 4 files changed, 445 insertions(+) create mode 100644 src/core/tools/ReasoningRepetitionDetector.ts create mode 100644 src/core/tools/__tests__/ReasoningRepetitionDetector.spec.ts diff --git a/src/core/prompts/responses.ts b/src/core/prompts/responses.ts index 60b5b4123ac..86bd8da4f56 100644 --- a/src/core/prompts/responses.ts +++ b/src/core/prompts/responses.ts @@ -54,6 +54,23 @@ Otherwise, if you have not completed the task and do not need additional informa (This is an automated message, so do not respond to it conversationally.)` }, + reasoningRepetitionDetected: () => { + const instructions = getToolInstructionsReminder() + + return `[ERROR] Your reasoning/thinking output was stuck in a repetitive loop, repeating the same lines over and over. The response was aborted to save tokens. + +IMPORTANT: Do NOT repeat the same thoughts or plans. Take a different approach or proceed directly with action. + +${instructions} + +# Next Steps + +If you have completed the user's task, use the attempt_completion tool. +If you require additional information from the user, use the ask_followup_question tool. +Otherwise, proceed with the next step of the task using a tool call. Do NOT repeat your previous reasoning. +(This is an automated message, so do not respond to it conversationally.)` + }, + tooManyMistakes: (feedback?: string) => JSON.stringify({ status: "guidance", diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 6d9426384ce..613b432b2a5 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -100,6 +100,7 @@ import { buildNativeToolsArrayWithRestrictions } from "./build-tools" // core modules import { ToolRepetitionDetector } from "../tools/ToolRepetitionDetector" +import { ReasoningRepetitionDetector } from "../tools/ReasoningRepetitionDetector" import { restoreTodoListForTask } from "../tools/UpdateTodoListTool" import { FileContextTracker } from "../context-tracking/FileContextTracker" import { RooIgnoreController } from "../ignore/RooIgnoreController" @@ -298,6 +299,8 @@ export class Task extends EventEmitter implements TaskLike { } toolRepetitionDetector: ToolRepetitionDetector + reasoningRepetitionDetector: ReasoningRepetitionDetector + reasoningRepetitionAborted: boolean = false rooIgnoreController?: RooIgnoreController rooProtectedController?: RooProtectedController fileContextTracker: FileContextTracker @@ -689,6 +692,7 @@ export class Task extends EventEmitter implements TaskLike { this.diffStrategy = new MultiSearchReplaceDiffStrategy() this.toolRepetitionDetector = new ToolRepetitionDetector(this.consecutiveMistakeLimit) + this.reasoningRepetitionDetector = new ReasoningRepetitionDetector() // Initialize todo list if provided if (initialTodos && initialTodos.length > 0) { @@ -2935,6 +2939,8 @@ export class Task extends EventEmitter implements TaskLike { this.didToolFailInCurrentTurn = false this.presentAssistantMessageLocked = false this.presentAssistantMessageHasPendingUpdates = false + this.reasoningRepetitionDetector.reset() + this.reasoningRepetitionAborted = false // No legacy text-stream tool parser. this.streamingToolCallIndices.clear() // Clear any leftover streaming tool call state from previous interrupted streams @@ -2997,6 +3003,22 @@ export class Task extends EventEmitter implements TaskLike { switch (chunk.type) { case "reasoning": { reasoningMessage += chunk.text + + // Detect repetitive reasoning during streaming to abort early + // and save tokens when the model gets stuck in a loop. + if (this.reasoningRepetitionDetector.addChunk(chunk.text)) { + console.warn( + `[Task#${this.taskId}.${this.instanceId}] Reasoning repetition detected, aborting stream`, + ) + await this.say( + "error", + "Repetitive reasoning detected - the model's thinking got stuck in a loop. Aborting to save tokens.", + ) + this.reasoningRepetitionAborted = true + this.cancelCurrentRequest() + break + } + // Only apply formatting if the message contains sentence-ending punctuation followed by ** let formattedReasoning = reasoningMessage if (reasoningMessage.includes("**")) { @@ -3310,6 +3332,31 @@ export class Task extends EventEmitter implements TaskLike { // Cline instance to finish aborting (error is thrown here when // any function in the for loop throws due to this.abort). if (!this.abandoned) { + // Check if this abort was triggered by reasoning repetition detection. + // In this case we don't retry - instead we continue the task loop with + // a guidance message to break the model out of the loop. + if (this.reasoningRepetitionAborted) { + this.reasoningRepetitionAborted = false + this.consecutiveMistakeCount++ + + // Clean up partial state without treating it as a streaming failure + await abortStream("streaming_failed") + + // Push guidance message onto the stack so the model gets feedback + // about the repetition and can try a different approach + stack.push({ + userContent: [ + { + type: "text" as const, + text: formatResponse.reasoningRepetitionDetected(), + }, + ], + includeFileDetails: false, + }) + + continue + } + // Determine cancellation reason const cancelReason: ClineApiReqCancelReason = this.abort ? "user_cancelled" : "streaming_failed" diff --git a/src/core/tools/ReasoningRepetitionDetector.ts b/src/core/tools/ReasoningRepetitionDetector.ts new file mode 100644 index 00000000000..361da08d422 --- /dev/null +++ b/src/core/tools/ReasoningRepetitionDetector.ts @@ -0,0 +1,125 @@ +/** + * Detects repetitive patterns in model reasoning/thinking output during streaming. + * + * Some models (particularly Gemini) can get stuck in a loop where their + * thinking/reasoning output repeats the same lines over and over, e.g.: + * "I'll mention that I verified with tests." + * "I'll mention that I reverted the tests." + * "I'll mention that I verified with tests." + * "I'll mention that I reverted the tests." + * ... + * + * This detector tracks lines as they stream in and flags when any single + * line has been repeated more than the configured threshold. + */ +export class ReasoningRepetitionDetector { + private lineCounts: Map = new Map() + private buffer: string = "" + private readonly repetitionThreshold: number + private readonly minLineLength: number + + /** + * @param repetitionThreshold Number of times a line must repeat to be considered a loop (default: 5) + * @param minLineLength Minimum line length to track - short lines are ignored (default: 20) + */ + constructor(repetitionThreshold: number = 5, minLineLength: number = 20) { + this.repetitionThreshold = repetitionThreshold + this.minLineLength = minLineLength + } + + /** + * Feed a new chunk of reasoning text and check for repetition. + * + * @param chunk A new piece of reasoning/thinking text from the stream + * @returns true if repetitive looping has been detected + */ + public addChunk(chunk: string): boolean { + this.buffer += chunk + + // Split buffer into complete lines (keeping incomplete last line in buffer) + const lines = this.buffer.split("\n") + + // Keep the last element as the buffer (it may be an incomplete line) + this.buffer = lines.pop() ?? "" + + for (const rawLine of lines) { + const line = this.normalizeLine(rawLine) + + if (line.length < this.minLineLength) { + continue + } + + const count = (this.lineCounts.get(line) ?? 0) + 1 + this.lineCounts.set(line, count) + + if (count >= this.repetitionThreshold) { + return true + } + } + + return false + } + + /** + * Check if any line in the accumulated reasoning has hit the repetition threshold. + * Useful for checking after a stream is complete but before tool processing. + */ + public isRepetitive(): boolean { + // Also process any remaining buffer content + if (this.buffer.length > 0) { + const line = this.normalizeLine(this.buffer) + if (line.length >= this.minLineLength) { + const count = (this.lineCounts.get(line) ?? 0) + 1 + this.lineCounts.set(line, count) + if (count >= this.repetitionThreshold) { + return true + } + } + } + + for (const count of this.lineCounts.values()) { + if (count >= this.repetitionThreshold) { + return true + } + } + + return false + } + + /** + * Get the most repeated line and its count, useful for diagnostics. + */ + public getMostRepeatedLine(): { line: string; count: number } | undefined { + let maxLine: string | undefined + let maxCount = 0 + + for (const [line, count] of this.lineCounts.entries()) { + if (count > maxCount) { + maxCount = count + maxLine = line + } + } + + if (maxLine !== undefined) { + return { line: maxLine, count: maxCount } + } + + return undefined + } + + /** + * Reset the detector state. Called at the start of each new API request. + */ + public reset(): void { + this.lineCounts.clear() + this.buffer = "" + } + + /** + * Normalize a line for comparison: trim whitespace, collapse internal + * whitespace, and lowercase. + */ + private normalizeLine(line: string): string { + return line.trim().replace(/\s+/g, " ").toLowerCase() + } +} diff --git a/src/core/tools/__tests__/ReasoningRepetitionDetector.spec.ts b/src/core/tools/__tests__/ReasoningRepetitionDetector.spec.ts new file mode 100644 index 00000000000..8172f0a44be --- /dev/null +++ b/src/core/tools/__tests__/ReasoningRepetitionDetector.spec.ts @@ -0,0 +1,256 @@ +import { ReasoningRepetitionDetector } from "../ReasoningRepetitionDetector" + +describe("ReasoningRepetitionDetector", () => { + describe("addChunk", () => { + it("should not flag non-repetitive reasoning", () => { + const detector = new ReasoningRepetitionDetector() + + expect(detector.addChunk("I need to analyze the code.\n")).toBe(false) + expect(detector.addChunk("Let me look at the file structure.\n")).toBe(false) + expect(detector.addChunk("The function needs to be refactored.\n")).toBe(false) + expect(detector.addChunk("I'll use the read_file tool.\n")).toBe(false) + }) + + it("should detect repetitive lines when threshold is reached", () => { + const detector = new ReasoningRepetitionDetector(3) // threshold of 3 + + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(true) + }) + + it("should detect the pattern from the original issue report", () => { + const detector = new ReasoningRepetitionDetector(5) + + const lines = [ + "The code is correct.\n", + "I'll complete the task.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that the fix is in packages/rev18/envar/src/env-runner.js.\n", + "I'll mention that it injects arguments immediately after the command.\n", + "I'll mention that this handles both explicit -- and implicit command starts.\n", + "I'll mention that this supports get command as well.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + ] + + let detected = false + for (const line of lines) { + if (detector.addChunk(line)) { + detected = true + break + } + } + + expect(detected).toBe(true) + }) + + it("should handle chunks that span multiple lines", () => { + const detector = new ReasoningRepetitionDetector(3) + + // Feed a chunk containing multiple lines at once + const chunk = + "I'll use attempt_completion.\nSome other text here.\nI'll use attempt_completion.\nAnother line.\nI'll use attempt_completion.\n" + expect(detector.addChunk(chunk)).toBe(true) + }) + + it("should handle chunks that split lines across calls", () => { + const detector = new ReasoningRepetitionDetector(3) + + // Line split across chunks + expect(detector.addChunk("I'll mention that I ver")).toBe(false) + expect(detector.addChunk("ified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(true) + }) + + it("should ignore short lines", () => { + const detector = new ReasoningRepetitionDetector(3, 20) + + // Short lines should not trigger detection + expect(detector.addChunk("Yes.\n")).toBe(false) + expect(detector.addChunk("Yes.\n")).toBe(false) + expect(detector.addChunk("Yes.\n")).toBe(false) + expect(detector.addChunk("Yes.\n")).toBe(false) + expect(detector.addChunk("Yes.\n")).toBe(false) + }) + + it("should normalize whitespace when comparing lines", () => { + const detector = new ReasoningRepetitionDetector(3) + + expect(detector.addChunk(" I'll mention that I verified with tests. \n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(true) + }) + + it("should be case-insensitive when comparing lines", () => { + const detector = new ReasoningRepetitionDetector(3) + + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'LL MENTION THAT I VERIFIED WITH TESTS.\n")).toBe(false) + expect(detector.addChunk("i'll Mention That I Verified With Tests.\n")).toBe(true) + }) + }) + + describe("isRepetitive", () => { + it("should return false for non-repetitive content", () => { + const detector = new ReasoningRepetitionDetector(3) + + detector.addChunk("Line one is unique and long enough.\n") + detector.addChunk("Line two is also unique and long.\n") + detector.addChunk("Line three is different from others.\n") + + expect(detector.isRepetitive()).toBe(false) + }) + + it("should return true when repetition threshold is met", () => { + const detector = new ReasoningRepetitionDetector(3) + + detector.addChunk("This line repeats a lot of times.\n") + detector.addChunk("This line repeats a lot of times.\n") + detector.addChunk("This line repeats a lot of times.\n") + + expect(detector.isRepetitive()).toBe(true) + }) + + it("should process remaining buffer content", () => { + const detector = new ReasoningRepetitionDetector(3) + + // Feed content without trailing newline - it stays in buffer + detector.addChunk("This line repeats a lot of times.\n") + detector.addChunk("This line repeats a lot of times.\n") + detector.addChunk("This line repeats a lot of times.") // No trailing newline + + // The buffer hasn't been processed by addChunk, but isRepetitive should check it + expect(detector.isRepetitive()).toBe(true) + }) + }) + + describe("getMostRepeatedLine", () => { + it("should return undefined when no lines have been processed", () => { + const detector = new ReasoningRepetitionDetector() + expect(detector.getMostRepeatedLine()).toBeUndefined() + }) + + it("should return the most repeated line", () => { + const detector = new ReasoningRepetitionDetector() + + detector.addChunk("I'll mention that I verified with tests.\n") + detector.addChunk("I'll mention that I reverted the tests.\n") + detector.addChunk("I'll mention that I verified with tests.\n") + detector.addChunk("I'll mention that I verified with tests.\n") + detector.addChunk("I'll mention that I reverted the tests.\n") + + const result = detector.getMostRepeatedLine() + expect(result).toBeDefined() + expect(result!.line).toBe("i'll mention that i verified with tests.") + expect(result!.count).toBe(3) + }) + }) + + describe("reset", () => { + it("should clear all state", () => { + const detector = new ReasoningRepetitionDetector(3) + + // Add some repetitive content + detector.addChunk("I'll mention that I verified with tests.\n") + detector.addChunk("I'll mention that I verified with tests.\n") + + // Reset + detector.reset() + + // Should start fresh - previous counts should be gone + expect(detector.isRepetitive()).toBe(false) + expect(detector.getMostRepeatedLine()).toBeUndefined() + + // Should need full threshold again + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(false) + expect(detector.addChunk("I'll mention that I verified with tests.\n")).toBe(true) + }) + }) + + describe("default threshold", () => { + it("should use default threshold of 5", () => { + const detector = new ReasoningRepetitionDetector() + + expect(detector.addChunk("I'll use attempt_completion to finish.\n")).toBe(false) + expect(detector.addChunk("I'll use attempt_completion to finish.\n")).toBe(false) + expect(detector.addChunk("I'll use attempt_completion to finish.\n")).toBe(false) + expect(detector.addChunk("I'll use attempt_completion to finish.\n")).toBe(false) + // 5th time should trigger + expect(detector.addChunk("I'll use attempt_completion to finish.\n")).toBe(true) + }) + }) + + describe("mixed content patterns", () => { + it("should detect alternating repetitive lines (A-B-A-B pattern)", () => { + const detector = new ReasoningRepetitionDetector(4) + + const lines = [ + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + "I'll mention that I reverted the tests.\n", + "I'll mention that I verified with tests.\n", + ] + + let detected = false + for (const line of lines) { + if (detector.addChunk(line)) { + detected = true + break + } + } + + expect(detected).toBe(true) + }) + + it("should not flag lines that appear below threshold among varied content", () => { + const detector = new ReasoningRepetitionDetector(5) + + const lines = [ + "I need to analyze the codebase structure.\n", + "Let me check the implementation details.\n", + "I'll look at the test coverage next.\n", + "I need to analyze the codebase structure.\n", // 2nd + "The function signature looks correct to me.\n", + "Let me verify the error handling path.\n", + "I need to analyze the codebase structure.\n", // 3rd + "The return type should be Promise.\n", + "I need to analyze the codebase structure.\n", // 4th - still below threshold of 5 + ] + + let detected = false + for (const line of lines) { + if (detector.addChunk(line)) { + detected = true + break + } + } + + expect(detected).toBe(false) + }) + + it("should handle streaming chunks of varying sizes", () => { + const detector = new ReasoningRepetitionDetector(3) + + // Simulate realistic streaming with small and large chunks + expect(detector.addChunk("I'll")).toBe(false) + expect(detector.addChunk(" use attempt_completion")).toBe(false) + expect(detector.addChunk(" to finish the task.\nI'll use attempt_completion to finish the task.\nI")).toBe( + false, + ) + expect(detector.addChunk("'ll use attempt_completion to finish the task.\n")).toBe(true) + }) + }) +})