RooCodeInc · hannesrudolph · Feb 11, 2026
@@ -26,6 +26,7 @@ import {
 	handleAiSdkError,
 } from "../transform/ai-sdk"
 import { calculateApiCostAnthropic } from "../../shared/cost"
+import { applyCacheBreakpoints } from "../transform/cache-breakpoints"
 
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
@@ -124,36 +125,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			anthropicProviderOptions.disableParallelToolUse = true
 		}
 
-		/**
-		 * Vertex API has specific limitations for prompt caching:
-		 * 1. Maximum of 4 blocks can have cache_control
-		 * 2. Only text blocks can be cached (images and other content types cannot)
-		 * 3. Cache control can only be applied to user messages, not assistant messages
-		 *
-		 * Our caching strategy:
-		 * - Cache the system prompt (1 block)
-		 * - Cache the last text block of the second-to-last user message (1 block)
-		 * - Cache the last text block of the last user message (1 block)
-		 * This ensures we stay under the 4-block limit while maintaining effective caching
-		 * for the most relevant context.
-		 */
+		// Apply cache control to user messages (Vertex allows up to 4 cache_control blocks;
+		// 1 for system prompt + 2 for the last 2 user message batches).
 		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-
-		const userMsgIndices = messages.reduce(
-			(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
-		}
+		applyCacheBreakpoints(messages, { cacheProviderOption })
 
 		// Build streamText request
 		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
@@ -260,29 +235,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 		}
 	}
 
-	/**
-	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache control lands on the right message.
-	 */
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId
 		let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId

@@ -25,6 +25,7 @@ import {
 	handleAiSdkError,
 } from "../transform/ai-sdk"
 import { calculateApiCostAnthropic } from "../../shared/cost"
+import { applyCacheBreakpoints } from "../transform/cache-breakpoints"
 
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
@@ -114,22 +115,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		// Apply cache control to user messages
 		// Strategy: cache the last 2 user messages (write-to-cache + read-from-cache)
 		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-
-		const userMsgIndices = messages.reduce(
-			(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
-		}
+		applyCacheBreakpoints(messages, { cacheProviderOption })
 
 		// Build streamText request
 		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
@@ -236,29 +222,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		}
 	}
 
-	/**
-	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache control lands on the right message.
-	 */
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId
 		let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId

@@ -32,6 +32,7 @@ import {
 	handleAiSdkError,
 } from "../transform/ai-sdk"
 import { getModelParams } from "../transform/model-params"
+import { applyCacheBreakpoints } from "../transform/cache-breakpoints"
 import { shouldUseReasoningBudget } from "../../shared/api"
 import { BaseProvider } from "./base-provider"
 import { DEFAULT_HEADERS } from "./constants"
@@ -276,46 +277,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 		if (usePromptCache) {
 			const cachePointOption = { bedrock: { cachePoint: { type: "default" as const } } }
-
-			// Find all user message indices in the original (pre-conversion) message array.
-			const originalUserIndices = filteredMessages.reduce<number[]>(
-				(acc, msg, idx) => ("role" in msg && msg.role === "user" ? [...acc, idx] : acc),
-				[],
-			)
-
-			// Select up to 3 user messages for cache points (system prompt uses the 4th):
-			// - Last user message: write to cache for next request
-			// - Second-to-last user message: read from cache for current request
-			// - An "anchor" message earlier in the conversation for 20-block window coverage
-			const targetOriginalIndices = new Set<number>()
-			const numUserMsgs = originalUserIndices.length
-
-			if (numUserMsgs >= 1) {
-				// Always cache the last user message
-				targetOriginalIndices.add(originalUserIndices[numUserMsgs - 1])
-			}
-			if (numUserMsgs >= 2) {
-				// Cache the second-to-last user message
-				targetOriginalIndices.add(originalUserIndices[numUserMsgs - 2])
-			}
-			if (numUserMsgs >= 5) {
-				// Add an anchor cache point roughly in the first third of user messages.
-				// This ensures that the 20-block lookback from the second-to-last breakpoint
-				// can find a stable cache entry, covering all the assistant and tool messages
-				// in the middle of the conversation. We pick the user message at ~1/3 position.
-				const anchorIdx = Math.floor(numUserMsgs / 3)
-				// Only add if it's not already one of the last-2 targets
-				if (!targetOriginalIndices.has(originalUserIndices[anchorIdx])) {
-					targetOriginalIndices.add(originalUserIndices[anchorIdx])
-				}
-			}
-
-			// Apply cachePoint to the correct AI SDK messages by walking both arrays in parallel.
-			// A single original user message with tool_results becomes [tool-role msg, user-role msg]
-			// in the AI SDK array, while a plain user message becomes [user-role msg].
-			if (targetOriginalIndices.size > 0) {
-				this.applyCachePointsToAiSdkMessages(aiSdkMessages, targetOriginalIndices, cachePointOption)
-			}
+			applyCacheBreakpoints(aiSdkMessages as RooMessage[], {
+				cacheProviderOption: cachePointOption,
+				maxMessageBreakpoints: 3,
+				useAnchor: true,
+				anchorThreshold: 5,
+			})
 		}
 
 		// Build streamText request
@@ -734,29 +701,6 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		)
 	}
 
-	/**
-	 * Apply cachePoint providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache points land on the right message.
-	 */
-	private applyCachePointsToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cachePointOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cachePointOption,
-				}
-			}
-		}
-	}
-
 	/************************************************************************************
 	 *
 	 *     AMAZON REGIONS

@@ -16,6 +16,7 @@ import {
 	handleAiSdkError,
 } from "../transform/ai-sdk"
 import { calculateApiCostAnthropic } from "../../shared/cost"
+import { applyCacheBreakpoints } from "../transform/cache-breakpoints"
 
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
@@ -95,21 +96,7 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 		}
 
 		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-		const userMsgIndices = mergedMessages.reduce(
-			(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(aiSdkMessages, targetIndices, cacheProviderOption)
-		}
+		applyCacheBreakpoints(aiSdkMessages as RooMessage[], { cacheProviderOption })
 
 		const requestOptions = {
 			model: this.client(modelConfig.id),
@@ -212,21 +199,6 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 		}
 	}
 
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId