diff --git a/src/api/transform/caching/__tests__/anthropic.spec.ts b/src/api/transform/caching/__tests__/anthropic.spec.ts deleted file mode 100644 index b0a6269cd81..00000000000 --- a/src/api/transform/caching/__tests__/anthropic.spec.ts +++ /dev/null @@ -1,181 +0,0 @@ -// npx vitest run src/api/transform/caching/__tests__/anthropic.spec.ts - -import OpenAI from "openai" - -import { addCacheBreakpoints } from "../anthropic" - -describe("addCacheBreakpoints (Anthropic)", () => { - const systemPrompt = "You are a helpful assistant." - - it("should always add a cache breakpoint to the system prompt", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "Hello" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[0].content).toEqual([ - { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should not add breakpoints to user messages if there are none", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "system", content: systemPrompt }] - const originalMessages = JSON.parse(JSON.stringify(messages)) - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[0].content).toEqual([ - { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, - ]) - - expect(messages.length).toBe(originalMessages.length) - }) - - it("should add a breakpoint to the only user message if only one exists", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add breakpoints to both user messages if only two exist", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, - { role: "user", content: "User message 2" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(messages[2].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add breakpoints to the last two user messages when more than two exist", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, // Should not get breakpoint. - { role: "user", content: "User message 2" }, // Should get breakpoint. - { role: "user", content: "User message 3" }, // Should get breakpoint. - ] - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([{ type: "text", text: "User message 1" }]) - - expect(messages[2].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(messages[3].content).toEqual([ - { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should handle assistant messages correctly when finding last two user messages", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, // Should not get breakpoint. - { role: "assistant", content: "Assistant response 1" }, - { role: "user", content: "User message 2" }, // Should get breakpoint (second to last user). - { role: "assistant", content: "Assistant response 2" }, - { role: "user", content: "User message 3" }, // Should get breakpoint (last user). - { role: "assistant", content: "Assistant response 3" }, - ] - addCacheBreakpoints(systemPrompt, messages) - - const userMessages = messages.filter((m) => m.role === "user") - - expect(userMessages[0].content).toEqual([{ type: "text", text: "User message 1" }]) - - expect(userMessages[1].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(userMessages[2].content).toEqual([ - { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add breakpoint to the last text part if content is an array", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, - { - role: "user", - content: [ - { type: "text", text: "This is the last user message." }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "This part should get the breakpoint." }, - ], - }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(messages[2].content).toEqual([ - { type: "text", text: "This is the last user message." }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "This part should get the breakpoint.", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add a placeholder text part if the target message has no text parts", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, - { - role: "user", - content: [{ type: "image_url", image_url: { url: "data:image/png;base64,..." } }], - }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(messages[2].content).toEqual([ - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "...", cache_control: { type: "ephemeral" } }, // Placeholder added. - ]) - }) - - it("should ensure content is array format even if no breakpoint added", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "User message 1" }, // String content, no breakpoint. - { role: "user", content: "User message 2" }, // Gets breakpoint. - { role: "user", content: "User message 3" }, // Gets breakpoint. - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[1].content).toEqual([{ type: "text", text: "User message 1" }]) - - expect(messages[2].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(messages[3].content).toEqual([ - { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, - ]) - }) -}) diff --git a/src/api/transform/caching/__tests__/gemini.spec.ts b/src/api/transform/caching/__tests__/gemini.spec.ts deleted file mode 100644 index e7268da7fbb..00000000000 --- a/src/api/transform/caching/__tests__/gemini.spec.ts +++ /dev/null @@ -1,266 +0,0 @@ -// npx vitest run src/api/transform/caching/__tests__/gemini.spec.ts - -import OpenAI from "openai" - -import { addCacheBreakpoints } from "../gemini" - -describe("addCacheBreakpoints", () => { - const systemPrompt = "You are a helpful assistant." - - it("should always add a cache breakpoint to the system prompt", () => { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "Hello" }, - ] - addCacheBreakpoints(systemPrompt, messages, 10) // Pass frequency - expect(messages[0].content).toEqual([ - { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should not add breakpoints for fewer than N user messages", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: frequency - 1 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - ] - - const originalMessages = JSON.parse(JSON.stringify(messages)) - - addCacheBreakpoints(systemPrompt, messages, frequency) - - expect(messages[0].content).toEqual([ - { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, - ]) - - for (let i = 1; i < messages.length; i++) { - const originalContent = originalMessages[i].content - - const expectedContent = - typeof originalContent === "string" ? [{ type: "text", text: originalContent }] : originalContent - - expect(messages[i].content).toEqual(expectedContent) - } - }) - - it("should add a breakpoint to the Nth user message", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: frequency }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - ] - - addCacheBreakpoints(systemPrompt, messages, frequency) - - // Check Nth user message (index 'frequency' in the full array). - expect(messages[frequency].content).toEqual([ - { type: "text", text: `User message ${frequency}`, cache_control: { type: "ephemeral" } }, - ]) - - // Check (N-1)th user message (index frequency-1) - should be unchanged. - expect(messages[frequency - 1].content).toEqual([{ type: "text", text: `User message ${frequency - 1}` }]) - }) - - it("should add breakpoints to the Nth and 2*Nth user messages", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: frequency * 2 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - ] - - expect(messages.length).toEqual(frequency * 2 + 1) - - addCacheBreakpoints(systemPrompt, messages, frequency) - - const indices = [] - - for (let i = 0; i < messages.length; i++) { - const content = messages[i].content?.[0] - - if (typeof content === "object" && "cache_control" in content) { - indices.push(i) - } - } - - expect(indices).toEqual([0, 5, 10]) - - // Check Nth user message (index frequency) - expect(messages[frequency].content).toEqual([ - { type: "text", text: `User message ${frequency}`, cache_control: { type: "ephemeral" } }, - ]) - - // Check (2*N-1)th user message (index 2*frequency-1) - unchanged - expect(messages[frequency * 2 - 1].content).toEqual([ - { type: "text", text: `User message ${frequency * 2 - 1}` }, - ]) - - // Check 2*Nth user message (index 2*frequency) - expect(messages[frequency * 2].content).toEqual([ - { type: "text", text: `User message ${frequency * 2}`, cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should handle assistant messages correctly when counting user messages", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - // N-1 user messages - ...Array.from({ length: frequency - 1 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - { role: "assistant", content: "Assistant response" }, - { role: "user", content: `User message ${frequency}` }, // This is the Nth user message. - { role: "assistant", content: "Another response" }, - { role: "user", content: `User message ${frequency + 1}` }, - ] - - addCacheBreakpoints(systemPrompt, messages, frequency) - - // Find the Nth user message. - const nthUserMessage = messages.filter((m) => m.role === "user")[frequency - 1] - expect(nthUserMessage.content).toEqual([ - { type: "text", text: `User message ${frequency}`, cache_control: { type: "ephemeral" } }, - ]) - - // Check the (N+1)th user message is unchanged. - const nPlusOneUserMessage = messages.filter((m) => m.role === "user")[frequency] - expect(nPlusOneUserMessage.content).toEqual([{ type: "text", text: `User message ${frequency + 1}` }]) - }) - - it("should add breakpoint to the last text part if content is an array", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: frequency - 1 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - { - role: "user", // Nth user message - content: [ - { type: "text", text: `This is the ${frequency}th user message.` }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "This part should get the breakpoint." }, - ], - }, - ] - - addCacheBreakpoints(systemPrompt, messages, frequency) - - expect(messages[frequency].content).toEqual([ - { type: "text", text: `This is the ${frequency}th user message.` }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "This part should get the breakpoint.", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add a placeholder text part if the target message has no text parts", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: frequency - 1 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - { - role: "user", // Nth user message. - content: [{ type: "image_url", image_url: { url: "data:image/png;base64,..." } }], - }, - ] - - addCacheBreakpoints(systemPrompt, messages, frequency) - - expect(messages[frequency].content).toEqual([ - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "...", cache_control: { type: "ephemeral" } }, - ]) - }) - - it("should add breakpoints correctly with frequency 5", () => { - const frequency = 5 - - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: 12 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - ] - - addCacheBreakpoints(systemPrompt, messages, frequency) - - // Check 5th user message (index 5). - expect(messages[5].content).toEqual([ - { type: "text", text: "User message 5", cache_control: { type: "ephemeral" } }, - ]) - - // Check 9th user message (index 9) - unchanged - expect(messages[9].content).toEqual([{ type: "text", text: "User message 9" }]) - - // Check 10th user message (index 10). - expect(messages[10].content).toEqual([ - { type: "text", text: "User message 10", cache_control: { type: "ephemeral" } }, - ]) - - // Check 11th user message (index 11) - unchanged - expect(messages[11].content).toEqual([{ type: "text", text: "User message 11" }]) - }) - - it("should not add breakpoints (except system) if frequency is 0", () => { - const frequency = 0 - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - ...Array.from({ length: 15 }, (_, i) => ({ - role: "user" as const, - content: `User message ${i + 1}`, - })), - ] - const originalMessages = JSON.parse(JSON.stringify(messages)) - - addCacheBreakpoints(systemPrompt, messages, frequency) - - // Check system prompt. - expect(messages[0].content).toEqual([ - { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, - ]) - - // Check all user messages - none should have cache_control - for (let i = 1; i < messages.length; i++) { - const originalContent = originalMessages[i].content - - const expectedContent = - typeof originalContent === "string" ? [{ type: "text", text: originalContent }] : originalContent - - expect(messages[i].content).toEqual(expectedContent) // Should match original (after string->array conversion). - - // Ensure no cache_control was added to user messages. - const content = messages[i].content - - if (Array.isArray(content)) { - // Assign to new variable after type check. - const contentParts = content - - contentParts.forEach((part: any) => { - // Iterate over the correctly typed variable. - expect(part).not.toHaveProperty("cache_control") - }) - } - } - }) -}) diff --git a/src/api/transform/caching/__tests__/vercel-ai-gateway.spec.ts b/src/api/transform/caching/__tests__/vercel-ai-gateway.spec.ts deleted file mode 100644 index 86dc593f4f3..00000000000 --- a/src/api/transform/caching/__tests__/vercel-ai-gateway.spec.ts +++ /dev/null @@ -1,233 +0,0 @@ -// npx vitest run src/api/transform/caching/__tests__/vercel-ai-gateway.spec.ts - -import OpenAI from "openai" -import { addCacheBreakpoints } from "../vercel-ai-gateway" - -describe("Vercel AI Gateway Caching", () => { - describe("addCacheBreakpoints", () => { - it("adds cache control to system message", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "Hello" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[0]).toEqual({ - role: "system", - content: systemPrompt, - cache_control: { type: "ephemeral" }, - }) - }) - - it("adds cache control to last two user messages with string content", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "First message" }, - { role: "assistant", content: "First response" }, - { role: "user", content: "Second message" }, - { role: "assistant", content: "Second response" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Third response" }, - { role: "user", content: "Fourth message" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const lastUserMessage = messages[7] - expect(Array.isArray(lastUserMessage.content)).toBe(true) - if (Array.isArray(lastUserMessage.content)) { - const textPart = lastUserMessage.content.find((part) => part.type === "text") - expect(textPart).toEqual({ - type: "text", - text: "Fourth message", - cache_control: { type: "ephemeral" }, - }) - } - - const secondLastUserMessage = messages[5] - expect(Array.isArray(secondLastUserMessage.content)).toBe(true) - if (Array.isArray(secondLastUserMessage.content)) { - const textPart = secondLastUserMessage.content.find((part) => part.type === "text") - expect(textPart).toEqual({ - type: "text", - text: "Third message", - cache_control: { type: "ephemeral" }, - }) - } - }) - - it("handles messages with existing array content", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { - role: "user", - content: [ - { type: "text", text: "Hello with image" }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - ], - }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const userMessage = messages[1] - expect(Array.isArray(userMessage.content)).toBe(true) - if (Array.isArray(userMessage.content)) { - const textPart = userMessage.content.find((part) => part.type === "text") - expect(textPart).toEqual({ - type: "text", - text: "Hello with image", - cache_control: { type: "ephemeral" }, - }) - - const imagePart = userMessage.content.find((part) => part.type === "image_url") - expect(imagePart).toEqual({ - type: "image_url", - image_url: { url: "data:image/png;base64,..." }, - }) - } - }) - - it("handles empty string content gracefully", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const userMessage = messages[1] - expect(userMessage.content).toBe("") - }) - - it("handles messages with no text parts", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { - role: "user", - content: [{ type: "image_url", image_url: { url: "data:image/png;base64,..." } }], - }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const userMessage = messages[1] - expect(Array.isArray(userMessage.content)).toBe(true) - if (Array.isArray(userMessage.content)) { - const textPart = userMessage.content.find((part) => part.type === "text") - expect(textPart).toBeUndefined() - - const imagePart = userMessage.content.find((part) => part.type === "image_url") - expect(imagePart).toEqual({ - type: "image_url", - image_url: { url: "data:image/png;base64,..." }, - }) - } - }) - - it("processes only user messages for conversation caching", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "First user" }, - { role: "assistant", content: "Assistant response" }, - { role: "user", content: "Second user" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[2]).toEqual({ - role: "assistant", - content: "Assistant response", - }) - - const firstUser = messages[1] - const secondUser = messages[3] - - expect(Array.isArray(firstUser.content)).toBe(true) - expect(Array.isArray(secondUser.content)).toBe(true) - }) - - it("handles case with only one user message", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "user", content: "Only message" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const userMessage = messages[1] - expect(Array.isArray(userMessage.content)).toBe(true) - if (Array.isArray(userMessage.content)) { - const textPart = userMessage.content.find((part) => part.type === "text") - expect(textPart).toEqual({ - type: "text", - text: "Only message", - cache_control: { type: "ephemeral" }, - }) - } - }) - - it("handles case with no user messages", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { role: "assistant", content: "Assistant only" }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - expect(messages[0]).toEqual({ - role: "system", - content: systemPrompt, - cache_control: { type: "ephemeral" }, - }) - - expect(messages[1]).toEqual({ - role: "assistant", - content: "Assistant only", - }) - }) - - it("handles messages with multiple text parts", () => { - const systemPrompt = "You are a helpful assistant." - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ - { role: "system", content: systemPrompt }, - { - role: "user", - content: [ - { type: "text", text: "First part" }, - { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, - { type: "text", text: "Second part" }, - ], - }, - ] - - addCacheBreakpoints(systemPrompt, messages) - - const userMessage = messages[1] - if (Array.isArray(userMessage.content)) { - const textParts = userMessage.content.filter((part) => part.type === "text") - expect(textParts).toHaveLength(2) - - expect(textParts[0]).toEqual({ - type: "text", - text: "First part", - }) - - expect(textParts[1]).toEqual({ - type: "text", - text: "Second part", - cache_control: { type: "ephemeral" }, - }) - } - }) - }) -}) diff --git a/src/api/transform/caching/__tests__/vertex.spec.ts b/src/api/transform/caching/__tests__/vertex.spec.ts deleted file mode 100644 index 92489649bc1..00000000000 --- a/src/api/transform/caching/__tests__/vertex.spec.ts +++ /dev/null @@ -1,178 +0,0 @@ -// npx vitest run src/api/transform/caching/__tests__/vertex.spec.ts - -import { Anthropic } from "@anthropic-ai/sdk" - -import { addCacheBreakpoints } from "../vertex" - -describe("addCacheBreakpoints (Vertex)", () => { - it("should return an empty array if input is empty", () => { - const messages: Anthropic.Messages.MessageParam[] = [] - const result = addCacheBreakpoints(messages) - expect(result).toEqual([]) - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should not add breakpoints if there are no user messages", () => { - const messages: Anthropic.Messages.MessageParam[] = [{ role: "assistant", content: "Hello" }] - const originalMessages = JSON.parse(JSON.stringify(messages)) - const result = addCacheBreakpoints(messages) - expect(result).toEqual(originalMessages) // Should be unchanged. - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should add a breakpoint to the only user message if only one exists", () => { - const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "User message 1" }] - const result = addCacheBreakpoints(messages) - - expect(result).toHaveLength(1) - - expect(result[0].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should add breakpoints to both user messages if only two exist", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, - { role: "user", content: "User message 2" }, - ] - - const result = addCacheBreakpoints(messages) - expect(result).toHaveLength(2) - - expect(result[0].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(result[1].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should add breakpoints only to the last two user messages when more than two exist", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, // Should not get breakpoint. - { role: "user", content: "User message 2" }, // Should get breakpoint. - { role: "user", content: "User message 3" }, // Should get breakpoint. - ] - - const originalMessage1 = JSON.parse(JSON.stringify(messages[0])) - const result = addCacheBreakpoints(messages) - - expect(result).toHaveLength(3) - expect(result[0]).toEqual(originalMessage1) - - expect(result[1].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(result[2].content).toEqual([ - { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, - ]) - - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should handle assistant messages correctly when finding last two user messages", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, // Should not get breakpoint. - { role: "assistant", content: "Assistant response 1" }, // Should be unchanged. - { role: "user", content: "User message 2" }, // Should get breakpoint (second to last user). - { role: "assistant", content: "Assistant response 2" }, // Should be unchanged. - { role: "user", content: "User message 3" }, // Should get breakpoint (last user). - { role: "assistant", content: "Assistant response 3" }, // Should be unchanged. - ] - const originalMessage1 = JSON.parse(JSON.stringify(messages[0])) - const originalAssistant1 = JSON.parse(JSON.stringify(messages[1])) - const originalAssistant2 = JSON.parse(JSON.stringify(messages[3])) - const originalAssistant3 = JSON.parse(JSON.stringify(messages[5])) - - const result = addCacheBreakpoints(messages) - expect(result).toHaveLength(6) - - expect(result[0]).toEqual(originalMessage1) - expect(result[1]).toEqual(originalAssistant1) - - expect(result[2].content).toEqual([ - { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, - ]) - - expect(result[3]).toEqual(originalAssistant2) - - expect(result[4].content).toEqual([ - { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, - ]) - - expect(result[5]).toEqual(originalAssistant3) - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should add breakpoint only to the last text part if content is an array", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, // Gets breakpoint. - { - role: "user", // Gets breakpoint. - content: [ - { type: "text", text: "First text part." }, // No breakpoint. - { type: "image", source: { type: "base64", media_type: "image/png", data: "..." } }, - { type: "text", text: "Last text part." }, // Gets breakpoint. - ], - }, - ] - - const result = addCacheBreakpoints(messages) - expect(result).toHaveLength(2) - - expect(result[0].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - expect(result[1].content).toEqual([ - { type: "text", text: "First text part." }, // Unchanged. - { type: "image", source: { type: "base64", media_type: "image/png", data: "..." } }, // Unchanged. - { type: "text", text: "Last text part.", cache_control: { type: "ephemeral" } }, // Breakpoint added. - ]) - - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should handle array content with no text parts gracefully", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, // Gets breakpoint. - { - role: "user", // Gets breakpoint, but has no text part to add it to. - content: [{ type: "image", source: { type: "base64", media_type: "image/png", data: "..." } }], - }, - ] - - const originalMessage2 = JSON.parse(JSON.stringify(messages[1])) - - const result = addCacheBreakpoints(messages) - expect(result).toHaveLength(2) - - expect(result[0].content).toEqual([ - { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, - ]) - - // Check second user message - should be unchanged as no text part found. - expect(result[1]).toEqual(originalMessage2) - expect(result).not.toBe(messages) // Ensure new array. - }) - - it("should not modify the original messages array", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "User message 1" }, - { role: "user", content: "User message 2" }, - ] - const originalMessagesCopy = JSON.parse(JSON.stringify(messages)) - - addCacheBreakpoints(messages) - - // Verify original array is untouched. - expect(messages).toEqual(originalMessagesCopy) - }) -}) diff --git a/src/api/transform/caching/anthropic.ts b/src/api/transform/caching/anthropic.ts deleted file mode 100644 index cff671a56ce..00000000000 --- a/src/api/transform/caching/anthropic.ts +++ /dev/null @@ -1,41 +0,0 @@ -import OpenAI from "openai" - -export function addCacheBreakpoints(systemPrompt: string, messages: OpenAI.Chat.ChatCompletionMessageParam[]) { - messages[0] = { - role: "system", - // @ts-ignore-next-line - content: [{ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }], - } - - // Ensure all user messages have content in array format first - for (const msg of messages) { - if (msg.role === "user" && typeof msg.content === "string") { - msg.content = [{ type: "text", text: msg.content }] - } - } - - // Add `cache_control: ephemeral` to the last two user messages. - // (Note: this works because we only ever add one user message at a - // time, but if we added multiple we'd need to mark the user message - // before the last assistant message.) - messages - .filter((msg) => msg.role === "user") - .slice(-2) - .forEach((msg) => { - if (Array.isArray(msg.content)) { - // NOTE: This is fine since env details will always be added - // at the end. But if it wasn't there, and the user added a - // image_url type message, it would pop a text part before - // it and then move it after to the end. - let lastTextPart = msg.content.filter((part) => part.type === "text").pop() - - if (!lastTextPart) { - lastTextPart = { type: "text", text: "..." } - msg.content.push(lastTextPart) - } - - // @ts-ignore-next-line - lastTextPart["cache_control"] = { type: "ephemeral" } - } - }) -} diff --git a/src/api/transform/caching/gemini.ts b/src/api/transform/caching/gemini.ts deleted file mode 100644 index 66d43e85553..00000000000 --- a/src/api/transform/caching/gemini.ts +++ /dev/null @@ -1,47 +0,0 @@ -import OpenAI from "openai" - -export function addCacheBreakpoints( - systemPrompt: string, - messages: OpenAI.Chat.ChatCompletionMessageParam[], - frequency: number = 10, -) { - // *Always* cache the system prompt. - messages[0] = { - role: "system", - // @ts-ignore-next-line - content: [{ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }], - } - - // Add breakpoints every N user messages based on frequency. - let count = 0 - - for (const msg of messages) { - if (msg.role !== "user") { - continue - } - - // Ensure content is in array format for potential modification. - if (typeof msg.content === "string") { - msg.content = [{ type: "text", text: msg.content }] - } - - const isNthMessage = count % frequency === frequency - 1 - - if (isNthMessage) { - if (Array.isArray(msg.content)) { - // Find the last text part to add the cache control to. - let lastTextPart = msg.content.filter((part) => part.type === "text").pop() - - if (!lastTextPart) { - lastTextPart = { type: "text", text: "..." } // Add a placeholder if no text part exists. - msg.content.push(lastTextPart) - } - - // @ts-ignore-next-line - Add cache control property - lastTextPart["cache_control"] = { type: "ephemeral" } - } - } - - count++ - } -} diff --git a/src/api/transform/caching/vercel-ai-gateway.ts b/src/api/transform/caching/vercel-ai-gateway.ts deleted file mode 100644 index 82eff0cd7bf..00000000000 --- a/src/api/transform/caching/vercel-ai-gateway.ts +++ /dev/null @@ -1,30 +0,0 @@ -import OpenAI from "openai" - -export function addCacheBreakpoints(systemPrompt: string, messages: OpenAI.Chat.ChatCompletionMessageParam[]) { - // Apply cache_control to system message at the message level - messages[0] = { - role: "system", - content: systemPrompt, - // @ts-ignore-next-line - cache_control: { type: "ephemeral" }, - } - - // Add cache_control to the last two user messages for conversation context caching - const lastTwoUserMessages = messages.filter((msg) => msg.role === "user").slice(-2) - - lastTwoUserMessages.forEach((msg) => { - if (typeof msg.content === "string" && msg.content.length > 0) { - msg.content = [{ type: "text", text: msg.content }] - } - - if (Array.isArray(msg.content)) { - // Find the last text part in the message content - let lastTextPart = msg.content.filter((part) => part.type === "text").pop() - - if (lastTextPart && lastTextPart.text && lastTextPart.text.length > 0) { - // @ts-ignore-next-line - lastTextPart["cache_control"] = { type: "ephemeral" } - } - } - }) -} diff --git a/src/api/transform/caching/vertex.ts b/src/api/transform/caching/vertex.ts deleted file mode 100644 index 48bf2615873..00000000000 --- a/src/api/transform/caching/vertex.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { Anthropic } from "@anthropic-ai/sdk" - -export function addCacheBreakpoints(messages: Anthropic.Messages.MessageParam[]) { - // Find indices of user messages that we want to cache. - // We only cache the last two user messages to stay within the 4-block limit - // (1 block for system + 1 block each for last two user messages = 3 total). - const indices = messages.reduce((acc, msg, i) => (msg.role === "user" ? [...acc, i] : acc), [] as number[]) - - // Only cache the last two user messages. - const lastIndex = indices[indices.length - 1] ?? -1 - const secondLastIndex = indices[indices.length - 2] ?? -1 - - return messages.map((message, index) => - message.role !== "assistant" && (index === lastIndex || index === secondLastIndex) - ? cachedMessage(message) - : message, - ) -} - -function cachedMessage(message: Anthropic.Messages.MessageParam): Anthropic.Messages.MessageParam { - // For string content, we convert to array format with optional cache control. - if (typeof message.content === "string") { - return { - ...message, - // For string content, we only have one block so it's always the last block. - content: [{ type: "text" as const, text: message.content, cache_control: { type: "ephemeral" } }], - } - } - - // For array content, find the last text block index once before mapping. - const lastTextBlockIndex = message.content.reduce( - (lastIndex, content, index) => (content.type === "text" ? index : lastIndex), - -1, - ) - - // Then use this pre-calculated index in the map function. - return { - ...message, - content: message.content.map((content, index) => - content.type === "text" - ? { - ...content, - // Check if this is the last text block using our pre-calculated index. - ...(index === lastTextBlockIndex && { cache_control: { type: "ephemeral" } }), - } - : content, - ), - } -}