diff --git a/core/config/onboarding.ts b/core/config/onboarding.ts index dde0fefe70a..e54f73665c7 100644 --- a/core/config/onboarding.ts +++ b/core/config/onboarding.ts @@ -23,6 +23,30 @@ const GEMINI_MODEL_CONFIG = { apiKeyInputName: "GEMINI_API_KEY", }; +const DEEPSEEK_MODEL_CONFIG = { + apiKeyInputName: "DEEPSEEK_API_KEY", + models: [ + { + slug: "deepseek/deepseek-chat", + model: "deepseek-chat", + name: "DeepSeek Chat", + contextLength: 131072, + maxTokens: 8192, + apiBase: "https://api.deepseek.com/", + roles: undefined, + }, + { + slug: "deepseek/deepseek-reasoner", + model: "deepseek-reasoner", + name: "DeepSeek Reasoner", + contextLength: 131072, + maxTokens: 32000, + apiBase: "https://api.deepseek.com/", + roles: undefined, + }, + ], +}; + /** * We set the "best" chat + autocopmlete models by default * whenever a user doesn't have a config.json @@ -97,6 +121,25 @@ export function setupProviderConfig( }, })); break; + case "deepseek": + newModels = DEEPSEEK_MODEL_CONFIG.models.map((modelConfig) => { + const model: any = { + name: modelConfig.name, + provider: "deepseek", + model: modelConfig.model, + apiKey, + contextLength: modelConfig.contextLength, + defaultCompletionOptions: { + maxTokens: modelConfig.maxTokens, + }, + roles: modelConfig.roles, + }; + if (modelConfig.apiBase) { + model.apiBase = modelConfig.apiBase; + } + return model; + }); + break; default: throw new Error(`Unknown provider: ${provider}`); } diff --git a/core/config/yaml/models.ts b/core/config/yaml/models.ts index 9e7a9ccdde4..4955e61e3e4 100644 --- a/core/config/yaml/models.ts +++ b/core/config/yaml/models.ts @@ -2,6 +2,7 @@ import { mergeConfigYamlRequestOptions, ModelConfig, } from "@continuedev/config-yaml"; +import { findLlmInfo } from "@continuedev/llm-info"; import { ContinueConfig, ILLMLogger, LLMOptions } from "../.."; import { BaseLLM } from "../../llm"; @@ -15,6 +16,82 @@ function getModelClass( return LLMClasses.find((llm) => llm.providerName === model.provider); } +function applyCapabilities(options: LLMOptions, model: ModelConfig): void { + const { capabilities } = model; + // Model capabilities - need to be undefined if not found + // To fallback to our autodetection + if (capabilities?.find((c) => c === "tool_use")) { + options.capabilities = { + ...options.capabilities, + tools: true, + }; + } + + if (capabilities?.find((c) => c === "image_input")) { + options.capabilities = { + ...options.capabilities, + uploadImage: true, + }; + } +} + +function applyEmbedOptions(options: LLMOptions, model: ModelConfig): void { + if (model.embedOptions?.maxBatchSize) { + options.maxEmbeddingBatchSize = model.embedOptions.maxBatchSize; + } + if (model.embedOptions?.maxChunkSize) { + options.maxEmbeddingChunkSize = model.embedOptions.maxChunkSize; + } +} + +function applyEnvOptions( + options: LLMOptions, + env: Record, +): void { + if ( + "useLegacyCompletionsEndpoint" in env && + typeof env.useLegacyCompletionsEndpoint === "boolean" + ) { + options.useLegacyCompletionsEndpoint = env.useLegacyCompletionsEndpoint; + } + if ("apiType" in env && typeof env.apiType === "string") { + options.apiType = env.apiType; + } + if ("apiVersion" in env && typeof env.apiVersion === "string") { + options.apiVersion = env.apiVersion; + } + if ("deployment" in env && typeof env.deployment === "string") { + options.deployment = env.deployment; + } + if ("deploymentId" in env && typeof env.deploymentId === "string") { + options.deploymentId = env.deploymentId; + } + if ("projectId" in env && typeof env.projectId === "string") { + options.projectId = env.projectId; + } + if ("region" in env && typeof env.region === "string") { + options.region = env.region; + } + if ("profile" in env && typeof env.profile === "string") { + options.profile = env.profile; + } + if ("accessKeyId" in env && typeof env.accessKeyId === "string") { + options.accessKeyId = env.accessKeyId; + } + if ("secretAccessKey" in env && typeof env.secretAccessKey === "string") { + options.secretAccessKey = env.secretAccessKey; + } + if ("modelArn" in env && typeof env.modelArn === "string") { + options.modelArn = env.modelArn; + } + if ("aiGatewaySlug" in env && typeof env.aiGatewaySlug === "string") { + options.aiGatewaySlug = env.aiGatewaySlug; + } + if ("accountId" in env && typeof env.accountId === "string") { + options.accountId = env.accountId; + } +} + // function getContinueProxyModelName( // ownerSlug: string, // packageSlug: string, @@ -49,24 +126,40 @@ async function modelConfigToBaseLLM({ config.requestOptions, ); + const llmInfo = findLlmInfo(model.model, model.provider); + const contextLength = + model.defaultCompletionOptions?.contextLength ?? llmInfo?.contextLength; + const maxCompletionTokens = llmInfo?.maxCompletionTokens; + const defaultMaxTokens = + maxCompletionTokens && contextLength + ? Math.min(maxCompletionTokens, contextLength / 4) + : undefined; + let options: LLMOptions = { ...rest, - contextLength: model.defaultCompletionOptions?.contextLength, + contextLength: contextLength, completionOptions: { ...(model.defaultCompletionOptions ?? {}), model: model.model, maxTokens: model.defaultCompletionOptions?.maxTokens ?? - cls.defaultOptions?.completionOptions?.maxTokens, + cls.defaultOptions?.completionOptions?.maxTokens ?? + defaultMaxTokens, }, logger: llmLogger, uniqueId, title: model.name, template: model.promptTemplates?.chat, promptTemplates: model.promptTemplates, - baseAgentSystemMessage: model.chatOptions?.baseAgentSystemMessage, - basePlanSystemMessage: model.chatOptions?.basePlanSystemMessage, - baseChatSystemMessage: model.chatOptions?.baseSystemMessage, + baseAgentSystemMessage: + model.chatOptions?.baseAgentSystemMessage ?? + cls.defaultOptions?.baseAgentSystemMessage, + basePlanSystemMessage: + model.chatOptions?.basePlanSystemMessage ?? + cls.defaultOptions?.basePlanSystemMessage, + baseChatSystemMessage: + model.chatOptions?.baseSystemMessage ?? + cls.defaultOptions?.baseChatSystemMessage, toolOverrides: model.chatOptions?.toolOverrides ? Object.entries(model.chatOptions.toolOverrides).map(([name, o]) => ({ name, @@ -83,76 +176,14 @@ async function modelConfigToBaseLLM({ requestOptions: mergedRequestOptions, }; - // Model capabilities - need to be undefined if not found - // To fallback to our autodetection - if (capabilities?.find((c) => c === "tool_use")) { - options.capabilities = { - ...options.capabilities, - tools: true, - }; - } + // Apply capabilities from model config + applyCapabilities(options, model); - if (capabilities?.find((c) => c === "image_input")) { - options.capabilities = { - ...options.capabilities, - uploadImage: true, - }; - } + applyEmbedOptions(options, model); - if (model.embedOptions?.maxBatchSize) { - options.maxEmbeddingBatchSize = model.embedOptions.maxBatchSize; - } - if (model.embedOptions?.maxChunkSize) { - options.maxEmbeddingChunkSize = model.embedOptions.maxChunkSize; - } - - // These are params that are at model config level in JSON - // But we decided to move to nested `env` in YAML - // Since types vary and we don't want to blindly spread env for now, - // Each one is handled individually here + // Apply environment-specific options const env = model.env ?? {}; - if ( - "useLegacyCompletionsEndpoint" in env && - typeof env.useLegacyCompletionsEndpoint === "boolean" - ) { - options.useLegacyCompletionsEndpoint = env.useLegacyCompletionsEndpoint; - } - if ("apiType" in env && typeof env.apiType === "string") { - options.apiType = env.apiType; - } - if ("apiVersion" in env && typeof env.apiVersion === "string") { - options.apiVersion = env.apiVersion; - } - if ("deployment" in env && typeof env.deployment === "string") { - options.deployment = env.deployment; - } - if ("deploymentId" in env && typeof env.deploymentId === "string") { - options.deploymentId = env.deploymentId; - } - if ("projectId" in env && typeof env.projectId === "string") { - options.projectId = env.projectId; - } - if ("region" in env && typeof env.region === "string") { - options.region = env.region; - } - if ("profile" in env && typeof env.profile === "string") { - options.profile = env.profile; - } - if ("accessKeyId" in env && typeof env.accessKeyId === "string") { - options.accessKeyId = env.accessKeyId; - } - if ("secretAccessKey" in env && typeof env.secretAccessKey === "string") { - options.secretAccessKey = env.secretAccessKey; - } - if ("modelArn" in env && typeof env.modelArn === "string") { - options.modelArn = env.modelArn; - } - if ("aiGatewaySlug" in env && typeof env.aiGatewaySlug === "string") { - options.aiGatewaySlug = env.aiGatewaySlug; - } - if ("accountId" in env && typeof env.accountId === "string") { - options.accountId = env.accountId; - } + applyEnvOptions(options, env); const llm = new cls(options); return llm; diff --git a/core/edit/recursiveStream.ts b/core/edit/recursiveStream.ts index 5c9f2fef6a6..142d764ac2e 100644 --- a/core/edit/recursiveStream.ts +++ b/core/edit/recursiveStream.ts @@ -39,7 +39,6 @@ export async function* recursiveStream( const injectApplyToken = type === "apply" && shouldInjectApplyToken(llm); if (typeof prompt === "string") { const finalPrompt = injectApplyToken ? prompt + APPLY_UNIQUE_TOKEN : prompt; - const generator = llm.streamComplete(finalPrompt, abortController.signal, { raw: true, prediction: undefined, @@ -84,9 +83,11 @@ export async function* recursiveStream( }); for await (const chunk of generator) { - yield chunk; - const rendered = renderChatMessage(chunk); - buffer += rendered; + if (chunk.role === "assistant") { + yield chunk; + const rendered = renderChatMessage(chunk); + buffer += rendered; + } totalTokens += countTokens(chunk.content); if (totalTokens >= safeTokens) { diff --git a/core/llm/autodetect.ts b/core/llm/autodetect.ts index 4085d3798b7..71db3549521 100644 --- a/core/llm/autodetect.ts +++ b/core/llm/autodetect.ts @@ -248,6 +248,7 @@ const PARALLEL_PROVIDERS: string[] = [ "vertexai", "function-network", "scaleway", + "deepseek", ]; function llmCanGenerateInParallel(provider: string, model: string): boolean { @@ -273,6 +274,9 @@ function isProviderHandlesTemplatingOrNoTemplateTypeRequired( modelName.includes("kimi") || modelName.includes("mercury") || modelName.includes("glm") || + modelName.includes("deepseek-chat") || + modelName.includes("deepseek-reasoner") || + modelName.includes("deepseek-fim-beta") || /^o\d/.test(modelName) ); } @@ -512,6 +516,10 @@ function autodetectPromptTemplates( editTemplate = gptEditPrompt; } else if (model.includes("codestral")) { editTemplate = osModelsEditPrompt; + } else if ( + ["deepseek-chat", "deepseek-reasoner", "deepseek-fim-beta"].includes(model) + ) { + editTemplate = osModelsEditPrompt; } if (editTemplate !== null) { diff --git a/core/llm/countTokens.test.ts b/core/llm/countTokens.test.ts index 4cbe13a5b59..17a029b0c82 100644 --- a/core/llm/countTokens.test.ts +++ b/core/llm/countTokens.test.ts @@ -366,3 +366,57 @@ describe("extractToolSequence", () => { expect(messages).toHaveLength(1); // User message should remain }); }); + +describe("compileChatMessages with prefill scenarios", () => { + test("should handle prefill scenario (last message is assistant)", () => { + const messages: ChatMessage[] = [ + { + role: "user", + content: "Please edit this code", + }, + { + role: "assistant", + content: "Sure! Here's the edited code:", + }, + ]; + + // This should not throw an error about missing user/tool message + expect(() => { + compileChatMessages({ + modelName: "gpt-4", + msgs: messages, + knownContextLength: 1000, + maxTokens: 100, + supportsImages: false, + }); + }).not.toThrow(); + }); + + test("should handle prefill scenario with system message", () => { + const messages: ChatMessage[] = [ + { + role: "system", + content: "You are a helpful assistant", + }, + { + role: "user", + content: "Please edit this code", + }, + { + role: "assistant", + content: "Sure! Here's the edited code:", + }, + ]; + + // This should not throw an error about missing user/tool message + expect(() => { + compileChatMessages({ + modelName: "gpt-4", + msgs: messages, + knownContextLength: 1000, + maxTokens: 100, + supportsImages: false, + }); + }).not.toThrow(); + }); +}); diff --git a/core/llm/countTokens.ts b/core/llm/countTokens.ts index 11cfc6cb2d9..60f07cb657c 100644 --- a/core/llm/countTokens.ts +++ b/core/llm/countTokens.ts @@ -448,8 +448,19 @@ function compileChatMessages({ msgsCopy = addSpaceToAnyEmptyMessages(msgsCopy); - // Extract the tool sequence from the end of the message array - const toolSequence = extractToolSequence(msgsCopy); + // Check if this is a prefill scenario (last message is from assistant) + // In prefill scenarios, last assistant message should be treated as non-negotiable + const lastMsg = msgsCopy[msgsCopy.length - 1]; + const isPrefillScenario = lastMsg && lastMsg.role === "assistant"; + + let toolSequence: ChatMessage[] = []; + if (!isPrefillScenario) { + // Extract the tool sequence from the end of the message array + toolSequence = extractToolSequence(msgsCopy); + } else { + // For prefill scenarios, just take the last assistant message as the "tool sequence" + toolSequence = [msgsCopy.pop()!]; + } // Count tokens for all messages in the tool sequence let lastMessagesTokens = 0; @@ -471,7 +482,13 @@ function compileChatMessages({ const contextLength = knownContextLength ?? DEFAULT_PRUNING_LENGTH; const countingSafetyBuffer = getTokenCountingBufferSafety(contextLength); - const minOutputTokens = Math.min(MIN_RESPONSE_TOKENS, maxTokens); + + // For DeepSeek models, we need to reserve the full maxTokens to prevent API overflow + // DeepSeek API rejects requests that exceed contextLength - maxToken + const isDeepSeekModel = modelName.toLowerCase().includes("deepseek-"); + const minOutputTokens = isDeepSeekModel + ? maxTokens + : Math.min(MIN_RESPONSE_TOKENS, maxTokens); let inputTokensAvailable = contextLength; diff --git a/core/llm/getAdjustedTokenCount.ts b/core/llm/getAdjustedTokenCount.ts index d700a8a762b..233922502c0 100644 --- a/core/llm/getAdjustedTokenCount.ts +++ b/core/llm/getAdjustedTokenCount.ts @@ -7,6 +7,7 @@ const ANTHROPIC_TOKEN_MULTIPLIER = 1.23; const GEMINI_TOKEN_MULTIPLIER = 1.18; const MISTRAL_TOKEN_MULTIPLIER = 1.26; +const DEEPSEEK_TOKEN_MULTIPLIER = 1.05; // DeepSeek's tokenizer is very efficient, 5% padding just for safety /** * Adjusts token count based on model-specific tokenizer differences. @@ -33,6 +34,8 @@ export function getAdjustedTokenCountFromModel( ) { // Mistral family models: mistral, mixtral, codestral, devstral, etc multiplier = MISTRAL_TOKEN_MULTIPLIER; + } else if (lowerModelName.includes("deepseek")) { + multiplier = DEEPSEEK_TOKEN_MULTIPLIER; } return Math.ceil(baseTokens * multiplier); } diff --git a/core/llm/index.ts b/core/llm/index.ts index dd6dd9c00aa..b6dc9738f12 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -39,6 +39,7 @@ import { isOllamaInstalled } from "../util/ollamaHelper.js"; import { TokensBatchingService } from "../util/TokensBatchingService.js"; import { withExponentialBackoff } from "../util/withExponentialBackoff.js"; +import { applyToolOverrides } from "../tools/applyToolOverrides.js"; import { autodetectPromptTemplates, autodetectTemplateFunction, @@ -66,7 +67,6 @@ import { toCompleteBody, toFimBody, } from "./openaiTypeConverters.js"; -import { applyToolOverrides } from "../tools/applyToolOverrides.js"; export class LLMError extends Error { constructor( @@ -962,7 +962,10 @@ export abstract class BaseLLM implements ILLM { ) { let completion = ""; for await (const message of this.streamChat(messages, signal, options)) { - completion += renderChatMessage(message); + // Only accumulate assistant messages, not thinking or other message types + if (message.role === "assistant") { + completion += renderChatMessage(message); + } } return { role: "assistant" as const, content: completion }; } diff --git a/core/llm/llms/DeepSeek.integration.test.ts b/core/llm/llms/DeepSeek.integration.test.ts new file mode 100644 index 00000000000..f575ee4783e --- /dev/null +++ b/core/llm/llms/DeepSeek.integration.test.ts @@ -0,0 +1,65 @@ +import { afterEach, describe, expect, jest, test } from "@jest/globals"; +import DeepSeek from "./DeepSeek.js"; + +describe.skip("DeepSeek Integration Tests", () => { + afterEach(() => { + jest.clearAllMocks(); + }); + + test("should handle abort signal", async () => { + const deepSeek = new DeepSeek({ + apiKey: "test-api-key", + model: "deepseek-chat", + apiBase: "https://api.deepseek.com", + }); + + const abortController = new AbortController(); + + // Mock fetch that rejects when aborted + const mockFetch = jest.fn<() => Promise>(); + mockFetch.mockImplementation(() => { + return Promise.reject(new DOMException("Aborted", "AbortError")); + }); + + (deepSeek as any).fetch = mockFetch; + (deepSeek as any).useOpenAIAdapterFor = []; + + // Abort immediately + abortController.abort(); + + await expect( + (deepSeek as any) + .streamChat( + [{ role: "user", content: "Hello" }], + abortController.signal, + ) + .next(), + ).rejects.toThrow("Aborted"); + }); + + test("should handle API errors gracefully", async () => { + const deepSeek = new DeepSeek({ + apiKey: "test-api-key", + model: "deepseek-chat", + apiBase: "https://api.deepseek.com", + }); + + const mockFetch = jest.fn<() => Promise>(); + mockFetch.mockResolvedValue( + new Response(JSON.stringify({ error: { message: "Invalid API key" } }), { + status: 401, + headers: { "Content-Type": "application/json" }, + }), + ); + + (deepSeek as any).fetch = mockFetch; + (deepSeek as any).useOpenAIAdapterFor = []; + + const stream = (deepSeek as any).streamChat( + [{ role: "user", content: "Hello" }], + new AbortController().signal, + ); + + await expect(stream.next()).rejects.toThrow(); + }); +}); diff --git a/core/llm/llms/DeepSeek.tools.test.ts b/core/llm/llms/DeepSeek.tools.test.ts new file mode 100644 index 00000000000..48bb78191e9 --- /dev/null +++ b/core/llm/llms/DeepSeek.tools.test.ts @@ -0,0 +1,386 @@ +import { beforeEach, describe, expect, it, jest } from "@jest/globals"; +import { ChatMessage, ThinkingChatMessage } from "../../index.js"; +import DeepSeek from "./DeepSeek.js"; +import { runLlmTest } from "./llmTestHarness.js"; + +describe.skip("DeepSeek Tools and Thinking Integration Tests", () => { + let deepSeek: DeepSeek; + + beforeEach(() => { + deepSeek = new DeepSeek({ + model: "deepseek-reasoner", + apiKey: "test-api-key", + apiBase: "https://api.deepseek.com", + }); + }); + + describe("Tool calls with thinking messages", () => { + it("should handle thinking followed by tool call", async () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Get the weather" }, + { + role: "thinking", + content: "I need to call the weather API", + } as ThinkingChatMessage, + ]; + + const tools = [ + { + type: "function" as const, + function: { + name: "get_weather", + description: "Get weather information", + parameters: { + type: "object", + properties: { + location: { type: "string" }, + }, + }, + }, + }, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal, { tools }], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "user", content: "Get the weather" }, + { role: "thinking", content: "I need to call the weather API" }, + { role: "assistant", content: "" }, // Auto-inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + tools, + }, + }, + mockStream: [ + 'data: {"choices":[{"delta":{"tool_calls":[{"id":"1","type":"function","function":{"name":"get_weather","arguments":"{\\"location\\": \\"NYC\\"}"}}]}}]}\n\n', + ], + }); + }); + + it("should handle thinking -> assistant -> tool -> thinking -> assistant pattern", async () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Calculate something" }, + { + role: "thinking", + content: "I need to use the calculator", + } as ThinkingChatMessage, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { + name: "calculate", + arguments: '{"expression": "2+2"}', + }, + }, + ], + }, + { role: "tool", content: "4", toolCallId: "1" }, + { role: "thinking", content: "The result is 4" } as ThinkingChatMessage, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "user", content: "Calculate something" }, + { role: "thinking", content: "I need to use the calculator" }, + { role: "assistant", content: "" }, // Auto-inserted + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { + name: "calculate", + arguments: '{"expression": "2+2"}', + }, + }, + ], + }, + { role: "tool", content: "4", toolCallId: "1" }, + { role: "thinking", content: "The result is 4" }, + { role: "assistant", content: "" }, // Auto-inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + }, + }, + mockStream: [ + 'data: {"choices":[{"delta":{"content":"The result is 4"}}]}\n\n', + ], + }); + }); + + it("should not insert assistant message when thinking is already followed by assistant", async () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Hello" }, + { + role: "thinking", + content: "I should respond", + } as ThinkingChatMessage, + { role: "assistant", content: "Hello there!" }, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "user", content: "Hello" }, + { role: "thinking", content: "I should respond" }, + { role: "assistant", content: "Hello there!" }, + // No extra assistant message inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + }, + }, + mockStream: [ + 'data: {"choices":[{"delta":{"content":"How can I help?"}}]}\n\n', + ], + }); + }); + + it("should handle complex multi-tool scenarios with thinking", async () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Get weather and time" }, + { + role: "thinking", + content: "I need to call both weather and time APIs", + } as ThinkingChatMessage, + ]; + + const tools = [ + { + type: "function" as const, + function: { + name: "get_weather", + description: "Get weather", + parameters: { type: "object", properties: {} }, + }, + }, + { + type: "function" as const, + function: { + name: "get_time", + description: "Get current time", + parameters: { type: "object", properties: {} }, + }, + }, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal, { tools }], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "user", content: "Get weather and time" }, + { + role: "thinking", + content: "I need to call both weather and time APIs", + }, + { role: "assistant", content: "" }, // Auto-inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + tools, + }, + }, + mockStream: [ + 'data: {"choices":[{"delta":{"tool_calls":[{"id":"1","type":"function","function":{"name":"get_weather","arguments":"{}"}}]}}]}\n\n', + 'data: {"choices":[{"delta":{"tool_calls":[{"id":"2","type":"function","function":{"name":"get_time","arguments":"{}"}}]}}]}\n\n', + ], + }); + }); + }); + + describe("Edge cases with tools and thinking", () => { + it("should handle empty thinking content", async () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Hello" }, + { role: "thinking", content: "" } as ThinkingChatMessage, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "user", content: "Hello" }, + { role: "thinking", content: "" }, + { role: "assistant", content: "" }, // Auto-inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + }, + }, + mockStream: ['data: {"choices":[{"delta":{"content":"Hello!"}}]}\n\n'], + }); + }); + + it("should handle system messages with thinking and tools", async () => { + const messages: ChatMessage[] = [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "Use the calculator" }, + { + role: "thinking", + content: "I need to calculate something", + } as ThinkingChatMessage, + ]; + + const tools = [ + { + type: "function" as const, + function: { + name: "calculate", + description: "Calculate expressions", + parameters: { type: "object", properties: {} }, + }, + }, + ]; + + await runLlmTest({ + llm: deepSeek, + methodToTest: "streamChat", + params: [messages, new AbortController().signal, { tools }], + expectedRequest: { + url: "https://api.deepseek.com/chat/completions", + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer test-api-key", + "api-key": "test-api-key", + }, + body: { + model: "deepseek-reasoner", + messages: [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "Use the calculator" }, + { role: "thinking", content: "I need to calculate something" }, + { role: "assistant", content: "" }, // Auto-inserted + ], + stream: true, + stream_options: { include_usage: true }, + max_tokens: 2048, + tools, + }, + }, + mockStream: [ + 'data: {"choices":[{"delta":{"tool_calls":[{"id":"1","type":"function","function":{"name":"calculate","arguments":"{}"}}]}}]}\n\n', + ], + }); + }); + }); + + describe("Unit tests for thinking message pairing with tools", () => { + it("should correctly pair thinking messages in complex tool scenarios", () => { + const complexMessages: ChatMessage[] = [ + { role: "user", content: "Complex task" }, + { role: "thinking", content: "First thinking" } as ThinkingChatMessage, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "tool1", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Result 1", toolCallId: "1" }, + { role: "thinking", content: "Second thinking" } as ThinkingChatMessage, + { role: "assistant", content: "Final response" }, + ]; + + const mockFetch = jest.fn(); + const result = (deepSeek as any)._pairLoneThinkingMessages( + complexMessages, + ); + + expect(result).toEqual([ + { role: "user", content: "Complex task" }, + { role: "thinking", content: "First thinking" }, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "tool1", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Result 1", toolCallId: "1" }, + { role: "thinking", content: "Second thinking" }, + { role: "assistant", content: "Final response" }, // Already present, no insertion + ]); + }); + }); +}); diff --git a/core/llm/llms/DeepSeek.ts b/core/llm/llms/DeepSeek.ts new file mode 100644 index 00000000000..e9c03a8a5bc --- /dev/null +++ b/core/llm/llms/DeepSeek.ts @@ -0,0 +1,196 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; + +import { + ChatMessage, + LLMFullCompletionOptions, + LLMOptions, + MessageOption, + PromptLog, +} from "../../index.js"; +import { LlmApiRequestType } from "../openaiTypeConverters.js"; +import { osModelsEditPrompt } from "../templates/edit.js"; +import OpenAI from "./OpenAI.js"; + +/** + * DeepSeek LLM provider implementation. + * + * This provider extends the OpenAI adapter and adds DeepSeek-specific handling: + * - Supports thinking tool chains by pairing lone thinking messages with assistant messages + * - Converts model names (e.g., deepseek-fim-beta to deepseek-chat) + * - Modifies stream_options to request usage statistics when streaming + * - Provides FIM support for beta endpoint (autocomplete) and chat completion + */ +class DeepSeek extends OpenAI { + /** + * DeepSeek supports the `reasoning_content` field for reasoning models. + * This enables the model to output reasoning text separate from the final answer. + */ + protected supportsReasoningContentField = true; + static providerName = "deepseek"; + + static defaultOptions: Partial = { + apiBase: "https://api.deepseek.com/", + promptTemplates: { + edit: osModelsEditPrompt, // Use OpenAI‑style edit prompt (DeepSeek is OpenAI‑compatible) + }, + useLegacyCompletionsEndpoint: false, // DeepSeek does not support the legacy /completions endpoint + baseChatSystemMessage: + "You are DeepSeek running in the Continue environment. Focus on writing clean, well-structured code with concise, meaningful comments.", + }; + + /** + * Which request types should be handled by the OpenAI‑compatible adapter. + * The adapter (DeepSeekApi) implements the actual API communication. + */ + protected useOpenAIAdapterFor: (LlmApiRequestType | "*")[] = [ + "chat", // Non‑streaming chat completions + "streamChat", // Streaming chat completions + "streamFim", // Streaming fill‑in‑middle (beta endpoint) + "list", // Listing available models + ]; + + constructor(options: LLMOptions) { + // No special initialization needed; the parent OpenAI class handles everything. + super(options); + } + + /** + * Stream chat completions with DeepSeek‑specific adaptations: + * 1. Pair lone thinking messages with an assistant message (DeepSeek requirement) + * 2. Convert model names (e.g., deepseek‑fim‑beta → deepseek‑chat) + * 3. Delegate to the parent OpenAI streamChat, which uses the DeepSeekApi adapter. + */ + async *streamChat( + messages: ChatMessage[], + signal: AbortSignal, + options: LLMFullCompletionOptions = {}, + messageOptions?: MessageOption, + ): AsyncGenerator { + const transformedMessages = this._pairLoneThinkingMessages(messages); + return yield* super.streamChat( + transformedMessages, + signal, + { + ...options, + model: this._convertModelName(options.model || this.model), + }, + messageOptions, + ); + } + + /** + * Ensures DeepSeek thinking tool chain validity. + * + * DeepSeek expects that every thinking message is immediately followed by an assistant message. + * This method inserts an empty assistant message after any thinking message that is not already + * followed by one. This preserves the correct message structure for the API. + * + * Example: + * [thinking, user] → [thinking, assistant, user] + * [thinking, assistant] → unchanged + * [thinking, tool] → [thinking, assistant, tool] + */ + private _pairLoneThinkingMessages(messages: ChatMessage[]): ChatMessage[] { + const result: ChatMessage[] = []; + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + result.push(msg); + + // If this is a thinking message, check the next message. + if (msg.role === "thinking") { + const nextMsg = messages[i + 1]; + // Insert an empty assistant message only if the next message is not already an assistant. + if (!nextMsg || nextMsg.role !== "assistant") { + result.push({ + role: "assistant", + content: "", + }); + } + } + } + return result; + } + + /** + * Converts internal model names to the actual model names expected by the DeepSeek API. + * + * The artificial model name "deepseek‑fim‑beta" is used in the configuration to signal + * that the FIM (fill‑in‑middle) beta endpoint should be used. The actual API still uses + * "deepseek‑chat" as the model name, but the endpoint path differs (/beta/chat/completions). + * + * @param model The model name from the configuration + * @returns The model name to send to the API + */ + protected _convertModelName(model: string): string { + if (model === "deepseek-fim-beta") { + return "deepseek-chat"; + } + return model; + } + + /** + * Stream fill‑in‑middle (FIM) completions using DeepSeek's beta endpoint. + * The model name is converted (deepseek‑fim‑beta → deepseek‑chat) before delegation. + */ + async *streamFim( + prefix: string, + suffix: string, + signal: AbortSignal, + options: LLMFullCompletionOptions = {}, + ): AsyncGenerator { + return yield* super.streamFim(prefix, suffix, signal, { + ...options, + model: this._convertModelName(options.model || this.model), + }); + } + + /** + * Adds stream_options to request usage statistics in streaming responses. + * This ensures that token usage is reported at the end of the stream. + */ + protected modifyChatBody( + body: ChatCompletionCreateParams, + ): ChatCompletionCreateParams { + if (body.stream) { + const bodyWithStreamOptions = body as ChatCompletionCreateParams & { + stream_options?: { include_usage: boolean }; + }; + bodyWithStreamOptions.stream_options = { include_usage: true }; + } + return super.modifyChatBody(body); + } + + /** + * Determines whether FIM (fill‑in‑middle) is supported. + * FIM is supported if the model is explicitly "deepseek‑fim‑beta" (the artificial FIM model) + */ + supportsFim(): boolean { + return this.model === "deepseek-fim-beta"; + } + + /** + * DeepSeek does not support the legacy /completions endpoint (text‑only completions). + * All completions must go through the chat or FIM endpoints. + */ + supportsCompletions(): boolean { + return false; // FIM could be used as workaround + } + + /** + * DeepSeek supports prefill (prefix completion) via the beta chat completions endpoint. + * When the last message is from the assistant, the API treats it as a prefix completion. + */ + supportsPrefill(): boolean { + return true; + } + + /** + * DeepSeek provides a /models endpoint to list available models. + */ + supportsList(): boolean { + return true; + } +} + +export default DeepSeek; diff --git a/core/llm/llms/DeepSeek.unit.test.ts b/core/llm/llms/DeepSeek.unit.test.ts new file mode 100644 index 00000000000..655492e1135 --- /dev/null +++ b/core/llm/llms/DeepSeek.unit.test.ts @@ -0,0 +1,266 @@ +import { beforeEach, describe, expect, it } from "@jest/globals"; +import { ChatMessage, ThinkingChatMessage } from "../../index.js"; +import DeepSeek from "./DeepSeek.js"; + +describe("DeepSeek Unit Tests", () => { + let deepSeek: DeepSeek; + + beforeEach(() => { + deepSeek = new DeepSeek({ + model: "deepseek-chat", + apiKey: "test-key", + apiBase: "https://api.deepseek.com", + }); + }); + + describe("constructor and initialization", () => { + it("should initialize with correct default options", () => { + expect(deepSeek.model).toBe("deepseek-chat"); + expect(DeepSeek.providerName).toBe("deepseek"); + expect(DeepSeek.defaultOptions.useLegacyCompletionsEndpoint).toBe(false); + }); + + it("should support reasoning content field", () => { + expect((deepSeek as any).supportsReasoningContentField).toBe(true); + }); + }); + + describe("_convertModelName", () => { + it("should convert deepseek-fim-beta to deepseek-chat", () => { + const result = (deepSeek as any)._convertModelName("deepseek-fim-beta"); + expect(result).toBe("deepseek-chat"); + }); + + it("should return other models unchanged", () => { + expect((deepSeek as any)._convertModelName("deepseek-chat")).toBe( + "deepseek-chat", + ); + expect((deepSeek as any)._convertModelName("deepseek-reasoner")).toBe( + "deepseek-reasoner", + ); + expect((deepSeek as any)._convertModelName("gpt-4")).toBe("gpt-4"); + }); + + it("should handle undefined model", () => { + const result = (deepSeek as any)._convertModelName(undefined); + expect(result).toBeUndefined(); + }); + + it("should handle empty string", () => { + const result = (deepSeek as any)._convertModelName(""); + expect(result).toBe(""); + }); + }); + + describe("modifyChatBody", () => { + it("should add stream_options for streaming requests", () => { + const body = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }; + + const result = (deepSeek as any).modifyChatBody(body); + + expect(result.stream_options).toEqual({ include_usage: true }); + }); + + it("should not add stream_options for non-streaming requests", () => { + const body = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }; + + const result = (deepSeek as any).modifyChatBody(body); + + expect(result.stream_options).toBeUndefined(); + }); + + it("should preserve existing properties", () => { + const body = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hello" }], + stream: true, + temperature: 0.7, + max_tokens: 1000, + }; + + const result = (deepSeek as any).modifyChatBody(body); + + expect(result.temperature).toBe(0.7); + expect(result.max_tokens).toBe(1000); + expect(result.stream_options).toEqual({ include_usage: true }); + }); + }); + + describe("supportsFim", () => { + it("should return true for deepseek-fim-beta model", () => { + const fimDeepSeek = new DeepSeek({ + model: "deepseek-fim-beta", + apiKey: "test-key", + }); + expect(fimDeepSeek.supportsFim()).toBe(true); + }); + + it("should return false for deepseek-chat without beta API", () => { + expect(deepSeek.supportsFim()).toBe(false); + }); + + it("should return false for other models", () => { + const otherDeepSeek = new DeepSeek({ + model: "deepseek-reasoner", + apiKey: "test-key", + }); + expect(otherDeepSeek.supportsFim()).toBe(false); + }); + }); + + describe("support methods", () => { + it("should not support completions", () => { + expect(deepSeek.supportsCompletions()).toBe(false); + }); + + it("should support prefill", () => { + expect(deepSeek.supportsPrefill()).toBe(true); + }); + + it("should support list", () => { + expect(deepSeek.supportsList()).toBe(true); + }); + }); + + describe("useOpenAIAdapterFor configuration", () => { + it("should include correct request types", () => { + const adapterTypes = (deepSeek as any).useOpenAIAdapterFor; + expect(adapterTypes).toContain("chat"); + expect(adapterTypes).toContain("streamChat"); + expect(adapterTypes).toContain("streamFim"); + expect(adapterTypes).toContain("list"); + }); + }); + + describe("_pairLoneThinkingMessages edge cases", () => { + it("should handle null content in thinking messages", () => { + const messages: ChatMessage[] = [ + { role: "thinking", content: null } as any, + { role: "user", content: "Hello" }, + ]; + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "thinking", content: null }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + + it("should handle undefined content in thinking messages", () => { + const messages: ChatMessage[] = [ + { role: "thinking", content: undefined } as any, + { role: "user", content: "Hello" }, + ]; + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "thinking", content: undefined }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + + it("should handle last message being thinking", () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Hello" }, + { role: "thinking", content: "Processing" } as ThinkingChatMessage, + ]; + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "user", content: "Hello" }, + { role: "thinking", content: "Processing" }, + { role: "assistant", content: "" }, + ]); + }); + + it("should handle complex tool call scenarios", () => { + const complexMessages: ChatMessage[] = [ + { role: "user", content: "Complex task" }, + { role: "thinking", content: "First thinking" } as ThinkingChatMessage, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "tool1", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Result 1", toolCallId: "1" }, + { role: "thinking", content: "Second thinking" } as ThinkingChatMessage, + { role: "assistant", content: "Final response" }, + ]; + + const result = (deepSeek as any)._pairLoneThinkingMessages( + complexMessages, + ); + + expect(result).toEqual([ + { role: "user", content: "Complex task" }, + { role: "thinking", content: "First thinking" }, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "tool1", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Result 1", toolCallId: "1" }, + { role: "thinking", content: "Second thinking" }, + { role: "assistant", content: "Final response" }, // Already present, no insertion + ]); + }); + + it("should handle multiple consecutive thinking messages", () => { + const messages: ChatMessage[] = [ + { role: "thinking", content: "T1" } as ThinkingChatMessage, + { role: "thinking", content: "T2" } as ThinkingChatMessage, + { role: "thinking", content: "T3" } as ThinkingChatMessage, + { role: "user", content: "Hello" }, + ]; + + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + + expect(result).toEqual([ + { role: "thinking", content: "T1" }, + { role: "assistant", content: "" }, + { role: "thinking", content: "T2" }, + { role: "assistant", content: "" }, + { role: "thinking", content: "T3" }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + }); + + describe("default options validation", () => { + it("should have correct base chat system message", () => { + expect(DeepSeek.defaultOptions.baseChatSystemMessage).toContain( + "DeepSeek", + ); + expect(DeepSeek.defaultOptions.baseChatSystemMessage).toContain( + "Continue", + ); + }); + + it("should have edit prompt template", () => { + expect(DeepSeek.defaultOptions.promptTemplates?.edit).toBeDefined(); + expect(typeof DeepSeek.defaultOptions.promptTemplates?.edit).toBe( + "function", + ); + }); + }); +}); diff --git a/core/llm/llms/DeepSeek.vitest.ts b/core/llm/llms/DeepSeek.vitest.ts new file mode 100644 index 00000000000..acf7ca79346 --- /dev/null +++ b/core/llm/llms/DeepSeek.vitest.ts @@ -0,0 +1,285 @@ +import { beforeEach, describe, expect, it } from "vitest"; +import { ChatMessage, ThinkingChatMessage } from "../../index.js"; +import DeepSeek from "./DeepSeek.js"; + +describe("DeepSeek", () => { + let deepSeek: DeepSeek; + + beforeEach(() => { + deepSeek = new DeepSeek({ + model: "deepseek-reasoner", + apiKey: "test-key", + }); + }); + + describe("_pairLoneThinkingMessages", () => { + // Helper method to access private method for testing + const pairLoneThinkingMessages = ( + messages: ChatMessage[], + ): ChatMessage[] => { + // Use type assertion to access private method + return (deepSeek as any)._pairLoneThinkingMessages(messages); + }; + + it("should return empty array for empty input", () => { + expect(pairLoneThinkingMessages([])).toEqual([]); + }); + + it("should not modify messages without thinking", () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi" }, + { role: "user", content: "How are you?" }, + { role: "assistant", content: "Fine" }, + ]; + expect(pairLoneThinkingMessages(messages)).toEqual(messages); + }); + + it("should insert empty assistant after lone thinking message", () => { + const messages: ChatMessage[] = [ + { + role: "thinking", + content: "I need to respond", + } as ThinkingChatMessage, + { role: "user", content: "Hello" }, + ]; + const result = pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "thinking", content: "I need to respond" }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + + it("should not insert assistant if thinking followed by assistant", () => { + const messages: ChatMessage[] = [ + { + role: "thinking", + content: "I need to respond", + } as ThinkingChatMessage, + { role: "assistant", content: "Hello there" }, + ]; + expect(pairLoneThinkingMessages(messages)).toEqual(messages); + }); + + it("should handle multiple lone thinking messages", () => { + const messages: ChatMessage[] = [ + { role: "thinking", content: "T1" } as ThinkingChatMessage, + { role: "thinking", content: "T2" } as ThinkingChatMessage, + { role: "user", content: "Hello" }, + { role: "thinking", content: "T3" } as ThinkingChatMessage, + { role: "assistant", content: "Hi" }, + ]; + const result = pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "thinking", content: "T1" }, + { role: "assistant", content: "" }, + { role: "thinking", content: "T2" }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + { role: "thinking", content: "T3" }, + { role: "assistant", content: "Hi" }, + ]); + }); + + it("should handle thinking messages in tool call loops", () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Get weather" }, + { + role: "thinking", + content: "Need to call tool", + } as ThinkingChatMessage, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "get_weather", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Sunny", toolCallId: "1" }, + { role: "thinking", content: "Now respond" } as ThinkingChatMessage, + { role: "assistant", content: "It's sunny" }, + ]; + const result = pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "user", content: "Get weather" }, + { role: "thinking", content: "Need to call tool" }, + { + role: "assistant", + content: "", + toolCalls: [ + { + id: "1", + type: "function", + function: { name: "get_weather", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "Sunny", toolCallId: "1" }, + { role: "thinking", content: "Now respond" }, + { role: "assistant", content: "It's sunny" }, + ]); + }); + + it("should handle system messages", () => { + const messages: ChatMessage[] = [ + { role: "system", content: "Be helpful" }, + { role: "thinking", content: "Processing" } as ThinkingChatMessage, + { role: "user", content: "Hello" }, + ]; + const result = pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "system", content: "Be helpful" }, + { role: "thinking", content: "Processing" }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + + it("should handle tool messages (no insertion)", () => { + const messages: ChatMessage[] = [ + { role: "tool", content: "Result", toolCallId: "1" }, + { role: "thinking", content: "Next" } as ThinkingChatMessage, + { role: "assistant", content: "Ok" }, + ]; + const result = pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "tool", content: "Result", toolCallId: "1" }, + { role: "thinking", content: "Next" }, + { role: "assistant", content: "Ok" }, + ]); + }); + }); + + describe("provider configuration", () => { + it("should have correct provider name", () => { + expect(DeepSeek.providerName).toBe("deepseek"); + expect(deepSeek.providerName).toBe("deepseek"); + }); + + it("should support reasoning content field", () => { + expect((deepSeek as any).supportsReasoningContentField).toBe(true); + expect((deepSeek as any).supportsReasoningField).toBe(false); + expect((deepSeek as any).supportsReasoningDetailsField).toBe(false); + }); + + it("should support FIM for deepseek-fim-beta model", () => { + const fimDeepSeek = new DeepSeek({ + model: "deepseek-fim-beta", + apiKey: "test-key", + }); + expect(fimDeepSeek.supportsFim()).toBe(true); + }); + it("should not support completions", () => { + expect(deepSeek.supportsCompletions()).toBe(false); + }); + + it("should support prefill", () => { + expect(deepSeek.supportsPrefill()).toBe(true); + }); + + it("should support list", () => { + expect(deepSeek.supportsList()).toBe(true); + }); + }); + + describe("_convertModelName", () => { + it("should convert deepseek-fim-beta to deepseek-chat", () => { + const result = (deepSeek as any)._convertModelName("deepseek-fim-beta"); + expect(result).toBe("deepseek-chat"); + }); + + it("should return other models unchanged", () => { + expect((deepSeek as any)._convertModelName("deepseek-chat")).toBe( + "deepseek-chat", + ); + expect((deepSeek as any)._convertModelName("deepseek-reasoner")).toBe( + "deepseek-reasoner", + ); + expect((deepSeek as any)._convertModelName("gpt-4")).toBe("gpt-4"); + }); + + it("should handle undefined model", () => { + const result = (deepSeek as any)._convertModelName(undefined); + expect(result).toBeUndefined(); + }); + }); + + describe("modifyChatBody", () => { + it("should add stream_options for streaming requests", () => { + const body = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }; + + const result = (deepSeek as any).modifyChatBody(body); + + expect(result.stream_options).toEqual({ include_usage: true }); + }); + + it("should not add stream_options for non-streaming requests", () => { + const body = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }; + + const result = (deepSeek as any).modifyChatBody(body); + + expect(result.stream_options).toBeUndefined(); + }); + }); + + describe("useOpenAIAdapterFor configuration", () => { + it("should include correct request types", () => { + const adapterTypes = (deepSeek as any).useOpenAIAdapterFor; + expect(adapterTypes).toContain("chat"); + expect(adapterTypes).toContain("streamChat"); + expect(adapterTypes).toContain("streamFim"); + expect(adapterTypes).toContain("list"); + }); + }); + + describe("default options", () => { + it("should have correct default configuration", () => { + expect(DeepSeek.defaultOptions.useLegacyCompletionsEndpoint).toBe(false); + expect(DeepSeek.defaultOptions.baseChatSystemMessage).toContain( + "DeepSeek", + ); + expect(DeepSeek.defaultOptions.promptTemplates?.edit).toBeDefined(); + }); + }); + + describe("edge cases for _pairLoneThinkingMessages", () => { + it("should handle null/undefined content", () => { + const messages: ChatMessage[] = [ + { role: "thinking", content: null } as any, + { role: "user", content: "Hello" }, + ]; + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "thinking", content: null }, + { role: "assistant", content: "" }, + { role: "user", content: "Hello" }, + ]); + }); + + it("should handle last message being thinking", () => { + const messages: ChatMessage[] = [ + { role: "user", content: "Hello" }, + { role: "thinking", content: "Processing" } as ThinkingChatMessage, + ]; + const result = (deepSeek as any)._pairLoneThinkingMessages(messages); + expect(result).toEqual([ + { role: "user", content: "Hello" }, + { role: "thinking", content: "Processing" }, + { role: "assistant", content: "" }, + ]); + }); + }); +}); diff --git a/core/llm/llms/Deepseek.ts b/core/llm/llms/Deepseek.ts deleted file mode 100644 index f6b9304dfd1..00000000000 --- a/core/llm/llms/Deepseek.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { streamSse } from "@continuedev/fetch"; -import { CompletionOptions, LLMOptions } from "../../index.js"; -import { osModelsEditPrompt } from "../templates/edit.js"; - -import OpenAI from "./OpenAI.js"; - -class Deepseek extends OpenAI { - static providerName = "deepseek"; - protected supportsReasoningField = false; - protected supportsReasoningDetailsField = false; - protected supportsReasoningContentField = true; - static defaultOptions: Partial = { - apiBase: "https://api.deepseek.com/", - model: "deepseek-coder", - promptTemplates: { - edit: osModelsEditPrompt, - }, - useLegacyCompletionsEndpoint: false, - }; - maxStopWords: number | undefined = 16; - - supportsFim(): boolean { - return true; - } - - async *_streamFim( - prefix: string, - suffix: string, - signal: AbortSignal, - options: CompletionOptions, - ): AsyncGenerator { - const endpoint = new URL("beta/completions", this.apiBase); - const resp = await this.fetch(endpoint, { - method: "POST", - body: JSON.stringify({ - model: options.model, - prompt: prefix, - suffix, - max_tokens: options.maxTokens, - temperature: options.temperature, - top_p: options.topP, - frequency_penalty: options.frequencyPenalty, - presence_penalty: options.presencePenalty, - stop: options.stop, - stream: true, - }), - headers: { - "Content-Type": "application/json", - Accept: "application/json", - Authorization: `Bearer ${this.apiKey}`, - }, - signal, - }); - for await (const chunk of streamSse(resp)) { - yield chunk.choices[0].text; - } - } -} - -export default Deepseek; diff --git a/core/llm/llms/OpenAI-compatible-core.vitest.ts b/core/llm/llms/OpenAI-compatible-core.vitest.ts index 5d96d099099..db5e679814c 100644 --- a/core/llm/llms/OpenAI-compatible-core.vitest.ts +++ b/core/llm/llms/OpenAI-compatible-core.vitest.ts @@ -1,19 +1,19 @@ import { createOpenAISubclassTests } from "./test-utils/openai-test-utils.js"; // Import core OpenAI-compatible providers -import OpenAI from "./OpenAI.js"; -import Groq from "./Groq.js"; -import Fireworks from "./Fireworks.js"; -import Together from "./Together.js"; -import Deepseek from "./Deepseek.js"; -import OpenRouter from "./OpenRouter.js"; -import xAI from "./xAI.js"; -import Mistral from "./Mistral.js"; -import LMStudio from "./LMStudio.js"; import Cerebras from "./Cerebras.js"; +import CometAPI from "./CometAPI.js"; import DeepInfra from "./DeepInfra.js"; +import DeepSeek from "./DeepSeek.js"; +import Fireworks from "./Fireworks.js"; +import Groq from "./Groq.js"; +import LMStudio from "./LMStudio.js"; +import Mistral from "./Mistral.js"; import Nvidia from "./Nvidia.js"; -import CometAPI from "./CometAPI.js"; +import OpenAI from "./OpenAI.js"; +import OpenRouter from "./OpenRouter.js"; +import Together from "./Together.js"; +import xAI from "./xAI.js"; // Base OpenAI tests import { afterEach, describe, expect, test, vi } from "vitest"; @@ -221,7 +221,7 @@ createOpenAISubclassTests(Together, { customStreamCompleteEndpoint: "completions", }); -createOpenAISubclassTests(Deepseek, { +createOpenAISubclassTests(DeepSeek, { providerName: "deepseek", defaultApiBase: "https://api.deepseek.com/", }); diff --git a/core/llm/llms/OpenAI-compatible.vitest.ts b/core/llm/llms/OpenAI-compatible.vitest.ts index 402fb7e7585..8556b2e9066 100644 --- a/core/llm/llms/OpenAI-compatible.vitest.ts +++ b/core/llm/llms/OpenAI-compatible.vitest.ts @@ -1,36 +1,36 @@ import { createOpenAISubclassTests } from "./test-utils/openai-test-utils.js"; // Import all OpenAI-compatible providers -import OpenAI from "./OpenAI.js"; -import Groq from "./Groq.js"; -import Fireworks from "./Fireworks.js"; -import Together from "./Together.js"; -import Deepseek from "./Deepseek.js"; -import OpenRouter from "./OpenRouter.js"; -import xAI from "./xAI.js"; -import Mistral from "./Mistral.js"; -import Mimo from "./Mimo.js"; -import LMStudio from "./LMStudio.js"; +import Azure from "./Azure.js"; import Cerebras from "./Cerebras.js"; import DeepInfra from "./DeepInfra.js"; +import DeepSeek from "./DeepSeek.js"; +import Docker from "./Docker.js"; +import Fireworks from "./Fireworks.js"; +import Groq from "./Groq.js"; +import Inception from "./Inception.js"; +import Kindo from "./Kindo.js"; +import LMStudio from "./LMStudio.js"; +import Mimo from "./Mimo.js"; +import Mistral from "./Mistral.js"; +import Moonshot from "./Moonshot.js"; +import Novita from "./Novita.js"; import Nvidia from "./Nvidia.js"; +import OpenAI from "./OpenAI.js"; +import OpenRouter from "./OpenRouter.js"; import SambaNova from "./SambaNova.js"; import Scaleway from "./Scaleway.js"; -import Venice from "./Venice.js"; -import Moonshot from "./Moonshot.js"; -import Novita from "./Novita.js"; import SiliconFlow from "./SiliconFlow.js"; -import Kindo from "./Kindo.js"; -import Azure from "./Azure.js"; -import Inception from "./Inception.js"; -import Docker from "./Docker.js"; -import Voyage from "./Voyage.js"; -import Vllm from "./Vllm.js"; import TextGenWebUI from "./TextGenWebUI.js"; +import Together from "./Together.js"; +import Venice from "./Venice.js"; +import Vllm from "./Vllm.js"; +import Voyage from "./Voyage.js"; +import xAI from "./xAI.js"; // import Relace from "./Relace.js"; // Skip - causing import issues import FunctionNetwork from "./FunctionNetwork.js"; -import NCompass from "./NCompass.js"; import LlamaStack from "./LlamaStack.js"; +import NCompass from "./NCompass.js"; import Nebius from "./Nebius.js"; import OVHcloud from "./OVHcloud.js"; @@ -272,7 +272,7 @@ createOpenAISubclassTests(Together, { customStreamCompleteEndpoint: "completions", }); -createOpenAISubclassTests(Deepseek, { +createOpenAISubclassTests(DeepSeek, { providerName: "deepseek", defaultApiBase: "https://api.deepseek.com/", }); diff --git a/core/llm/llms/index.ts b/core/llm/llms/index.ts index 04f58e393de..e863ec7bc51 100644 --- a/core/llm/llms/index.ts +++ b/core/llm/llms/index.ts @@ -19,7 +19,7 @@ import Cloudflare from "./Cloudflare"; import Cohere from "./Cohere"; import CometAPI from "./CometAPI"; import DeepInfra from "./DeepInfra"; -import Deepseek from "./Deepseek"; +import DeepSeek from "./DeepSeek"; import Docker from "./Docker"; import Fireworks from "./Fireworks"; import Flowise from "./Flowise"; @@ -102,7 +102,7 @@ export const LLMClasses = [ NCompass, ContinueProxy, Cloudflare, - Deepseek, + DeepSeek, Docker, Msty, Azure, diff --git a/core/llm/llms/test/supportsFim.test.ts b/core/llm/llms/test/supportsFim.test.ts index 6cb7525aecf..a6416813e60 100644 --- a/core/llm/llms/test/supportsFim.test.ts +++ b/core/llm/llms/test/supportsFim.test.ts @@ -1,6 +1,6 @@ import { jest } from "@jest/globals"; import Anthropic from "../Anthropic.js"; -import Deepseek from "../Deepseek.js"; +import Deepseek from "../DeepSeek.js"; import FunctionNetwork from "../FunctionNetwork.js"; import Mistral from "../Mistral.js"; import OpenAI from "../OpenAI.js"; @@ -38,7 +38,9 @@ const testCases: [any, string, boolean, string][] = [ [FunctionNetwork, "any-model", false, "FunctionNetwork"], [OpenAI, "codestral", false, "OpenAI"], [Mistral, "codestral", true, "Mistral"], - [Deepseek, "deepseek-chat", true, "Deepseek"], + [Deepseek, "deepseek-fim-beta", true, "DeepSeek"], + [Deepseek, "deepseek-chat", false, "DeepSeek"], + [Deepseek, "deepseek-reasoner", false, "DeepSeek"], ]; testCases.forEach(([LLMClass, model, expectedResult, description]) => { diff --git a/core/llm/openaiTypeConverters.ts b/core/llm/openaiTypeConverters.ts index 9280a51c706..d8be11baaff 100644 --- a/core/llm/openaiTypeConverters.ts +++ b/core/llm/openaiTypeConverters.ts @@ -380,6 +380,13 @@ export function fromChatCompletionChunk( reasoning_details: delta?.reasoning_details as any[], }; return message; + } else if (chunk.usage) { + // Return a message with usage statistics + return { + role: "assistant", + content: "", + usage: chunk.usage, + } as ChatMessage & { usage?: any }; } return undefined; diff --git a/core/llm/toolSupport.test.ts b/core/llm/toolSupport.test.ts index 5be68bfa21c..b8418bb0bc8 100644 --- a/core/llm/toolSupport.test.ts +++ b/core/llm/toolSupport.test.ts @@ -440,8 +440,11 @@ describe("isRecommendedAgentModel", () => { expect(isRecommendedAgentModel("deepseek-reasoner")).toBe(true); }); - it("should return false for non-reasoner DeepSeek models", () => { - expect(isRecommendedAgentModel("deepseek-chat")).toBe(false); + it("should return true for DeepSeek Chat models", () => { + expect(isRecommendedAgentModel("deepseek-chat")).toBe(true); + }); + + it("should return false for non-reasoner, non-chat DeepSeek models", () => { expect(isRecommendedAgentModel("deepseek-coder")).toBe(false); }); }); diff --git a/core/llm/toolSupport.ts b/core/llm/toolSupport.ts index 099424c61ab..bbde8dd6244 100644 --- a/core/llm/toolSupport.ts +++ b/core/llm/toolSupport.ts @@ -438,7 +438,7 @@ export function isRecommendedAgentModel(modelName: string): boolean { // AND behavior const recs: RegExp[][] = [ [/o[134]/], - [/deepseek/, /r1|reasoner/], + [/deepseek/, /r1|reasoner|-chat/], [/gemini/, /2\.5/, /pro/], [/gemini/, /3-pro/], [/gpt/, /-5|5\.1|5\.2/], diff --git a/core/llm/utils/calculateRequestCost.ts b/core/llm/utils/calculateRequestCost.ts index 794524d448d..d473b5977b8 100644 --- a/core/llm/utils/calculateRequestCost.ts +++ b/core/llm/utils/calculateRequestCost.ts @@ -222,6 +222,75 @@ function calculateOpenAICost( }; } +function calculateDeepSeekCost( + model: string, + usage: Usage, +): CostBreakdown | null { + // Normalize model name + const normalizedModel = model.toLowerCase(); + // Define pricing per million tokens (MTok) + // https://api-docs.deepseek.com/quick_start/pricing + // Input tokens: cache hit $0.028, cache miss $0.28 per million tokens + // Output tokens: $0.42 per million tokens + const pricing = { + cacheHitInput: 0.028, + cacheMissInput: 0.28, + output: 0.42, + }; + // Calculate cache tokens + const cacheHitTokens = usage.promptTokensDetails?.cachedTokens ?? 0; + const cacheMissTokens = usage.promptTokensDetails?.cacheWriteTokens ?? 0; + // If no cache details, assume all input tokens are cache miss + const totalInputTokens = usage.promptTokens; + let remainingInputTokens = + totalInputTokens - cacheHitTokens - cacheMissTokens; + if (remainingInputTokens < 0) { + // If cache tokens exceed total, adjust + remainingInputTokens = 0; + } + // Distribute remaining tokens as cache miss (default) + const effectiveCacheMissTokens = cacheMissTokens + remainingInputTokens; + // Calculate costs + const inputCost = + (cacheHitTokens / 1_000_000) * pricing.cacheHitInput + + (effectiveCacheMissTokens / 1_000_000) * pricing.cacheMissInput; + const outputCost = (usage.completionTokens / 1_000_000) * pricing.output; + // Build breakdown components + const breakdownParts: string[] = []; + if (cacheHitTokens > 0) { + breakdownParts.push( + `Input Cache Hit: ${cacheHitTokens.toLocaleString()} tokens × $${pricing.cacheHitInput}/MTok = $${( + (cacheHitTokens / 1_000_000) * + pricing.cacheHitInput + ).toFixed(6)}`, + ); + } + if (effectiveCacheMissTokens > 0) { + breakdownParts.push( + `Input Cache Miss: ${effectiveCacheMissTokens.toLocaleString()} tokens × $${pricing.cacheMissInput}/MTok = $${( + (effectiveCacheMissTokens / 1_000_000) * + pricing.cacheMissInput + ).toFixed(6)}`, + ); + } + if (usage.completionTokens > 0) { + breakdownParts.push( + `Output: ${usage.completionTokens.toLocaleString()} tokens × $${pricing.output}/MTok = $${outputCost.toFixed(6)}`, + ); + } + const totalCost = inputCost + outputCost; + // Build final breakdown string + let breakdown = `Model: ${model}\n`; + breakdown += breakdownParts.join("\n"); + if (breakdownParts.length > 1) { + breakdown += `\nTotal: $${totalCost.toFixed(6)}`; + } + return { + cost: totalCost, + breakdown, + }; +} + export function calculateRequestCost( provider: string, model: string, @@ -232,6 +301,8 @@ export function calculateRequestCost( return calculateAnthropicCost(model, usage); case "openai": return calculateOpenAICost(model, usage); + case "deepseek": + return calculateDeepSeekCost(model, usage); default: return null; } diff --git a/core/llm/utils/calculateRequestCost.vitest.ts b/core/llm/utils/calculateRequestCost.vitest.ts index 32183ca1b49..be89ac42823 100644 --- a/core/llm/utils/calculateRequestCost.vitest.ts +++ b/core/llm/utils/calculateRequestCost.vitest.ts @@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest"; import type { Usage } from "../.."; -import { calculateRequestCost, CostBreakdown } from "./calculateRequestCost"; +import { calculateRequestCost } from "./calculateRequestCost"; interface TestCase { provider: string; @@ -163,6 +163,61 @@ describe("calculateRequestCost", () => { description: "GPT-3.5 Turbo", }, + // DeepSeek + { + provider: "deepseek", + model: "deepseek-chat", + promptTokens: 1000, + completionTokens: 500, + expectedCost: 0.00049, + description: "DeepSeek Chat basic usage (cache miss)", + }, + { + provider: "deepseek", + model: "deepseek-reasoner", + promptTokens: 1000, + completionTokens: 500, + cachedTokens: 200, + cacheWriteTokens: 800, + expectedCost: 0.0004396, + description: "DeepSeek Reasoner with cache details", + }, + { + provider: "deepseek", + model: "deepseek-chat", + promptTokens: 1000, + completionTokens: 500, + cachedTokens: 1000, + cacheWriteTokens: 0, + expectedCost: 0.000238, + description: "DeepSeek Chat all cache hit", + }, + { + provider: "deepseek", + model: "deepseek-chat", + promptTokens: 0, + completionTokens: 1000, + expectedCost: 0.00042, + description: "DeepSeek Chat only output tokens", + }, + { + provider: "deepseek", + model: "deepseek-chat", + promptTokens: 1000, + completionTokens: 0, + expectedCost: 0.00028, + description: "DeepSeek Chat only input tokens (cache miss)", + }, + { + provider: "deepseek", + model: "deepseek-chat", + promptTokens: 1000, + completionTokens: 0, + cachedTokens: 1000, + expectedCost: 0.000028, + description: "DeepSeek Chat only input tokens (cache hit)", + }, + // Edge cases { provider: "anthropic", diff --git a/docs/customize/deep-dives/model-capabilities.mdx b/docs/customize/deep-dives/model-capabilities.mdx index ab2adcd8346..3d5a14907b0 100644 --- a/docs/customize/deep-dives/model-capabilities.mdx +++ b/docs/customize/deep-dives/model-capabilities.mdx @@ -213,11 +213,10 @@ This matrix shows which models support tool use and image input capabilities. Co ### DeepSeek -| Model | Tool Use | Image Input | Context Window | -| :---------------- | -------- | ----------- | -------------- | -| DeepSeek V3 | Yes | No | 128k | -| DeepSeek Coder V2 | Yes | No | 128k | -| DeepSeek Chat | Yes | No | 64k | +| Model | Tool Use | Image Input | Context Window | Max Output | Reasoning | +| :---------------- | :------- | :---------- | :------------- | :--------- | :-------- | +| DeepSeek Chat | Yes | No | 128k | 8k | No | +| DeepSeek Reasoner | Yes | No | 128k | 64k | Yes | ### xAI @@ -260,6 +259,8 @@ This matrix shows which models support tool use and image input capabilities. Co - **Tool Use**: Function calling support (tools are required for Agent mode) - **Image Input**: Processing images - **Context Window**: Maximum number of tokens the model can process in a single request +- **Reasoning**: Chain-of-thought reasoning support (e.g., DeepSeek Reasoner) +- **FIM Support**: Fill-in-Middle completion support for code generation --- diff --git a/docs/customize/model-providers/more/deepseek.mdx b/docs/customize/model-providers/more/deepseek.mdx index b092a661f56..8e9748f3f91 100644 --- a/docs/customize/model-providers/more/deepseek.mdx +++ b/docs/customize/model-providers/more/deepseek.mdx @@ -1,44 +1,157 @@ --- -title: DeepSeek +title: "How to Configure DeepSeek Models with Continue" +description: "Configure DeepSeek's AI models with Continue, including setup for DeepSeek Chat and DeepSeek Reasoner models with strong code generation capabilities" slug: ../deepseek +sidebarTitle: "DeepSeek" --- + **Discover DeepSeek models [here](https://continue.dev/hub?q=DeepSeek)** + - You can get an API key from the [DeepSeek Console](https://www.deepseek.com/). + + Get an API key from the [DeepSeek Platform](https://platform.deepseek.com/). + DeepSeek offers both chat and reasoning models with strong code generation capabilities. + + + +## Configuration + +### Basic Configuration + + + + ```yaml title="config.yaml" + name: My Config + version: 0.0.1 + schema: v1 + + models: + - name: DeepSeek Chat + provider: deepseek + model: deepseek-chat + apiKey: + - name: DeepSeek Reasoner + provider: deepseek + model: deepseek-reasoner + apiKey: + ``` + + + ```json title="config.json" + { + "models": [ + { + "title": "DeepSeek Chat", + "provider": "deepseek", + "model": "deepseek-chat", + "apiKey": "" + }, + { + "title": "DeepSeek Reasoner", + "provider": "deepseek", + "model": "deepseek-reasoner", + "apiKey": "" + } + ] + } + ``` + + + +## DeepSeek Model Capabilities + +DeepSeek models support various capabilities that you should be aware of when configuring: + + + + | Model | Tool Use | Image Input | Context Window | Max Output | Reasoning | + | :---------------- | :------- | :---------- | :------------- | :--------- | :-------- | + | DeepSeek Chat | Yes | No | 128k | 8k | No | + | DeepSeek Reasoner | Yes | No | 128k | 64k | Yes | + + + + + + **FIM (Fill-in-Middle) Model**: DeepSeek supports FIM completion through the beta endpoint `https://api.deepseek.com/beta/completions`. Its high latency may be impractical for smooth autocompletion tasks. + + **Reasoning Model Limitations**: DeepSeek Reasoner models ignore `temperature`, `top_p`, `frequency_penalty`, and `presence_penalty` parameters. They use deterministic reasoning logic instead. + -## Confiugration +## Advanced Configurations - - ```yaml title="config.yaml" - name: My Config - version: 0.0.1 - schema: v1 - - models: - - name: - provider: deepseek - model: - apiKey: - ``` - - - ```json title="config.json" - { - "models": [ - { - "title": "", - "provider": "deepseek", - "model": "", - "apiKey": "" - } - ] - } - ``` - + + ```yaml title="config.yaml" + name: My Config + version: 0.0.1 + schema: v1 + + models: + - name: DeepSeek Chat + provider: deepseek + model: deepseek-chat + apiKey: + contextLength: 131072 + temperature: 0.5 + maxTokens: 8192 + roles: + - chat + - edit + - summarize + capabilities: + - tool_use + ``` + + + ```yaml title="config.yaml" + name: My Config + version: 0.0.1 + schema: v1 + + models: + - name: DeepSeek Reasoner + provider: deepseek + model: deepseek-reasoner + apiKey: + contextLength: 131072 + maxTokens: 65536 + roles: + - chat + - edit + - apply + - summarize + capabilities: + - tool_use + ``` + + + + + DeepSeek Reasoner's 64k token output length is especially well-suited for large file edits. + Thinking mode provides `reasoning_content` in responses, showing the model's thought process before delivering the final answer. + + + +### Tool Calling Support + +DeepSeek models support native tool/function calling: + + + + Tool calling is required for Agent mode in Continue. DeepSeek models support both single and parallel tool calls. + + + +## Additional Resources + +- [DeepSeek Official Documentation](https://platform.deepseek.com/api-docs/) +- [DeepSeek Model Hub](https://continue.dev/hub?q=DeepSeek) +- [Continue Model Capabilities Guide](/customize/deep-dives/model-capabilities) + diff --git a/docs/customize/model-providers/overview.mdx b/docs/customize/model-providers/overview.mdx index 4bbf1c0dc60..e9ee9aaa45c 100644 --- a/docs/customize/model-providers/overview.mdx +++ b/docs/customize/model-providers/overview.mdx @@ -15,7 +15,7 @@ These are the most commonly used model providers that offer a wide range of capa | [Amazon Bedrock](/customize/model-providers/top-level/bedrock) | AWS service offering access to various foundation models | Chat, Edit, Apply, Embeddings | | [Ollama](/customize/model-providers/top-level/ollama) | Run open-source models locally with a simple interface | Chat, Edit, Apply, Embeddings, Autocomplete | | [Google Gemini](/customize/model-providers/top-level/gemini) | Google's multimodal AI models | Chat, Edit, Apply, Embeddings | -| [DeepSeek](/customize/model-providers/more/deepseek) | Specialized code models with strong performance | Chat, Edit, Apply | +| [DeepSeek](/customize/model-providers/more/deepseek) | DeepSeek's specialized models with strong reasoning and coding capabilities | Chat, Edit, Apply | | [Mistral](/customize/model-providers/more/mistral) | High-performance open models with commercial offerings | Chat, Edit, Apply, Embeddings | | [xAI](/customize/model-providers/more/xAI) | Grok models from xAI | Chat, Edit, Apply | | [Vertex AI](/customize/model-providers/top-level/vertexai) | Google Cloud's machine learning platform | Chat, Edit, Apply, Embeddings | diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json index c7bd24175d5..086473e333d 100644 --- a/extensions/vscode/config_schema.json +++ b/extensions/vscode/config_schema.json @@ -262,7 +262,7 @@ "### Fireworks\nFireworks is a fast inference engine for open-source language models. To get started, obtain an API key from [their console](https://fireworks.ai/api-keys).", "### Ncompass\nnCompass is an extremely fast inference engine for open-source language models. To get started, obtain an API key from [their console](https://app.ncompass.tech/api-settings).", "### Cloudflare Workers AI\n\n[Reference](https://developers.cloudflare.com/workers-ai/)", - "### Deepseek\n Deepseek's API provides the best pricing for their state-of-the-art Deepseek Coder models. To get started, obtain an API key from [their console](https://platform.deepseek.com/api_keys)", + "### Deepseek\n Deepseek's API provides the best pricing for their state-of-the-art reasoning models. To get started, obtain an API key from [their console](https://platform.deepseek.com/api_keys)", "### Azure OpenAI\n Azure OpenAI lets you securely run OpenAI's models on Azure. To get started, follow the steps [here](https://docs.continue.dev/reference/Model%20Providers/azure)", "### Msty\nMsty is the simplest way to get started with online or local LLMs on all desktop platforms - Windows, Mac, and Linux. No fussing around, one-click and you are up and running. To get started, follow these steps:\n1. Download from [Msty.app](https://msty.app/), open the application, and click 'Setup Local AI'.\n2. Go to the Local AI Module page and download a model of your choice.\n3. Once the model has finished downloading, you can start asking questions through Continue.\n> [Reference](https://continue.dev/docs/reference/Model%20Providers/Msty)", "### IBM watsonx\nwatsonx, developed by IBM, offers a variety of pre-trained AI foundation models that can be used for natural language processing (NLP), computer vision, and speech recognition tasks.", @@ -304,6 +304,24 @@ "description": "The base URL of the LLM API.", "type": "string" }, + "roles": { + "title": "Roles", + "description": "An array specifying the roles this model can fulfill, such as chat, autocomplete, embed, rerank, edit, apply, summarize, subagent.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "chat", + "autocomplete", + "embed", + "rerank", + "edit", + "apply", + "summarize", + "subagent" + ] + } + }, "region": { "title": "Region", "description": "The region where the model is hosted", @@ -1278,24 +1296,16 @@ "enum": ["deepseek"] } }, - "required": ["provider"] + "required": ["provider", "apiKey"] }, "then": { "properties": { - "cacheBehavior": { - "title": "Caching Behavior", - "description": "Options for the prompt caching", - "properties": { - "cacheSystemMessage": { - "type": "boolean" - }, - "cacheConversation": { - "type": "boolean" - } - } - }, "model": { - "enum": ["deepseek-chat", "deepseek-coder"] + "enum": [ + "deepseek-reasoner", + "deepseek-chat", + "deepseek-fim-beta" + ] } } } diff --git a/extensions/vscode/src/webviewProtocol.ts b/extensions/vscode/src/webviewProtocol.ts index 765e7a8c7a4..3f70e64fe89 100644 --- a/extensions/vscode/src/webviewProtocol.ts +++ b/extensions/vscode/src/webviewProtocol.ts @@ -62,6 +62,11 @@ export class VsCodeWebviewProtocol this.listeners.get(msg.messageType as keyof FromWebviewProtocol) || []; for (const handler of handlers) { try { + if (typeof handler !== "function") { + throw new Error( + `Invalid webview handler for messageType "${msg.messageType}" (expected function, got ${typeof handler})`, + ); + } const response = await handler(msg); // For generator types e.g. llm/streamChat if ( diff --git a/gui/src/components/OnboardingCard/components/OnboardingProvidersTab.tsx b/gui/src/components/OnboardingCard/components/OnboardingProvidersTab.tsx index a4ddd380cb1..68fb7164986 100644 --- a/gui/src/components/OnboardingCard/components/OnboardingProvidersTab.tsx +++ b/gui/src/components/OnboardingCard/components/OnboardingProvidersTab.tsx @@ -26,6 +26,7 @@ export function OnboardingProvidersTab({ providers["openai"], providers["anthropic"], providers["gemini"], + providers["deepseek"], ]; const handleFormSubmit = () => { diff --git a/gui/src/pages/AddNewModel/configs/models.ts b/gui/src/pages/AddNewModel/configs/models.ts index d15db3f3d1d..6759638d9ea 100644 --- a/gui/src/pages/AddNewModel/configs/models.ts +++ b/gui/src/pages/AddNewModel/configs/models.ts @@ -383,39 +383,44 @@ export const models: { [key: string]: ModelPackage } = { providerOptions: ["ollama", "lmstudio", "llama.cpp"], isOpenSource: true, }, - deepseekChatApi: { + deepseekChat: { title: "DeepSeek Chat", - description: "DeepSeek's best model for general chat use cases.", + description: "DeepSeek's best model with excellent coding capabilities.", params: { title: "DeepSeek Chat", model: "deepseek-chat", - contextLength: 128_000, + contextLength: 131072, + maxTokens: 8192, + apiBase: "https://api.deepseek.com/", }, icon: "deepseek.png", providerOptions: ["deepseek"], - isOpenSource: false, + isOpenSource: true, }, - deepseekCoderApi: { - title: "DeepSeek Coder", + deepseekReasoner: { + title: "DeepSeek Reasoner", description: - "A model pre-trained on 2 trillion tokens including 80+ programming languages and a repo-level corpus.", + "DeepSeek's reasoning model with 64k max_token limit for complex thinking tasks.", params: { - title: "DeepSeek Coder", - model: "deepseek-coder", - contextLength: 128_000, + title: "DeepSeek Reasoner", + model: "deepseek-reasoner", + contextLength: 131072, + maxTokens: 32000, + apiBase: "https://api.deepseek.com/", }, icon: "deepseek.png", providerOptions: ["deepseek"], - isOpenSource: false, + isOpenSource: true, }, - deepseekReasonerApi: { - title: "DeepSeek Reasoner", - description: - "An open-source reasoning model which generates a chain of thought to enhance the accuracy of its responses.", + deepseekFimBeta: { + title: "DeepSeek FIM (Beta)", + description: "DeepSeek's Fill-in-the-Middle model beta feature", params: { - title: "DeepSeek Reasoner", - model: "deepseek-reasoner", - contextLength: 64_000, + title: "DeepSeek FIM Beta", + model: "deepseek-fim-beta", + contextLength: 131072, + maxTokens: 8192, + apiBase: "https://api.deepseek.com/", }, icon: "deepseek.png", providerOptions: ["deepseek"], diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts index 5dfb7220b14..7f3420746b7 100644 --- a/gui/src/pages/AddNewModel/configs/providers.ts +++ b/gui/src/pages/AddNewModel/configs/providers.ts @@ -584,25 +584,32 @@ Select the \`GPT-4o\` model below to complete your provider configuration, but n provider: "deepseek", icon: "deepseek.png", description: - "DeepSeek provides cheap inference of its DeepSeek Coder v2 and other impressive open-source models.", - longDescription: - "To get started with DeepSeek, obtain an API key from their website [here](https://platform.deepseek.com/api_keys).", + "DeepSeek - Advanced language models for reasoning, coding, and chat", + longDescription: `DeepSeek provides an affordable API for state-of-the-art language models with exceptional performance for reasoning, code completion, and general conversation. Get your API key from [platform.deepseek.com](https://platform.deepseek.com). + +**Important Notes:** +- 128k context length +- **DeepSeek Chat**: Standard model with 8k max output token limit, supports tools +- **DeepSeek Reasoner**: Reasoning mode with 64k max output token limit, supports tools +- **DeepSeek FIM (Beta)**: Fill-in-the-Middle for autocompletion without native tool support`, tags: [ModelProviderTags.RequiresApiKey, ModelProviderTags.OpenSource], collectInputFor: [ { inputType: "text", key: "apiKey", label: "API Key", - placeholder: "Enter your DeepSeek API key", + placeholder: "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", required: true, + description: "Your DeepSeek API key from platform.deepseek.com", }, + ...completionParamsInputsConfigs, ], packages: [ - models.deepseekCoderApi, - models.deepseekChatApi, - models.deepseekReasonerApi, + models.deepseekChat, + models.deepseekReasoner, + models.deepseekFimBeta, ], - apiKeyUrl: "https://platform.deepseek.com/api_keys", + apiKeyUrl: "https://platform.deepseek.com", }, together: { title: "TogetherAI", diff --git a/packages/llm-info/src/index.ts b/packages/llm-info/src/index.ts index 52ca9f211cb..abbf3ed8488 100644 --- a/packages/llm-info/src/index.ts +++ b/packages/llm-info/src/index.ts @@ -3,6 +3,7 @@ import { Azure } from "./providers/azure.js"; import { Bedrock } from "./providers/bedrock.js"; import { Cohere } from "./providers/cohere.js"; import { CometAPI } from "./providers/cometapi.js"; +import { DeepSeek } from "./providers/deepseek.js"; import { Gemini } from "./providers/gemini.js"; import { Mistral } from "./providers/mistral.js"; import { Ollama } from "./providers/ollama.js"; @@ -26,6 +27,7 @@ export const allModelProviders: ModelProvider[] = [ Cohere, CometAPI, xAI, + DeepSeek, zAI, ]; diff --git a/packages/llm-info/src/providers/deepseek.ts b/packages/llm-info/src/providers/deepseek.ts new file mode 100644 index 00000000000..ced431d8437 --- /dev/null +++ b/packages/llm-info/src/providers/deepseek.ts @@ -0,0 +1,29 @@ +import { ModelProvider } from "../types.js"; + +export const DeepSeek: ModelProvider = { + models: [ + { + model: "deepseek-chat", + displayName: "DeepSeek Chat", + contextLength: 131072, + maxCompletionTokens: 8192, + recommendedFor: ["chat"], + }, + { + model: "deepseek-reasoner", + displayName: "DeepSeek Reasoner", + contextLength: 131072, + maxCompletionTokens: 32000, + recommendedFor: ["chat"], + }, + { + model: "deepseek-fim-beta", + displayName: "DeepSeek FIM Beta", + contextLength: 131072, + maxCompletionTokens: 8192, + recommendedFor: ["autocomplete"], + }, + ], + id: "deepseek", + displayName: "DeepSeek", +}; diff --git a/packages/openai-adapters/src/apis/DeepSeek.ts b/packages/openai-adapters/src/apis/DeepSeek.ts index 0beba18fee1..f12364ccdf9 100644 --- a/packages/openai-adapters/src/apis/DeepSeek.ts +++ b/packages/openai-adapters/src/apis/DeepSeek.ts @@ -1,56 +1,412 @@ import { streamSse } from "@continuedev/fetch"; -import { ChatCompletionChunk, Model } from "openai/resources/index"; -import { DeepseekConfig } from "../types.js"; +import { + ChatCompletion, + ChatCompletionAssistantMessageParam, + ChatCompletionChunk, + ChatCompletionToolMessageParam, + CreateEmbeddingResponse, + EmbeddingCreateParams, + Model, +} from "openai/resources/index"; +import { z } from "zod"; + +import { OpenAIConfigSchema } from "../types.js"; import { chatChunk, customFetch } from "../util.js"; import { OpenAIApi } from "./OpenAI.js"; -import { FimCreateParamsStreaming } from "./base.js"; +import { + CreateRerankResponse, + FimCreateParamsStreaming, + RerankCreateParams, +} from "./base.js"; +// Import converter functions +import { + ChatCompletionCreateParamsExt, + convertToChatDeepSeekRequestBody, + convertToChatPrefixDeepSeekRequestBody, + convertToFimDeepSeekRequestBody, + isReasoningEnabled, +} from "../util/deepseek-converters.js"; +import type { ChatDeepSeekRequestBody } from "../util/deepseek-types.js"; export const DEEPSEEK_API_BASE = "https://api.deepseek.com/"; +// Default configuration values + +/** + * OAI to DeepSeek API adapter + * + * - Includes Prefix-/ Chat Completions (stream/non-stream), and Fill-in-Middle (FIM) + * - With proper type safety and validated conversion + * - Repairs rare cases of missing content in chat stream responses + * ! (beta-Endpoints may change in the future) + */ export class DeepSeekApi extends OpenAIApi { - constructor(config: DeepseekConfig) { + // Default configuration values + private readonly WARN_ON_UNSUPPORTED_FEATURES = true; + + constructor(config: z.infer) { + const apiBase = config.apiBase ?? DEEPSEEK_API_BASE; super({ ...config, - provider: "openai", - apiBase: config.apiBase ?? DEEPSEEK_API_BASE, + apiBase, + }); + if (!this.apiBase.endsWith("/")) { + this.apiBase += "/"; + } + } + + private async _throwDeepSeekError(resp: Response): Promise { + const errorText = await resp.text(); + throw new Error(`DeepSeek API error (${resp.status}): ${errorText}`); + } + + // checks for signs of native tools in the conversation + private hasToolsInConversation(body: ChatCompletionCreateParamsExt): boolean { + if (body.tools && body.tools.length > 0) { + return true; + } + // Check if tool_choice is specified (indicates intent to use tools) + if (body.tool_choice) { + return true; + } + // Check if any message contains tool_calls or tool_call_id + for (const message of body.messages ?? []) { + if (message.role === "assistant") { + const assistantMsg = message as ChatCompletionAssistantMessageParam; + if ( + Array.isArray(assistantMsg.tool_calls) && + assistantMsg.tool_calls.length > 0 + ) { + return true; + } + } + if (message.role === "tool") { + const toolMsg = message as ChatCompletionToolMessageParam; + if (typeof toolMsg.tool_call_id === "string") { + return true; + } + } + } + + return false; + } + + /** + * Determines the appropriate endpoint and request body for chat completions + * based on the message structure + */ + private prepareChatCompletionRequest(body: ChatCompletionCreateParamsExt): { + endpoint: URL; + deepSeekBody: ChatDeepSeekRequestBody; + } { + const warnings: string[] = []; + + const lastMessage = + body.messages.length > 0 + ? body.messages[body.messages.length - 1] + : undefined; + const hasTools = this.hasToolsInConversation(body); + + // Prefix completion requires: + // 1. Last message must be from assistant + // 2. No tools involved in the conversation (as prefix completion doesn't support tools) + const isPrefixCompletion = lastMessage?.role === "assistant" && !hasTools; + + // Warn if tools are present but last message is from assistant + if (lastMessage?.role === "assistant" && hasTools) { + warnings.push( + "Prefix completion does not support tools. Using regular chat completion instead.", + ); + } + + const endpoint = new URL( + isPrefixCompletion ? "beta/chat/completions" : "chat/completions", + this.apiBase, + ); + + const deepSeekBody = isPrefixCompletion + ? convertToChatPrefixDeepSeekRequestBody(body, warnings) + : convertToChatDeepSeekRequestBody(body, warnings); + + this._processWarnings(warnings); + + return { endpoint, deepSeekBody }; + } + + /** + * Non‑streaming chat completion. + * + * Handles the same request conversion as the streaming endpoint but returns a complete + * ChatCompletion object. Also applies the same reasoning‑content repair logic as the + * streaming method, in case the API returns reasoning_content but no regular content. + */ + async chatCompletionNonStream( + body: ChatCompletionCreateParamsExt, + signal: AbortSignal, + ): Promise { + const { endpoint, deepSeekBody } = this.prepareChatCompletionRequest(body); + + // Execute the API request + const resp = await customFetch(this.config.requestOptions)(endpoint, { + method: "POST", + body: JSON.stringify({ + ...deepSeekBody, + stream: false, + }), + headers: this.getHeaders(), + signal, }); + + // Handle error responses + if (!resp.ok) { + await this._throwDeepSeekError(resp); + } + + // Parse the JSON response (may contain DeepSeek‑specific fields like reasoning_content) + const responseData: any = await resp.json(); + + // Repair logic for non‑streaming responses: if the API returned reasoning_content + // but no regular content (and reasoning is enabled), copy reasoning_content into content. + // This mirrors the repair done in the streaming method and may be removed with future DeepSeek API changes. + if ( + isReasoningEnabled(body) && + responseData.choices && + Array.isArray(responseData.choices) && + responseData.choices.length > 0 + ) { + const choice = responseData.choices[0]; + if (choice && choice.message) { + const message = choice.message; + const reasoningContent = message.reasoning_content; + const hasContent = + message.content && + typeof message.content === "string" && + message.content.trim() !== ""; + const hasToolCalls = + message.tool_calls && + Array.isArray(message.tool_calls) && + message.tool_calls.length > 0; + + if ( + !hasContent && + !hasToolCalls && + reasoningContent && + typeof reasoningContent === "string" && + reasoningContent.trim() !== "" + ) { + // Copy reasoning_content into content to ensure the result is usable next turn + message.content = reasoningContent; + } + } + } + + return responseData as ChatCompletion; } + // streaming chat completion request with repair logic for missing content + async *chatCompletionStream( + body: ChatCompletionCreateParamsExt, + signal: AbortSignal, + ): AsyncGenerator { + const { endpoint, deepSeekBody } = this.prepareChatCompletionRequest(body); + const resp = await customFetch(this.config.requestOptions)(endpoint, { + method: "POST", + body: JSON.stringify({ + ...deepSeekBody, + stream: true, + }), + headers: this.getHeaders(), + signal, + }); + if (!resp.ok) { + await this._throwDeepSeekError(resp); + } + + /* Very rare edge case workarounds: + * - If the stream ends with a finish_reason "stop" + * and no content or tool_calls were ever sent, but reasoning_content was received, + * → inject a final chunk containing the reasoning as content to rescue results for next turn + * (remove when fixed with future model updates and no longer needed) + */ + + let reasoningBuffer = ""; + let finishReason: string | null = null; + let hasContent = false; + let hasToolCalls = false; + + for await (const chunk of streamSse(resp)) { + if (chunk.choices?.[0]?.delta?.reasoning_content) { + reasoningBuffer += chunk.choices[0].delta.reasoning_content; + } + + // check if chunk contains content or tool_calls + hasContent = + hasContent || + (!!chunk.choices?.[0]?.delta?.content && + typeof chunk.choices[0].delta.content === "string" && + chunk.choices[0].delta.content !== ""); + hasToolCalls = + hasToolCalls || + (!!chunk.choices?.[0]?.delta?.tool_calls && + Array.isArray(chunk.choices[0].delta.tool_calls) && + chunk.choices[0].delta.tool_calls.length > 0); + + const chunkFinishReason = chunk.choices?.[0]?.finish_reason ?? null; + if (chunkFinishReason) { + finishReason = chunkFinishReason; + // Do not forward provider finish_reason early; emit it as a final chunk below. + const sanitized: ChatCompletionChunk = { + ...chunk, + choices: chunk.choices?.map((c: ChatCompletionChunk.Choice) => ({ + ...c, + finish_reason: null, + })), + } as ChatCompletionChunk; + yield sanitized; + } else { + yield chunk; + } + } + + // fix missing content on last message + if ( + !hasContent && + !hasToolCalls && + reasoningBuffer && + finishReason == "stop" && + isReasoningEnabled(body) + ) { + const repairChunk: ChatCompletionChunk = { + id: "repair", + object: "chat.completion.chunk", + created: Date.now(), + model: body.model, + choices: [ + { + index: 0, + delta: { content: reasoningBuffer }, + finish_reason: null, + }, + ] as ChatCompletionChunk.Choice[], + }; + yield repairChunk; + } + + // emit finish chunk + if (finishReason) { + const finishChunk: ChatCompletionChunk = { + id: "finish", + object: "chat.completion.chunk", + created: Date.now(), + model: body.model, + choices: [ + { + index: 0, + delta: { + content: "", + }, + finish_reason: finishReason, + }, + ] as ChatCompletionChunk.Choice[], + }; + yield finishChunk; + } + } + + // Performs a streaming Fill-in-Middle (FIM) completion request (Beta API) async *fimStream( - body: FimCreateParamsStreaming, + body: FimCreateParamsStreaming & { messages?: Array }, signal: AbortSignal, - ): AsyncGenerator { + ): AsyncGenerator { + const warnings: string[] = []; const endpoint = new URL("beta/completions", this.apiBase); + + const deepSeekBody = convertToFimDeepSeekRequestBody(body, warnings); + + // Log any warnings about unsupported features + this._processWarnings(warnings); + // Execute the streaming API request const resp = await customFetch(this.config.requestOptions)(endpoint, { method: "POST", body: JSON.stringify({ - model: body.model, - prompt: body.prompt, - suffix: body.suffix, - max_tokens: body.max_tokens, - temperature: body.temperature, - top_p: body.top_p, - frequency_penalty: body.frequency_penalty, - presence_penalty: body.presence_penalty, - stop: body.stop, + ...deepSeekBody, stream: true, }), - headers: { - "Content-Type": "application/json", - Accept: "application/json", - Authorization: `Bearer ${this.config.apiKey}`, - }, + headers: this.getHeaders(), signal, }); - for await (const chunk of streamSse(resp as any)) { - yield chatChunk({ - content: chunk.choices[0].text, - finish_reason: chunk.finish_reason, - model: body.model, - }); + + // Handle error responses + if (!resp.ok) { + await this._throwDeepSeekError(resp); + } + // Process the streaming response + for await (const chunk of streamSse(resp)) { + if (chunk.choices && chunk.choices.length > 0) { + yield chatChunk({ + content: chunk.choices[0].text || "", + finish_reason: chunk.choices[0].finish_reason, + model: deepSeekBody.model, + }); + if (chunk.choices[0].finish_reason) { + return; + } + } + } + } + + /** + * Creates embeddings for the input text + * + * @throws {Error} Always throws an error as DeepSeek does not support embeddings + */ + async embed(_body: EmbeddingCreateParams): Promise { + throw new Error("DeepSeek does not support embeddings API"); + } + + /** + * Reranks a list of documents based on their relevance to a query + * + * @throws {Error} Always throws an error as DeepSeek does not support reranking + */ + async rerank(_body: RerankCreateParams): Promise { + throw new Error("DeepSeek does not support reranking API"); + } + + /** + * Lists all available models from the DeepSeek API + * + * @returns Promise that resolves to an array of available models + */ + async list(): Promise { + const endpoint = new URL("models", this.apiBase); + + // Execute the API request + const resp = await customFetch(this.config.requestOptions)(endpoint, { + method: "GET", + headers: this.getHeaders(), + }); + + if (!resp.ok) { + await this._throwDeepSeekError(resp); } + + const data = await resp.json(); + return data.data || []; } - list(): Promise { - throw new Error("Method not implemented."); + // Generates the headers required for API requests + protected getHeaders() { + return { + "content-type": "application/json", + accept: "application/json", + authorization: `Bearer ${this.config.apiKey}`, + }; + } + + // Logs any warnings about unsupported features + protected _processWarnings(warnings: string[]) { + if (warnings.length > 0 && this.WARN_ON_UNSUPPORTED_FEATURES) { + warnings.forEach((warning) => console.warn(warning)); + } } } diff --git a/packages/openai-adapters/src/test/DeepSeekApi.test.ts b/packages/openai-adapters/src/test/DeepSeekApi.test.ts new file mode 100644 index 00000000000..e2531e24c35 --- /dev/null +++ b/packages/openai-adapters/src/test/DeepSeekApi.test.ts @@ -0,0 +1,160 @@ +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { DeepSeekApi } from "../apis/DeepSeek.js"; +import { OpenAIConfigSchema } from "../types.js"; + +describe("DeepSeekApi - Simple Unit Tests", () => { + const mockConfig = { + apiKey: "test-key", + apiBase: "https://api.deepseek.com/", + }; + + describe("constructor", () => { + it("should preserve API base URL exactly as provided", () => { + const api = new DeepSeekApi({ + ...mockConfig, + apiBase: "https://api.deepseek.com/", + } as z.infer); + + // @ts-ignore - accessing private property for test + expect(api.apiBase).toBe("https://api.deepseek.com/"); + }); + + it("should use default API base if not provided", () => { + const api = new DeepSeekApi({ + apiKey: "test-key", + } as z.infer); + + // @ts-ignore + expect(api.apiBase).toBe("https://api.deepseek.com/"); + }); + }); + + describe("hasToolsInConversation", () => { + it("should detect tools in body.tools", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hi" }], + tools: [{ type: "function", function: { name: "test" } }], + }; + + // @ts-ignore - accessing private method + const result = api.hasToolsInConversation(body); + expect(result).toBe(true); + }); + + it("should detect tool_choice", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hi" }], + tool_choice: "auto", + }; + + // @ts-ignore + const result = api.hasToolsInConversation(body); + expect(result).toBe(true); + }); + + it("should detect tool_calls in assistant messages", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [ + { role: "user", content: "Hi" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "1", + type: "function", + function: { name: "test", arguments: "{}" }, + }, + ], + }, + ], + }; + + // @ts-ignore + const result = api.hasToolsInConversation(body); + expect(result).toBe(true); + }); + + it("should return false for no tools", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [{ role: "user", content: "Hi" }], + }; + + // @ts-ignore + const result = api.hasToolsInConversation(body); + expect(result).toBe(false); + }); + }); + + describe("prepareChatCompletionRequest logic", () => { + it("should use prefix completion for assistant last message without tools", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [ + { role: "user", content: "Complete this" }, + { role: "assistant", content: "I think" }, + ], + }; + + // @ts-ignore - accessing private method + const result = api.prepareChatCompletionRequest(body); + expect(result.endpoint.pathname).toContain("beta/chat/completions"); + }); + + it("should use regular chat completion with tools", () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const body: any = { + model: "deepseek-chat", + messages: [ + { role: "user", content: "Use tool" }, + { role: "assistant", content: "I'll use tool" }, + ], + tools: [{ type: "function", function: { name: "test" } }], + }; + + // @ts-ignore - accessing private method + const result = api.prepareChatCompletionRequest(body); + expect(result.endpoint.pathname).toContain("chat/completions"); + expect(result.endpoint.pathname).not.toContain("beta"); + }); + }); + + describe("error handling", () => { + it("_throwDeepSeekError should format error message", async () => { + const api = new DeepSeekApi( + mockConfig as z.infer, + ); + const mockResponse = { + status: 429, + text: async () => "Rate limit exceeded", + } as Response; + + // @ts-ignore - accessing private method + await expect(api._throwDeepSeekError(mockResponse)).rejects.toThrow( + "DeepSeek API error (429): Rate limit exceeded", + ); + }); + }); +}); diff --git a/packages/openai-adapters/src/test/deepseek-converters.test.ts b/packages/openai-adapters/src/test/deepseek-converters.test.ts new file mode 100644 index 00000000000..7bbf08d1f4d --- /dev/null +++ b/packages/openai-adapters/src/test/deepseek-converters.test.ts @@ -0,0 +1,443 @@ +import { + ChatCompletionTool, + ChatCompletionToolChoiceOption, +} from "openai/resources"; +import { describe, expect, it } from "vitest"; +import type { + ChatCompletionCreateParamsExt, + OpenAICompatibleMessage, +} from "../util/deepseek-converters.js"; +import { + convertToChatDeepSeekRequestBody, + prepareMessage, + validateAndFilterContent, + validateAndFilterTools, + validateAndPrepareMessages, + validateResponseFormat, + validateToolChoice, +} from "../util/deepseek-converters.js"; +import type { DeepSeekMessage, DeepSeekTool } from "../util/deepseek-types.js"; + +describe("DeepSeek Converters", () => { + describe("validateAndPrepareMessages", () => { + it("should ensure reasoning_content is defined for assistant messages after last user message in reasoning mode", () => { + const warnings: string[] = []; + const messages: OpenAICompatibleMessage[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "Hi", + reasoning_content: "I should greet back", + }, + { role: "user", content: "What's the weather?" }, + // Assistant with tool calls but no reasoning_content - should get empty string + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { name: "get_weather", arguments: '{"city":"Berlin"}' }, + }, + ], + }, + { role: "tool", content: '{"temp":20}', tool_call_id: "call_1" }, + // Another assistant with reasoning_content already defined + { + role: "assistant", + content: "It's 20 degrees", + reasoning_content: "I need to summarize the weather", + }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, true); + + // Find assistant messages after last user message (index 2) + // The assistant with tool calls should have reasoning_content = "" + const assistantWithToolCalls = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.tool_calls?.length, + ); + expect(assistantWithToolCalls?.reasoning_content).toBe(""); + + // Assistant with existing reasoning_content should keep it + const assistantWithReasoning = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.content === "It's 20 degrees", + ); + expect(assistantWithReasoning?.reasoning_content).toBe( + "I need to summarize the weather", + ); + + // Assistant before last user message (first assistant) should NOT have reasoning_content + // because it's before the last user message boundary + const firstAssistant = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.content === "Hi", + ); + expect(firstAssistant?.reasoning_content).toBeUndefined(); + + expect(warnings).toEqual([]); + }); + + it("should handle tool call chain without reasoning_content", () => { + const warnings: string[] = []; + const messages: OpenAICompatibleMessage[] = [ + { role: "user", content: "Get data" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { name: "api_call", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "data", tool_call_id: "call_1" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_2", + type: "function", + function: { name: "process", arguments: "{}" }, + }, + ], + }, + { role: "tool", content: "processed", tool_call_id: "call_2" }, + { role: "assistant", content: "Here is the result" }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, true); + + // All assistant messages after last user message should have reasoning_content (empty string) + const allAssistants = result.filter( + (msg: DeepSeekMessage) => msg.role === "assistant", + ); + expect(allAssistants).toHaveLength(3); + allAssistants.forEach((assistant: DeepSeekMessage) => { + expect(assistant.reasoning_content).toBe(""); + }); + + expect(warnings).toEqual([]); + }); + + it("should not add reasoning_content to assistant messages before last user message", () => { + const warnings: string[] = []; + const messages: OpenAICompatibleMessage[] = [ + { role: "user", content: "First question" }, + { role: "assistant", content: "First answer" }, // No reasoning_content + { role: "user", content: "Second question" }, + { + role: "assistant", + content: "Second answer", + reasoning_content: "Thinking", + }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, true); + + const firstAssistant = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.content === "First answer", + ); + // This assistant is before last user message (second user), so should NOT get reasoning_content + expect(firstAssistant?.reasoning_content).toBeUndefined(); + + const secondAssistant = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.content === "Second answer", + ); + // This assistant is after last user message, but already has reasoning_content + expect(secondAssistant?.reasoning_content).toBe("Thinking"); + + expect(warnings).toEqual([]); + }); + + it("should handle system messages as user boundary", () => { + const warnings: string[] = []; + const messages: OpenAICompatibleMessage[] = [ + { role: "system", content: "You are a helper" }, + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi" }, // No reasoning_content + { role: "system", content: "Now be concise" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { name: "tool", arguments: "{}" }, + }, + ], + }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, true); + + // Assistant after last system message (which resets boundary) should get reasoning_content for tool calls + const assistantWithToolCalls = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.tool_calls?.length, + ); + expect(assistantWithToolCalls?.reasoning_content).toBe(""); + + // Assistant before last system message should NOT get reasoning_content + const firstAssistant = result.find( + (msg: DeepSeekMessage) => + msg.role === "assistant" && msg.content === "Hi", + ); + expect(firstAssistant?.reasoning_content).toBeUndefined(); + + expect(warnings).toEqual([]); + }); + + it("should handle reasoning field (legacy) as reasoning_content", () => { + const warnings: string[] = []; + const messages: OpenAICompatibleMessage[] = [ + { role: "user", content: "Test" }, + { role: "assistant", content: "Answer", reasoning: "Legacy reasoning" }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, true); + + const assistant = result.find( + (msg: DeepSeekMessage) => msg.role === "assistant", + ); + expect(assistant?.reasoning_content).toBe("Legacy reasoning"); + expect(warnings).toEqual([]); + }); + + it("should throw error for empty messages array", () => { + expect(() => validateAndPrepareMessages([], [], true)).toThrow( + "Messages array must contain at least one message", + ); + }); + + it("should filter out invalid roles with warning", () => { + const warnings: string[] = []; + const messages: any[] = [ + { role: "user", content: "Hi" }, + { role: "invalid", content: "Bad" }, + { role: "assistant", content: "Ok" }, + ]; + + const result = validateAndPrepareMessages(messages, warnings, false); + expect(result).toHaveLength(2); + expect(result.map((msg: DeepSeekMessage) => msg.role)).toEqual([ + "user", + "assistant", + ]); + expect(warnings).toContain( + "Invalid message role: invalid at index 1. (removed from request)", + ); + }); + }); + + describe("prepareMessage", () => { + it("should convert developer role to system", () => { + const warnings: string[] = []; + const message: OpenAICompatibleMessage = { + role: "developer", + content: "Instructions", + }; + + const result = prepareMessage(message, 0, warnings); + expect(result?.role).toBe("system"); + expect(result?.content).toBe("Instructions"); + expect(warnings).toEqual([]); + }); + + it("should handle tool messages with tool_call_id", () => { + const warnings: string[] = []; + const message: OpenAICompatibleMessage = { + role: "tool", + content: "result", + tool_call_id: "call_123", + }; + + const result = prepareMessage(message, 0, warnings); + expect(result?.role).toBe("tool"); + expect(result?.content).toBe("result"); + expect(result?.tool_call_id).toBe("call_123"); + expect(warnings).toEqual([]); + }); + + it("should filter non-text content with warning", () => { + const warnings: string[] = []; + const message: OpenAICompatibleMessage = { + role: "user", + content: [ + { type: "text", text: "Hello" }, + { type: "image_url", image_url: { url: "data:image/png" } }, + ], + }; + + const result = prepareMessage(message, 0, warnings); + expect(result?.content).toEqual([{ type: "text", text: "Hello" }]); + expect(warnings).toContain("Non-text content parts were filtered out"); + }); + }); + + describe("validateAndFilterContent", () => { + it("should return null for undefined content", () => { + expect(validateAndFilterContent(undefined)).toBe(null); + }); + + it("should return string content as is", () => { + expect(validateAndFilterContent("Hello")).toBe("Hello"); + }); + + it("should filter array content to text only", () => { + const warnings: string[] = []; + const content = [ + { type: "text", text: "Hello" }, + { type: "image_url", image_url: {} }, + { type: "text", text: "World" }, + ]; + const result = validateAndFilterContent(content, warnings); + expect(result).toEqual([ + { type: "text", text: "Hello" }, + { type: "text", text: "World" }, + ]); + expect(warnings).toContain("Non-text content parts were filtered out"); + }); + + it("should return empty string for array with no text parts", () => { + const warnings: string[] = []; + const content = [{ type: "image_url", image_url: {} }]; + const result = validateAndFilterContent(content, warnings); + expect(result).toBe(""); + expect(warnings).toContain("Non-text content parts were filtered out"); + }); + }); + + describe("validateResponseFormat", () => { + it("should return undefined for invalid type", () => { + const warnings: string[] = []; + const result = validateResponseFormat({ type: "invalid" }, warnings); + expect(result).toBeUndefined(); + expect(warnings).toContain( + "Invalid response_format.type: invalid. Must be 'text' or 'json_object'.", + ); + }); + + it("should accept text and json_object", () => { + const warnings: string[] = []; + expect(validateResponseFormat({ type: "text" }, warnings)).toEqual({ + type: "text", + }); + expect(validateResponseFormat({ type: "json_object" }, warnings)).toEqual( + { + type: "json_object", + }, + ); + expect(warnings).toEqual([]); + }); + }); + + describe("validateAndFilterTools", () => { + it("should filter out non-function tools", () => { + const warnings: string[] = []; + const tools: any[] = [ + { type: "function", function: { name: "func1" } }, + { type: "retrieval", function: { name: "ret" } }, + { type: "function", function: { name: "func2" } }, + ]; + const result = validateAndFilterTools( + tools as ChatCompletionTool[], + warnings, + ); + expect(result).toHaveLength(2); + expect(result?.map((t: DeepSeekTool) => t.function.name)).toEqual([ + "func1", + "func2", + ]); + expect(warnings).toContain( + "DeepSeek API supports only function tools. Ignoring 1 tools.", + ); + }); + + it("should limit to 128 tools", () => { + const warnings: string[] = []; + const tools = Array.from({ length: 130 }, (_, i) => ({ + type: "function", + function: { name: `func${i}` }, + })) as ChatCompletionTool[]; + const result = validateAndFilterTools(tools, warnings); + expect(result).toHaveLength(128); + expect(warnings).toContain( + "DeepSeek API supports maximum 128 tools. Using first 128 and ignoring 2 tools.", + ); + }); + }); + + describe("validateToolChoice", () => { + it("should accept string values none, auto, required", () => { + const warnings: string[] = []; + expect(validateToolChoice("none", warnings)).toBe("none"); + expect(validateToolChoice("auto", warnings)).toBe("auto"); + expect(validateToolChoice("required", warnings)).toBe("required"); + expect(warnings).toEqual([]); + }); + + it("should accept function object", () => { + const warnings: string[] = []; + const toolChoice = { + type: "function" as const, + function: { name: "specific" }, + }; + const result = validateToolChoice(toolChoice, warnings); + expect(result).toEqual(toolChoice); + expect(warnings).toEqual([]); + }); + + it("should warn for invalid string", () => { + const warnings: string[] = []; + const result = validateToolChoice( + "invalid" as ChatCompletionToolChoiceOption, + warnings, + ); + expect(result).toBeUndefined(); + expect(warnings).toContain( + "Unsupported tool_choice value: invalid. Must be one of: 'none', 'auto', 'required'", + ); + }); + }); + + describe("convertToChatDeepSeekRequestBody", () => { + it("should add thinking field for deepseek-reasoner model", () => { + const warnings: string[] = []; + const body = { + model: "deepseek-reasoner", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }; + const result = convertToChatDeepSeekRequestBody( + body as ChatCompletionCreateParamsExt, + warnings, + ); + expect(result.thinking).toEqual({ type: "enabled" }); + }); + + it("should add thinking field when thinking.enabled is set", () => { + const warnings: string[] = []; + const body = { + model: "deepseek-chat", + thinking: { type: "enabled" }, + messages: [{ role: "user", content: "Hello" }], + stream: false, + }; + const result = convertToChatDeepSeekRequestBody( + body as ChatCompletionCreateParamsExt, + warnings, + ); + expect(result.thinking).toEqual({ type: "enabled" }); + }); + }); +}); diff --git a/packages/openai-adapters/src/types.ts b/packages/openai-adapters/src/types.ts index 868a6e8dfe9..be93625e6b3 100644 --- a/packages/openai-adapters/src/types.ts +++ b/packages/openai-adapters/src/types.ts @@ -60,6 +60,7 @@ export const OpenAIConfigSchema = BasePlusConfig.extend({ z.literal("ncompass"), z.literal("relace"), z.literal("huggingface-inference-api"), + z.literal("deepseek"), ]), }); export type OpenAIConfig = z.infer; @@ -262,7 +263,6 @@ export const LLMConfigSchema = z.discriminatedUnion("provider", [ OpenAIConfigSchema, BedrockConfigSchema, MoonshotConfigSchema, - DeepseekConfigSchema, CohereConfigSchema, AzureConfigSchema, GeminiConfigSchema, diff --git a/packages/openai-adapters/src/util/deepseek-converters.ts b/packages/openai-adapters/src/util/deepseek-converters.ts new file mode 100644 index 00000000000..5d563cff141 --- /dev/null +++ b/packages/openai-adapters/src/util/deepseek-converters.ts @@ -0,0 +1,521 @@ +import { + ChatCompletionCreateParamsNonStreaming, + ChatCompletionCreateParamsStreaming, + ChatCompletionMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionTool, + ChatCompletionToolChoiceOption, +} from "openai/resources/index"; + +import { FimCreateParamsStreaming } from "../apis/base.js"; +import { + BaseDeepSeekRequestBody, + ChatDeepSeekRequestBody, + DeepSeekMessage, + DeepSeekResponseFormat, + DeepSeekTool, + DeepSeekToolCall, + DeepSeekToolChoice, + FimDeepSeekRequestBody, +} from "./deepseek-types.js"; + +// Converts valid OpenAI request body to DeepSeek request body + +// Type utilities +/** + * Represents possible content types in OpenAI messages + */ +type OpenAIContent = + | string + | null + | undefined + | Array<{ type: string; text?: string; image_url?: any }>; + +/** + * Represents a message that could come from OpenAI API + */ +export type OpenAICompatibleMessage = ChatCompletionMessageParam & { + content?: OpenAIContent; + reasoning?: string; + reasoning_content?: string; + prefix?: boolean; + tool_calls?: ChatCompletionMessageToolCall[]; + tool_call_id?: string; + name?: string; +}; + +/** + * Validation utilities for DeepSeek API requests + */ + +/** + * Filters message content to only include text parts + */ +export function validateAndFilterContent( + content: OpenAIContent, + warnings: string[] = [], +): string | Array<{ type: "text"; text: string }> | null { + if (content === undefined) { + return null; + } + + if (Array.isArray(content)) { + const filtered = content.filter( + (part): part is { type: "text"; text: string } => { + return part.type === "text" && typeof part.text === "string"; + }, + ); + + if (filtered.length !== content.length) { + warnings.push("Non-text content parts were filtered out"); + } + + return filtered.length > 0 ? filtered : ""; + } + + return content; +} + +/** + * Validates the response format parameter + */ +export function validateResponseFormat( + responseFormat: unknown, + warnings: string[] = [], +): DeepSeekResponseFormat | undefined { + // Check if responseFormat is an object with a type property + if ( + !responseFormat || + typeof responseFormat !== "object" || + !("type" in responseFormat) || + typeof (responseFormat as any).type !== "string" + ) { + return undefined; + } + const type = (responseFormat as any).type; + if (!["text", "json_object"].includes(type)) { + warnings.push( + `Invalid response_format.type: ${type}. Must be 'text' or 'json_object'.`, + ); + return undefined; + } + return { type: type as "text" | "json_object" }; +} + +/** + * Validates the top_logprobs parameter + */ +export function validateLogprobs( + logprobs: boolean | null | undefined, + top_logprobs: number | null | undefined, + isReasoning: boolean, + warnings: string[], +): { + logprobs: boolean | null | undefined; + top_logprobs: number | null | undefined; +} { + if (isReasoning) { + if (logprobs !== undefined) { + warnings.push("logprobs is not supported for deepseek reasoner models."); + } + + if (top_logprobs !== undefined) { + warnings.push( + "top_logprobs is not supported for deepseek reasoner models.", + ); + } + + return { top_logprobs: undefined, logprobs: undefined }; + } + + return { logprobs: logprobs, top_logprobs: top_logprobs }; +} + +/** + * Validates and prepares tools for the API request + */ +export function validateAndFilterTools( + tools: ChatCompletionTool[] | undefined, + warnings: string[] = [], +): DeepSeekTool[] | undefined { + if (!tools?.length) { + return undefined; + } + + let filteredTools = tools.filter((tool) => tool.type === "function"); + const ignoredCount = tools.length - filteredTools.length; + + if (ignoredCount > 0) { + warnings.push( + `DeepSeek API supports only function tools. Ignoring ${ignoredCount} tools.`, + ); + } + + if (filteredTools.length > 128) { + warnings.push( + `DeepSeek API supports maximum 128 tools. Using first 128 and ignoring ${filteredTools.length - 128} tools.`, + ); + filteredTools = filteredTools.slice(0, 128); + } + + return filteredTools.map((tool) => { + // ChatCompletionTool's function property is of type FunctionDefinition + // We need to handle the strict field which may be present as a DeepSeek extension + const func = tool.function; + // Access strict via type assertion since it's not part of standard FunctionDefinition + const strict = (func as any).strict; + const result: DeepSeekTool = { + type: "function", + function: { + name: func.name, + ...(func.description && { description: func.description }), + ...(func.parameters && { parameters: func.parameters }), + // DeepSeek API expects strict to be boolean | undefined, not null + ...(strict !== null && strict !== undefined && { strict }), + }, + }; + return result; + }); +} +/** + * Validates and processes stop sequences + */ +export function validateStopSequences( + stop: string | string[] | null | undefined, + warnings: string[], +): string | string[] | undefined { + if (!stop) return undefined; + + if (Array.isArray(stop) && stop.length > 16) { + warnings.push( + `DeepSeek API supports maximum 16 stop sequences. Got ${stop.length}. Using first 16.`, + ); + return stop.slice(0, 16); + } + + return stop; +} + +/** + * Type guard to check if input is a token array (number[] or number[][]) + */ +function isTokenArray( + prompt: string | string[] | number[] | number[][] | null | undefined, +): prompt is number[] | number[][] { + return ( + Array.isArray(prompt) && + prompt.length > 0 && + (typeof prompt[0] === "number" || + (Array.isArray(prompt[0]) && + prompt[0].length > 0 && + typeof prompt[0][0] === "number")) + ); +} + +/** + * Validates the prompt parameter for FIM completion + */ +export function validateFimPrompt( + prompt: string | string[] | number[] | number[][] | null | undefined, + warnings: string[] = [], +): string { + if (prompt == null || prompt === "") { + throw new Error("FIM completion requires a prompt"); + } + + if (isTokenArray(prompt)) { + throw new Error( + "DeepSeek API does not support token arrays (number[] or number[][]) as prompt input. " + + "Please provide a string or string[].", + ); + } + + const promptText = Array.isArray(prompt) ? prompt.join(" ") : prompt; + + if (!promptText.trim()) { + throw new Error("FIM prompt cannot be empty"); + } + + return promptText; +} + +/** + * Validates and converts the tool_choice parameter to DeepSeek format + */ +export function validateToolChoice( + toolChoice: ChatCompletionToolChoiceOption | null | undefined, + warnings: string[] = [], +): DeepSeekToolChoice | undefined { + if (!toolChoice) return undefined; + + // Handle string values + if (typeof toolChoice === "string") { + if ( + toolChoice === "none" || + toolChoice === "auto" || + toolChoice === "required" + ) { + return toolChoice as "none" | "auto" | "required"; + } + warnings.push( + `Unsupported tool_choice value: ${toolChoice}. Must be one of: 'none', 'auto', 'required'`, + ); + return undefined; + } + + // Handle object format { type: 'function', function: { name: string } } + if (toolChoice.type === "function" && toolChoice.function?.name) { + return toolChoice as { type: "function"; function: { name: string } }; + } + + warnings.push( + `Invalid tool_choice format: ${JSON.stringify(toolChoice)}. Must be one of: 'none', 'auto', 'required' or ChatCompletionNamedToolChoice`, + ); + return undefined; +} + +// Extends the standard OpenAI ChatCompletionCreateParams with DeepSeek-specific options +export interface ChatCompletionCreateParamsNonStreamingExt + extends ChatCompletionCreateParamsNonStreaming { + thinking?: { type: "enabled" | "disabled" } | null; +} + +// Extended streaming chat completion parameters for DeepSeek-specific features +export interface ChatCompletionCreateParamsStreamingExt + extends ChatCompletionCreateParamsStreaming { + thinking?: { type: "enabled" | "disabled" } | null; +} + +// Union type for both streaming and non-streaming extended parameters +export type ChatCompletionCreateParamsExt = + | ChatCompletionCreateParamsNonStreamingExt + | ChatCompletionCreateParamsStreamingExt; + +/** + * Converts OpenAI chat completion parameters to DeepSeek format + */ +export function convertToChatDeepSeekRequestBody( + body: ChatCompletionCreateParamsExt, + warnings: string[] = [], +): ChatDeepSeekRequestBody { + const coreBody = convertToBaseDeepSeekRequestBody(body, warnings); + const responseFormat = validateResponseFormat(body.response_format, warnings); + const validatedTools = validateAndFilterTools(body.tools, warnings); + const validatedMessages = validateAndPrepareMessages( + body.messages || [], + warnings, + isReasoningEnabled(body), + ); + const validatedToolChoice = validateToolChoice(body.tool_choice, warnings); + const { logprobs, top_logprobs } = validateLogprobs( + body.logprobs, + body.top_logprobs, + isReasoningEnabled(body), + warnings, + ); + + return { + ...coreBody, + messages: validatedMessages, + ...(isReasoningEnabled(body) ? { thinking: { type: "enabled" } } : {}), + ...(validatedToolChoice ? { tool_choice: validatedToolChoice } : {}), + ...(validatedTools && validatedTools.length > 0 + ? { tools: validatedTools } + : {}), + ...(responseFormat ? { response_format: responseFormat } : {}), + ...(logprobs ? { logprobs: logprobs } : {}), + ...(top_logprobs && logprobs ? { top_logprobs: top_logprobs } : {}), + ...(body.stream_options?.include_usage + ? { stream_options: { include_usage: true } } + : {}), + }; +} + +/** + * Converts OpenAI completion parameters to DeepSeek prefix completion format + */ +export function convertToChatPrefixDeepSeekRequestBody( + body: ChatCompletionCreateParamsExt, + warnings: string[] = [], +): ChatDeepSeekRequestBody { + const chatBody = convertToChatDeepSeekRequestBody(body, warnings); + + if (chatBody.messages[chatBody.messages.length - 1].role === "assistant") { + // Force prefix to true for assistant messages in prefix completion mode + chatBody.messages[chatBody.messages.length - 1].prefix = true; + } + + return chatBody; +} + +/** + * Converts OpenAI FIM completion parameters to DeepSeek format + */ +export function convertToFimDeepSeekRequestBody( + body: FimCreateParamsStreaming, // with optional messages + warnings: string[], +): FimDeepSeekRequestBody { + const coreBody = convertToBaseDeepSeekRequestBody(body, warnings); + const validatedPrompt = validateFimPrompt(body.prompt, warnings); + + const model = validateFIMModel(body.model, warnings); + + return { + ...coreBody, + model, + prompt: validatedPrompt, + ...(body.suffix !== undefined ? { suffix: body.suffix } : {}), + ...(body.logprobs !== undefined ? { logprobs: body.logprobs } : {}), + ...(body.echo !== undefined ? { echo: body.echo } : {}), + ...(body.stream_options?.include_usage + ? { stream_options: { include_usage: true } } + : {}), + }; +} + +/** + * Converts common parameters to base DeepSeek request body + */ +export function convertToBaseDeepSeekRequestBody( + body: ChatCompletionCreateParamsExt | FimCreateParamsStreaming, + warnings: string[] = [], +): BaseDeepSeekRequestBody { + const validatedStop = validateStopSequences(body.stop, warnings); + + // Handle max_completion_tokens (OpenAI o-series) -> max_tokens (DeepSeek) + // Prefer max_completion_tokens if provided, otherwise fall back to max_tokens + const maxTokens = (body as any).max_completion_tokens ?? body.max_tokens; + + return { + model: body.model, + max_tokens: maxTokens, + temperature: body.temperature, + top_p: body.top_p, + frequency_penalty: body.frequency_penalty, + presence_penalty: body.presence_penalty, + stop: validatedStop, + stream: body.stream, + }; +} + +/** + * Validates and prepares an array of messages for the API request + */ +export function validateAndPrepareMessages( + messages: OpenAICompatibleMessage[], + warnings: string[] = [], + isReasoningMode: boolean = false, +): DeepSeekMessage[] { + if (!messages?.length) { + throw new Error("Messages array must contain at least one message"); + } + + const result: DeepSeekMessage[] = []; + let firstUserMsgPassed = false; + + // Process messages in reverse to preserve reasoning content for current conversation turn + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + const prepared = prepareMessage(msg, i, warnings); + + if (prepared) { + if (prepared.role === "user" || prepared.role === "system") { + firstUserMsgPassed = true; + } + + // Copy reasoning content if reasoning mode and after last user/system message boundary + if ( + isReasoningMode && + !firstUserMsgPassed && + prepared.role === "assistant" + ) { + const reasoningContent = getReasoning(msg); + if (reasoningContent !== undefined) { + // Reasoning field exists (could be empty string) - preserve it + prepared.reasoning_content = reasoningContent; + } else { + // In reasoning mode, every assistant message in current turn must have reasoning_content + prepared.reasoning_content = ""; + } + } + + result.push(prepared); + } + } + + if (result.length === 0) { + throw new Error("No valid messages found after preparation"); + } + + return result.reverse(); +} + +/** + * Converts a single OpenAI message to DeepSeek format. + * Filters invalid roles, handles developer->system conversion, and preserves tool calls. + * Returns undefined if role is invalid. + */ +export function prepareMessage( + msg: OpenAICompatibleMessage, + index: number, + warnings: string[] = [], +): DeepSeekMessage | undefined { + if (!msg) return undefined; + + // DeepSeek supports: system, user, assistant, tool (developer is converted to system) + const validDeepSeekRoles = ["system", "user", "assistant", "tool"]; + + const role = msg.role === "developer" ? "system" : msg.role; + + if (!validDeepSeekRoles.includes(role as DeepSeekMessage["role"])) { + warnings.push( + `Invalid message role: ${msg.role} at index ${index}. (removed from request)`, + ); + return undefined; + } + + // Prepare base message object + const baseMessage: DeepSeekMessage = { + role: role as DeepSeekMessage["role"], + content: validateAndFilterContent(msg.content, warnings), + ...(msg.name && { name: msg.name }), + ...(role === "tool" && + msg.tool_call_id && { tool_call_id: msg.tool_call_id }), + }; + + // Add tool_calls if present (only for assistant role) + if (role === "assistant" && Array.isArray(msg.tool_calls)) { + baseMessage.tool_calls = msg.tool_calls as DeepSeekToolCall[]; + } + + return baseMessage; +} + +export function isReasoningEnabled( + body: ChatCompletionCreateParamsExt, +): boolean { + // Reasoning is enabled for deepseek-reasoner model or when explicitly set + return ( + body.thinking?.type === "enabled" || body.model === "deepseek-reasoner" + ); +} + +/** Extracts reasoning content from a message, checking both possible field names. */ +function getReasoning(msg: OpenAICompatibleMessage): string | undefined { + return msg.reasoning_content ?? msg.reasoning; +} + +/** + * Validates FIM model name, defaulting to 'deepseek-chat'. + * Warns if a different model is requested, as only deepseek-chat supports FIM. + */ +function validateFIMModel( + model: string | undefined, + warnings: string[] = [], +): string { + const modelName = "deepseek-chat"; + if (model && model !== modelName) { + warnings.push("FIM models other than deepseek-chat are not supported"); + } + return model || modelName; +} diff --git a/packages/openai-adapters/src/util/deepseek-types.ts b/packages/openai-adapters/src/util/deepseek-types.ts new file mode 100644 index 00000000000..8fb9b9f0e01 --- /dev/null +++ b/packages/openai-adapters/src/util/deepseek-types.ts @@ -0,0 +1,253 @@ +/** + * Core request body interface shared across different API endpoints + */ +export interface BaseDeepSeekRequestBody { + model: string; + max_tokens?: number | null; + temperature?: number | null; + top_p?: number | null; + frequency_penalty?: number | null; + presence_penalty?: number | null; + stop?: string | string[] | null; + stream?: boolean | null; +} + +/** + * DeepSeek tool type (function tool only) + */ +export type DeepSeekTool = { + type: "function"; + function: { + name: string; + description?: string; + parameters?: any; + strict?: boolean; + }; +}; + +/** + * DeepSeek tool choice type + */ +export type DeepSeekToolChoice = + | "none" + | "auto" + | "required" + | { type: "function"; function: { name: string } }; + +/** + * DeepSeek response format type + */ +export type DeepSeekResponseFormat = { + type: "text" | "json_object"; +}; + +/** + * Interface for chat completion request body + */ +export interface ChatDeepSeekRequestBody extends BaseDeepSeekRequestBody { + messages: DeepSeekMessage[]; + thinking?: { type: "enabled" | "disabled" } | null; + tool_choice?: DeepSeekToolChoice | null; + tools?: DeepSeekTool[] | null; + response_format?: DeepSeekResponseFormat | null; + stream_options?: { + include_usage?: boolean; + } | null; + logprobs?: boolean | null; + top_logprobs?: number | null; +} + +/** + * Interface for prefix completion request body + * + * Note: This is a beta endpoint that shares parameters with chat completion, + * but requires the last message to have role: 'assistant' and prefix: true + */ +// export interface ChatPrefixDeepSeekRequestBody extends ChatDeepSeekRequestBody {} + +/** + * Interface for Fill-in-Middle (FIM) completion request body + */ +export interface FimDeepSeekRequestBody extends BaseDeepSeekRequestBody { + prompt: string; + suffix?: string | null; + echo?: boolean | null; + logprobs?: number | null; + stream_options?: { + include_usage?: boolean; + } | null; +} + +/** + * Interface for a message in a chat completion request body + */ +export interface DeepSeekMessage { + role: "system" | "user" | "assistant" | "tool"; + content: string | Array<{ type: "text"; text: string }> | null; + name?: string; + tool_call_id?: string; + tool_calls?: DeepSeekToolCall[]; + prefix?: boolean; // assistant messages only + reasoning_content?: string | null; // assistant messages only +} + +/** + * Tool call interface for DeepSeek API responses + */ +export interface DeepSeekToolCall { + id: string; + type: "function"; + function: { + name: string; + arguments: string; + }; +} + +/** + * Logprobs interface for chat completions + */ +export interface ChatLogprobsResponse { + content: Array<{ + token: string; + logprob: number; + bytes?: number[] | null; + top_logprobs?: Array<{ + token: string; + logprob: number; + bytes?: number[] | null; + }>; + }> | null; + reasoning_content?: Array<{ + token: string; + logprob: number; + bytes?: number[] | null; + top_logprobs?: Array<{ + token: string; + logprob: number; + bytes?: number[] | null; + }>; + }> | null; +} + +/** + * Usage interface for DeepSeek API responses + */ +export interface UsageDeepSeekResponse { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + prompt_cache_hit_tokens: number; + prompt_cache_miss_tokens: number; + completion_tokens_details?: { + reasoning_tokens?: number; + }; +} + +/** + * DeepSeek API error response interface + */ +export interface ErrorDeepSeekResponse { + error: { + message: string; + type?: string; + param?: string | null; + code?: number | null; + details?: any; + }; +} + +/** + * Base interface for all DeepSeek API responses + */ +export interface BaseDeepSeekResponseBody { + id: string; + created: number; + model: string; + object: string; // static string for each response type + system_fingerprint?: string; // Required - Optional only for FIM stream chunks (official docs) +} + +/** + * DeepSeek API success response interface for chat completions + */ +export interface ChatDeepSeekResponseBody extends BaseDeepSeekResponseBody { + choices: Array<{ + index: number; + message: { + role: string; + content: string | null; + reasoning_content?: string | null; + tool_calls?: DeepSeekToolCall[]; + }; + finish_reason: string; + logprobs?: ChatLogprobsResponse | null; + }>; + usage?: UsageDeepSeekResponse; +} + +/** + * DeepSeek API success response interface for FIM completions + */ +export interface FimDeepSeekResponseBody extends BaseDeepSeekResponseBody { + choices: Array<{ + text: string; + index: number; + logprobs: { + text_offset?: number[]; + token_logprobs?: number[]; + tokens?: string[]; + top_logprobs?: Array>; + } | null; + finish_reason: string; + }>; + usage?: UsageDeepSeekResponse; +} + +/** + * DeepSeek streaming chunk interface for chat completions + */ +export interface ChatDeepSeekStreamChunk extends BaseDeepSeekResponseBody { + choices: Array<{ + index: number; + delta: { + role?: string; // possible values: "assistant" + content?: string | null; + tool_calls?: DeepSeekToolCall[]; + reasoning_content?: string | null; + }; + finish_reason: string | null; + logprobs?: ChatLogprobsResponse | null; + }>; +} + +/** + * Interface for prefix completion response body + * + * Note: This is a beta endpoint that shares parameters with chat completion, + * + */ +// export interface ChatPrefixDeepSeekResponseBody extends ChatDeepSeekResponseBody {} +// export interface ChatPrefixDeepSeekStreamChunk extends ChatDeepSeekStreamChunk {} + +/** + * DeepSeek streaming chunk interface for FIM completions + */ +export interface DeepSeekFimStreamChunk extends BaseDeepSeekResponseBody { + choices: Array<{ + text: string; + index: number; + finish_reason?: string | null; + }>; +} + +/** + * DeepSeek models list response interface + */ +export interface DeepSeekModelsResponse { + object: "list"; + data: Array<{ + id: string; + object: "model"; + owned_by: string; + }>; +}