diff --git a/core/config/load.ts b/core/config/load.ts index e4c7e13c4f0..a0752f67445 100644 --- a/core/config/load.ts +++ b/core/config/load.ts @@ -652,6 +652,8 @@ function llmToSerializedModelDescription(llm: ILLM): ModelDescription { sourceFile: llm.sourceFile, isFromAutoDetect: llm.isFromAutoDetect, toolOverrides: llm.toolOverrides, + customReasoningFields: + llm.customReasoningFields ?? (llm as any).options?.customReasoningFields, }; } diff --git a/core/config/types.ts b/core/config/types.ts index 8c64de1ab1a..d41f73d9055 100644 --- a/core/config/types.ts +++ b/core/config/types.ts @@ -572,6 +572,9 @@ declare global { // IBM watsonx Options deploymentId?: string; + + /** Custom fields to check for reasoning/thinking content in streaming chunks */ + customReasoningFields?: string[]; } type RequireAtLeastOne = Pick< @@ -960,6 +963,8 @@ declare global { promptTemplates?: { [key: string]: string }; capabilities?: ModelCapability; cacheBehavior?: CacheBehavior; + /** Custom fields to check for reasoning/thinking content in streaming chunks */ + customReasoningFields?: string[]; } export interface JSONEmbedOptions { diff --git a/core/config/util.ts b/core/config/util.ts index 28536a06d37..02dd61b10ad 100644 --- a/core/config/util.ts +++ b/core/config/util.ts @@ -83,6 +83,7 @@ export function addModel( contextLength: model.contextLength, maxStopWords: model.maxStopWords, defaultCompletionOptions: model.completionOptions, + customReasoningFields: model.customReasoningFields, ...(capabilities.length > 0 ? { capabilities } : {}), }; config.models.push(desc); diff --git a/core/config/yaml/models.vitest.ts b/core/config/yaml/models.vitest.ts index 0e99448d51d..72e0a5916b1 100644 --- a/core/config/yaml/models.vitest.ts +++ b/core/config/yaml/models.vitest.ts @@ -173,6 +173,27 @@ describe("llmsFromModelConfig requestOptions merging", () => { expect(llm.requestOptions).toEqual(model.requestOptions); }); + it("should preserve custom reasoning fields from model config", async () => { + const model: ModelConfig = { + name: "test-openai", + provider: "openai", + model: "gpt-4", + customReasoningFields: ["my_custom_thinking_key"], + }; + + const result = await llmsFromModelConfig({ + model, + uniqueId: "test-id", + llmLogger: mockLLMLogger, + config: mockConfig, + }); + + expect(result).toHaveLength(1); + expect((result[0] as any).customReasoningFields).toEqual([ + "my_custom_thinking_key", + ]); + }); + it("should handle empty headers correctly in merge", async () => { const model: ModelConfig = { name: "test-openai", diff --git a/core/control-plane/schema.ts b/core/control-plane/schema.ts index 02901bda2fb..a5b8c235a99 100644 --- a/core/control-plane/schema.ts +++ b/core/control-plane/schema.ts @@ -68,6 +68,7 @@ const modelDescriptionSchema = z.object({ stream: z.boolean().optional(), }) .optional(), + customReasoningFields: z.array(z.string()).optional(), systemMessage: z.string().optional(), requestOptions: z .object({ diff --git a/core/index.d.ts b/core/index.d.ts index 6192666503f..c4e958ec96b 100644 --- a/core/index.d.ts +++ b/core/index.d.ts @@ -96,7 +96,8 @@ type RequiredLLMOptions = | "completionOptions"; export interface ILLM - extends Omit, + extends + Omit, Required> { get providerName(): string; get underlyingProviderName(): string; @@ -714,6 +715,9 @@ export interface LLMOptions { /** Tool overrides for this model */ toolOverrides?: ToolOverride[]; + + /** Custom fields to check for reasoning/thinking content in streaming chunks */ + customReasoningFields?: string[]; } type RequireAtLeastOne = Pick< @@ -1259,6 +1263,9 @@ export interface ModelDescription { /** Tool overrides for this model */ toolOverrides?: ToolOverride[]; + + /** Custom fields to check for reasoning/thinking content in streaming chunks */ + customReasoningFields?: string[]; } export interface JSONEmbedOptions { @@ -1742,6 +1749,8 @@ export interface JSONModelDescription { useResponsesApi?: boolean; deploymentId?: string; isFromAutoDetect?: boolean; + /** Custom fields to check for reasoning/thinking content in streaming chunks */ + customReasoningFields?: string[]; } // config.json diff --git a/core/llm/index.ts b/core/llm/index.ts index f7d97b73e3c..9a615e7ba6d 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -40,6 +40,7 @@ import { isOllamaInstalled } from "../util/ollamaHelper.js"; import { TokensBatchingService } from "../util/TokensBatchingService.js"; import { withExponentialBackoff } from "../util/withExponentialBackoff.js"; +import { applyToolOverrides } from "../tools/applyToolOverrides.js"; import { autodetectPromptTemplates, autodetectTemplateFunction, @@ -67,7 +68,6 @@ import { toCompleteBody, toFimBody, } from "./openaiTypeConverters.js"; -import { applyToolOverrides } from "../tools/applyToolOverrides.js"; export class LLMError extends Error { constructor( @@ -210,6 +210,10 @@ export abstract class BaseLLM implements ILLM { protected openaiAdapter?: BaseLlmApi; + public get options(): LLMOptions { + return this._llmOptions; + } + constructor(_options: LLMOptions) { this._llmOptions = _options; this.lastRequestId = undefined; @@ -643,7 +647,13 @@ export abstract class BaseLLM implements ILLM { if (!this.lastRequestId && typeof (chunk as any).id === "string") { this.lastRequestId = (chunk as any).id; } - const result = fromChatCompletionChunk(chunk); + const result = fromChatCompletionChunk( + chunk, + this.options?.customReasoningFields, + ); + if (result && result.role === "thinking") { + continue; + } if (result) { const content = renderChatMessage(result); const formattedContent = this._formatChatMessage(result); @@ -1065,7 +1075,10 @@ export abstract class BaseLLM implements ILLM { if (!this.lastRequestId && typeof (chunk as any).id === "string") { this.lastRequestId = (chunk as any).id; } - const chatChunk = fromChatCompletionChunk(chunk as any); + const chatChunk = fromChatCompletionChunk( + chunk as any, + this.options?.customReasoningFields, + ); if (chatChunk) { yield chatChunk; } @@ -1084,7 +1097,10 @@ export abstract class BaseLLM implements ILLM { signal, ); this.lastRequestId = response.id ?? this.lastRequestId; - const messages = fromChatResponse(response as any); + const messages = fromChatResponse( + response as any, + this.options?.customReasoningFields, + ); for (const msg of messages) { yield msg; } diff --git a/core/llm/llms/OpenAI.ts b/core/llm/llms/OpenAI.ts index c65b55dc1a5..547b0ddc56c 100644 --- a/core/llm/llms/OpenAI.ts +++ b/core/llm/llms/OpenAI.ts @@ -560,7 +560,10 @@ class OpenAI extends BaseLLM { } for await (const value of streamSse(response)) { - const chunk = fromChatCompletionChunk(value); + const chunk = fromChatCompletionChunk( + value, + this.options?.customReasoningFields, + ); if (chunk) { yield chunk; } diff --git a/core/llm/llms/WatsonX.ts b/core/llm/llms/WatsonX.ts index 1d1d473dda9..5c09f93d662 100644 --- a/core/llm/llms/WatsonX.ts +++ b/core/llm/llms/WatsonX.ts @@ -313,7 +313,10 @@ class WatsonX extends BaseLLM { let accumulatedArgs = ""; for await (const value of streamSse(response)) { - const message = fromChatCompletionChunk(value); + const message = fromChatCompletionChunk( + value, + this.options?.customReasoningFields, + ); if (!!message) { if ( (message as AssistantChatMessage)?.toolCalls && diff --git a/core/llm/openaiTypeConverters.test.ts b/core/llm/openaiTypeConverters.test.ts index f597f002262..89de09c26f7 100644 --- a/core/llm/openaiTypeConverters.test.ts +++ b/core/llm/openaiTypeConverters.test.ts @@ -1,4 +1,9 @@ -import { toResponsesInput, isItemType } from "./openaiTypeConverters"; +import { + fromChatCompletionChunk, + fromChatResponse, + toResponsesInput, + isItemType, +} from "./openaiTypeConverters"; import { ChatMessage } from ".."; import type { EasyInputMessage, @@ -40,6 +45,61 @@ function getMessagesByRole(items: ResponseInputItem[], role: string) { } describe("openaiTypeConverters", () => { + describe("custom reasoning fields", () => { + it("should convert a custom streaming delta field to a thinking message", () => { + const chunk = { + choices: [ + { + delta: { + my_custom_thinking_key: "checking constraints", + content: "should not render as chat text", + }, + }, + ], + }; + + const result = fromChatCompletionChunk(chunk as any, [ + "my_custom_thinking_key", + ]); + + expect(result).toEqual({ + role: "thinking", + content: "checking constraints", + signature: undefined, + reasoning_details: undefined, + }); + }); + + it("should convert a custom non-streaming message field to a thinking message", () => { + const response = { + choices: [ + { + message: { + role: "assistant", + my_custom_thinking_key: "planning answer", + content: "final answer", + }, + }, + ], + }; + + const result = fromChatResponse(response as any, [ + "my_custom_thinking_key", + ]); + + expect(result).toEqual([ + { + role: "thinking", + content: "planning answer", + }, + { + role: "assistant", + content: "final answer", + }, + ]); + }); + }); + describe("toResponsesInput", () => { describe("tool calls handling - OpenAI Responses API", () => { it("should emit function_call items when fc_ ID is in metadata", () => { diff --git a/core/llm/openaiTypeConverters.ts b/core/llm/openaiTypeConverters.ts index fb4673e11be..06c4db17b91 100644 --- a/core/llm/openaiTypeConverters.ts +++ b/core/llm/openaiTypeConverters.ts @@ -287,7 +287,10 @@ export function toFimBody( } as any; } -export function fromChatResponse(response: ChatCompletion): ChatMessage[] { +export function fromChatResponse( + response: ChatCompletion, + customFields?: string[], +): ChatMessage[] { const messages: ChatMessage[] = []; const message = response.choices[0].message as ChatCompletionMessage & { reasoning?: string; @@ -298,11 +301,16 @@ export function fromChatResponse(response: ChatCompletion): ChatMessage[] { }[]; }; + const customContent = customFields + ?.map((f) => (message as any)?.[f]) + .find((v) => typeof v === "string" && v.length > 0); + // Check for reasoning content first (similar to fromChatCompletionChunk) - if (message.reasoning_content || message.reasoning) { + if (message.reasoning_content || message.reasoning || customContent) { const thinkingMessage: ChatMessage = { role: "thinking", - content: (message as any).reasoning_content || (message as any).reasoning, + content: + customContent || message.reasoning_content || message.reasoning || "", }; // Preserve reasoning_details if present @@ -346,6 +354,7 @@ export function fromChatResponse(response: ChatCompletion): ChatMessage[] { export function fromChatCompletionChunk( chunk: ChatCompletionChunk, + customFields?: string[], ): ChatMessage | undefined { const delta = chunk.choices?.[0]?.delta as | (ChatCompletionChunk.Choice.Delta & { @@ -357,7 +366,25 @@ export function fromChatCompletionChunk( }) | undefined; - if (delta?.content) { + const customContent = customFields + ?.map((f) => (delta as any)?.[f]) + .find((v) => typeof v === "string" && v.length > 0); + + if ( + delta?.reasoning_content || + delta?.reasoning || + delta?.reasoning_details?.length || + customContent + ) { + const message: ThinkingChatMessage = { + role: "thinking", + content: + customContent || delta?.reasoning_content || delta?.reasoning || "", + signature: delta?.reasoning_details?.[0]?.signature, + reasoning_details: delta?.reasoning_details as any[], + }; + return message; + } else if (delta?.content) { return { role: "assistant", content: delta.content, @@ -381,18 +408,6 @@ export function fromChatCompletionChunk( toolCalls, }; } - } else if ( - delta?.reasoning_content || - delta?.reasoning || - delta?.reasoning_details?.length - ) { - const message: ThinkingChatMessage = { - role: "thinking", - content: delta.reasoning_content || delta.reasoning || "", - signature: delta?.reasoning_details?.[0]?.signature, - reasoning_details: delta?.reasoning_details as any[], - }; - return message; } return undefined; diff --git a/packages/config-types/src/index.ts b/packages/config-types/src/index.ts index 8561500e662..e8adca44bfa 100644 --- a/packages/config-types/src/index.ts +++ b/packages/config-types/src/index.ts @@ -88,6 +88,7 @@ export const modelDescriptionSchema = z.object({ ]) .optional(), completionOptions: completionOptionsSchema.optional(), + customReasoningFields: z.array(z.string()).optional(), systemMessage: z.string().optional(), requestOptions: z .object({ diff --git a/packages/config-yaml/src/converter.ts b/packages/config-yaml/src/converter.ts index 4be3702ec49..7d7ceac5ca9 100644 --- a/packages/config-yaml/src/converter.ts +++ b/packages/config-yaml/src/converter.ts @@ -5,6 +5,8 @@ import { ModelRole } from "./schemas/models.js"; type ModelYaml = NonNullable[number]; type ContextYaml = NonNullable[number]; type PromptYaml = NonNullable[number]; +type ModelJson = ConfigJson["models"][number]; +type ContextProviderJson = NonNullable[number]; function convertModel( m: ConfigJson["models"][number], @@ -19,6 +21,7 @@ function convertModel( roles, requestOptions: m.requestOptions, defaultCompletionOptions: m.completionOptions, + customReasoningFields: m.customReasoningFields, }; } @@ -95,7 +98,7 @@ function withFromContextProvider( function convertContext(configJson: ConfigJson): ContextYaml[] { const context: ContextYaml[] = - configJson.contextProviders?.map((ctx) => { + configJson.contextProviders?.map((ctx: ContextProviderJson) => { // ctx providers that weren't given official blocks if ( ["web", "debugger", "issue", "database", "google", "http"].includes( @@ -151,14 +154,20 @@ function convertDoc( export function convertJsonToYamlConfig(configJson: ConfigJson): ConfigYaml { // models - const models = configJson.models.map((m) => convertModel(m, ["chat"])); - const autocompleteModels = Array.isArray(configJson.tabAutocompleteModel) + const models = configJson.models.map((m: ModelJson) => + convertModel(m, ["chat"]), + ); + const autocompleteModels: ModelJson[] = Array.isArray( + configJson.tabAutocompleteModel, + ) ? configJson.tabAutocompleteModel : configJson.tabAutocompleteModel ? [configJson.tabAutocompleteModel] : []; models.push( - ...autocompleteModels.map((m) => convertModel(m, ["autocomplete"])), + ...autocompleteModels.map((m: ModelJson) => + convertModel(m, ["autocomplete"]), + ), ); if (configJson.embeddingsProvider) { diff --git a/packages/config-yaml/src/schemas/models.ts b/packages/config-yaml/src/schemas/models.ts index ee20d5a0540..64f89db8305 100644 --- a/packages/config-yaml/src/schemas/models.ts +++ b/packages/config-yaml/src/schemas/models.ts @@ -192,6 +192,7 @@ const baseModelFields = { promptTemplates: promptTemplatesSchema.optional(), useLegacyCompletionsEndpoint: z.boolean().optional(), useResponsesApi: z.boolean().optional(), + customReasoningFields: z.array(z.string()).optional(), env: z .record(z.string(), z.union([z.string(), z.boolean(), z.number()])) .optional(),