From df105b59755fcb54855e405f57538c551fa22fb8 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sat, 9 May 2026 12:13:42 -0700 Subject: [PATCH 01/16] codec: expand additive OpenAI request extraction and multimodal parts Signed-off-by: Alex Fournier --- crates/core/src/codec/anthropic.rs | 19 +- crates/core/src/codec/openai_chat.rs | 48 +++++ crates/core/src/codec/openai_responses.rs | 77 +++++++ crates/core/src/codec/request.rs | 76 +++++-- crates/core/src/codec/response.rs | 11 +- crates/core/tests/integration/codec_tests.rs | 182 ++++++++++++++++ .../core/tests/integration/pipeline_tests.rs | 13 ++ .../core/tests/unit/codec/anthropic_tests.rs | 13 ++ .../tests/unit/codec/openai_chat_tests.rs | 204 +++++++++++++++++- .../unit/codec/openai_responses_tests.rs | 118 +++++++++- crates/core/tests/unit/codec/request_tests.rs | 145 ++++++++++++- crates/core/tests/unit/shared_tests.rs | 13 ++ 12 files changed, 889 insertions(+), 30 deletions(-) diff --git a/crates/core/src/codec/anthropic.rs b/crates/core/src/codec/anthropic.rs index 4380ec32..d71e925a 100644 --- a/crates/core/src/codec/anthropic.rs +++ b/crates/core/src/codec/anthropic.rs @@ -179,9 +179,9 @@ fn extract_system_text(msg: &Message) -> Option { } => { let texts: Vec<&str> = parts .iter() - .map(|p| { - let super::request::ContentPart::Text { text } = p; - text.as_str() + .filter_map(|p| match p { + super::request::ContentPart::Text { text } => Some(text.as_str()), + super::request::ContentPart::ImageUrl { .. } => None, }) .collect(); if texts.is_empty() { @@ -449,6 +449,19 @@ impl LlmCodec for AnthropicMessagesCodec { params, tools, tool_choice, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra, }) } diff --git a/crates/core/src/codec/openai_chat.rs b/crates/core/src/codec/openai_chat.rs index 26323e1d..62505d48 100644 --- a/crates/core/src/codec/openai_chat.rs +++ b/crates/core/src/codec/openai_chat.rs @@ -112,6 +112,13 @@ const MODELED_REQUEST_KEYS: &[&str] = &[ "stop", "tools", "tool_choice", + "store", + "user", + "metadata", + "service_tier", + "parallel_tool_calls", + "top_logprobs", + "stream", ]; // --------------------------------------------------------------------------- @@ -263,6 +270,22 @@ impl LlmCodec for OpenAIChatCodec { params, tools, tool_choice, + store: obj.get("store").and_then(|v| v.as_bool()), + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: obj.get("user").and_then(|v| v.as_str()).map(String::from), + metadata: obj.get("metadata").cloned(), + service_tier: obj + .get("service_tier") + .and_then(|v| v.as_str()) + .map(String::from), + parallel_tool_calls: obj.get("parallel_tool_calls").and_then(|v| v.as_bool()), + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: obj.get("top_logprobs").and_then(|v| v.as_u64()), + stream: obj.get("stream").and_then(|v| v.as_bool()), extra, }) } @@ -291,6 +314,31 @@ impl LlmCodec for OpenAIChatCodec { insert_serialized(obj, "tool_choice", tool_choice, "tool_choice")?; } + if let Some(store) = annotated.store { + obj.insert("store".into(), Json::Bool(store)); + } + if let Some(ref user) = annotated.user { + obj.insert("user".into(), Json::String(user.clone())); + } + if let Some(ref metadata) = annotated.metadata { + obj.insert("metadata".into(), metadata.clone()); + } + if let Some(ref service_tier) = annotated.service_tier { + obj.insert("service_tier".into(), Json::String(service_tier.clone())); + } + if let Some(parallel_tool_calls) = annotated.parallel_tool_calls { + obj.insert( + "parallel_tool_calls".into(), + Json::Bool(parallel_tool_calls), + ); + } + if let Some(top_logprobs) = annotated.top_logprobs { + obj.insert("top_logprobs".into(), Json::from(top_logprobs)); + } + if let Some(stream) = annotated.stream { + obj.insert("stream".into(), Json::Bool(stream)); + } + for (k, v) in &annotated.extra { obj.insert(k.clone(), v.clone()); } diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index 62691df0..5a1897cf 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -108,6 +108,18 @@ const MODELED_REQUEST_KEYS: &[&str] = &[ "top_p", "tools", "tool_choice", + "store", + "previous_response_id", + "truncation", + "reasoning", + "include", + "user", + "metadata", + "service_tier", + "parallel_tool_calls", + "max_tool_calls", + "top_logprobs", + "stream", ]; /// Helper to construct a [`Json`] number from an `f64`. @@ -381,6 +393,25 @@ impl LlmCodec for OpenAIResponsesCodec { params, tools, tool_choice, + store: obj.get("store").and_then(|v| v.as_bool()), + previous_response_id: obj + .get("previous_response_id") + .and_then(|v| v.as_str()) + .map(String::from), + truncation: obj.get("truncation").cloned(), + reasoning: obj.get("reasoning").cloned(), + include: obj.get("include").cloned(), + user: obj.get("user").and_then(|v| v.as_str()).map(String::from), + metadata: obj.get("metadata").cloned(), + service_tier: obj + .get("service_tier") + .and_then(|v| v.as_str()) + .map(String::from), + parallel_tool_calls: obj.get("parallel_tool_calls").and_then(|v| v.as_bool()), + max_output_tokens: obj.get("max_output_tokens").and_then(|v| v.as_u64()), + max_tool_calls: obj.get("max_tool_calls").and_then(|v| v.as_u64()), + top_logprobs: obj.get("top_logprobs").and_then(|v| v.as_u64()), + stream: obj.get("stream").and_then(|v| v.as_bool()), extra, }) } @@ -415,6 +446,52 @@ impl LlmCodec for OpenAIResponsesCodec { insert_serialized(obj, "tool_choice", tool_choice, "tool_choice")?; } + if let Some(store) = annotated.store { + obj.insert("store".into(), Json::Bool(store)); + } + if let Some(ref previous_response_id) = annotated.previous_response_id { + obj.insert( + "previous_response_id".into(), + Json::String(previous_response_id.clone()), + ); + } + if let Some(ref truncation) = annotated.truncation { + obj.insert("truncation".into(), truncation.clone()); + } + if let Some(ref reasoning) = annotated.reasoning { + obj.insert("reasoning".into(), reasoning.clone()); + } + if let Some(ref include) = annotated.include { + obj.insert("include".into(), include.clone()); + } + if let Some(ref user) = annotated.user { + obj.insert("user".into(), Json::String(user.clone())); + } + if let Some(ref metadata) = annotated.metadata { + obj.insert("metadata".into(), metadata.clone()); + } + if let Some(ref service_tier) = annotated.service_tier { + obj.insert("service_tier".into(), Json::String(service_tier.clone())); + } + if let Some(parallel_tool_calls) = annotated.parallel_tool_calls { + obj.insert( + "parallel_tool_calls".into(), + Json::Bool(parallel_tool_calls), + ); + } + if let Some(max_output_tokens) = annotated.max_output_tokens { + obj.insert("max_output_tokens".into(), Json::from(max_output_tokens)); + } + if let Some(max_tool_calls) = annotated.max_tool_calls { + obj.insert("max_tool_calls".into(), Json::from(max_tool_calls)); + } + if let Some(top_logprobs) = annotated.top_logprobs { + obj.insert("top_logprobs".into(), Json::from(top_logprobs)); + } + if let Some(stream) = annotated.stream { + obj.insert("stream".into(), Json::Bool(stream)); + } + // Merge extra fields back. for (k, v) in &annotated.extra { obj.insert(k.clone(), v.clone()); diff --git a/crates/core/src/codec/request.rs b/crates/core/src/codec/request.rs index e4dac14b..e1a499e0 100644 --- a/crates/core/src/codec/request.rs +++ b/crates/core/src/codec/request.rs @@ -37,6 +37,45 @@ pub struct AnnotatedLlmRequest { /// Tool choice control. #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option, + /// OpenAI Responses: whether to persist response state server-side. + #[serde(skip_serializing_if = "Option::is_none")] + pub store: Option, + /// OpenAI Responses: prior response to continue from. + #[serde(skip_serializing_if = "Option::is_none")] + pub previous_response_id: Option, + /// OpenAI Responses: context truncation behavior. + #[serde(skip_serializing_if = "Option::is_none")] + pub truncation: Option, + /// OpenAI Responses: reasoning configuration object. + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + /// OpenAI Responses: include filter for additional output/state items. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option, + /// OpenAI user identifier. + #[serde(skip_serializing_if = "Option::is_none")] + pub user: Option, + /// OpenAI metadata map/object. + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + /// OpenAI service tier preference. + #[serde(skip_serializing_if = "Option::is_none")] + pub service_tier: Option, + /// OpenAI tool parallelism toggle. + #[serde(skip_serializing_if = "Option::is_none")] + pub parallel_tool_calls: Option, + /// OpenAI Responses max output token limit. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_output_tokens: Option, + /// OpenAI Responses max tool calls. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tool_calls: Option, + /// OpenAI logprob fanout count. + #[serde(skip_serializing_if = "Option::is_none")] + pub top_logprobs: Option, + /// OpenAI streaming toggle. + #[serde(skip_serializing_if = "Option::is_none")] + pub stream: Option, /// Extensible key-value pairs for unmodeled provider-specific fields. /// Merged back into the request body during encode via `serde(flatten)`. #[serde(flatten)] @@ -105,6 +144,21 @@ pub enum ContentPart { /// The text content. text: String, }, + /// An image URL content part. + ImageUrl { + /// Image URL payload. + image_url: OpenAiImageUrl, + }, +} + +/// OpenAI image URL payload. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OpenAiImageUrl { + /// URL for the image. + pub url: String, + /// Optional provider-specific detail hint. + #[serde(skip_serializing_if = "Option::is_none")] + pub detail: Option, } /// A tool call requested by the assistant. @@ -214,13 +268,10 @@ impl AnnotatedLlmRequest { self.messages.iter().find_map(|m| match m { Message::System { content, .. } => match content { MessageContent::Text(s) => Some(s.as_str()), - MessageContent::Parts(parts) => parts - .iter() - .map(|p| { - let ContentPart::Text { text } = p; - text.as_str() - }) - .next(), + MessageContent::Parts(parts) => parts.iter().find_map(|p| match p { + ContentPart::Text { text } => Some(text.as_str()), + ContentPart::ImageUrl { .. } => None, + }), }, _ => None, }) @@ -235,13 +286,10 @@ impl AnnotatedLlmRequest { self.messages.iter().rev().find_map(|m| match m { Message::User { content, .. } => match content { MessageContent::Text(s) => Some(s.as_str()), - MessageContent::Parts(parts) => parts - .iter() - .map(|p| { - let ContentPart::Text { text } = p; - text.as_str() - }) - .next(), + MessageContent::Parts(parts) => parts.iter().find_map(|p| match p { + ContentPart::Text { text } => Some(text.as_str()), + ContentPart::ImageUrl { .. } => None, + }), }, _ => None, }) diff --git a/crates/core/src/codec/response.rs b/crates/core/src/codec/response.rs index 3d85bb51..8b718333 100644 --- a/crates/core/src/codec/response.rs +++ b/crates/core/src/codec/response.rs @@ -219,13 +219,10 @@ impl AnnotatedLlmResponse { pub fn response_text(&self) -> Option<&str> { match self.message.as_ref()? { MessageContent::Text(s) => Some(s.as_str()), - MessageContent::Parts(parts) => parts - .iter() - .map(|p| { - let super::request::ContentPart::Text { text } = p; - text.as_str() - }) - .next(), + MessageContent::Parts(parts) => parts.iter().find_map(|p| match p { + super::request::ContentPart::Text { text } => Some(text.as_str()), + super::request::ContentPart::ImageUrl { .. } => None, + }), } } diff --git a/crates/core/tests/integration/codec_tests.rs b/crates/core/tests/integration/codec_tests.rs index 916af3a0..fa42c0ae 100644 --- a/crates/core/tests/integration/codec_tests.rs +++ b/crates/core/tests/integration/codec_tests.rs @@ -31,6 +31,19 @@ impl LlmCodec for MockCodec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }) } @@ -102,6 +115,19 @@ fn test_annotated_llm_request_full_roundtrip() { }, }]), tool_choice: Some(ToolChoice::Auto), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: { let mut m = serde_json::Map::new(); m.insert("response_format".into(), json!({"type": "json_object"})); @@ -125,6 +151,19 @@ fn test_annotated_llm_request_minimal() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -368,6 +407,19 @@ fn test_extra_field_flatten() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra, }; @@ -396,6 +448,19 @@ fn test_clone_and_partial_eq() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -408,6 +473,19 @@ fn test_clone_and_partial_eq() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -441,6 +519,19 @@ fn test_system_prompt_text() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), Some("Be helpful")); @@ -457,6 +548,19 @@ fn test_system_prompt_none() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), None); @@ -475,6 +579,19 @@ fn test_system_prompt_parts() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), Some("Be careful")); @@ -502,6 +619,19 @@ fn test_last_user_message_basic() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.last_user_message(), Some("last")); @@ -525,6 +655,19 @@ fn test_last_user_message_none() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.last_user_message(), None); @@ -549,6 +692,19 @@ fn test_has_tool_calls_true() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(req.has_tool_calls()); @@ -565,6 +721,19 @@ fn test_has_tool_calls_false_no_assistant() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(!req.has_tool_calls()); @@ -582,6 +751,19 @@ fn test_has_tool_calls_false_empty_calls() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(!req.has_tool_calls()); diff --git a/crates/core/tests/integration/pipeline_tests.rs b/crates/core/tests/integration/pipeline_tests.rs index d93c1fdb..71d2d400 100644 --- a/crates/core/tests/integration/pipeline_tests.rs +++ b/crates/core/tests/integration/pipeline_tests.rs @@ -98,6 +98,19 @@ impl LlmCodec for TrackingCodec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra, }) } diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 796bd96d..1ca5e676 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -692,6 +692,19 @@ fn test_helper_and_error_paths_cover_remaining_anthropic_branches() { }, }]), tool_choice: Some(ToolChoice::None), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/core/tests/unit/codec/openai_chat_tests.rs b/crates/core/tests/unit/codec/openai_chat_tests.rs index 78b66509..2f560025 100644 --- a/crates/core/tests/unit/codec/openai_chat_tests.rs +++ b/crates/core/tests/unit/codec/openai_chat_tests.rs @@ -6,7 +6,7 @@ use super::*; use serde_json::json; -use super::super::request::MessageContent; +use super::super::request::{ContentPart, MessageContent, OpenAiImageUrl}; use super::super::response::{ApiSpecificResponse, FinishReason}; // ------------------------------------------------------------------- @@ -538,7 +538,7 @@ fn test_decode_request_extra_fields() { "response_format": {"type": "json_object"} })); let annotated = codec.decode(&request).unwrap(); - assert_eq!(annotated.extra.get("stream"), Some(&json!(true))); + assert_eq!(annotated.stream, Some(true)); assert_eq!(annotated.extra.get("seed"), Some(&json!(42))); assert_eq!( annotated.extra.get("response_format"), @@ -546,6 +546,30 @@ fn test_decode_request_extra_fields() { ); } +#[test] +fn test_decode_request_openai_chat_typed_controls() { + let codec = OpenAIChatCodec; + let request = make_request(json!({ + "messages": [{"role": "user", "content": "Hi"}], + "model": "gpt-4o", + "store": true, + "user": "u1", + "metadata": {"k":"v"}, + "service_tier": "default", + "parallel_tool_calls": true, + "top_logprobs": 2, + "stream": true + })); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.store, Some(true)); + assert_eq!(annotated.user.as_deref(), Some("u1")); + assert_eq!(annotated.metadata, Some(json!({"k":"v"}))); + assert_eq!(annotated.service_tier.as_deref(), Some("default")); + assert_eq!(annotated.parallel_tool_calls, Some(true)); + assert_eq!(annotated.top_logprobs, Some(2)); + assert_eq!(annotated.stream, Some(true)); +} + #[test] fn test_decode_request_no_messages_key() { let codec = OpenAIChatCodec; @@ -556,6 +580,44 @@ fn test_decode_request_no_messages_key() { assert!(annotated.messages.is_empty()); } +#[test] +fn test_decode_request_multimodal_image_url_parts() { + let codec = OpenAIChatCodec; + let request = make_request(json!({ + "messages": [{ + "role": "user", + "content": [ + {"type": "text", "text": "describe this"}, + {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}} + ] + }], + "model": "gpt-4o" + })); + let annotated = codec.decode(&request).unwrap(); + match &annotated.messages[0] { + Message::User { content, .. } => match content { + MessageContent::Parts(parts) => { + assert_eq!( + parts, + &vec![ + ContentPart::Text { + text: "describe this".into() + }, + ContentPart::ImageUrl { + image_url: OpenAiImageUrl { + url: "https://example.com/cat.png".into(), + detail: Some("high".into()) + } + } + ] + ); + } + _ => panic!("expected parts content"), + }, + _ => panic!("expected user message"), + } +} + // =================================================================== // Request encode tests // =================================================================== @@ -595,6 +657,92 @@ fn test_encode_with_modified_model() { assert_eq!(obj.get("model"), Some(&json!("gpt-4o-mini"))); } +#[test] +fn test_encode_writes_openai_chat_typed_controls() { + let codec = OpenAIChatCodec; + let mut annotated = codec + .decode(&make_request(json!({ + "messages": [{"role":"user","content":"hi"}], + "model": "gpt-4o" + }))) + .unwrap(); + annotated.store = Some(false); + annotated.user = Some("u2".into()); + annotated.metadata = Some(json!({"m":1})); + annotated.service_tier = Some("default".into()); + annotated.parallel_tool_calls = Some(false); + annotated.top_logprobs = Some(1); + annotated.stream = Some(true); + let encoded = codec + .encode( + &annotated, + &make_request(json!({"messages":[{"role":"user","content":"hi"}],"model":"gpt-4o"})), + ) + .unwrap(); + let obj = encoded.content.as_object().unwrap(); + assert_eq!(obj.get("store"), Some(&json!(false))); + assert_eq!(obj.get("user"), Some(&json!("u2"))); + assert_eq!(obj.get("metadata"), Some(&json!({"m":1}))); + assert_eq!(obj.get("service_tier"), Some(&json!("default"))); + assert_eq!(obj.get("parallel_tool_calls"), Some(&json!(false))); + assert_eq!(obj.get("top_logprobs"), Some(&json!(1))); + assert_eq!(obj.get("stream"), Some(&json!(true))); +} + +#[test] +fn test_encode_chat_extra_overrides_typed_controls() { + let codec = OpenAIChatCodec; + let mut annotated = codec + .decode(&make_request(json!({ + "messages": [{"role":"user","content":"hi"}], + "model": "gpt-4o" + }))) + .unwrap(); + annotated.store = Some(false); + annotated.extra.insert("store".into(), json!(true)); + let encoded = codec + .encode( + &annotated, + &make_request(json!({"messages":[{"role":"user","content":"hi"}],"model":"gpt-4o"})), + ) + .unwrap(); + let obj = encoded.content.as_object().unwrap(); + assert_eq!(obj.get("store"), Some(&json!(true))); +} + +#[test] +fn test_encode_request_multimodal_image_url_parts() { + let codec = OpenAIChatCodec; + let original = make_request(json!({ + "messages": [{"role":"user","content":"hi"}], + "model": "gpt-4o" + })); + let mut annotated = codec.decode(&original).unwrap(); + annotated.messages = vec![Message::User { + content: MessageContent::Parts(vec![ + ContentPart::Text { + text: "describe this".into(), + }, + ContentPart::ImageUrl { + image_url: OpenAiImageUrl { + url: "https://example.com/cat.png".into(), + detail: Some("low".into()), + }, + }, + ]), + name: None, + }]; + let encoded = codec.encode(&annotated, &original).unwrap(); + assert_eq!( + encoded.content["messages"][0]["content"][1]["type"], + json!("image_url") + ); + assert_eq!( + encoded.content["messages"][0]["content"][1]["image_url"]["url"], + json!("https://example.com/cat.png") + ); +} + #[test] fn test_encode_restores_max_completion_tokens_key() { let codec = OpenAIChatCodec; @@ -673,6 +821,19 @@ fn test_helper_and_error_paths_cover_remaining_chat_branches() { }, }]), tool_choice: Some(ToolChoice::Required), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let encoded = codec @@ -716,6 +877,19 @@ fn test_encode_injects_stream_options_on_streaming_request() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let encoded = codec @@ -748,6 +922,19 @@ fn test_encode_preserves_caller_stream_options() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let caller_set = json!({ @@ -778,6 +965,19 @@ fn test_encode_does_not_inject_stream_options_on_non_streaming() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index c10ed308..2035bb09 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -406,11 +406,47 @@ fn test_decode_request_extra_fields() { "tool_choice": "auto" })); let annotated = codec.decode(&request).unwrap(); - assert_eq!(annotated.extra.get("store"), Some(&json!(true))); + assert_eq!(annotated.store, Some(true)); + assert_eq!(annotated.metadata, Some(json!({"key": "value"}))); +} + +#[test] +fn test_decode_request_openai_controls_typed() { + let codec = OpenAIResponsesCodec; + let request = make_request(json!({ + "model": "gpt-4o", + "input": "Hi", + "store": true, + "previous_response_id": "resp_prev", + "truncation": "disabled", + "reasoning": { "effort": "high" }, + "include": ["reasoning.encrypted_content"], + "user": "u123", + "metadata": { "k": "v" }, + "service_tier": "default", + "parallel_tool_calls": true, + "max_output_tokens": 777, + "max_tool_calls": 3, + "top_logprobs": 2, + "stream": true + })); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.store, Some(true)); + assert_eq!(annotated.previous_response_id.as_deref(), Some("resp_prev")); + assert_eq!(annotated.truncation, Some(json!("disabled"))); + assert_eq!(annotated.reasoning, Some(json!({"effort":"high"}))); assert_eq!( - annotated.extra.get("metadata"), - Some(&json!({"key": "value"})) + annotated.include, + Some(json!(["reasoning.encrypted_content"])) ); + assert_eq!(annotated.user.as_deref(), Some("u123")); + assert_eq!(annotated.metadata, Some(json!({"k":"v"}))); + assert_eq!(annotated.service_tier.as_deref(), Some("default")); + assert_eq!(annotated.parallel_tool_calls, Some(true)); + assert_eq!(annotated.max_output_tokens, Some(777)); + assert_eq!(annotated.max_tool_calls, Some(3)); + assert_eq!(annotated.top_logprobs, Some(2)); + assert_eq!(annotated.stream, Some(true)); } // =================================================================== @@ -476,6 +512,69 @@ fn test_encode_writes_max_output_tokens() { assert!(!obj.contains_key("max_tokens")); } +#[test] +fn test_encode_request_openai_controls_typed() { + let codec = OpenAIResponsesCodec; + let mut annotated = codec + .decode(&make_request(json!({"model":"gpt-4o","input":"hello"}))) + .unwrap(); + annotated.store = Some(false); + annotated.previous_response_id = Some("resp_1".into()); + annotated.truncation = Some(json!("auto")); + annotated.reasoning = Some(json!({"effort":"low"})); + annotated.include = Some(json!(["reasoning.encrypted_content"])); + annotated.user = Some("abc".into()); + annotated.metadata = Some(json!({"x":1})); + annotated.service_tier = Some("default".into()); + annotated.parallel_tool_calls = Some(false); + annotated.max_output_tokens = Some(222); + annotated.max_tool_calls = Some(5); + annotated.top_logprobs = Some(1); + annotated.stream = Some(true); + + let encoded = codec + .encode( + &annotated, + &make_request(json!({"model":"gpt-4o","input":"hello"})), + ) + .unwrap(); + let obj = encoded.content.as_object().unwrap(); + assert_eq!(obj.get("store"), Some(&json!(false))); + assert_eq!(obj.get("previous_response_id"), Some(&json!("resp_1"))); + assert_eq!(obj.get("truncation"), Some(&json!("auto"))); + assert_eq!(obj.get("reasoning"), Some(&json!({"effort":"low"}))); + assert_eq!( + obj.get("include"), + Some(&json!(["reasoning.encrypted_content"])) + ); + assert_eq!(obj.get("user"), Some(&json!("abc"))); + assert_eq!(obj.get("metadata"), Some(&json!({"x":1}))); + assert_eq!(obj.get("service_tier"), Some(&json!("default"))); + assert_eq!(obj.get("parallel_tool_calls"), Some(&json!(false))); + assert_eq!(obj.get("max_output_tokens"), Some(&json!(222))); + assert_eq!(obj.get("max_tool_calls"), Some(&json!(5))); + assert_eq!(obj.get("top_logprobs"), Some(&json!(1))); + assert_eq!(obj.get("stream"), Some(&json!(true))); +} + +#[test] +fn test_encode_extra_overrides_typed_controls() { + let codec = OpenAIResponsesCodec; + let mut annotated = codec + .decode(&make_request(json!({"model":"gpt-4o","input":"hello"}))) + .unwrap(); + annotated.store = Some(false); + annotated.extra.insert("store".into(), json!(true)); + let encoded = codec + .encode( + &annotated, + &make_request(json!({"model":"gpt-4o","input":"hello"})), + ) + .unwrap(); + let obj = encoded.content.as_object().unwrap(); + assert_eq!(obj.get("store"), Some(&json!(true))); +} + #[test] fn test_helper_and_error_paths_cover_remaining_responses_branches() { assert_eq!( @@ -534,6 +633,19 @@ fn test_helper_and_error_paths_cover_remaining_responses_branches() { }, }]), tool_choice: Some(ToolChoice::Auto), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/core/tests/unit/codec/request_tests.rs b/crates/core/tests/unit/codec/request_tests.rs index 19b57aaf..48b892a4 100644 --- a/crates/core/tests/unit/codec/request_tests.rs +++ b/crates/core/tests/unit/codec/request_tests.rs @@ -26,6 +26,19 @@ fn test_annotated_llm_request_round_trip() { }), tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let json_val = serde_json::to_value(&req).unwrap(); @@ -245,7 +258,7 @@ fn test_annotated_llm_request_extra_flatten() { "custom_field": "value" }); let req: AnnotatedLlmRequest = serde_json::from_value(json_val).unwrap(); - assert_eq!(req.extra.get("stream"), Some(&json!(true))); + assert_eq!(req.stream, Some(true)); assert_eq!(req.extra.get("custom_field"), Some(&json!("value"))); // Round-trip: extra fields should appear as top-level keys let serialized = serde_json::to_value(&req).unwrap(); @@ -288,6 +301,19 @@ fn test_all_types_clone() { }, }]), tool_choice: Some(ToolChoice::Auto), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let cloned = req.clone(); @@ -343,6 +369,19 @@ fn test_system_prompt_returns_text() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), Some("Be helpful")); @@ -359,6 +398,19 @@ fn test_system_prompt_returns_none_when_absent() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), None); @@ -377,6 +429,19 @@ fn test_system_prompt_from_parts() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.system_prompt(), Some("Be concise")); @@ -408,6 +473,19 @@ fn test_last_user_message_returns_last() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.last_user_message(), Some("last")); @@ -424,6 +502,19 @@ fn test_last_user_message_returns_none_when_absent() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.last_user_message(), None); @@ -449,6 +540,19 @@ fn test_last_user_message_from_parts() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert_eq!(req.last_user_message(), Some("from parts")); @@ -477,6 +581,19 @@ fn test_has_tool_calls_true() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(req.has_tool_calls()); @@ -493,6 +610,19 @@ fn test_has_tool_calls_false_no_assistant() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(!req.has_tool_calls()); @@ -510,6 +640,19 @@ fn test_has_tool_calls_false_empty_vec() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(!req.has_tool_calls()); diff --git a/crates/core/tests/unit/shared_tests.rs b/crates/core/tests/unit/shared_tests.rs index 5cd03239..81690e3f 100644 --- a/crates/core/tests/unit/shared_tests.rs +++ b/crates/core/tests/unit/shared_tests.rs @@ -37,6 +37,19 @@ impl LlmCodec for SharedTestCodec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }) } From 15ae1863935be1caa92d176a6eb09292193a4c9c Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sat, 9 May 2026 12:42:30 -0700 Subject: [PATCH 02/16] codec: expand OpenAI Responses response-side extraction Signed-off-by: Alex Fournier --- crates/core/src/codec/openai_responses.rs | 37 +++++++++++--- crates/core/src/codec/response.rs | 21 ++++++++ .../unit/codec/openai_responses_tests.rs | 50 +++++++++++++++++++ .../core/tests/unit/codec/response_tests.rs | 7 +++ 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index 5a1897cf..3105fd18 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -48,6 +48,11 @@ struct RawResponsesResponse { output: Option>, usage: Option, incomplete_details: Option, + previous_response_id: Option, + store: Option, + service_tier: Option, + truncation: Option, + reasoning: Option, #[serde(flatten)] extra: serde_json::Map, } @@ -57,12 +62,8 @@ struct RawResponsesUsage { input_tokens: Option, output_tokens: Option, total_tokens: Option, - input_tokens_details: Option, -} - -#[derive(Deserialize)] -struct RawInputTokensDetails { - cached_tokens: Option, + input_tokens_details: Option, + output_tokens_details: Option, } // --------------------------------------------------------------------------- @@ -98,6 +99,12 @@ fn parse_arguments(arguments: &str) -> Json { serde_json::from_str(arguments).unwrap_or_else(|_| Json::String(arguments.to_string())) } +fn cached_tokens_from_details(details: Option<&Json>) -> Option { + details + .and_then(|d| d.get("cached_tokens")) + .and_then(|v| v.as_u64()) +} + /// Keys that are modeled in [`AnnotatedLlmRequest`] and should NOT go into `extra`. const MODELED_REQUEST_KEYS: &[&str] = &[ "input", @@ -278,12 +285,21 @@ impl LlmResponseCodec for OpenAIResponsesCodec { let finish_reason = map_responses_finish_reason(raw.status.as_deref(), raw.incomplete_details.as_ref()); + let input_tokens_details = raw + .usage + .as_ref() + .and_then(|u| u.input_tokens_details.clone()); + let output_tokens_details = raw + .usage + .as_ref() + .and_then(|u| u.output_tokens_details.clone()); + // Map usage. let usage = raw.usage.map(|u| Usage { prompt_tokens: u.input_tokens, completion_tokens: u.output_tokens, total_tokens: u.total_tokens, - cache_read_tokens: u.input_tokens_details.and_then(|d| d.cached_tokens), + cache_read_tokens: cached_tokens_from_details(u.input_tokens_details.as_ref()), cache_write_tokens: None, }); @@ -292,6 +308,13 @@ impl LlmResponseCodec for OpenAIResponsesCodec { output_items: all_output_items, status: raw.status, incomplete_details: raw.incomplete_details, + previous_response_id: raw.previous_response_id, + store: raw.store, + service_tier: raw.service_tier, + truncation: raw.truncation, + reasoning: raw.reasoning, + input_tokens_details, + output_tokens_details, }); Ok(AnnotatedLlmResponse { diff --git a/crates/core/src/codec/response.rs b/crates/core/src/codec/response.rs index 8b718333..6b835662 100644 --- a/crates/core/src/codec/response.rs +++ b/crates/core/src/codec/response.rs @@ -181,6 +181,27 @@ pub enum ApiSpecificResponse { /// Details about why the response is incomplete. #[serde(skip_serializing_if = "Option::is_none")] incomplete_details: Option, + /// Echoed previous response ID for conversation continuation. + #[serde(skip_serializing_if = "Option::is_none")] + previous_response_id: Option, + /// Whether this response is marked for server-side storage. + #[serde(skip_serializing_if = "Option::is_none")] + store: Option, + /// Service tier used for the response. + #[serde(skip_serializing_if = "Option::is_none")] + service_tier: Option, + /// Truncation behavior metadata. + #[serde(skip_serializing_if = "Option::is_none")] + truncation: Option, + /// Reasoning configuration/result metadata. + #[serde(skip_serializing_if = "Option::is_none")] + reasoning: Option, + /// Raw input token details payload. + #[serde(skip_serializing_if = "Option::is_none")] + input_tokens_details: Option, + /// Raw output token details payload. + #[serde(skip_serializing_if = "Option::is_none")] + output_tokens_details: Option, }, /// Anthropic Messages API-specific fields. diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index 2035bb09..fd373c04 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -107,11 +107,61 @@ fn test_decode_full_response() { output_items, status, incomplete_details, + previous_response_id, + store, + service_tier, + truncation, + reasoning, + input_tokens_details, + output_tokens_details, } => { assert_eq!(status, Some("completed".into())); assert!(output_items.is_some()); assert_eq!(output_items.unwrap().len(), 3); assert!(incomplete_details.is_none()); + assert_eq!(previous_response_id, None); + assert_eq!(store, None); + assert_eq!(service_tier, None); + assert_eq!(truncation, None); + assert_eq!(reasoning, None); + assert_eq!(input_tokens_details, Some(json!({"cached_tokens": 10}))); + assert_eq!( + output_tokens_details, + Some(json!({"reasoning_tokens": 1024})) + ); + } + other => panic!("Expected OpenAIResponses, got {other:?}"), + } +} + +#[test] +fn test_decode_response_openai_responses_api_specific_top_level_fields() { + let codec = OpenAIResponsesCodec; + let response = json!({ + "id": "resp_abc123", + "status": "completed", + "output": [], + "previous_response_id": "resp_prev_1", + "store": true, + "service_tier": "default", + "truncation": "auto", + "reasoning": {"effort": "high"} + }); + let resp = codec.decode_response(&response).unwrap(); + match resp.api_specific.unwrap() { + ApiSpecificResponse::OpenAIResponses { + previous_response_id, + store, + service_tier, + truncation, + reasoning, + .. + } => { + assert_eq!(previous_response_id.as_deref(), Some("resp_prev_1")); + assert_eq!(store, Some(true)); + assert_eq!(service_tier.as_deref(), Some("default")); + assert_eq!(truncation, Some(json!("auto"))); + assert_eq!(reasoning, Some(json!({"effort":"high"}))); } other => panic!("Expected OpenAIResponses, got {other:?}"), } diff --git a/crates/core/tests/unit/codec/response_tests.rs b/crates/core/tests/unit/codec/response_tests.rs index 3335f586..4026e2c4 100644 --- a/crates/core/tests/unit/codec/response_tests.rs +++ b/crates/core/tests/unit/codec/response_tests.rs @@ -192,6 +192,13 @@ fn test_api_specific_openai_responses_round_trip() { output_items: Some(vec![json!({"type": "message", "content": []})]), status: Some("completed".into()), incomplete_details: None, + previous_response_id: None, + store: None, + service_tier: None, + truncation: None, + reasoning: None, + input_tokens_details: None, + output_tokens_details: None, }; let json_val = serde_json::to_value(&api).unwrap(); assert_eq!(json_val["api"], json!("openai_responses")); From a8e9b6641c9475a959cea8d52362a21c08b1169a Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 09:21:49 -0700 Subject: [PATCH 03/16] codec: expand Anthropic request extraction for metadata and tool parallelism Signed-off-by: Alex Fournier --- crates/core/src/codec/anthropic.rs | 41 ++++++++++++-- .../core/tests/unit/codec/anthropic_tests.rs | 55 +++++++++++++++++-- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/crates/core/src/codec/anthropic.rs b/crates/core/src/codec/anthropic.rs index d71e925a..0920ccde 100644 --- a/crates/core/src/codec/anthropic.rs +++ b/crates/core/src/codec/anthropic.rs @@ -94,6 +94,8 @@ const MODELED_REQUEST_KEYS: &[&str] = &[ "stop_sequences", "tools", "tool_choice", + "metadata", + "service_tier", ]; /// Decode the Anthropic `tool_choice` JSON value into a normalized [`ToolChoice`]. @@ -119,6 +121,15 @@ fn decode_anthropic_tool_choice(val: &Json) -> Option { } } +/// Extract Anthropic `disable_parallel_tool_use` from tool_choice and map +/// to normalized `parallel_tool_calls` semantics. +fn decode_parallel_tool_calls(val: &Json) -> Option { + let obj = val.as_object()?; + obj.get("disable_parallel_tool_use") + .and_then(|v| v.as_bool()) + .map(|disabled| !disabled) +} + /// Encode a normalized [`ToolChoice`] back into Anthropic JSON format. fn encode_anthropic_tool_choice(tc: &ToolChoice) -> Json { match tc { @@ -131,6 +142,17 @@ fn encode_anthropic_tool_choice(tc: &ToolChoice) -> Json { } } +fn encode_tool_choice_with_parallel_hint( + tc: &ToolChoice, + parallel_tool_calls: Option, +) -> Json { + let mut value = encode_anthropic_tool_choice(tc); + if let (Some(parallel), Some(obj)) = (parallel_tool_calls, value.as_object_mut()) { + obj.insert("disable_parallel_tool_use".into(), Json::Bool(!parallel)); + } + value +} + /// Extract the system prompt from an Anthropic top-level `system` field. /// /// Handles both string and array-of-content-blocks formats. @@ -435,6 +457,7 @@ impl LlmCodec for AnthropicMessagesCodec { let tool_choice = obj .get("tool_choice") .and_then(decode_anthropic_tool_choice); + let parallel_tool_calls = obj.get("tool_choice").and_then(decode_parallel_tool_calls); // Collect extra fields (keys not in MODELED_REQUEST_KEYS). let extra: serde_json::Map = obj @@ -455,9 +478,12 @@ impl LlmCodec for AnthropicMessagesCodec { reasoning: None, include: None, user: None, - metadata: None, - service_tier: None, - parallel_tool_calls: None, + metadata: obj.get("metadata").cloned(), + service_tier: obj + .get("service_tier") + .and_then(|v| v.as_str()) + .map(String::from), + parallel_tool_calls, max_output_tokens: None, max_tool_calls: None, top_logprobs: None, @@ -506,10 +532,17 @@ impl LlmCodec for AnthropicMessagesCodec { if let Some(ref tool_choice) = annotated.tool_choice { obj.insert( "tool_choice".into(), - encode_anthropic_tool_choice(tool_choice), + encode_tool_choice_with_parallel_hint(tool_choice, annotated.parallel_tool_calls), ); } + if let Some(ref metadata) = annotated.metadata { + obj.insert("metadata".into(), metadata.clone()); + } + if let Some(ref service_tier) = annotated.service_tier { + obj.insert("service_tier".into(), Json::String(service_tier.clone())); + } + // Merge extra fields back. for (k, v) in &annotated.extra { obj.insert(k.clone(), v.clone()); diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 1ca5e676..1491833a 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -495,13 +495,25 @@ fn test_decode_request_extra_fields() { "stream": true })); let annotated = codec.decode(&request).unwrap(); - assert_eq!( - annotated.extra.get("metadata"), - Some(&json!({"user_id": "abc"})) - ); + assert_eq!(annotated.metadata, Some(json!({"user_id": "abc"}))); assert_eq!(annotated.extra.get("stream"), Some(&json!(true))); } +#[test] +fn test_decode_request_service_tier_and_parallel_tool_calls() { + let codec = AnthropicMessagesCodec; + let request = make_request(json!({ + "messages": [{ "role": "user", "content": "Hi" }], + "model": "claude-sonnet-4-20250514", + "max_tokens": 100, + "service_tier": "default", + "tool_choice": { "type": "auto", "disable_parallel_tool_use": true } + })); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.service_tier.as_deref(), Some("default")); + assert_eq!(annotated.parallel_tool_calls, Some(false)); +} + // =================================================================== // Request encode tests // =================================================================== @@ -525,6 +537,41 @@ fn test_encode_round_trip_preserves_unmodeled_fields() { assert_eq!(obj.get("stream"), Some(&json!(true))); } +#[test] +fn test_encode_writes_anthropic_modeled_controls() { + let codec = AnthropicMessagesCodec; + let mut annotated = codec + .decode(&make_request(json!({ + "messages": [{ "role": "user", "content": "Hi" }], + "model": "claude-sonnet-4-20250514", + "max_tokens": 100, + "tool_choice": { "type": "auto" } + }))) + .unwrap(); + annotated.metadata = Some(json!({"user_id":"abc"})); + annotated.service_tier = Some("default".into()); + annotated.parallel_tool_calls = Some(false); + let encoded = codec + .encode( + &annotated, + &make_request(json!({ + "messages": [{ "role": "user", "content": "Hi" }], + "model": "claude-sonnet-4-20250514", + "max_tokens": 100, + "tool_choice": { "type": "auto" } + })), + ) + .unwrap(); + let obj = encoded.content.as_object().unwrap(); + assert_eq!(obj.get("metadata"), Some(&json!({"user_id":"abc"}))); + assert_eq!(obj.get("service_tier"), Some(&json!("default"))); + assert_eq!( + obj.get("tool_choice") + .and_then(|v| v.get("disable_parallel_tool_use")), + Some(&json!(true)) + ); +} + #[test] fn test_encode_system_as_top_level() { let codec = AnthropicMessagesCodec; From 8330f0daf7d30a1aa110ae08968066acaecf9486 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 10:14:54 -0700 Subject: [PATCH 04/16] codec: expand Anthropic response api-specific extraction Signed-off-by: Alex Fournier --- crates/core/src/codec/anthropic.rs | 10 +++++ crates/core/src/codec/response.rs | 15 +++++++ .../core/tests/unit/codec/anthropic_tests.rs | 39 +++++++++++++++---- .../core/tests/unit/codec/response_tests.rs | 5 +++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/crates/core/src/codec/anthropic.rs b/crates/core/src/codec/anthropic.rs index 0920ccde..ef208068 100644 --- a/crates/core/src/codec/anthropic.rs +++ b/crates/core/src/codec/anthropic.rs @@ -45,10 +45,15 @@ pub struct AnthropicMessagesCodec; #[derive(Deserialize)] struct RawAnthropicResponse { id: Option, + #[serde(rename = "type")] + object_type: Option, + role: Option, model: Option, content: Option>, stop_reason: Option, stop_sequence: Option, + service_tier: Option, + container: Option, usage: Option, #[serde(flatten)] extra: serde_json::Map, @@ -361,7 +366,12 @@ impl LlmResponseCodec for AnthropicMessagesCodec { // Build API-specific fields: all content blocks + stop_sequence. let api_specific_content_blocks = raw.content.clone(); let api_specific = Some(ApiSpecificResponse::AnthropicMessages { + object_type: raw.object_type, + role: raw.role, + stop_reason: raw.stop_reason.clone(), stop_sequence: raw.stop_sequence, + service_tier: raw.service_tier, + container: raw.container, content_blocks: api_specific_content_blocks, }); diff --git a/crates/core/src/codec/response.rs b/crates/core/src/codec/response.rs index 6b835662..1ea83a89 100644 --- a/crates/core/src/codec/response.rs +++ b/crates/core/src/codec/response.rs @@ -207,9 +207,24 @@ pub enum ApiSpecificResponse { /// Anthropic Messages API-specific fields. #[serde(rename = "anthropic_messages")] AnthropicMessages { + /// Anthropic object type (typically `"message"`). + #[serde(skip_serializing_if = "Option::is_none")] + object_type: Option, + /// Anthropic response role (typically `"assistant"`). + #[serde(skip_serializing_if = "Option::is_none")] + role: Option, + /// Raw Anthropic stop_reason. + #[serde(skip_serializing_if = "Option::is_none")] + stop_reason: Option, /// Which stop sequence was matched (if any). #[serde(skip_serializing_if = "Option::is_none")] stop_sequence: Option, + /// Anthropic response service tier when present. + #[serde(skip_serializing_if = "Option::is_none")] + service_tier: Option, + /// Anthropic container payload when present. + #[serde(skip_serializing_if = "Option::is_none")] + container: Option, /// Full content blocks array for direct access. #[serde(skip_serializing_if = "Option::is_none")] content_blocks: Option>, diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 1491833a..09b41aad 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -224,8 +224,13 @@ fn test_decode_response_thinking_blocks_in_api_specific() { let resp = codec.decode_response(&full_anthropic_response()).unwrap(); match resp.api_specific.unwrap() { ApiSpecificResponse::AnthropicMessages { + object_type, + role, + stop_reason, content_blocks, stop_sequence, + service_tier, + container, } => { let blocks = content_blocks.unwrap(); // Should contain ALL content blocks @@ -239,7 +244,12 @@ fn test_decode_response_thinking_blocks_in_api_specific() { assert!(types.contains(&"redacted_thinking")); assert!(types.contains(&"text")); assert!(types.contains(&"tool_use")); + assert_eq!(object_type.as_deref(), Some("message")); + assert_eq!(role.as_deref(), Some("assistant")); + assert_eq!(stop_reason.as_deref(), Some("end_turn")); assert_eq!(stop_sequence, None); + assert_eq!(service_tier, None); + assert_eq!(container, None); } other => panic!("Expected AnthropicMessages, got {other:?}"), } @@ -261,6 +271,11 @@ fn test_decode_response_stop_sequence_value() { ApiSpecificResponse::AnthropicMessages { stop_sequence, content_blocks: _, + object_type: _, + role: _, + stop_reason: _, + service_tier: _, + container: _, } => { assert_eq!(stop_sequence, Some("\n\nHuman:".into())); } @@ -282,13 +297,23 @@ fn test_decode_response_extra_fields_preserved() { "container": { "id": "container_abc123" } }); let resp = codec.decode_response(&response).unwrap(); - // type, role, container should be in extra - assert_eq!(resp.extra.get("type"), Some(&json!("message"))); - assert_eq!(resp.extra.get("role"), Some(&json!("assistant"))); - assert_eq!( - resp.extra.get("container"), - Some(&json!({"id": "container_abc123"})) - ); + // type/role/container are now modeled in api_specific. + assert!(resp.extra.get("type").is_none()); + assert!(resp.extra.get("role").is_none()); + assert!(resp.extra.get("container").is_none()); + match resp.api_specific.unwrap() { + ApiSpecificResponse::AnthropicMessages { + object_type, + role, + container, + .. + } => { + assert_eq!(object_type.as_deref(), Some("message")); + assert_eq!(role.as_deref(), Some("assistant")); + assert_eq!(container, Some(json!({"id":"container_abc123"}))); + } + other => panic!("Expected AnthropicMessages, got {other:?}"), + } } #[test] diff --git a/crates/core/tests/unit/codec/response_tests.rs b/crates/core/tests/unit/codec/response_tests.rs index 4026e2c4..36dd51c0 100644 --- a/crates/core/tests/unit/codec/response_tests.rs +++ b/crates/core/tests/unit/codec/response_tests.rs @@ -209,7 +209,12 @@ fn test_api_specific_openai_responses_round_trip() { #[test] fn test_api_specific_anthropic_messages_round_trip() { let api = ApiSpecificResponse::AnthropicMessages { + object_type: Some("message".into()), + role: Some("assistant".into()), + stop_reason: Some("end_turn".into()), stop_sequence: Some("\n\nHuman:".into()), + service_tier: Some("default".into()), + container: Some(json!({"id":"container_123"})), content_blocks: Some(vec![json!({"type": "text", "text": "Hello"})]), }; let json_val = serde_json::to_value(&api).unwrap(); From 9512ccc506de441a3d5174cebdf79ba02771ea0f Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 12:18:02 -0700 Subject: [PATCH 05/16] codec(openai-responses): adopt strict input fallback and add vLLM-style hybrid fixtures Signed-off-by: Alex Fournier --- crates/core/src/codec/openai_responses.rs | 64 +++++++++++++++---- .../anthropic_tool_choice_hint.json | 5 ++ ...mixed_input_with_function_call_output.json | 7 ++ .../strict_messages_array.json | 9 +++ .../unit/codec/openai_responses_tests.rs | 64 +++++++++++++------ 5 files changed, 119 insertions(+), 30 deletions(-) create mode 100644 crates/core/tests/fixtures/codec/openai_responses/anthropic_tool_choice_hint.json create mode 100644 crates/core/tests/fixtures/codec/openai_responses/mixed_input_with_function_call_output.json create mode 100644 crates/core/tests/fixtures/codec/openai_responses/strict_messages_array.json diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index 3105fd18..aee7c358 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -22,7 +22,8 @@ use crate::error::{FlowError, Result}; use crate::json::Json; use super::request::{ - AnnotatedLlmRequest, GenerationParams, Message, MessageContent, ToolChoice, ToolDefinition, + AnnotatedLlmRequest, GenerationParams, Message, MessageContent, ToolChoice, + ToolChoiceFunction, ToolChoiceFunctionName, ToolDefinition, }; use super::response::{ AnnotatedLlmResponse, ApiSpecificResponse, FinishReason, ResponseToolCall, Usage, @@ -128,6 +129,7 @@ const MODELED_REQUEST_KEYS: &[&str] = &[ "top_logprobs", "stream", ]; +const UNPARSED_INPUT_ITEMS_KEY: &str = "_openai_responses_unparsed_input_items"; /// Helper to construct a [`Json`] number from an `f64`. fn json_f64(v: f64) -> Json { @@ -267,6 +269,40 @@ fn overlay_generation_params(obj: &mut serde_json::Map, params: &G } } +fn decode_openai_or_anthropic_tool_choice(value: &Json) -> Option { + if let Ok(parsed) = serde_json::from_value::(value.clone()) { + return Some(parsed); + } + + let obj = value.as_object()?; + match obj.get("type").and_then(|v| v.as_str()) { + Some("auto") => Some(ToolChoice::Auto), + Some("any") => Some(ToolChoice::Required), + Some("tool") => { + let name = obj.get("name").and_then(|v| v.as_str())?.to_string(); + Some(ToolChoice::Specific(ToolChoiceFunction { + choice_type: "function".to_string(), + function: ToolChoiceFunctionName { name }, + })) + } + _ => None, + } +} + +fn decode_openai_or_anthropic_parallel_tool_calls( + obj: &serde_json::Map, +) -> Option { + if let Some(value) = obj.get("parallel_tool_calls").and_then(|v| v.as_bool()) { + return Some(value); + } + let tool_choice = obj.get("tool_choice")?.as_object()?; + tool_choice + .get("disable_parallel_tool_use") + .and_then(|v| v.as_bool()) + .map(|disabled| !disabled) +} + + // --------------------------------------------------------------------------- // LlmResponseCodec implementation // --------------------------------------------------------------------------- @@ -342,6 +378,7 @@ impl LlmCodec for OpenAIResponsesCodec { .ok_or_else(|| FlowError::Internal("request content is not an object".into()))?; let mut messages: Vec = Vec::new(); + let mut preserved_unparsed_input: Option = None; // Extract instructions -> system message (first). if let Some(instructions) = obj.get("instructions").and_then(|v| v.as_str()) { @@ -360,10 +397,14 @@ impl LlmCodec for OpenAIResponsesCodec { name: None, }); } else if input.is_array() { - // Input is an array of message items. - let input_messages: Vec = - serde_json::from_value(input.clone()).unwrap_or_default(); - messages.extend(input_messages); + // Strict-first parse to avoid partial normalized state. + match serde_json::from_value::>(input.clone()) { + Ok(input_messages) => messages.extend(input_messages), + Err(_) => { + // Preserve full original array for lossless handling. + preserved_unparsed_input = Some(input.clone()); + } + } } } @@ -397,18 +438,17 @@ impl LlmCodec for OpenAIResponsesCodec { // Extract tool_choice. let tool_choice: Option = obj .get("tool_choice") - .map(|v| serde_json::from_value(v.clone())) - .transpose() - .map_err(|e| { - FlowError::Internal(format!("OpenAI Responses tool_choice decode: {e}")) - })?; + .and_then(decode_openai_or_anthropic_tool_choice); // Collect extra fields (keys not in MODELED_REQUEST_KEYS). - let extra: serde_json::Map = obj + let mut extra: serde_json::Map = obj .iter() .filter(|(k, _)| !MODELED_REQUEST_KEYS.contains(&k.as_str())) .map(|(k, v)| (k.clone(), v.clone())) .collect(); + if let Some(input_items) = preserved_unparsed_input { + extra.insert(UNPARSED_INPUT_ITEMS_KEY.into(), input_items); + } Ok(AnnotatedLlmRequest { messages, @@ -430,7 +470,7 @@ impl LlmCodec for OpenAIResponsesCodec { .get("service_tier") .and_then(|v| v.as_str()) .map(String::from), - parallel_tool_calls: obj.get("parallel_tool_calls").and_then(|v| v.as_bool()), + parallel_tool_calls: decode_openai_or_anthropic_parallel_tool_calls(obj), max_output_tokens: obj.get("max_output_tokens").and_then(|v| v.as_u64()), max_tool_calls: obj.get("max_tool_calls").and_then(|v| v.as_u64()), top_logprobs: obj.get("top_logprobs").and_then(|v| v.as_u64()), diff --git a/crates/core/tests/fixtures/codec/openai_responses/anthropic_tool_choice_hint.json b/crates/core/tests/fixtures/codec/openai_responses/anthropic_tool_choice_hint.json new file mode 100644 index 00000000..7749a174 --- /dev/null +++ b/crates/core/tests/fixtures/codec/openai_responses/anthropic_tool_choice_hint.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-4o", + "input": "Hi", + "tool_choice": { "type": "auto", "disable_parallel_tool_use": true } +} diff --git a/crates/core/tests/fixtures/codec/openai_responses/mixed_input_with_function_call_output.json b/crates/core/tests/fixtures/codec/openai_responses/mixed_input_with_function_call_output.json new file mode 100644 index 00000000..cbf468bd --- /dev/null +++ b/crates/core/tests/fixtures/codec/openai_responses/mixed_input_with_function_call_output.json @@ -0,0 +1,7 @@ +{ + "model": "gpt-4o", + "input": [ + { "role": "user", "content": "hello" }, + { "type": "function_call_output", "call_id": "call_1", "output": "ok" } + ] +} diff --git a/crates/core/tests/fixtures/codec/openai_responses/strict_messages_array.json b/crates/core/tests/fixtures/codec/openai_responses/strict_messages_array.json new file mode 100644 index 00000000..b2581c13 --- /dev/null +++ b/crates/core/tests/fixtures/codec/openai_responses/strict_messages_array.json @@ -0,0 +1,9 @@ +{ + "model": "gpt-4o", + "instructions": "Be helpful.", + "input": [ + { "role": "user", "content": "What is 2+2?" }, + { "role": "assistant", "content": "4" }, + { "role": "user", "content": "And 3+3?" } + ] +} diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index fd373c04..0db677a2 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -20,6 +20,10 @@ fn make_request(content: Json) -> LlmRequest { } } +fn fixture_json(path: &str) -> Json { + serde_json::from_str(path).expect("valid fixture json") +} + /// Full Responses API response with message, function_call, reasoning, and usage. fn full_responses_response() -> Json { json!({ @@ -386,29 +390,24 @@ fn test_decode_invalid_json() { #[test] fn test_decode_request_with_input_array() { let codec = OpenAIResponsesCodec; - let request = make_request(json!({ - "model": "gpt-4o", - "instructions": "Be helpful and concise.", - "input": [ - { "role": "user", "content": "What is 2+2?" }, - { "role": "assistant", "content": "4" }, - { "role": "user", "content": "And 3+3?" } - ], - "tools": [{ - "type": "function", - "function": { - "name": "calculate", - "description": "Calculate math", - "parameters": {"type": "object"} - } - }] - })); + let mut request_json = fixture_json(include_str!( + "../../fixtures/codec/openai_responses/strict_messages_array.json" + )); + request_json["tools"] = json!([{ + "type": "function", + "function": { + "name": "calculate", + "description": "Calculate math", + "parameters": {"type": "object"} + } + }]); + let request = make_request(request_json); let annotated = codec.decode(&request).unwrap(); assert_eq!(annotated.model, Some("gpt-4o".into())); // instructions becomes system message (first) assert!(annotated.messages.len() >= 2); - assert_eq!(annotated.system_prompt(), Some("Be helpful and concise.")); + assert_eq!(annotated.system_prompt(), Some("Be helpful.")); // input items become messages (after system) // System + 3 input items = 4 total messages @@ -499,6 +498,35 @@ fn test_decode_request_openai_controls_typed() { assert_eq!(annotated.stream, Some(true)); } +#[test] +fn test_decode_request_input_array_preserves_unparsed_items_in_extra() { + let codec = OpenAIResponsesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/openai_responses/mixed_input_with_function_call_output.json" + ))); + let annotated = codec.decode(&request).unwrap(); + // strict-first behavior: no partial message extraction on mixed arrays + assert!(annotated.messages.is_empty()); + assert_eq!( + annotated.extra.get("_openai_responses_unparsed_input_items"), + Some(&json!([ + { "role": "user", "content": "hello" }, + { "type": "function_call_output", "call_id": "call_1", "output": "ok" } + ])) + ); +} + +#[test] +fn test_decode_request_accepts_anthropic_hint_tool_choice() { + let codec = OpenAIResponsesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/openai_responses/anthropic_tool_choice_hint.json" + ))); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.tool_choice, Some(ToolChoice::Auto)); + assert_eq!(annotated.parallel_tool_calls, Some(false)); +} + // =================================================================== // Request encode tests // =================================================================== From 49ccb05316ba77582f06144569ea207fb477fd03 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 12:33:20 -0700 Subject: [PATCH 06/16] codec(anthropic): add vLLM tool_choice none parity and hybrid fixtures Signed-off-by: Alex Fournier --- crates/core/src/codec/anthropic.rs | 4 +- .../vllm_system_block_with_non_text.json | 11 +++++ ...vllm_tool_choice_none_with_extensions.json | 10 +++++ .../core/tests/unit/codec/anthropic_tests.rs | 40 ++++++++++++++++++- 4 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 crates/core/tests/fixtures/codec/anthropic/vllm_system_block_with_non_text.json create mode 100644 crates/core/tests/fixtures/codec/anthropic/vllm_tool_choice_none_with_extensions.json diff --git a/crates/core/src/codec/anthropic.rs b/crates/core/src/codec/anthropic.rs index ef208068..175b7d94 100644 --- a/crates/core/src/codec/anthropic.rs +++ b/crates/core/src/codec/anthropic.rs @@ -108,6 +108,7 @@ const MODELED_REQUEST_KEYS: &[&str] = &[ /// Anthropic format: /// - `{"type": "auto"}` -> `ToolChoice::Auto` /// - `{"type": "any"}` -> `ToolChoice::Required` +/// - `{"type": "none"}` -> `ToolChoice::None` /// - `{"type": "tool", "name": "X"}` -> `ToolChoice::Specific` fn decode_anthropic_tool_choice(val: &Json) -> Option { let obj = val.as_object()?; @@ -115,6 +116,7 @@ fn decode_anthropic_tool_choice(val: &Json) -> Option { match tc_type { "auto" => Some(ToolChoice::Auto), "any" => Some(ToolChoice::Required), + "none" => Some(ToolChoice::None), "tool" => { let name = obj.get("name")?.as_str()?.to_string(); Some(ToolChoice::Specific(ToolChoiceFunction { @@ -140,7 +142,7 @@ fn encode_anthropic_tool_choice(tc: &ToolChoice) -> Json { match tc { ToolChoice::Auto => serde_json::json!({"type": "auto"}), ToolChoice::Required => serde_json::json!({"type": "any"}), - ToolChoice::None => serde_json::json!({"type": "auto"}), // Anthropic has no "none"; fall back to auto + ToolChoice::None => serde_json::json!({"type": "none"}), ToolChoice::Specific(func) => { serde_json::json!({"type": "tool", "name": func.function.name}) } diff --git a/crates/core/tests/fixtures/codec/anthropic/vllm_system_block_with_non_text.json b/crates/core/tests/fixtures/codec/anthropic/vllm_system_block_with_non_text.json new file mode 100644 index 00000000..93fc0936 --- /dev/null +++ b/crates/core/tests/fixtures/codec/anthropic/vllm_system_block_with_non_text.json @@ -0,0 +1,11 @@ +{ + "model": "claude-sonnet-4-20250514", + "messages": [ + { "role": "user", "content": "Describe this" } + ], + "max_tokens": 100, + "system": [ + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "abcd" } }, + { "type": "text", "text": "Only answer in one sentence." } + ] +} diff --git a/crates/core/tests/fixtures/codec/anthropic/vllm_tool_choice_none_with_extensions.json b/crates/core/tests/fixtures/codec/anthropic/vllm_tool_choice_none_with_extensions.json new file mode 100644 index 00000000..997bbf58 --- /dev/null +++ b/crates/core/tests/fixtures/codec/anthropic/vllm_tool_choice_none_with_extensions.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-20250514", + "messages": [ + { "role": "user", "content": "Hi" } + ], + "max_tokens": 100, + "tool_choice": { "type": "none", "disable_parallel_tool_use": true }, + "kv_transfer_params": { "mode": "decode" }, + "chat_template_kwargs": { "include_system": true } +} diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 09b41aad..378c4632 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -20,6 +20,10 @@ fn make_request(content: Json) -> LlmRequest { } } +fn fixture_json(path: &str) -> Json { + serde_json::from_str(path).expect("valid fixture json") +} + /// Full Anthropic Messages response with text, tool_use, thinking, usage, etc. fn full_anthropic_response() -> Json { json!({ @@ -539,6 +543,38 @@ fn test_decode_request_service_tier_and_parallel_tool_calls() { assert_eq!(annotated.parallel_tool_calls, Some(false)); } +#[test] +fn test_decode_request_vllm_tool_choice_none_and_extensions_preserved() { + let codec = AnthropicMessagesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/anthropic/vllm_tool_choice_none_with_extensions.json" + ))); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.tool_choice, Some(ToolChoice::None)); + assert_eq!(annotated.parallel_tool_calls, Some(false)); + assert_eq!( + annotated.extra.get("kv_transfer_params"), + Some(&json!({"mode":"decode"})) + ); + assert_eq!( + annotated.extra.get("chat_template_kwargs"), + Some(&json!({"include_system":true})) + ); +} + +#[test] +fn test_decode_request_vllm_system_array_ignores_non_text_blocks() { + let codec = AnthropicMessagesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/anthropic/vllm_system_block_with_non_text.json" + ))); + let annotated = codec.decode(&request).unwrap(); + assert_eq!( + annotated.system_prompt(), + Some("Only answer in one sentence.") + ); +} + // =================================================================== // Request encode tests // =================================================================== @@ -672,7 +708,7 @@ fn test_encode_tool_choice_anthropic_format() { let annotated = codec.decode(&original).unwrap(); let encoded = codec.encode(&annotated, &original).unwrap(); let obj = encoded.content.as_object().unwrap(); - assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "auto"}))); + assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "none"}))); } #[test] @@ -793,7 +829,7 @@ fn test_helper_and_error_paths_cover_remaining_anthropic_branches() { assert_eq!(obj.get("temperature"), Some(&json!(0.3))); assert_eq!(obj.get("top_p"), Some(&json!(0.8))); assert_eq!(obj.get("stop_sequences"), Some(&json!(["END"]))); - assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "auto"}))); + assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "none"}))); assert_eq!(obj.get("system"), Some(&json!("First\nSecond"))); let tools = obj.get("tools").unwrap().as_array().unwrap(); From ea3203b9467e108fd429d2fe17189ea05e370283 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 17:52:58 -0700 Subject: [PATCH 07/16] tests(codec): add LiteLLM hybrid fixtures for anthropic and responses Signed-off-by: Alex Fournier --- .../litellm_cache_control_blocks.json | 15 ++++++++ ...inking_output_config_reasoning_effort.json | 11 ++++++ .../litellm_reasoning_input_item.json | 11 ++++++ .../core/tests/unit/codec/anthropic_tests.rs | 38 +++++++++++++++++++ .../unit/codec/openai_responses_tests.rs | 20 ++++++++++ 5 files changed, 95 insertions(+) create mode 100644 crates/core/tests/fixtures/codec/anthropic/litellm_cache_control_blocks.json create mode 100644 crates/core/tests/fixtures/codec/anthropic/litellm_thinking_output_config_reasoning_effort.json create mode 100644 crates/core/tests/fixtures/codec/openai_responses/litellm_reasoning_input_item.json diff --git a/crates/core/tests/fixtures/codec/anthropic/litellm_cache_control_blocks.json b/crates/core/tests/fixtures/codec/anthropic/litellm_cache_control_blocks.json new file mode 100644 index 00000000..79ac0e56 --- /dev/null +++ b/crates/core/tests/fixtures/codec/anthropic/litellm_cache_control_blocks.json @@ -0,0 +1,15 @@ +{ + "model": "claude-sonnet-4-20250514", + "max_tokens": 128, + "system": [ + { "type": "text", "text": "Be terse", "cache_control": { "type": "ephemeral" } } + ], + "messages": [ + { + "role": "user", + "content": [ + { "type": "text", "text": "Hello", "cache_control": { "type": "ephemeral", "scope": "global" } } + ] + } + ] +} diff --git a/crates/core/tests/fixtures/codec/anthropic/litellm_thinking_output_config_reasoning_effort.json b/crates/core/tests/fixtures/codec/anthropic/litellm_thinking_output_config_reasoning_effort.json new file mode 100644 index 00000000..a6412d33 --- /dev/null +++ b/crates/core/tests/fixtures/codec/anthropic/litellm_thinking_output_config_reasoning_effort.json @@ -0,0 +1,11 @@ +{ + "model": "claude-sonnet-4-20250514", + "messages": [ + { "role": "user", "content": "Hi" } + ], + "max_tokens": 128, + "thinking": { "type": "enabled", "budget_tokens": 2048 }, + "output_config": { "effort": "low" }, + "reasoning_effort": "minimal", + "tool_choice": { "type": "any", "disable_parallel_tool_use": false } +} diff --git a/crates/core/tests/fixtures/codec/openai_responses/litellm_reasoning_input_item.json b/crates/core/tests/fixtures/codec/openai_responses/litellm_reasoning_input_item.json new file mode 100644 index 00000000..4a4f3da8 --- /dev/null +++ b/crates/core/tests/fixtures/codec/openai_responses/litellm_reasoning_input_item.json @@ -0,0 +1,11 @@ +{ + "model": "gpt-5-mini", + "input": [ + { "type": "reasoning", "id": "rs_1", "summary": "work", "status": null }, + { "type": "message", "role": "user", "content": [ { "type": "input_text", "text": "What is 2+2?" } ] } + ], + "reasoning": { "effort": "minimal" }, + "truncation": "disabled", + "store": true, + "parallel_tool_calls": true +} diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 378c4632..3be11d0d 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -575,6 +575,44 @@ fn test_decode_request_vllm_system_array_ignores_non_text_blocks() { ); } +#[test] +fn test_decode_request_litellm_bridge_thinking_output_config_preserved_in_extra() { + let codec = AnthropicMessagesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/anthropic/litellm_thinking_output_config_reasoning_effort.json" + ))); + let annotated = codec.decode(&request).unwrap(); + // stable extraction + assert_eq!(annotated.tool_choice, Some(ToolChoice::Required)); + assert_eq!(annotated.parallel_tool_calls, Some(true)); + // bridge-specific controls preserved losslessly + assert_eq!( + annotated.extra.get("thinking"), + Some(&json!({"type":"enabled","budget_tokens":2048})) + ); + assert_eq!( + annotated.extra.get("output_config"), + Some(&json!({"effort":"low"})) + ); + assert_eq!( + annotated.extra.get("reasoning_effort"), + Some(&json!("minimal")) + ); +} + +#[test] +fn test_decode_request_litellm_cache_control_blocks_preserved() { + let codec = AnthropicMessagesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/anthropic/litellm_cache_control_blocks.json" + ))); + let annotated = codec.decode(&request).unwrap(); + // System text should still extract. + assert_eq!(annotated.system_prompt(), Some("Be terse")); + // `system` is a modeled key in Anthropic decode and should not live in extra. + assert!(annotated.extra.get("system").is_none()); +} + // =================================================================== // Request encode tests // =================================================================== diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index 0db677a2..d817d879 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -527,6 +527,26 @@ fn test_decode_request_accepts_anthropic_hint_tool_choice() { assert_eq!(annotated.parallel_tool_calls, Some(false)); } +#[test] +fn test_decode_request_litellm_reasoning_input_item_preserved_and_controls_extracted() { + let codec = OpenAIResponsesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/openai_responses/litellm_reasoning_input_item.json" + ))); + let annotated = codec.decode(&request).unwrap(); + // strict-first parse: mixed input array preserved whole in extra + assert!(annotated.messages.is_empty()); + assert!(annotated + .extra + .get("_openai_responses_unparsed_input_items") + .is_some()); + // stable controls still extracted + assert_eq!(annotated.store, Some(true)); + assert_eq!(annotated.parallel_tool_calls, Some(true)); + assert_eq!(annotated.truncation, Some(json!("disabled"))); + assert_eq!(annotated.reasoning, Some(json!({"effort":"minimal"}))); +} + // =================================================================== // Request encode tests // =================================================================== From e77584abe06b337fd61cc11e73ce1318572d7f8b Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 19:26:20 -0700 Subject: [PATCH 08/16] tests(codec): add SGLang responses extension fixture coverage Signed-off-by: Alex Fournier --- ...ang_responses_request_with_extensions.json | 18 +++++++++++++++ .../unit/codec/openai_responses_tests.rs | 23 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 crates/core/tests/fixtures/codec/openai_responses/sglang_responses_request_with_extensions.json diff --git a/crates/core/tests/fixtures/codec/openai_responses/sglang_responses_request_with_extensions.json b/crates/core/tests/fixtures/codec/openai_responses/sglang_responses_request_with_extensions.json new file mode 100644 index 00000000..a2b2db14 --- /dev/null +++ b/crates/core/tests/fixtures/codec/openai_responses/sglang_responses_request_with_extensions.json @@ -0,0 +1,18 @@ +{ + "model": "gpt-oss-120b", + "input": "Summarize this", + "request_id": "resp_custom_1", + "priority": 3, + "extra_key": "tenant-a", + "cache_salt": "salt-123", + "frequency_penalty": 0.1, + "presence_penalty": 0.2, + "top_k": 40, + "min_p": 0.05, + "repetition_penalty": 1.02, + "store": true, + "truncation": "auto", + "reasoning": { "effort": "low" }, + "parallel_tool_calls": true, + "tool_choice": "none" +} diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index d817d879..0a585053 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -547,6 +547,29 @@ fn test_decode_request_litellm_reasoning_input_item_preserved_and_controls_extra assert_eq!(annotated.reasoning, Some(json!({"effort":"minimal"}))); } +#[test] +fn test_decode_request_sglang_extensions_preserved_in_extra() { + let codec = OpenAIResponsesCodec; + let request = make_request(fixture_json(include_str!( + "../../fixtures/codec/openai_responses/sglang_responses_request_with_extensions.json" + ))); + let annotated = codec.decode(&request).unwrap(); + // core controls extracted + assert_eq!(annotated.store, Some(true)); + assert_eq!(annotated.parallel_tool_calls, Some(true)); + assert_eq!(annotated.truncation, Some(json!("auto"))); + assert_eq!(annotated.reasoning, Some(json!({"effort":"low"}))); + assert_eq!(annotated.tool_choice, Some(ToolChoice::None)); + // sglang-specific extensions retained losslessly + assert_eq!(annotated.extra.get("request_id"), Some(&json!("resp_custom_1"))); + assert_eq!(annotated.extra.get("priority"), Some(&json!(3))); + assert_eq!(annotated.extra.get("extra_key"), Some(&json!("tenant-a"))); + assert_eq!(annotated.extra.get("cache_salt"), Some(&json!("salt-123"))); + assert_eq!(annotated.extra.get("top_k"), Some(&json!(40))); + assert_eq!(annotated.extra.get("min_p"), Some(&json!(0.05))); + assert_eq!(annotated.extra.get("repetition_penalty"), Some(&json!(1.02))); +} + // =================================================================== // Request encode tests // =================================================================== From 096a6be3301b02f529f746fef8843c8ba4b6afd7 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 19:47:00 -0700 Subject: [PATCH 09/16] adaptive: handle expanded request IR and multimodal content parts Signed-off-by: Alex Fournier --- crates/adaptive/src/acg/ir_builder.rs | 6 +-- crates/adaptive/src/acg_profile.rs | 5 ++- .../integration/acg_module_surface_tests.rs | 26 +++++++++++++ .../adaptive/tests/integration/redis_tests.rs | 13 +++++++ .../integration/runtime_integration_tests.rs | 13 +++++++ .../tests/unit/acg/ir_builder_tests.rs | 39 +++++++++++++++++++ .../tests/unit/acg_component_tests.rs | 13 +++++++ .../adaptive/tests/unit/acg_learner_tests.rs | 13 +++++++ .../adaptive/tests/unit/acg_profile_tests.rs | 13 +++++++ .../unit/adaptive_hints_intercept_tests.rs | 13 +++++++ .../tests/unit/cache_diagnostics_tests.rs | 13 +++++++ crates/adaptive/tests/unit/runtime_tests.rs | 39 +++++++++++++++++++ 12 files changed, 201 insertions(+), 5 deletions(-) diff --git a/crates/adaptive/src/acg/ir_builder.rs b/crates/adaptive/src/acg/ir_builder.rs index e1181ab6..3c20dfe2 100644 --- a/crates/adaptive/src/acg/ir_builder.rs +++ b/crates/adaptive/src/acg/ir_builder.rs @@ -147,9 +147,9 @@ fn extract_text(content: &MessageContent) -> String { MessageContent::Text(text) => text.clone(), MessageContent::Parts(parts) => parts .iter() - .map(|part| { - let ContentPart::Text { text } = part; - text.as_str() + .filter_map(|part| match part { + ContentPart::Text { text } => Some(text.as_str()), + ContentPart::ImageUrl { .. } => None, }) .collect::>() .join("\n"), diff --git a/crates/adaptive/src/acg_profile.rs b/crates/adaptive/src/acg_profile.rs index 28de023d..f753af9c 100644 --- a/crates/adaptive/src/acg_profile.rs +++ b/crates/adaptive/src/acg_profile.rs @@ -187,8 +187,9 @@ fn extract_text(content: &MessageContent) -> String { MessageContent::Text(text) => text.clone(), MessageContent::Parts(parts) => parts .iter() - .map(|part| match part { - ContentPart::Text { text } => text.as_str(), + .filter_map(|part| match part { + ContentPart::Text { text } => Some(text.as_str()), + ContentPart::ImageUrl { .. } => None, }) .collect::>() .join("\n"), diff --git a/crates/adaptive/tests/integration/acg_module_surface_tests.rs b/crates/adaptive/tests/integration/acg_module_surface_tests.rs index e03c8bfc..87433e79 100644 --- a/crates/adaptive/tests/integration/acg_module_surface_tests.rs +++ b/crates/adaptive/tests/integration/acg_module_surface_tests.rs @@ -143,6 +143,19 @@ fn acg_module_surface_policy_and_ir_builder_symbols_compile_from_canonical_names params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -188,6 +201,19 @@ fn acg_module_surface_build_prompt_ir_inserts_tool_schema_before_first_non_syste }, }]), tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/adaptive/tests/integration/redis_tests.rs b/crates/adaptive/tests/integration/redis_tests.rs index 8b7745dc..94f3c16f 100644 --- a/crates/adaptive/tests/integration/redis_tests.rs +++ b/crates/adaptive/tests/integration/redis_tests.rs @@ -120,6 +120,19 @@ fn sample_annotated_request(model: &str) -> AnnotatedLlmRequest { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), } } diff --git a/crates/adaptive/tests/integration/runtime_integration_tests.rs b/crates/adaptive/tests/integration/runtime_integration_tests.rs index 10d0cd79..0cb3f7cb 100644 --- a/crates/adaptive/tests/integration/runtime_integration_tests.rs +++ b/crates/adaptive/tests/integration/runtime_integration_tests.rs @@ -79,6 +79,19 @@ fn sample_annotated_request(model: &str) -> AnnotatedLlmRequest { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), } } diff --git a/crates/adaptive/tests/unit/acg/ir_builder_tests.rs b/crates/adaptive/tests/unit/acg/ir_builder_tests.rs index 14e8c4a2..b435f809 100644 --- a/crates/adaptive/tests/unit/acg/ir_builder_tests.rs +++ b/crates/adaptive/tests/unit/acg/ir_builder_tests.rs @@ -71,6 +71,19 @@ fn build_prompt_ir_inserts_tools_before_first_non_system_message_and_preserves_a params: None, tools: Some(vec![sample_tool_definition("search")]), tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -110,6 +123,19 @@ fn build_prompt_ir_appends_tool_blocks_when_request_contains_only_system_message sample_tool_definition("lookup"), ]), tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; @@ -139,6 +165,19 @@ fn build_prompt_ir_omits_tool_schema_hashes_when_no_tools_are_present() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/adaptive/tests/unit/acg_component_tests.rs b/crates/adaptive/tests/unit/acg_component_tests.rs index e20ebb87..cc3c30a9 100644 --- a/crates/adaptive/tests/unit/acg_component_tests.rs +++ b/crates/adaptive/tests/unit/acg_component_tests.rs @@ -97,6 +97,19 @@ fn sample_annotated_request(model: &str) -> AnnotatedLlmRequest { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), } } diff --git a/crates/adaptive/tests/unit/acg_learner_tests.rs b/crates/adaptive/tests/unit/acg_learner_tests.rs index 386094e0..84e61096 100644 --- a/crates/adaptive/tests/unit/acg_learner_tests.rs +++ b/crates/adaptive/tests/unit/acg_learner_tests.rs @@ -34,6 +34,19 @@ fn sample_request(model: &str, system: &str, user: &str) -> AnnotatedLlmRequest params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), } } diff --git a/crates/adaptive/tests/unit/acg_profile_tests.rs b/crates/adaptive/tests/unit/acg_profile_tests.rs index 763a653a..29bcab09 100644 --- a/crates/adaptive/tests/unit/acg_profile_tests.rs +++ b/crates/adaptive/tests/unit/acg_profile_tests.rs @@ -17,6 +17,19 @@ fn request(messages: Vec, tools: Option>) -> Annota params: None, tools, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), } } diff --git a/crates/adaptive/tests/unit/adaptive_hints_intercept_tests.rs b/crates/adaptive/tests/unit/adaptive_hints_intercept_tests.rs index 33598bdc..58a3d3b2 100644 --- a/crates/adaptive/tests/unit/adaptive_hints_intercept_tests.rs +++ b/crates/adaptive/tests/unit/adaptive_hints_intercept_tests.rs @@ -179,6 +179,19 @@ fn test_adaptive_hints_intercept_injects_prediction_hints_and_manual_override() params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let (request, returned_annotated) = req_fn( diff --git a/crates/adaptive/tests/unit/cache_diagnostics_tests.rs b/crates/adaptive/tests/unit/cache_diagnostics_tests.rs index 6cff2655..86aadef3 100644 --- a/crates/adaptive/tests/unit/cache_diagnostics_tests.rs +++ b/crates/adaptive/tests/unit/cache_diagnostics_tests.rs @@ -104,6 +104,19 @@ fn sample_request(model: Option<&str>) -> AnnotatedLlmRequest { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), } } diff --git a/crates/adaptive/tests/unit/runtime_tests.rs b/crates/adaptive/tests/unit/runtime_tests.rs index 33042cc9..0ddb6f52 100644 --- a/crates/adaptive/tests/unit/runtime_tests.rs +++ b/crates/adaptive/tests/unit/runtime_tests.rs @@ -51,6 +51,19 @@ fn sample_annotated_request(model: Option<&str>) -> AnnotatedLlmRequest { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), } } @@ -82,6 +95,19 @@ fn sample_layered_request(model: Option<&str>, language_guide: &str) -> Annotate params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), } } @@ -490,6 +516,19 @@ fn adaptive_acg_defaults_and_profile_key_behavior_stay_stable() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }; let rust_bundle_variant_key = From 2a4dc728b5ab560df4e63e7df4d4988dbef93f72 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 20:04:52 -0700 Subject: [PATCH 10/16] tests(codec): fix anthropic auto tool_choice round-trip expectation Signed-off-by: Alex Fournier --- crates/core/tests/unit/codec/anthropic_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core/tests/unit/codec/anthropic_tests.rs b/crates/core/tests/unit/codec/anthropic_tests.rs index 3be11d0d..9355e066 100644 --- a/crates/core/tests/unit/codec/anthropic_tests.rs +++ b/crates/core/tests/unit/codec/anthropic_tests.rs @@ -746,7 +746,7 @@ fn test_encode_tool_choice_anthropic_format() { let annotated = codec.decode(&original).unwrap(); let encoded = codec.encode(&annotated, &original).unwrap(); let obj = encoded.content.as_object().unwrap(); - assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "none"}))); + assert_eq!(obj.get("tool_choice"), Some(&json!({"type": "auto"}))); } #[test] From 1225bf84035f11b325645a94ffa49f8b777aba5f Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 20:08:13 -0700 Subject: [PATCH 11/16] ffi/tests: update AnnotatedLlmRequest literals for expanded request IR Signed-off-by: Alex Fournier --- .../ffi/tests/integration/callable_extra_tests.rs | 13 +++++++++++++ crates/ffi/tests/unit/callable_tests.rs | 13 +++++++++++++ crates/ffi/tests/unit/types_tests.rs | 13 +++++++++++++ 3 files changed, 39 insertions(+) diff --git a/crates/ffi/tests/integration/callable_extra_tests.rs b/crates/ffi/tests/integration/callable_extra_tests.rs index 5a916133..7541b81e 100644 --- a/crates/ffi/tests/integration/callable_extra_tests.rs +++ b/crates/ffi/tests/integration/callable_extra_tests.rs @@ -264,6 +264,19 @@ fn test_callable_extra_request_intercept_and_codec_paths() { params: None, tools: Some(vec![]), tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; diff --git a/crates/ffi/tests/unit/callable_tests.rs b/crates/ffi/tests/unit/callable_tests.rs index 471cd1b4..e6747fa4 100644 --- a/crates/ffi/tests/unit/callable_tests.rs +++ b/crates/ffi/tests/unit/callable_tests.rs @@ -323,6 +323,19 @@ fn test_wrap_llm_request_intercept_with_annotated_input() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::from_iter([("annotated".into(), json!(true))]), }; let (intercepted, annotated_out) = diff --git a/crates/ffi/tests/unit/types_tests.rs b/crates/ffi/tests/unit/types_tests.rs index 200a2368..dd404a73 100644 --- a/crates/ffi/tests/unit/types_tests.rs +++ b/crates/ffi/tests/unit/types_tests.rs @@ -521,6 +521,19 @@ fn test_annotated_event_accessors_and_codec_handles() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::from_iter([("provider".into(), json!("ffi"))]), }; let llm_start = make_scope_event(ScopeEventFixture { From 812ffa2fc3b28e64ed6f9f0026c61c8e36a4e10e Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 20:13:16 -0700 Subject: [PATCH 12/16] wasm/tests: update AnnotatedLlmRequest literals for expanded request IR Signed-off-by: Alex Fournier --- crates/wasm/tests/coverage/callable_tests.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/crates/wasm/tests/coverage/callable_tests.rs b/crates/wasm/tests/coverage/callable_tests.rs index 08daaee7..29757467 100644 --- a/crates/wasm/tests/coverage/callable_tests.rs +++ b/crates/wasm/tests/coverage/callable_tests.rs @@ -147,6 +147,19 @@ async fn native_intercept_and_codec_fallbacks_are_callable() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; assert!(codec.encode(&annotated, &request).is_err()); From e06ecfcc82e769a7ccb61bb5e7c78c26cfb7eae5 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 20:14:43 -0700 Subject: [PATCH 13/16] python: update AnnotatedLlmRequest constructors for expanded request IR Signed-off-by: Alex Fournier --- crates/python/src/py_types/codecs.rs | 13 ++++++++++ .../tests/coverage/py_types_coverage_tests.rs | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/crates/python/src/py_types/codecs.rs b/crates/python/src/py_types/codecs.rs index 3652de5e..31beed94 100644 --- a/crates/python/src/py_types/codecs.rs +++ b/crates/python/src/py_types/codecs.rs @@ -107,6 +107,19 @@ impl PyAnnotatedLLMRequest { params: gen_params, tools: tool_defs, tool_choice: tc, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: extra_map, }, }) diff --git a/crates/python/tests/coverage/py_types_coverage_tests.rs b/crates/python/tests/coverage/py_types_coverage_tests.rs index d0441e1b..7347c40e 100644 --- a/crates/python/tests/coverage/py_types_coverage_tests.rs +++ b/crates/python/tests/coverage/py_types_coverage_tests.rs @@ -586,6 +586,19 @@ fn test_stream_request_event_and_handle_wrappers_cover_remaining_methods() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }; let annotated_response = AnnotatedLLMResponse { @@ -1342,6 +1355,19 @@ fn test_forced_serialization_error_hooks_cover_unreachable_wrappers() { }, }]), tool_choice: Some(nemo_flow::codec::request::ToolChoice::Auto), + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: serde_json::Map::new(), }, }; From f6e79c8529098d822dd6c005a56f2a79cb8922d3 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Sun, 10 May 2026 20:26:46 -0700 Subject: [PATCH 14/16] style(codec): apply rustfmt after pre-commit Signed-off-by: Alex Fournier --- crates/core/src/codec/openai_responses.rs | 5 ++-- .../unit/codec/openai_responses_tests.rs | 24 +++++++++++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index aee7c358..19073154 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -22,8 +22,8 @@ use crate::error::{FlowError, Result}; use crate::json::Json; use super::request::{ - AnnotatedLlmRequest, GenerationParams, Message, MessageContent, ToolChoice, - ToolChoiceFunction, ToolChoiceFunctionName, ToolDefinition, + AnnotatedLlmRequest, GenerationParams, Message, MessageContent, ToolChoice, ToolChoiceFunction, + ToolChoiceFunctionName, ToolDefinition, }; use super::response::{ AnnotatedLlmResponse, ApiSpecificResponse, FinishReason, ResponseToolCall, Usage, @@ -302,7 +302,6 @@ fn decode_openai_or_anthropic_parallel_tool_calls( .map(|disabled| !disabled) } - // --------------------------------------------------------------------------- // LlmResponseCodec implementation // --------------------------------------------------------------------------- diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index 0a585053..f302851e 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -508,7 +508,9 @@ fn test_decode_request_input_array_preserves_unparsed_items_in_extra() { // strict-first behavior: no partial message extraction on mixed arrays assert!(annotated.messages.is_empty()); assert_eq!( - annotated.extra.get("_openai_responses_unparsed_input_items"), + annotated + .extra + .get("_openai_responses_unparsed_input_items"), Some(&json!([ { "role": "user", "content": "hello" }, { "type": "function_call_output", "call_id": "call_1", "output": "ok" } @@ -536,10 +538,12 @@ fn test_decode_request_litellm_reasoning_input_item_preserved_and_controls_extra let annotated = codec.decode(&request).unwrap(); // strict-first parse: mixed input array preserved whole in extra assert!(annotated.messages.is_empty()); - assert!(annotated - .extra - .get("_openai_responses_unparsed_input_items") - .is_some()); + assert!( + annotated + .extra + .get("_openai_responses_unparsed_input_items") + .is_some() + ); // stable controls still extracted assert_eq!(annotated.store, Some(true)); assert_eq!(annotated.parallel_tool_calls, Some(true)); @@ -561,13 +565,19 @@ fn test_decode_request_sglang_extensions_preserved_in_extra() { assert_eq!(annotated.reasoning, Some(json!({"effort":"low"}))); assert_eq!(annotated.tool_choice, Some(ToolChoice::None)); // sglang-specific extensions retained losslessly - assert_eq!(annotated.extra.get("request_id"), Some(&json!("resp_custom_1"))); + assert_eq!( + annotated.extra.get("request_id"), + Some(&json!("resp_custom_1")) + ); assert_eq!(annotated.extra.get("priority"), Some(&json!(3))); assert_eq!(annotated.extra.get("extra_key"), Some(&json!("tenant-a"))); assert_eq!(annotated.extra.get("cache_salt"), Some(&json!("salt-123"))); assert_eq!(annotated.extra.get("top_k"), Some(&json!(40))); assert_eq!(annotated.extra.get("min_p"), Some(&json!(0.05))); - assert_eq!(annotated.extra.get("repetition_penalty"), Some(&json!(1.02))); + assert_eq!( + annotated.extra.get("repetition_penalty"), + Some(&json!(1.02)) + ); } // =================================================================== From c9ea854e21790e604c3bef7cd2cc1588333ed442 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Tue, 12 May 2026 14:29:15 -0700 Subject: [PATCH 15/16] fix(codec): preserve typed usage details and raw responses input Signed-off-by: Alex Fournier --- crates/core/src/codec/anthropic.rs | 2 +- crates/core/src/codec/openai_responses.rs | 70 +++++++++++++++++------ 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/crates/core/src/codec/anthropic.rs b/crates/core/src/codec/anthropic.rs index 175b7d94..68795d45 100644 --- a/crates/core/src/codec/anthropic.rs +++ b/crates/core/src/codec/anthropic.rs @@ -370,7 +370,7 @@ impl LlmResponseCodec for AnthropicMessagesCodec { let api_specific = Some(ApiSpecificResponse::AnthropicMessages { object_type: raw.object_type, role: raw.role, - stop_reason: raw.stop_reason.clone(), + stop_reason: raw.stop_reason, stop_sequence: raw.stop_sequence, service_tier: raw.service_tier, container: raw.container, diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index 19073154..b6534ae7 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -63,8 +63,22 @@ struct RawResponsesUsage { input_tokens: Option, output_tokens: Option, total_tokens: Option, - input_tokens_details: Option, - output_tokens_details: Option, + input_tokens_details: Option, + output_tokens_details: Option, +} + +#[derive(Deserialize, Clone)] +struct RawInputTokensDetails { + cached_tokens: Option, + #[serde(flatten)] + extra: serde_json::Map, +} + +#[derive(Deserialize, Clone)] +struct RawOutputTokensDetails { + reasoning_tokens: Option, + #[serde(flatten)] + extra: serde_json::Map, } // --------------------------------------------------------------------------- @@ -100,10 +114,22 @@ fn parse_arguments(arguments: &str) -> Json { serde_json::from_str(arguments).unwrap_or_else(|_| Json::String(arguments.to_string())) } -fn cached_tokens_from_details(details: Option<&Json>) -> Option { - details - .and_then(|d| d.get("cached_tokens")) - .and_then(|v| v.as_u64()) +fn input_tokens_details_to_json(details: &RawInputTokensDetails) -> Json { + let mut obj = serde_json::Map::new(); + if let Some(cached_tokens) = details.cached_tokens { + obj.insert("cached_tokens".into(), Json::from(cached_tokens)); + } + obj.extend(details.extra.clone()); + Json::Object(obj) +} + +fn output_tokens_details_to_json(details: &RawOutputTokensDetails) -> Json { + let mut obj = serde_json::Map::new(); + if let Some(reasoning_tokens) = details.reasoning_tokens { + obj.insert("reasoning_tokens".into(), Json::from(reasoning_tokens)); + } + obj.extend(details.extra.clone()); + Json::Object(obj) } /// Keys that are modeled in [`AnnotatedLlmRequest`] and should NOT go into `extra`. @@ -320,21 +346,26 @@ impl LlmResponseCodec for OpenAIResponsesCodec { let finish_reason = map_responses_finish_reason(raw.status.as_deref(), raw.incomplete_details.as_ref()); - let input_tokens_details = raw - .usage - .as_ref() - .and_then(|u| u.input_tokens_details.clone()); - let output_tokens_details = raw - .usage - .as_ref() - .and_then(|u| u.output_tokens_details.clone()); + let input_tokens_details = raw.usage.as_ref().and_then(|u| { + u.input_tokens_details + .as_ref() + .map(input_tokens_details_to_json) + }); + let output_tokens_details = raw.usage.as_ref().and_then(|u| { + u.output_tokens_details + .as_ref() + .map(output_tokens_details_to_json) + }); // Map usage. let usage = raw.usage.map(|u| Usage { prompt_tokens: u.input_tokens, completion_tokens: u.output_tokens, total_tokens: u.total_tokens, - cache_read_tokens: cached_tokens_from_details(u.input_tokens_details.as_ref()), + cache_read_tokens: u + .input_tokens_details + .as_ref() + .and_then(|d| d.cached_tokens), cache_write_tokens: None, }); @@ -486,7 +517,11 @@ impl LlmCodec for OpenAIResponsesCodec { let (system_text, input_messages) = split_system_and_input_messages(&annotated.messages); set_or_remove_string(obj, "instructions", system_text); - insert_serialized(obj, "input", &input_messages, "input")?; + if let Some(raw_input_items) = annotated.extra.get(UNPARSED_INPUT_ITEMS_KEY) { + obj.insert("input".into(), raw_input_items.clone()); + } else { + insert_serialized(obj, "input", &input_messages, "input")?; + } // Overlay model if present. if let Some(ref model) = annotated.model { @@ -556,6 +591,9 @@ impl LlmCodec for OpenAIResponsesCodec { // Merge extra fields back. for (k, v) in &annotated.extra { + if k == UNPARSED_INPUT_ITEMS_KEY { + continue; + } obj.insert(k.clone(), v.clone()); } From 2e927e13cd90c40ce634bb2ea918aa6fcb5c7af7 Mon Sep 17 00:00:00 2001 From: Alex Fournier Date: Tue, 12 May 2026 19:13:52 -0700 Subject: [PATCH 16/16] fix(codec): address CodeRabbit review nits Signed-off-by: Alex Fournier --- crates/adaptive/src/acg_profile.rs | 10 +- .../integration/runtime_integration_tests.rs | 39 +++++ .../adaptive/tests/unit/acg_profile_tests.rs | 36 +++- crates/adaptive/tests/unit/runtime_tests.rs | 13 ++ crates/core/src/codec/openai_responses.rs | 1 + .../unit/codec/openai_responses_tests.rs | 12 ++ crates/python/src/py_types/codecs.rs | 155 ++++++++++++++++++ .../tests/coverage/py_types_coverage_tests.rs | 63 +++++++ 8 files changed, 325 insertions(+), 4 deletions(-) diff --git a/crates/adaptive/src/acg_profile.rs b/crates/adaptive/src/acg_profile.rs index f753af9c..89170270 100644 --- a/crates/adaptive/src/acg_profile.rs +++ b/crates/adaptive/src/acg_profile.rs @@ -187,9 +187,13 @@ fn extract_text(content: &MessageContent) -> String { MessageContent::Text(text) => text.clone(), MessageContent::Parts(parts) => parts .iter() - .filter_map(|part| match part { - ContentPart::Text { text } => Some(text.as_str()), - ContentPart::ImageUrl { .. } => None, + .map(|part| match part { + ContentPart::Text { text } => text.clone(), + ContentPart::ImageUrl { image_url } => format!( + "[image:{}:{}]", + image_url.detail.as_deref().unwrap_or("none"), + sha256_hex(&image_url.url) + ), }) .collect::>() .join("\n"), diff --git a/crates/adaptive/tests/integration/runtime_integration_tests.rs b/crates/adaptive/tests/integration/runtime_integration_tests.rs index 0cb3f7cb..22e59348 100644 --- a/crates/adaptive/tests/integration/runtime_integration_tests.rs +++ b/crates/adaptive/tests/integration/runtime_integration_tests.rs @@ -113,6 +113,19 @@ fn sample_growing_chat_requests(model: &str) -> Vec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }, AnnotatedLlmRequest { @@ -141,6 +154,19 @@ fn sample_growing_chat_requests(model: &str) -> Vec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }, AnnotatedLlmRequest { @@ -180,6 +206,19 @@ fn sample_growing_chat_requests(model: &str) -> Vec { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }, ] diff --git a/crates/adaptive/tests/unit/acg_profile_tests.rs b/crates/adaptive/tests/unit/acg_profile_tests.rs index 29bcab09..60100bc3 100644 --- a/crates/adaptive/tests/unit/acg_profile_tests.rs +++ b/crates/adaptive/tests/unit/acg_profile_tests.rs @@ -4,7 +4,8 @@ //! Unit tests for acg profile in the NeMo Flow adaptive crate. use nemo_flow::codec::request::{ - AnnotatedLlmRequest, ContentPart, FunctionDefinition, Message, MessageContent, ToolDefinition, + AnnotatedLlmRequest, ContentPart, FunctionDefinition, Message, MessageContent, + OpenAiImageUrl, ToolDefinition, }; use serde_json::json; @@ -123,3 +124,36 @@ fn acg_profile_helpers_cover_none_paths_and_short_hash() { assert_eq!(short_hash("short"), "short"); assert_eq!(message_role_tag(&too_short.messages[0]), "user"); } + +#[test] +fn acg_profile_image_parts_contribute_stable_fingerprint_signal() { + let with_image_a = request( + vec![Message::User { + content: MessageContent::Parts(vec![ContentPart::ImageUrl { + image_url: OpenAiImageUrl { + url: "https://example.com/a.png".to_string(), + detail: Some("high".to_string()), + }, + }]), + name: None, + }], + None, + ); + let with_image_b = request( + vec![Message::User { + content: MessageContent::Parts(vec![ContentPart::ImageUrl { + image_url: OpenAiImageUrl { + url: "https://example.com/b.png".to_string(), + detail: Some("high".to_string()), + }, + }]), + name: None, + }], + None, + ); + + assert_ne!( + learning_seed_fingerprint(&with_image_a), + learning_seed_fingerprint(&with_image_b) + ); +} diff --git a/crates/adaptive/tests/unit/runtime_tests.rs b/crates/adaptive/tests/unit/runtime_tests.rs index 0ddb6f52..a25ce2ce 100644 --- a/crates/adaptive/tests/unit/runtime_tests.rs +++ b/crates/adaptive/tests/unit/runtime_tests.rs @@ -452,6 +452,19 @@ fn adaptive_acg_defaults_and_profile_key_behavior_stay_stable() { params: None, tools: None, tool_choice: None, + store: None, + previous_response_id: None, + truncation: None, + reasoning: None, + include: None, + user: None, + metadata: None, + service_tier: None, + parallel_tool_calls: None, + max_output_tokens: None, + max_tool_calls: None, + top_logprobs: None, + stream: None, extra: Map::new(), }; assert_eq!( diff --git a/crates/core/src/codec/openai_responses.rs b/crates/core/src/codec/openai_responses.rs index b6534ae7..be82e8db 100644 --- a/crates/core/src/codec/openai_responses.rs +++ b/crates/core/src/codec/openai_responses.rs @@ -304,6 +304,7 @@ fn decode_openai_or_anthropic_tool_choice(value: &Json) -> Option { match obj.get("type").and_then(|v| v.as_str()) { Some("auto") => Some(ToolChoice::Auto), Some("any") => Some(ToolChoice::Required), + Some("none") => Some(ToolChoice::None), Some("tool") => { let name = obj.get("name").and_then(|v| v.as_str())?.to_string(); Some(ToolChoice::Specific(ToolChoiceFunction { diff --git a/crates/core/tests/unit/codec/openai_responses_tests.rs b/crates/core/tests/unit/codec/openai_responses_tests.rs index f302851e..5e413362 100644 --- a/crates/core/tests/unit/codec/openai_responses_tests.rs +++ b/crates/core/tests/unit/codec/openai_responses_tests.rs @@ -529,6 +529,18 @@ fn test_decode_request_accepts_anthropic_hint_tool_choice() { assert_eq!(annotated.parallel_tool_calls, Some(false)); } +#[test] +fn test_decode_request_accepts_anthropic_none_tool_choice_object() { + let codec = OpenAIResponsesCodec; + let request = make_request(json!({ + "model": "gpt-4o", + "input": "hello", + "tool_choice": {"type": "none"} + })); + let annotated = codec.decode(&request).unwrap(); + assert_eq!(annotated.tool_choice, Some(ToolChoice::None)); +} + #[test] fn test_decode_request_litellm_reasoning_input_item_preserved_and_controls_extracted() { let codec = OpenAIResponsesCodec; diff --git a/crates/python/src/py_types/codecs.rs b/crates/python/src/py_types/codecs.rs index 31beed94..4c2c2ebb 100644 --- a/crates/python/src/py_types/codecs.rs +++ b/crates/python/src/py_types/codecs.rs @@ -48,6 +48,31 @@ pub struct PyAnnotatedLLMRequest { pub inner: AnnotatedLLMRequest, } +fn optional_json_getter( + py: Python<'_>, + value: &Option, +) -> PyResult> { + match value { + Some(value) => json_to_py(py, value), + None => Ok(py.None()), + } +} + +fn optional_json_setter( + target: &mut Option, + value: &Bound<'_, PyAny>, + field: &str, +) -> PyResult<()> { + if value.is_none() { + *target = None; + } else { + *target = Some(pythonize::depythonize(value).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("invalid {field}: {e}")) + })?); + } + Ok(()) +} + #[pymethods] impl PyAnnotatedLLMRequest { /// Create a new AnnotatedLLMRequest. @@ -240,6 +265,136 @@ impl PyAnnotatedLLMRequest { Ok(()) } + #[getter] + pub(crate) fn store(&self) -> Option { + self.inner.store + } + + #[setter] + pub(crate) fn set_store(&mut self, value: Option) { + self.inner.store = value; + } + + #[getter] + pub(crate) fn previous_response_id(&self) -> Option { + self.inner.previous_response_id.clone() + } + + #[setter] + pub(crate) fn set_previous_response_id(&mut self, value: Option) { + self.inner.previous_response_id = value; + } + + #[getter] + pub(crate) fn truncation(&self, py: Python<'_>) -> PyResult> { + optional_json_getter(py, &self.inner.truncation) + } + + #[setter] + pub(crate) fn set_truncation(&mut self, value: &Bound<'_, PyAny>) -> PyResult<()> { + optional_json_setter(&mut self.inner.truncation, value, "truncation") + } + + #[getter] + pub(crate) fn reasoning(&self, py: Python<'_>) -> PyResult> { + optional_json_getter(py, &self.inner.reasoning) + } + + #[setter] + pub(crate) fn set_reasoning(&mut self, value: &Bound<'_, PyAny>) -> PyResult<()> { + optional_json_setter(&mut self.inner.reasoning, value, "reasoning") + } + + #[getter] + pub(crate) fn include(&self, py: Python<'_>) -> PyResult> { + optional_json_getter(py, &self.inner.include) + } + + #[setter] + pub(crate) fn set_include(&mut self, value: &Bound<'_, PyAny>) -> PyResult<()> { + optional_json_setter(&mut self.inner.include, value, "include") + } + + #[getter] + pub(crate) fn user(&self) -> Option { + self.inner.user.clone() + } + + #[setter] + pub(crate) fn set_user(&mut self, value: Option) { + self.inner.user = value; + } + + #[getter] + pub(crate) fn metadata(&self, py: Python<'_>) -> PyResult> { + optional_json_getter(py, &self.inner.metadata) + } + + #[setter] + pub(crate) fn set_metadata(&mut self, value: &Bound<'_, PyAny>) -> PyResult<()> { + optional_json_setter(&mut self.inner.metadata, value, "metadata") + } + + #[getter] + pub(crate) fn service_tier(&self) -> Option { + self.inner.service_tier.clone() + } + + #[setter] + pub(crate) fn set_service_tier(&mut self, value: Option) { + self.inner.service_tier = value; + } + + #[getter] + pub(crate) fn parallel_tool_calls(&self) -> Option { + self.inner.parallel_tool_calls + } + + #[setter] + pub(crate) fn set_parallel_tool_calls(&mut self, value: Option) { + self.inner.parallel_tool_calls = value; + } + + #[getter] + pub(crate) fn max_output_tokens(&self) -> Option { + self.inner.max_output_tokens + } + + #[setter] + pub(crate) fn set_max_output_tokens(&mut self, value: Option) { + self.inner.max_output_tokens = value; + } + + #[getter] + pub(crate) fn max_tool_calls(&self) -> Option { + self.inner.max_tool_calls + } + + #[setter] + pub(crate) fn set_max_tool_calls(&mut self, value: Option) { + self.inner.max_tool_calls = value; + } + + #[getter] + pub(crate) fn top_logprobs(&self) -> Option { + self.inner.top_logprobs + } + + #[setter] + pub(crate) fn set_top_logprobs(&mut self, value: Option) { + self.inner.top_logprobs = value; + } + + #[getter] + pub(crate) fn stream(&self) -> Option { + self.inner.stream + } + + #[setter] + pub(crate) fn set_stream(&mut self, value: Option) { + self.inner.stream = value; + } + #[getter] pub(crate) fn extra(&self, py: Python<'_>) -> PyResult> { let value = serde_json::Value::Object(self.inner.extra.clone()); diff --git a/crates/python/tests/coverage/py_types_coverage_tests.rs b/crates/python/tests/coverage/py_types_coverage_tests.rs index 7347c40e..6a8da6a1 100644 --- a/crates/python/tests/coverage/py_types_coverage_tests.rs +++ b/crates/python/tests/coverage/py_types_coverage_tests.rs @@ -1068,6 +1068,19 @@ fn test_annotated_llm_types_and_builtin_codecs_cover_mutators_and_codecs() { py_to_json(annotated.extra(py).unwrap().bind(py)).unwrap()["provider"], json!("test") ); + assert_eq!(annotated.store(), None); + assert_eq!(annotated.previous_response_id(), None); + assert!(annotated.truncation(py).unwrap().bind(py).is_none()); + assert!(annotated.reasoning(py).unwrap().bind(py).is_none()); + assert!(annotated.include(py).unwrap().bind(py).is_none()); + assert_eq!(annotated.user(), None); + assert!(annotated.metadata(py).unwrap().bind(py).is_none()); + assert_eq!(annotated.service_tier(), None); + assert_eq!(annotated.parallel_tool_calls(), None); + assert_eq!(annotated.max_output_tokens(), None); + assert_eq!(annotated.max_tool_calls(), None); + assert_eq!(annotated.top_logprobs(), None); + assert_eq!(annotated.stream(), None); let updated_messages = json_to_py(py, &json!([{"role": "user", "content": "updated"}])).unwrap(); @@ -1086,10 +1099,52 @@ fn test_annotated_llm_types_and_builtin_codecs_cover_mutators_and_codecs() { annotated.set_tools(updated_tools.bind(py)).unwrap(); let updated_choice = json_to_py(py, &json!("auto")).unwrap(); annotated.set_tool_choice(updated_choice.bind(py)).unwrap(); + annotated.set_store(Some(true)); + annotated.set_previous_response_id(Some("resp_1".into())); + let updated_truncation = json_to_py(py, &json!("disabled")).unwrap(); + annotated.set_truncation(updated_truncation.bind(py)).unwrap(); + let updated_reasoning = json_to_py(py, &json!({"effort": "low"})).unwrap(); + annotated.set_reasoning(updated_reasoning.bind(py)).unwrap(); + let updated_include = json_to_py(py, &json!(["reasoning.encrypted_content"])).unwrap(); + annotated.set_include(updated_include.bind(py)).unwrap(); + annotated.set_user(Some("user-1".into())); + let updated_metadata = json_to_py(py, &json!({"tenant": "qa"})).unwrap(); + annotated.set_metadata(updated_metadata.bind(py)).unwrap(); + annotated.set_service_tier(Some("default".into())); + annotated.set_parallel_tool_calls(Some(false)); + annotated.set_max_output_tokens(Some(128)); + annotated.set_max_tool_calls(Some(3)); + annotated.set_top_logprobs(Some(2)); + annotated.set_stream(Some(true)); let updated_extra = json_to_py(py, &json!({"updated": true})).unwrap(); annotated.set_extra(updated_extra.bind(py)).unwrap(); assert_eq!(annotated.model(), Some("updated-model".into())); assert_eq!(annotated.last_user_message(), Some("updated".into())); + assert_eq!(annotated.store(), Some(true)); + assert_eq!(annotated.previous_response_id(), Some("resp_1".into())); + assert_eq!( + py_to_json(annotated.truncation(py).unwrap().bind(py)).unwrap(), + json!("disabled") + ); + assert_eq!( + py_to_json(annotated.reasoning(py).unwrap().bind(py)).unwrap(), + json!({"effort": "low"}) + ); + assert_eq!( + py_to_json(annotated.include(py).unwrap().bind(py)).unwrap(), + json!(["reasoning.encrypted_content"]) + ); + assert_eq!(annotated.user(), Some("user-1".into())); + assert_eq!( + py_to_json(annotated.metadata(py).unwrap().bind(py)).unwrap(), + json!({"tenant": "qa"}) + ); + assert_eq!(annotated.service_tier(), Some("default".into())); + assert_eq!(annotated.parallel_tool_calls(), Some(false)); + assert_eq!(annotated.max_output_tokens(), Some(128)); + assert_eq!(annotated.max_tool_calls(), Some(3)); + assert_eq!(annotated.top_logprobs(), Some(2)); + assert_eq!(annotated.stream(), Some(true)); assert_eq!( py_to_json(annotated.extra(py).unwrap().bind(py)).unwrap(), json!({"updated": true}) @@ -1098,9 +1153,17 @@ fn test_annotated_llm_types_and_builtin_codecs_cover_mutators_and_codecs() { annotated.set_params(py.None().bind(py)).unwrap(); annotated.set_tools(py.None().bind(py)).unwrap(); annotated.set_tool_choice(py.None().bind(py)).unwrap(); + annotated.set_truncation(py.None().bind(py)).unwrap(); + annotated.set_reasoning(py.None().bind(py)).unwrap(); + annotated.set_include(py.None().bind(py)).unwrap(); + annotated.set_metadata(py.None().bind(py)).unwrap(); assert!(annotated.params(py).unwrap().bind(py).is_none()); assert!(annotated.tools(py).unwrap().bind(py).is_none()); assert!(annotated.tool_choice(py).unwrap().bind(py).is_none()); + assert!(annotated.truncation(py).unwrap().bind(py).is_none()); + assert!(annotated.reasoning(py).unwrap().bind(py).is_none()); + assert!(annotated.include(py).unwrap().bind(py).is_none()); + assert!(annotated.metadata(py).unwrap().bind(py).is_none()); let bad_messages = json_to_py(py, &json!([{"content": "missing role"}])).unwrap(); let err = PyAnnotatedLLMRequest::new(bad_messages.bind(py), None, None, None, None, None)