From 0aa9a488671dd42978d47973fcfaef1287c6562c Mon Sep 17 00:00:00 2001 From: mkulakow Date: Thu, 30 Apr 2026 14:28:53 +0200 Subject: [PATCH 1/3] Support functions in responses api --- src/llm/apis/openai_responses.cpp | 616 ++++++++++++++++++++---- src/llm/py_jinja_template_processor.cpp | 2 +- src/llm/servable.cpp | 5 +- 3 files changed, 525 insertions(+), 98 deletions(-) diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index e5d63985e6..6c7f44557f 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -57,6 +57,498 @@ static std::string joinServerSideEvents(const std::vector& events) return ss.str(); } +// Convert the Responses API tools array (flat function format) into the chat/completions +// nested format ({type:"function", function:{name, description, parameters, ...}}) in place +// on the request document. The chat template (e.g. gpt-oss) and the chat/completions tools +// schema both expect the nested shape; doing this once up front lets every downstream +// consumer (chat history path, processedJson builder for Python Jinja, parseToolsToJsonContainer) +// share the same representation. Tools already in nested form, or non-function tools, are +// left untouched. +static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson::Document::AllocatorType& alloc) { + if (!toolsArray.IsArray()) { + return; + } + for (auto& tool : toolsArray.GetArray()) { + if (!tool.IsObject()) { + continue; + } + auto toolObj = tool.GetObject(); + if (toolObj.FindMember("function") != toolObj.MemberEnd()) { + continue; // Already in nested chat/completions format. + } + auto typeIt = toolObj.FindMember("type"); + const std::string toolType = (typeIt != toolObj.MemberEnd() && typeIt->value.IsString()) + ? typeIt->value.GetString() + : ""; + if (toolType != "function") { + continue; // Preserve non-function tools as-is. + } + rapidjson::Value funcObj(rapidjson::kObjectType); + for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd();) { + if (!memberIt->name.IsString()) { + ++memberIt; + continue; + } + const std::string fieldName = memberIt->name.GetString(); + if (fieldName == "type" || fieldName == "response") { + ++memberIt; + continue; + } + rapidjson::Value keyCopy(memberIt->name, alloc); + rapidjson::Value valCopy(memberIt->value, alloc); + funcObj.AddMember(keyCopy, valCopy, alloc); + memberIt = tool.EraseMember(memberIt); + } + tool.AddMember("function", funcObj, alloc); + } +} + +// Pull the reasoning text out of a Responses API "reasoning" item. +// Prefers the newer content[].text shape over the legacy summary[].text shape. +static std::string extractReasoningText(const rapidjson::Value::ConstObject& itemObj) { + auto contentIt = itemObj.FindMember("content"); + if (contentIt != itemObj.MemberEnd() && contentIt->value.IsArray()) { + for (const auto& ci : contentIt->value.GetArray()) { + if (!ci.IsObject()) + continue; + auto textIt = ci.GetObject().FindMember("text"); + if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + auto summaryIt = itemObj.FindMember("summary"); + if (summaryIt != itemObj.MemberEnd() && summaryIt->value.IsArray()) { + for (const auto& si : summaryIt->value.GetArray()) { + if (!si.IsObject()) + continue; + auto textIt = si.GetObject().FindMember("text"); + if (textIt != si.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + return ""; +} + +// Extract a flat text string from a Responses API content field which may be +// either a string or an array of {type,text} objects. +static std::string extractTextContent(const rapidjson::Value& contentVal) { + if (contentVal.IsString()) { + return contentVal.GetString(); + } + if (!contentVal.IsArray()) { + return ""; + } + for (const auto& ci : contentVal.GetArray()) { + if (!ci.IsObject()) + continue; + auto ctTypeIt = ci.GetObject().FindMember("type"); + if (ctTypeIt == ci.GetObject().MemberEnd() || !ctTypeIt->value.IsString()) + continue; + const std::string ctType = ctTypeIt->value.GetString(); + if (ctType == "input_text" || ctType == "output_text") { + auto textIt = ci.GetObject().FindMember("text"); + if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) { + return textIt->value.GetString(); + } + } + } + return ""; +} + +// Read the three string fields (id, name, arguments) out of a function_call item. +struct FunctionCallFields { + std::string id; + std::string name; + std::string arguments; +}; +static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) { + FunctionCallFields out; + auto fcObj = item.GetObject(); + auto idIt = fcObj.FindMember("id"); + if (idIt != fcObj.MemberEnd() && idIt->value.IsString()) + out.id = idIt->value.GetString(); + auto nameIt = fcObj.FindMember("name"); + if (nameIt != fcObj.MemberEnd() && nameIt->value.IsString()) + out.name = nameIt->value.GetString(); + auto argsIt = fcObj.FindMember("arguments"); + if (argsIt != fcObj.MemberEnd() && argsIt->value.IsString()) + out.arguments = argsIt->value.GetString(); + return out; +} + +// Classification of a Responses API input item used to dispatch to per-type +// handlers in the builders below. +enum class ResponsesInputItemKind { + REASONING, + FUNCTION_CALL, + FUNCTION_CALL_OUTPUT, + ROLE_ITEM, + MISSING_ROLE, +}; + +static absl::StatusOr classifyInputItem(const rapidjson::Value& item) { + if (!item.IsObject()) { + return absl::InvalidArgumentError("input array items must be objects"); + } + auto itemObj = item.GetObject(); + auto itemTypeIt = itemObj.FindMember("type"); + const std::string itemType = (itemTypeIt != itemObj.MemberEnd() && itemTypeIt->value.IsString()) + ? itemTypeIt->value.GetString() + : ""; + if (itemType == "reasoning") + return ResponsesInputItemKind::REASONING; + if (itemType == "function_call") + return ResponsesInputItemKind::FUNCTION_CALL; + if (itemType == "function_call_output") + return ResponsesInputItemKind::FUNCTION_CALL_OUTPUT; + auto roleIt = itemObj.FindMember("role"); + if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) + return ResponsesInputItemKind::MISSING_ROLE; + return ResponsesInputItemKind::ROLE_ITEM; +} + +// Builds chat/completions-shaped messages from a Responses API input array. +// +// Reasoning items are buffered and attached as `reasoning_content` on the next +// assistant message (matching the gpt-oss template's expected field). +// Reasoning that is not followed by an assistant/function_call item is dropped, +// since emitting a standalone {role:assistant, reasoning_content:...} message +// with no content/tool_calls would confuse most chat templates. +// +// Pending function_call items are merged into the next assistant message as a +// chat/completions-shaped tool_calls[] array. Without this, the assistant turn +// would have no tool_calls field, the chat template would treat it as a final +// answer, and a subsequent tool message would fail (e.g. gpt-oss raises +// "Message has tool role, but there was no previous assistant message with a +// tool call!"). +// +// The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a +// rapidjson messages array) is provided by the Sink template parameter, which +// must implement: +// absl::Status extractContent(itemObj, index, std::string& outText); +// void emitToolMessage(callId, output); +// void emitMessage(role, contentText, reasoning); // reasoning empty -> skip +// void emitAssistantWithToolCalls(contentText, reasoning, toolCalls); +// absl::Status onMissingRole(itemObj); +template +class ResponsesInputBuilder { +public: + explicit ResponsesInputBuilder(Sink& sink) : + sink(sink) {} + + absl::Status build(const rapidjson::Value& inputArray) { + if (!inputArray.IsArray()) { + return absl::InvalidArgumentError("input is not an array"); + } + for (rapidjson::SizeType i = 0; i < inputArray.GetArray().Size(); ++i) { + const auto& item = inputArray.GetArray()[i]; + auto kind = classifyInputItem(item); + if (!kind.ok()) + return kind.status(); + absl::Status status; + switch (kind.value()) { + case ResponsesInputItemKind::REASONING: + status = onReasoningItem(item.GetObject()); + break; + case ResponsesInputItemKind::FUNCTION_CALL: + pendingFunctionCalls.push_back(&item); + break; + case ResponsesInputItemKind::FUNCTION_CALL_OUTPUT: + status = onFunctionCallOutputItem(item.GetObject()); + break; + case ResponsesInputItemKind::ROLE_ITEM: + status = onRoleItem(item.GetObject(), i); + break; + case ResponsesInputItemKind::MISSING_ROLE: + status = sink.onMissingRole(item.GetObject()); + break; + } + if (!status.ok()) + return status; + } + // Flush any trailing buffered function_calls (e.g. input ends with a + // function_call item that has no corresponding output yet). + flushPendingFunctionCalls(""); + return absl::OkStatus(); + } + +private: + absl::Status onReasoningItem(const rapidjson::Value::ConstObject& itemObj) { + std::string text = extractReasoningText(itemObj); + if (!text.empty()) { + if (!pendingReasoningContent.empty()) + pendingReasoningContent += "\n"; + pendingReasoningContent += text; + } + return absl::OkStatus(); + } + + absl::Status onFunctionCallOutputItem(const rapidjson::Value::ConstObject& itemObj) { + flushPendingFunctionCalls(""); + std::string callId; + auto callIdIt = itemObj.FindMember("call_id"); + if (callIdIt != itemObj.MemberEnd() && callIdIt->value.IsString()) + callId = callIdIt->value.GetString(); + std::string output; + auto outputIt = itemObj.FindMember("output"); + if (outputIt != itemObj.MemberEnd() && outputIt->value.IsString()) + output = outputIt->value.GetString(); + sink.emitToolMessage(callId, output); + return absl::OkStatus(); + } + + absl::Status onRoleItem(const rapidjson::Value::ConstObject& itemObj, rapidjson::SizeType index) { + const std::string role = itemObj.FindMember("role")->value.GetString(); + std::string contentText; + auto status = sink.extractContent(itemObj, index, contentText); + if (!status.ok()) + return status; + + // Assistant role with buffered function_calls: merge into one message + // (so the tool_calls field rides on the same assistant turn). + if (role == "assistant" && !pendingFunctionCalls.empty()) { + flushPendingFunctionCalls(contentText); + return absl::OkStatus(); + } + // Non-assistant items must not absorb pending tool_calls; flush first. + // (flushPendingFunctionCalls also clears any orphan reasoning content.) + if (role != "assistant") { + flushPendingFunctionCalls(""); + } + + std::string reasoning; + if (role == "assistant" && !pendingReasoningContent.empty()) { + reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + } + sink.emitMessage(role, contentText, reasoning); + return absl::OkStatus(); + } + + void flushPendingFunctionCalls(const std::string& assistantText) { + if (pendingFunctionCalls.empty()) { + pendingReasoningContent.clear(); + return; + } + std::string reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + sink.emitAssistantWithToolCalls(assistantText, reasoning, pendingFunctionCalls); + pendingFunctionCalls.clear(); + } + + Sink& sink; + std::vector pendingFunctionCalls; + std::string pendingReasoningContent; +}; + +// Sink that appends to ov::genai::ChatHistory (used when Python is disabled +// or as the fallback C++ chat-history path). Owns a scratch rapidjson document +// whose allocator backs the tool_calls Values until they are deep-copied into +// a JsonContainer. +class ChatHistorySink { +public: + ChatHistorySink(ov::genai::ChatHistory& chatHistory, ImageHistory& imageHistory, + const std::optional& allowedLocalMediaPath, + const std::optional>& allowedMediaDomains) : + chatHistory(chatHistory), + imageHistory(imageHistory), + allowedLocalMediaPath(allowedLocalMediaPath), + allowedMediaDomains(allowedMediaDomains) { + scratchDoc.SetObject(); + } + + absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj, + rapidjson::SizeType index, std::string& outText) { + outText.clear(); + auto contentIt = itemObj.FindMember("content"); + if (contentIt == itemObj.MemberEnd()) + return absl::OkStatus(); + if (contentIt->value.IsString()) { + outText = contentIt->value.GetString(); + return absl::OkStatus(); + } + if (!contentIt->value.IsArray()) + return absl::InvalidArgumentError("input item content must be a string or array"); + for (const auto& contentItem : contentIt->value.GetArray()) { + if (!contentItem.IsObject()) + return absl::InvalidArgumentError("input content items must be objects"); + auto contentObj = contentItem.GetObject(); + auto typeIt = contentObj.FindMember("type"); + if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) + return absl::InvalidArgumentError("input content item type is missing or invalid"); + const std::string type = typeIt->value.GetString(); + if (type == "input_text" || type == "output_text") { + auto textIt = contentObj.FindMember("text"); + if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) + return absl::InvalidArgumentError(absl::StrCat(type, " requires a valid text field")); + // Last text-bearing item wins, matching pre-refactor behaviour. + outText = textIt->value.GetString(); + } else if (type == "input_image") { + auto status = appendInputImage(contentObj, index); + if (!status.ok()) + return status; + } else { + // Skip unrecognised content item types for forward compatibility. + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping unsupported content type: {}", type); + } + } + return absl::OkStatus(); + } + + void emitToolMessage(const std::string& callId, const std::string& output) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "tool"; + if (!callId.empty()) + chatHistory.last()["tool_call_id"] = callId; + chatHistory.last()["content"] = output; + } + + void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) { + chatHistory.push_back({}); + chatHistory.last()["role"] = role; + chatHistory.last()["content"] = contentText; + if (!reasoning.empty()) + chatHistory.last()["reasoning_content"] = reasoning; + } + + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, + const std::vector& toolCalls) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "assistant"; + chatHistory.last()["content"] = contentText; + if (!reasoning.empty()) + chatHistory.last()["reasoning_content"] = reasoning; + auto& alloc = scratchDoc.GetAllocator(); + rapidjson::Value toolCallsArray(rapidjson::kArrayType); + buildToolCallsArray(toolCalls, toolCallsArray, alloc); + // rapidJsonValueToJsonContainer deep-copies, so scratchDoc can be reused. + chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray); + } + + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { + return absl::InvalidArgumentError("input item role is missing or invalid"); + } + +private: + absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj, rapidjson::SizeType index) { + auto imageUrlIt = contentObj.FindMember("image_url"); + if (imageUrlIt == contentObj.MemberEnd()) + return absl::InvalidArgumentError("input_image requires image_url field"); + + std::string imageUrl; + if (imageUrlIt->value.IsString()) { + imageUrl = imageUrlIt->value.GetString(); + } else if (imageUrlIt->value.IsObject()) { + auto imageUrlObj = imageUrlIt->value.GetObject(); + auto urlIt = imageUrlObj.FindMember("url"); + if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) + return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); + imageUrl = urlIt->value.GetString(); + } else { + return absl::InvalidArgumentError("input_image.image_url must be a string or object"); + } + + auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); + if (!tensorResult.ok()) + return tensorResult.status(); + imageHistory.push_back({index, tensorResult.value()}); + return absl::OkStatus(); + } + + // Build a chat/completions tool_calls[] array into outArr using the given allocator. + static void buildToolCallsArray(const std::vector& toolCalls, + rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) { + for (const auto* fc : toolCalls) { + const FunctionCallFields fields = readFunctionCallFields(*fc); + rapidjson::Value funcObj(rapidjson::kObjectType); + funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); + funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); + rapidjson::Value tcObj(rapidjson::kObjectType); + tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); + tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); + tcObj.AddMember("function", funcObj, alloc); + outArr.PushBack(tcObj, alloc); + } + } + + ov::genai::ChatHistory& chatHistory; + ImageHistory& imageHistory; + const std::optional& allowedLocalMediaPath; + const std::optional>& allowedMediaDomains; + rapidjson::Document scratchDoc; +}; + +#if (PYTHON_DISABLE == 0) +// Sink that appends to a rapidjson messages array, used to feed the Python +// Jinja chat template path. Image content items are silently dropped (the +// Python path receives only text). +class ProcessedJsonSink { +public: + ProcessedJsonSink(rapidjson::Value& messagesArray, rapidjson::Document::AllocatorType& alloc) : + messagesArray(messagesArray), + alloc(alloc) {} + + absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj, + rapidjson::SizeType /*index*/, std::string& outText) { + auto contentIt = itemObj.FindMember("content"); + outText = (contentIt != itemObj.MemberEnd()) ? extractTextContent(contentIt->value) : ""; + return absl::OkStatus(); + } + + void emitToolMessage(const std::string& callId, const std::string& output) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("tool", alloc), alloc); + if (!callId.empty()) + msgObj.AddMember("tool_call_id", rapidjson::Value(callId.c_str(), alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(output.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + + void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value(role.c_str(), alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc); + if (!reasoning.empty()) + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, + const std::vector& toolCalls) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc); + msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc); + if (!reasoning.empty()) + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + rapidjson::Value toolCallsArray(rapidjson::kArrayType); + for (const auto* fc : toolCalls) { + const FunctionCallFields fields = readFunctionCallFields(*fc); + rapidjson::Value funcObj(rapidjson::kObjectType); + funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc); + funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc); + rapidjson::Value tcObj(rapidjson::kObjectType); + tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc); + tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc); + tcObj.AddMember("function", funcObj, alloc); + toolCallsArray.PushBack(tcObj, alloc); + } + msgObj.AddMember("tool_calls", toolCallsArray, alloc); + messagesArray.PushBack(msgObj, alloc); + } + + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { + // Silently skip unknown items without a role in the processed JSON path. + return absl::OkStatus(); + } + +private: + rapidjson::Value& messagesArray; + rapidjson::Document::AllocatorType& alloc; +}; +#endif // PYTHON_DISABLE == 0 + // --- Request parsing --- absl::Status OpenAIResponsesHandler::parseRequest(std::optional maxTokensLimit, uint32_t bestOfLimit, std::optional maxModelLength, @@ -87,87 +579,12 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional allow if (inputIt->value.GetArray().Size() == 0) { return absl::InvalidArgumentError("Messages array cannot be empty"); } - - for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) { - auto& item = inputIt->value.GetArray()[i]; - if (!item.IsObject()) { - return absl::InvalidArgumentError("input array items must be objects"); - } - - auto itemObj = item.GetObject(); - auto roleIt = itemObj.FindMember("role"); - if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) { - return absl::InvalidArgumentError("input item role is missing or invalid"); - } - - request.chatHistory.push_back({}); - request.chatHistory.last()["role"] = roleIt->value.GetString(); - - auto contentIt = itemObj.FindMember("content"); - if (contentIt == itemObj.MemberEnd()) { - return absl::InvalidArgumentError("input item content is missing"); - } - - if (contentIt->value.IsString()) { - request.chatHistory.last()["content"] = contentIt->value.GetString(); - continue; - } - - if (!contentIt->value.IsArray()) { - return absl::InvalidArgumentError("input item content must be a string or array"); - } - if (contentIt->value.GetArray().Size() == 0) { - return absl::InvalidArgumentError("Invalid message structure - content array is empty"); - } - - std::string contentText = ""; - for (auto& contentItem : contentIt->value.GetArray()) { - if (!contentItem.IsObject()) { - return absl::InvalidArgumentError("input content items must be objects"); - } - auto contentObj = contentItem.GetObject(); - auto typeIt = contentObj.FindMember("type"); - if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) { - return absl::InvalidArgumentError("input content item type is missing or invalid"); - } - - const std::string type = typeIt->value.GetString(); - if (type == "input_text") { - auto textIt = contentObj.FindMember("text"); - if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) { - return absl::InvalidArgumentError("input_text requires a valid text field"); - } - contentText = textIt->value.GetString(); - } else if (type == "input_image") { - std::string imageUrl; - auto imageUrlIt = contentObj.FindMember("image_url"); - if (imageUrlIt == contentObj.MemberEnd()) { - return absl::InvalidArgumentError("input_image requires image_url field"); - } - if (imageUrlIt->value.IsString()) { - imageUrl = imageUrlIt->value.GetString(); - } else if (imageUrlIt->value.IsObject()) { - auto imageUrlObj = imageUrlIt->value.GetObject(); - auto urlIt = imageUrlObj.FindMember("url"); - if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) { - return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid"); - } - imageUrl = urlIt->value.GetString(); - } else { - return absl::InvalidArgumentError("input_image.image_url must be a string or object"); - } - - auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains); - if (!tensorResult.ok()) { - return tensorResult.status(); - } - request.imageHistory.push_back({i, tensorResult.value()}); - } else { - return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image."); - } - } - - request.chatHistory.last()["content"] = contentText; + ChatHistorySink sink(request.chatHistory, request.imageHistory, + allowedLocalMediaPath, allowedMediaDomains); + ResponsesInputBuilder builder(sink); + auto status = builder.build(inputIt->value); + if (!status.ok()) { + return status; } } else { return absl::InvalidArgumentError("input is not a string or array"); @@ -189,6 +606,14 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional return absl::InvalidArgumentError("input missing in request"); } + // Convert tools array (Responses-flat -> chat/completions-nested) once, in place, + // before any consumer reads it. parseInput, parseToolsToJsonContainer and the + // processedJson builder all rely on the nested shape. + auto toolsIt = doc.FindMember("tools"); + if (toolsIt != doc.MemberEnd() && toolsIt->value.IsArray()) { + convertResponsesToolsInPlace(toolsIt->value, doc.GetAllocator()); + } + auto messagesStatus = parseInput(allowedLocalMediaPath, allowedMediaDomains); if (!messagesStatus.ok()) { return messagesStatus; @@ -228,30 +653,31 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional } #if (PYTHON_DISABLE == 0) - // Build processedJson with "messages" array from chatHistory so that - // the Python chat template path (which reads request_json["messages"]) - // can consume Responses API input without a separate code path. + // Build processedJson with a "messages" array in chat/completions format so that + // the Python Jinja template path can consume Responses API input without a separate code path. + // Handles reasoning, function_call (merged into assistant tool_calls), and + // function_call_output (converted to role:tool messages). { Document processedDoc; processedDoc.SetObject(); auto& alloc = processedDoc.GetAllocator(); Value messagesArray(kArrayType); - for (size_t i = 0; i < request.chatHistory.size(); ++i) { - Value msgObj(kObjectType); - auto role = request.chatHistory[i]["role"].as_string(); - if (role.has_value()) { - msgObj.AddMember("role", Value(role.value().c_str(), alloc), alloc); - } - auto content = request.chatHistory[i]["content"].as_string(); - if (content.has_value()) { - msgObj.AddMember("content", Value(content.value().c_str(), alloc), alloc); + + auto inputArrIt = doc.FindMember("input"); + if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsArray()) { + ProcessedJsonSink sink(messagesArray, alloc); + ResponsesInputBuilder builder(sink); + auto processedStatus = builder.build(inputArrIt->value); + if (!processedStatus.ok()) { + return processedStatus; } - messagesArray.PushBack(msgObj, alloc); } + processedDoc.AddMember("messages", messagesArray, alloc); - // Copy tools from original doc if present + // Tools were already normalised to chat/completions nested format by + // convertResponsesToolsInPlace earlier in parseResponsesPart — just copy verbatim. auto toolsIt = doc.FindMember("tools"); if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) { Value toolsCopy(toolsIt->value, alloc); diff --git a/src/llm/py_jinja_template_processor.cpp b/src/llm/py_jinja_template_processor.cpp index 432aa8e722..61116d3c5d 100644 --- a/src/llm/py_jinja_template_processor.cpp +++ b/src/llm/py_jinja_template_processor.cpp @@ -40,7 +40,7 @@ bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templ output = "Error: Chat template not loaded correctly, so it cannot be applied"; return false; } - + SPDLOG_DEBUG("Before chat template: \n {}", requestBody); py::gil_scoped_acquire acquire; try { auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(), diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index e2ccd06e78..1e2486835b 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -22,6 +22,7 @@ #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246 6313) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include "absl/strings/str_cat.h" #include "mediapipe/framework/calculator_graph.h" #include #include @@ -208,7 +209,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); + return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); } #endif if (inputText.size() == 0) { @@ -240,7 +241,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrtokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs); } catch (const std::exception& e) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what()); - return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one."); + return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what())); } #endif if (inputText.size() == 0) { From 99b08c3e4bedca3caef4a515380e079954de695a Mon Sep 17 00:00:00 2001 From: mkulakow Date: Wed, 13 May 2026 13:21:49 +0200 Subject: [PATCH 2/3] uts --- .../continuous_batching/agentic_ai/README.md | 2 +- src/llm/apis/openai_responses.cpp | 39 +- src/test/http_openai_handler_test.cpp | 761 ++++++++++++++++++ 3 files changed, 799 insertions(+), 3 deletions(-) diff --git a/demos/continuous_batching/agentic_ai/README.md b/demos/continuous_batching/agentic_ai/README.md index b630158a9d..2ba22afb8c 100644 --- a/demos/continuous_batching/agentic_ai/README.md +++ b/demos/continuous_batching/agentic_ai/README.md @@ -330,7 +330,7 @@ Pull and start OVMS: ```bash mkdir -p ${HOME}/models docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) openvino/model_server:weekly \ ---rest_port 8000 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com +--rest_port 8122 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_name ovms-model --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com ``` Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp index 6c7f44557f..2e68e6324e 100644 --- a/src/llm/apis/openai_responses.cpp +++ b/src/llm/apis/openai_responses.cpp @@ -224,6 +224,12 @@ static absl::StatusOr classifyInputItem(const rapidjson: // "Message has tool role, but there was no previous assistant message with a // tool call!"). // +// Reasoning that is not followed by an assistant or function_call item is +// emitted as a standalone assistant turn with empty content and the buffered +// reasoning attached as `reasoning_content`. This preserves the model's +// chain-of-thought across turns even when the prior turn produced no visible +// output. +// // The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a // rapidjson messages array) is provided by the Sink template parameter, which // must implement: @@ -231,6 +237,7 @@ static absl::StatusOr classifyInputItem(const rapidjson: // void emitToolMessage(callId, output); // void emitMessage(role, contentText, reasoning); // reasoning empty -> skip // void emitAssistantWithToolCalls(contentText, reasoning, toolCalls); +// void emitStandaloneReasoning(reasoning); // assistant turn carrying only reasoning_content // absl::Status onMissingRole(itemObj); template class ResponsesInputBuilder { @@ -313,7 +320,8 @@ class ResponsesInputBuilder { return absl::OkStatus(); } // Non-assistant items must not absorb pending tool_calls; flush first. - // (flushPendingFunctionCalls also clears any orphan reasoning content.) + // (flushPendingFunctionCalls also emits any standalone reasoning content + // as a standalone assistant turn.) if (role != "assistant") { flushPendingFunctionCalls(""); } @@ -329,7 +337,16 @@ class ResponsesInputBuilder { void flushPendingFunctionCalls(const std::string& assistantText) { if (pendingFunctionCalls.empty()) { - pendingReasoningContent.clear(); + // No tool calls, but possibly buffered reasoning to flush as a + // standalone assistant turn carrying only reasoning_content (no + // `content` field at all, so templates that gate on `message.content` + // skip the content branch and templates that gate on + // `message.reasoning_content` still see the buffered text). + if (!pendingReasoningContent.empty()) { + std::string reasoning = std::move(pendingReasoningContent); + pendingReasoningContent.clear(); + sink.emitStandaloneReasoning(reasoning); + } return; } std::string reasoning = std::move(pendingReasoningContent); @@ -427,6 +444,15 @@ class ChatHistorySink { chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray); } + // Emit an assistant turn that carries only reasoning_content (no content, + // no tool_calls). Used when reasoning is not followed by an assistant or + // function_call item. + void emitStandaloneReasoning(const std::string& reasoning) { + chatHistory.push_back({}); + chatHistory.last()["role"] = "assistant"; + chatHistory.last()["reasoning_content"] = reasoning; + } + absl::Status onMissingRole(const rapidjson::Value::ConstObject&) { return absl::InvalidArgumentError("input item role is missing or invalid"); } @@ -515,6 +541,15 @@ class ProcessedJsonSink { messagesArray.PushBack(msgObj, alloc); } + // Emit an assistant turn that carries only reasoning_content (no content, + // no tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale. + void emitStandaloneReasoning(const std::string& reasoning) { + rapidjson::Value msgObj(rapidjson::kObjectType); + msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc); + msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc); + messagesArray.PushBack(msgObj, alloc); + } + void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning, const std::vector& toolCalls) { rapidjson::Value msgObj(rapidjson::kObjectType); diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index a4e6585af0..58488bb411 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -3610,3 +3610,764 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields) EXPECT_FALSE(history[1].contains("tool_call_id")); EXPECT_FALSE(history[1].contains("name")); } + +namespace { +std::shared_ptr parseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { + doc.Parse(json.c_str()); + EXPECT_FALSE(doc.HasParseError()) << json; + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + auto apiHandler = std::make_shared( + doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); + EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json; + return apiHandler; +} + +// Variant for negative tests: returns the parseRequest status without asserting +// it is OK, so the caller can verify the failure mode. +absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) { + doc.Parse(json.c_str()); + EXPECT_FALSE(doc.HasParseError()) << json; + std::optional maxTokensLimit; + uint32_t bestOfLimit = 0; + std::optional maxModelLength; + auto apiHandler = std::make_shared( + doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); + return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); +} +} // namespace + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) { + // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name / + // tool.function.parameters. The Responses-flat shape ({type, name, parameters}) + // must be rewritten in-place to chat/completions nested shape before it is + // forwarded to the template. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "name": "get_weather", + "description": "Get current weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + EXPECT_TRUE(apiHandler->areToolsAvailable()); + + // Inspect the (now normalised) tools array on the request document directly. + ASSERT_TRUE(doc.HasMember("tools")); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + ASSERT_TRUE(tool.HasMember("function")); + ASSERT_TRUE(tool["function"].IsObject()); + EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather"); + EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather"); + ASSERT_TRUE(tool["function"].HasMember("parameters")); + EXPECT_TRUE(tool["function"]["parameters"].IsObject()); + // The flat fields should have been moved under `function`, leaving only `type` + `function`. + EXPECT_FALSE(tool.HasMember("name")); + EXPECT_FALSE(tool.HasMember("parameters")); + EXPECT_FALSE(tool.HasMember("description")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { + // Tools that are already in chat/completions nested shape must pass through + // untouched (no double-wrapping). + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + } + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + EXPECT_TRUE(apiHandler->areToolsAvailable()); + ASSERT_TRUE(doc["tools"][0].HasMember("function")); + EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather"); + // No spurious nested wrap. + EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) { + // A bare reasoning item, then an assistant message: the reasoning text should + // ride on the next assistant message as reasoning_content (matching the + // gpt-oss template's expected field). It must NOT produce its own message. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[0]["role"].get_string(), "user"); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), "hello"); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) { + // Reasoning followed directly by a user message (no assistant/function_call + // in between) is emitted as a standalone assistant turn with empty content + // and the buffered text attached as reasoning_content. This preserves the + // model's chain-of-thought across turns even when the prior turn produced + // no visible output. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_FALSE(history[1].contains("content")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan"); + EXPECT_FALSE(history[1].contains("tool_calls")); + + EXPECT_EQ(history[2]["role"].get_string(), "user"); + EXPECT_FALSE(history[2].contains("reasoning_content")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) { + // Input ending with a reasoning item (no following assistant/function_call) + // — the buffered reasoning is flushed as a standalone trailing assistant + // turn rather than silently lost. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_FALSE(history[1].contains("content")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) { + // function_call followed by function_call_output should produce: + // user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...) + // The assistant message MUST own a tool_calls field; otherwise gpt-oss + // raises "Message has tool role, but there was no previous assistant + // message with a tool call!". + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", + "output": "{\"temp_c\":17}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), ""); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_TRUE(history[1]["tool_calls"].is_array()); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}"); + + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) { + // reasoning + function_call should both attach to the synthesised assistant + // turn that owns the tool_calls. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) { + // Two function_calls back-to-back must produce a single assistant message + // with two entries in tool_calls, not two assistant turns. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + // user, assistant(2 tool_calls), tool + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 2); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2"); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) { + // Input ending with a function_call (no matching output) — the trailing + // function_call must still be flushed as an assistant message rather than + // silently lost. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) { + // If an assistant role item follows a function_call, its text content should + // ride on the same merged message (assistant-with-tool_calls), not produce + // a second assistant turn. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), "calling tool"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) { + // The newer reasoning shape: content[].text instead of summary[].text. + // OVMS accepts both. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, + {"role": "assistant", "content": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 2); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) { + // function_call_output without call_id: should still emit a tool message + // (with no tool_call_id field) rather than failing parsing. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 3); + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_FALSE(history[2].contains("tool_call_id")); + EXPECT_EQ(history[2]["content"].get_string(), "ok"); +} + +#if (PYTHON_DISABLE == 0) +// processedJson (the chat/completions-shaped messages array fed to the Python +// Jinja chat template) must mirror the chat history layout for the same input. +// These tests assert the same buffering invariants on that path. + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + ASSERT_TRUE(processedDoc.HasMember("messages")); + const auto& messages = processedDoc["messages"]; + ASSERT_TRUE(messages.IsArray()); + ASSERT_EQ(messages.Size(), 3u); + + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1]["tool_calls"].IsArray()); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); + EXPECT_STREQ(messages[2]["content"].GetString(), "ok"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) { + // The tools forwarded to the template via processedJson must be in the + // chat/completions nested shape (because convertResponsesToolsInPlace + // normalised the doc before processedJson is built). + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{ + "type": "function", + "name": "get_weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + ASSERT_TRUE(processedDoc.HasMember("tools")); + ASSERT_TRUE(processedDoc["tools"].IsArray()); + ASSERT_EQ(processedDoc["tools"].Size(), 1u); + ASSERT_TRUE(processedDoc["tools"][0].HasMember("function")); + EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[1]["content"].GetString(), "answer"); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) { + // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the + // processedJson path: an assistant turn carrying only reasoning_content + // (no `content`, no `tool_calls`). + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_FALSE(messages[1].HasMember("content")); + EXPECT_FALSE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan"); + EXPECT_STREQ(messages[2]["role"].GetString(), "user"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_FALSE(messages[1].HasMember("content")); + EXPECT_FALSE(messages[1].HasMember("tool_calls")); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) { + // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning + // and tool_calls must land on the same JSON object. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) { + // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates + // the rapidjson tool_calls array growth across PushBack calls. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 3u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); + EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2"); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}"); + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) { + // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing + // function_call without output produces an assistant turn with tool_calls + // and no following tool message. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) { + // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant + // text content and tool_calls coexist on a single JSON object. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 2u); + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + ASSERT_TRUE(messages[1].HasMember("content")); + EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); +} +#endif // PYTHON_DISABLE == 0 + +// --- Tools normalisation edge cases --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { + // Flat Responses tools may omit `parameters` for zero-arg functions. The + // nested form should still be produced (with no `parameters` key under + // function), not fail or fabricate one. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{"type": "function", "name": "ping", "description": "no args"}] + })"; + parseResponses(doc, *tokenizer, json); + ASSERT_TRUE(doc.HasMember("tools")); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + ASSERT_TRUE(tool.HasMember("function")); + EXPECT_STREQ(tool["function"]["name"].GetString(), "ping"); + EXPECT_STREQ(tool["function"]["description"].GetString(), "no args"); + EXPECT_FALSE(tool["function"].HasMember("parameters")); + // The flat-shape `name` field at top level must have been removed. + EXPECT_FALSE(tool.HasMember("name")); + EXPECT_FALSE(tool.HasMember("description")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) { + // Tools with an unrecognised `type` (e.g. a future built-in tool) must be + // passed through verbatim rather than being incorrectly rewrapped. + std::string json = R"({ + "model": "llama", + "input": "hello", + "tools": [{"type": "web_search", "name": "search"}] + })"; + parseResponses(doc, *tokenizer, json); + ASSERT_TRUE(doc["tools"].IsArray()); + ASSERT_EQ(doc["tools"].Size(), 1u); + const auto& tool = doc["tools"][0]; + EXPECT_STREQ(tool["type"].GetString(), "web_search"); + EXPECT_STREQ(tool["name"].GetString(), "search"); + EXPECT_FALSE(tool.HasMember("function")); +} + +// --- Error paths --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputItemMissingRoleIsRejected) { + // An input item with no recognised `type` and no `role` cannot be + // classified — the chat-history sink must surface this as an + // InvalidArgumentError rather than silently dropping the turn. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"content": [{"type":"output_text","text":"orphaned"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("role")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentNotStringOrArrayIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": 42} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("content")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentItemMissingTypeIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"text":"no type field"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("type")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputTextMissingTextFieldIsRejected) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text"}]} + ] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("text")); +} + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputArrayItemNotObjectIsRejected) { + std::string json = R"({ + "model": "llama", + "input": ["not an object"] + })"; + auto status = tryParseResponses(doc, *tokenizer, json); + EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("must be objects")); +} + +// --- Multi-turn composite --- + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndFollowupAssistant) { + // End-to-end: user -> reasoning + function_call (merged on synthesised + // assistant) -> function_call_output -> reasoning + assistant final answer. + // Validates that buffering state is correctly reset between turns. + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + auto& history = apiHandler->getChatHistory(); + ASSERT_EQ(history.size(), 4); + + // user + EXPECT_EQ(history[0]["role"].get_string(), "user"); + + // synthesised assistant: empty content + reasoning + tool_calls + EXPECT_EQ(history[1]["role"].get_string(), "assistant"); + EXPECT_EQ(history[1]["content"].get_string(), ""); + ASSERT_TRUE(history[1].contains("reasoning_content")); + EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); + ASSERT_TRUE(history[1].contains("tool_calls")); + ASSERT_EQ(history[1]["tool_calls"].size(), 1); + EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); + + // tool result + EXPECT_EQ(history[2]["role"].get_string(), "tool"); + EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C"); + EXPECT_FALSE(history[2].contains("reasoning_content")); + EXPECT_FALSE(history[2].contains("tool_calls")); + + // final assistant turn: second reasoning buffer must have been used here, + // not leaked from the first turn or carried over. + EXPECT_EQ(history[3]["role"].get_string(), "assistant"); + EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris."); + ASSERT_TRUE(history[3].contains("reasoning_content")); + EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer"); + EXPECT_FALSE(history[3].contains("tool_calls")); +} + +#if (PYTHON_DISABLE == 0) +// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to +// the chat-history multi-turn test above. +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) { + std::string json = R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })"; + auto apiHandler = parseResponses(doc, *tokenizer, json); + rapidjson::Document processedDoc; + processedDoc.Parse(apiHandler->getProcessedJson().c_str()); + ASSERT_FALSE(processedDoc.HasParseError()); + const auto& messages = processedDoc["messages"]; + ASSERT_EQ(messages.Size(), 4u); + + EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[1]["content"].GetString(), ""); + ASSERT_TRUE(messages[1].HasMember("reasoning_content")); + EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); + ASSERT_TRUE(messages[1].HasMember("tool_calls")); + ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); + EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); + + EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); + EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); + EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C"); + + EXPECT_STREQ(messages[3]["role"].GetString(), "assistant"); + EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris."); + ASSERT_TRUE(messages[3].HasMember("reasoning_content")); + EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer"); + EXPECT_FALSE(messages[3].HasMember("tool_calls")); +} +#endif // PYTHON_DISABLE == 0 From ed876b5efe7c5546a007502be7446db6fddfb72c Mon Sep 17 00:00:00 2001 From: mkulakow Date: Wed, 13 May 2026 14:46:30 +0200 Subject: [PATCH 3/3] Update tests --- src/test/http_openai_handler_test.cpp | 926 +++++++++----------------- 1 file changed, 331 insertions(+), 595 deletions(-) diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 58488bb411..25ca17ac2d 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -3636,46 +3636,112 @@ absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& t doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer); return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength); } -} // namespace -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) { - // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name / - // tool.function.parameters. The Responses-flat shape ({type, name, parameters}) - // must be rewritten in-place to chat/completions nested shape before it is - // forwarded to the template. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{ - "type": "function", - "name": "get_weather", - "description": "Get current weather", - "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} - }] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - EXPECT_TRUE(apiHandler->areToolsAvailable()); +// Assert that parsing the given Responses API request produces a chat history +// (and processedJson, when Python is enabled) equivalent to the expected +// chat/completions request. +// +// The expected JSON is a chat/completions REQUEST body — an object with a +// "messages" array and optionally a "tools" array. This makes each test read as +// "given this Responses input, OVMS should produce this chat/completions +// request" — which is exactly the contract of the Responses-to-chat/completions +// translator. +// +// Comparison is structural via rapidjson Value::operator== (member order inside +// objects is irrelevant). +// +// Both the chat-history path (used in the C++/non-Python build) and the +// processedJson path (used by the Python Jinja template) are checked, so a +// single test pins both downstream consumers. +void expectResponsesEquivalentToChatCompletions(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, + const std::string& responsesRequest, const std::string& expectedChatCompletions) { + auto handler = parseResponses(doc, tokenizer, responsesRequest); - // Inspect the (now normalised) tools array on the request document directly. - ASSERT_TRUE(doc.HasMember("tools")); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - ASSERT_TRUE(tool.HasMember("function")); - ASSERT_TRUE(tool["function"].IsObject()); - EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather"); - EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather"); - ASSERT_TRUE(tool["function"].HasMember("parameters")); - EXPECT_TRUE(tool["function"]["parameters"].IsObject()); - // The flat fields should have been moved under `function`, leaving only `type` + `function`. - EXPECT_FALSE(tool.HasMember("name")); - EXPECT_FALSE(tool.HasMember("parameters")); - EXPECT_FALSE(tool.HasMember("description")); + rapidjson::Document expectedDoc; + expectedDoc.Parse(expectedChatCompletions.c_str()); + ASSERT_FALSE(expectedDoc.HasParseError()) + << "could not parse expected chat/completions: " << expectedChatCompletions; + ASSERT_TRUE(expectedDoc.HasMember("messages")) + << "expected chat/completions JSON must contain a 'messages' array"; + + // --- ChatHistory path (C++ / non-Python build) --- + const std::string actualHistoryJson = handler->getChatHistory().get_messages().to_json_string(); + rapidjson::Document actualHistoryDoc; + actualHistoryDoc.Parse(actualHistoryJson.c_str()); + ASSERT_FALSE(actualHistoryDoc.HasParseError()) << actualHistoryJson; + EXPECT_TRUE(actualHistoryDoc == expectedDoc["messages"]) + << "ChatHistory messages mismatch.\n actual: " << actualHistoryJson + << "\n expected: " << expectedChatCompletions; + // Tools on the C++ path are exposed via parseToolsToJsonContainer() — that + // is exactly what the non-Python servable forwards to GenAI. Compare its + // serialised JSON against the expected chat/completions tools. + if (expectedDoc.HasMember("tools")) { + auto toolsStatus = handler->parseToolsToJsonContainer(); + ASSERT_TRUE(toolsStatus.ok()) << "parseToolsToJsonContainer failed: " << toolsStatus.status().message(); + ASSERT_TRUE(toolsStatus.value().has_value()) << "parseToolsToJsonContainer returned nullopt"; + const std::string actualToolsJson = toolsStatus.value()->to_json_string(); + rapidjson::Document actualToolsDoc; + actualToolsDoc.Parse(actualToolsJson.c_str()); + ASSERT_FALSE(actualToolsDoc.HasParseError()) << actualToolsJson; + EXPECT_TRUE(actualToolsDoc == expectedDoc["tools"]) + << "parseToolsToJsonContainer mismatch.\n actual: " << actualToolsJson + << "\n expected: " << expectedChatCompletions; + } + +#if (PYTHON_DISABLE == 0) + // --- processedJson path (Python Jinja chat template) --- + const std::string actualProcessedJson = handler->getProcessedJson(); + rapidjson::Document actualProcessedDoc; + actualProcessedDoc.Parse(actualProcessedJson.c_str()); + ASSERT_FALSE(actualProcessedDoc.HasParseError()) << actualProcessedJson; + ASSERT_TRUE(actualProcessedDoc.HasMember("messages")) << actualProcessedJson; + EXPECT_TRUE(actualProcessedDoc["messages"] == expectedDoc["messages"]) + << "processedJson messages mismatch.\n actual: " << actualProcessedJson + << "\n expected: " << expectedChatCompletions; + if (expectedDoc.HasMember("tools")) { + ASSERT_TRUE(actualProcessedDoc.HasMember("tools")) << actualProcessedJson; + EXPECT_TRUE(actualProcessedDoc["tools"] == expectedDoc["tools"]) + << "processedJson tools mismatch.\n actual: " << actualProcessedJson + << "\n expected: " << expectedChatCompletions; + } +#endif +} +} // namespace + +TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormaliseToChatCompletions) { + // Responses-flat tools shape ({type, name, parameters}) must be rewritten + // to chat/completions nested shape ({type, function:{...}}) before the + // request is forwarded to the chat template. Input is given as an array so + // both ChatHistory and processedJson sinks populate the messages array. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}], + "tools": [{ + "type": "function", + "name": "get_weather", + "description": "Get current weather", + "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + }] + })", + R"({ + "messages": [{"role":"user","content":"hello"}], + "tools": [{ + "type":"function", + "function":{ + "name":"get_weather", + "description":"Get current weather", + "parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} + } + }] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { - // Tools that are already in chat/completions nested shape must pass through - // untouched (no double-wrapping). + // Tools already in chat/completions nested shape must pass through without + // double-wrapping. This is asserted directly on the (in-place mutated) + // request document because the equivalence helper would not detect a + // spurious unwrap+rewrap that nets to the same shape. std::string json = R"({ "model": "llama", "input": "hello", @@ -3691,527 +3757,260 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) { EXPECT_TRUE(apiHandler->areToolsAvailable()); ASSERT_TRUE(doc["tools"][0].HasMember("function")); EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather"); - // No spurious nested wrap. EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function")); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) { - // A bare reasoning item, then an assistant message: the reasoning text should - // ride on the next assistant message as reasoning_content (matching the - // gpt-oss template's expected field). It must NOT produce its own message. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[0]["role"].get_string(), "user"); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), "hello"); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first"); + // A bare reasoning item, then an assistant message: the reasoning text + // rides on the next assistant message as reasoning_content and does NOT + // produce its own message. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","content":"hello","reasoning_content":"think first"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) { - // Reasoning followed directly by a user message (no assistant/function_call - // in between) is emitted as a standalone assistant turn with empty content - // and the buffered text attached as reasoning_content. This preserves the - // model's chain-of-thought across turns even when the prior turn produced - // no visible output. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, - {"role": "user", "content": [{"type":"input_text","text":"again"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_FALSE(history[1].contains("content")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan"); - EXPECT_FALSE(history[1].contains("tool_calls")); - - EXPECT_EQ(history[2]["role"].get_string(), "user"); - EXPECT_FALSE(history[2].contains("reasoning_content")); + // Reasoning followed by a non-assistant/non-function_call item is flushed + // as a standalone assistant turn carrying ONLY reasoning_content (no + // `content`, no `tool_calls`). This preserves the chain-of-thought across + // turns even when the prior turn produced no visible output. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, + {"role": "user", "content": [{"type":"input_text","text":"again"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","reasoning_content":"orphan"}, + {"role":"user","content":"again"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) { - // Input ending with a reasoning item (no following assistant/function_call) - // — the buffered reasoning is flushed as a standalone trailing assistant - // turn rather than silently lost. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_FALSE(history[1].contains("content")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing"); + // Input ending with a reasoning item — the buffered reasoning is flushed + // as a trailing standalone assistant turn rather than silently lost. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","reasoning_content":"trailing"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) { // function_call followed by function_call_output should produce: // user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...) - // The assistant message MUST own a tool_calls field; otherwise gpt-oss - // raises "Message has tool role, but there was no previous assistant - // message with a tool call!". - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", - "output": "{\"temp_c\":17}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), ""); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_TRUE(history[1]["tool_calls"].is_array()); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}"); - - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); - EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}"); + // The synthesised assistant message MUST own a tool_calls field; otherwise + // gpt-oss raises "Message has tool role, but there was no previous + // assistant message with a tool call!". + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", + "output": "{\"temp_c\":17}"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"{\"temp_c\":17}"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) { // reasoning + function_call should both attach to the synthesised assistant // turn that owns the tool_calls. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"ok"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) { // Two function_calls back-to-back must produce a single assistant message // with two entries in tool_calls, not two assistant turns. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call", "id": "call_2", "call_id": "call_2", - "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "15C"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - // user, assistant(2 tool_calls), tool - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 2); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2"); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "id": "call_2", "call_id": "call_2", + "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "15C"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}, + {"id":"call_2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"London\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"15C"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) { // Input ending with a function_call (no matching output) — the trailing - // function_call must still be flushed as an assistant message rather than + // function_call must still be flushed as an assistant turn rather than // silently lost. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) { - // If an assistant role item follows a function_call, its text content should - // ride on the same merged message (assistant-with-tool_calls), not produce - // a second assistant turn. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"role": "assistant", "content": "calling tool"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), "calling tool"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); + // If an assistant role item follows a function_call, its text content + // should ride on the same merged message (assistant-with-tool_calls), not + // produce a second assistant turn. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"role": "assistant", "content": "calling tool"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"calling tool","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) { // The newer reasoning shape: content[].text instead of summary[].text. - // OVMS accepts both. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, - {"role": "assistant", "content": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 2); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape"); + // OVMS accepts both and produces the same chat/completions output. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, + {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]}, + {"role": "assistant", "content": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"hi"}, + {"role":"assistant","content":"ok","reasoning_content":"new shape"} + ] + })"); } TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) { - // function_call_output without call_id: should still emit a tool message - // (with no tool_call_id field) rather than failing parsing. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{}"}, - {"type": "function_call_output", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 3); - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_FALSE(history[2].contains("tool_call_id")); - EXPECT_EQ(history[2]["content"].get_string(), "ok"); + // function_call_output without call_id: the resulting tool message has no + // tool_call_id field rather than failing parsing or carrying an empty id. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{}"}, + {"type": "function_call_output", "output": "ok"} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather?"}, + {"role":"assistant","content":"","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{}"}} + ]}, + {"role":"tool","content":"ok"} + ] + })"); } -#if (PYTHON_DISABLE == 0) -// processedJson (the chat/completions-shaped messages array fed to the Python -// Jinja chat template) must mirror the chat history layout for the same input. -// These tests assert the same buffering invariants on that path. - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - ASSERT_TRUE(processedDoc.HasMember("messages")); - const auto& messages = processedDoc["messages"]; - ASSERT_TRUE(messages.IsArray()); - ASSERT_EQ(messages.Size(), 3u); - - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1]["tool_calls"].IsArray()); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); - EXPECT_STREQ(messages[2]["content"].GetString(), "ok"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) { - // The tools forwarded to the template via processedJson must be in the - // chat/completions nested shape (because convertResponsesToolsInPlace - // normalised the doc before processedJson is built). - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{ - "type": "function", - "name": "get_weather", - "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]} - }] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - ASSERT_TRUE(processedDoc.HasMember("tools")); - ASSERT_TRUE(processedDoc["tools"].IsArray()); - ASSERT_EQ(processedDoc["tools"].Size(), 1u); - ASSERT_TRUE(processedDoc["tools"][0].HasMember("function")); - EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[1]["content"].GetString(), "answer"); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) { - // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the - // processedJson path: an assistant turn carrying only reasoning_content - // (no `content`, no `tool_calls`). - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]}, - {"role": "user", "content": [{"type":"input_text","text":"again"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_FALSE(messages[1].HasMember("content")); - EXPECT_FALSE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan"); - EXPECT_STREQ(messages[2]["role"].GetString(), "user"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"hi"}]}, - {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_FALSE(messages[1].HasMember("content")); - EXPECT_FALSE(messages[1].HasMember("tool_calls")); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) { - // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning - // and tool_calls must land on the same JSON object. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "ok"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) { - // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates - // the rapidjson tool_calls array growth across PushBack calls. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call", "id": "call_2", "call_id": "call_2", - "name": "get_weather", "arguments": "{\"city\":\"London\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "15C"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 3u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); - EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2"); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}"); - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) { - // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing - // function_call without output produces an assistant turn with tool_calls - // and no following tool message. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) { - // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant - // text content and tool_calls coexist on a single JSON object. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather?"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"role": "assistant", "content": "calling tool"} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 2u); - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - ASSERT_TRUE(messages[1].HasMember("content")); - EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1"); -} -#endif // PYTHON_DISABLE == 0 - // --- Tools normalisation edge cases --- TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) { // Flat Responses tools may omit `parameters` for zero-arg functions. The // nested form should still be produced (with no `parameters` key under - // function), not fail or fabricate one. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{"type": "function", "name": "ping", "description": "no args"}] - })"; - parseResponses(doc, *tokenizer, json); - ASSERT_TRUE(doc.HasMember("tools")); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - ASSERT_TRUE(tool.HasMember("function")); - EXPECT_STREQ(tool["function"]["name"].GetString(), "ping"); - EXPECT_STREQ(tool["function"]["description"].GetString(), "no args"); - EXPECT_FALSE(tool["function"].HasMember("parameters")); - // The flat-shape `name` field at top level must have been removed. - EXPECT_FALSE(tool.HasMember("name")); - EXPECT_FALSE(tool.HasMember("description")); -} - -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) { - // Tools with an unrecognised `type` (e.g. a future built-in tool) must be - // passed through verbatim rather than being incorrectly rewrapped. - std::string json = R"({ - "model": "llama", - "input": "hello", - "tools": [{"type": "web_search", "name": "search"}] - })"; - parseResponses(doc, *tokenizer, json); - ASSERT_TRUE(doc["tools"].IsArray()); - ASSERT_EQ(doc["tools"].Size(), 1u); - const auto& tool = doc["tools"][0]; - EXPECT_STREQ(tool["type"].GetString(), "web_search"); - EXPECT_STREQ(tool["name"].GetString(), "search"); - EXPECT_FALSE(tool.HasMember("function")); + // function), not fail or fabricate one. Input is given as an array so + // both ChatHistory and processedJson sinks populate the messages array. + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}], + "tools": [{"type": "function", "name": "ping", "description": "no args"}] + })", + R"({ + "messages": [{"role":"user","content":"hello"}], + "tools": [{"type":"function","function":{"name":"ping","description":"no args"}}] + })"); } // --- Error paths --- @@ -4284,90 +4083,27 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndF // End-to-end: user -> reasoning + function_call (merged on synthesised // assistant) -> function_call_output -> reasoning + assistant final answer. // Validates that buffering state is correctly reset between turns. - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - auto& history = apiHandler->getChatHistory(); - ASSERT_EQ(history.size(), 4); - - // user - EXPECT_EQ(history[0]["role"].get_string(), "user"); - - // synthesised assistant: empty content + reasoning + tool_calls - EXPECT_EQ(history[1]["role"].get_string(), "assistant"); - EXPECT_EQ(history[1]["content"].get_string(), ""); - ASSERT_TRUE(history[1].contains("reasoning_content")); - EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather"); - ASSERT_TRUE(history[1].contains("tool_calls")); - ASSERT_EQ(history[1]["tool_calls"].size(), 1); - EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1"); - EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather"); - - // tool result - EXPECT_EQ(history[2]["role"].get_string(), "tool"); - EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1"); - EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C"); - EXPECT_FALSE(history[2].contains("reasoning_content")); - EXPECT_FALSE(history[2].contains("tool_calls")); - - // final assistant turn: second reasoning buffer must have been used here, - // not leaked from the first turn or carried over. - EXPECT_EQ(history[3]["role"].get_string(), "assistant"); - EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris."); - ASSERT_TRUE(history[3].contains("reasoning_content")); - EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer"); - EXPECT_FALSE(history[3].contains("tool_calls")); + expectResponsesEquivalentToChatCompletions(doc, *tokenizer, + R"({ + "model": "llama", + "input": [ + {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, + {"type": "function_call", "id": "call_1", "call_id": "call_1", + "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, + {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, + {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} + ] + })", + R"({ + "messages": [ + {"role":"user","content":"weather in Paris?"}, + {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[ + {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}} + ]}, + {"role":"tool","tool_call_id":"call_1","content":"sunny, 22C"}, + {"role":"assistant","content":"It is sunny and 22C in Paris.","reasoning_content":"format the answer"} + ] + })"); } - -#if (PYTHON_DISABLE == 0) -// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to -// the chat-history multi-turn test above. -TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) { - std::string json = R"({ - "model": "llama", - "input": [ - {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]}, - {"type": "function_call", "id": "call_1", "call_id": "call_1", - "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, - {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"}, - {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]}, - {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]} - ] - })"; - auto apiHandler = parseResponses(doc, *tokenizer, json); - rapidjson::Document processedDoc; - processedDoc.Parse(apiHandler->getProcessedJson().c_str()); - ASSERT_FALSE(processedDoc.HasParseError()); - const auto& messages = processedDoc["messages"]; - ASSERT_EQ(messages.Size(), 4u); - - EXPECT_STREQ(messages[1]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[1]["content"].GetString(), ""); - ASSERT_TRUE(messages[1].HasMember("reasoning_content")); - EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather"); - ASSERT_TRUE(messages[1].HasMember("tool_calls")); - ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u); - EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather"); - - EXPECT_STREQ(messages[2]["role"].GetString(), "tool"); - EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1"); - EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C"); - - EXPECT_STREQ(messages[3]["role"].GetString(), "assistant"); - EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris."); - ASSERT_TRUE(messages[3].HasMember("reasoning_content")); - EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer"); - EXPECT_FALSE(messages[3].HasMember("tool_calls")); -} -#endif // PYTHON_DISABLE == 0