From 0aa9a488671dd42978d47973fcfaef1287c6562c Mon Sep 17 00:00:00 2001
From: mkulakow <devuser@ov-ptl-13.sclab.intel.com>
Date: Thu, 30 Apr 2026 14:28:53 +0200
Subject: [PATCH 1/3] Support functions in responses api

---
 src/llm/apis/openai_responses.cpp       | 616 ++++++++++++++++++++----
 src/llm/py_jinja_template_processor.cpp |   2 +-
 src/llm/servable.cpp                    |   5 +-
 3 files changed, 525 insertions(+), 98 deletions(-)

diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp
index e5d63985e6..6c7f44557f 100644
--- a/src/llm/apis/openai_responses.cpp
+++ b/src/llm/apis/openai_responses.cpp
@@ -57,6 +57,498 @@ static std::string joinServerSideEvents(const std::vector<std::string>& events)
     return ss.str();
 }
 
+// Convert the Responses API tools array (flat function format) into the chat/completions
+// nested format ({type:"function", function:{name, description, parameters, ...}}) in place
+// on the request document. The chat template (e.g. gpt-oss) and the chat/completions tools
+// schema both expect the nested shape; doing this once up front lets every downstream
+// consumer (chat history path, processedJson builder for Python Jinja, parseToolsToJsonContainer)
+// share the same representation. Tools already in nested form, or non-function tools, are
+// left untouched.
+static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson::Document::AllocatorType& alloc) {
+    if (!toolsArray.IsArray()) {
+        return;
+    }
+    for (auto& tool : toolsArray.GetArray()) {
+        if (!tool.IsObject()) {
+            continue;
+        }
+        auto toolObj = tool.GetObject();
+        if (toolObj.FindMember("function") != toolObj.MemberEnd()) {
+            continue;  // Already in nested chat/completions format.
+        }
+        auto typeIt = toolObj.FindMember("type");
+        const std::string toolType = (typeIt != toolObj.MemberEnd() && typeIt->value.IsString())
+                                         ? typeIt->value.GetString()
+                                         : "";
+        if (toolType != "function") {
+            continue;  // Preserve non-function tools as-is.
+        }
+        rapidjson::Value funcObj(rapidjson::kObjectType);
+        for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd();) {
+            if (!memberIt->name.IsString()) {
+                ++memberIt;
+                continue;
+            }
+            const std::string fieldName = memberIt->name.GetString();
+            if (fieldName == "type" || fieldName == "response") {
+                ++memberIt;
+                continue;
+            }
+            rapidjson::Value keyCopy(memberIt->name, alloc);
+            rapidjson::Value valCopy(memberIt->value, alloc);
+            funcObj.AddMember(keyCopy, valCopy, alloc);
+            memberIt = tool.EraseMember(memberIt);
+        }
+        tool.AddMember("function", funcObj, alloc);
+    }
+}
+
+// Pull the reasoning text out of a Responses API "reasoning" item.
+// Prefers the newer content[].text shape over the legacy summary[].text shape.
+static std::string extractReasoningText(const rapidjson::Value::ConstObject& itemObj) {
+    auto contentIt = itemObj.FindMember("content");
+    if (contentIt != itemObj.MemberEnd() && contentIt->value.IsArray()) {
+        for (const auto& ci : contentIt->value.GetArray()) {
+            if (!ci.IsObject())
+                continue;
+            auto textIt = ci.GetObject().FindMember("text");
+            if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    auto summaryIt = itemObj.FindMember("summary");
+    if (summaryIt != itemObj.MemberEnd() && summaryIt->value.IsArray()) {
+        for (const auto& si : summaryIt->value.GetArray()) {
+            if (!si.IsObject())
+                continue;
+            auto textIt = si.GetObject().FindMember("text");
+            if (textIt != si.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    return "";
+}
+
+// Extract a flat text string from a Responses API content field which may be
+// either a string or an array of {type,text} objects.
+static std::string extractTextContent(const rapidjson::Value& contentVal) {
+    if (contentVal.IsString()) {
+        return contentVal.GetString();
+    }
+    if (!contentVal.IsArray()) {
+        return "";
+    }
+    for (const auto& ci : contentVal.GetArray()) {
+        if (!ci.IsObject())
+            continue;
+        auto ctTypeIt = ci.GetObject().FindMember("type");
+        if (ctTypeIt == ci.GetObject().MemberEnd() || !ctTypeIt->value.IsString())
+            continue;
+        const std::string ctType = ctTypeIt->value.GetString();
+        if (ctType == "input_text" || ctType == "output_text") {
+            auto textIt = ci.GetObject().FindMember("text");
+            if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    return "";
+}
+
+// Read the three string fields (id, name, arguments) out of a function_call item.
+struct FunctionCallFields {
+    std::string id;
+    std::string name;
+    std::string arguments;
+};
+static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) {
+    FunctionCallFields out;
+    auto fcObj = item.GetObject();
+    auto idIt = fcObj.FindMember("id");
+    if (idIt != fcObj.MemberEnd() && idIt->value.IsString())
+        out.id = idIt->value.GetString();
+    auto nameIt = fcObj.FindMember("name");
+    if (nameIt != fcObj.MemberEnd() && nameIt->value.IsString())
+        out.name = nameIt->value.GetString();
+    auto argsIt = fcObj.FindMember("arguments");
+    if (argsIt != fcObj.MemberEnd() && argsIt->value.IsString())
+        out.arguments = argsIt->value.GetString();
+    return out;
+}
+
+// Classification of a Responses API input item used to dispatch to per-type
+// handlers in the builders below.
+enum class ResponsesInputItemKind {
+    REASONING,
+    FUNCTION_CALL,
+    FUNCTION_CALL_OUTPUT,
+    ROLE_ITEM,
+    MISSING_ROLE,
+};
+
+static absl::StatusOr<ResponsesInputItemKind> classifyInputItem(const rapidjson::Value& item) {
+    if (!item.IsObject()) {
+        return absl::InvalidArgumentError("input array items must be objects");
+    }
+    auto itemObj = item.GetObject();
+    auto itemTypeIt = itemObj.FindMember("type");
+    const std::string itemType = (itemTypeIt != itemObj.MemberEnd() && itemTypeIt->value.IsString())
+                                     ? itemTypeIt->value.GetString()
+                                     : "";
+    if (itemType == "reasoning")
+        return ResponsesInputItemKind::REASONING;
+    if (itemType == "function_call")
+        return ResponsesInputItemKind::FUNCTION_CALL;
+    if (itemType == "function_call_output")
+        return ResponsesInputItemKind::FUNCTION_CALL_OUTPUT;
+    auto roleIt = itemObj.FindMember("role");
+    if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString())
+        return ResponsesInputItemKind::MISSING_ROLE;
+    return ResponsesInputItemKind::ROLE_ITEM;
+}
+
+// Builds chat/completions-shaped messages from a Responses API input array.
+//
+// Reasoning items are buffered and attached as `reasoning_content` on the next
+// assistant message (matching the gpt-oss template's expected field).
+// Reasoning that is not followed by an assistant/function_call item is dropped,
+// since emitting a standalone {role:assistant, reasoning_content:...} message
+// with no content/tool_calls would confuse most chat templates.
+//
+// Pending function_call items are merged into the next assistant message as a
+// chat/completions-shaped tool_calls[] array. Without this, the assistant turn
+// would have no tool_calls field, the chat template would treat it as a final
+// answer, and a subsequent tool message would fail (e.g. gpt-oss raises
+// "Message has tool role, but there was no previous assistant message with a
+// tool call!").
+//
+// The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a
+// rapidjson messages array) is provided by the Sink template parameter, which
+// must implement:
+//   absl::Status extractContent(itemObj, index, std::string& outText);
+//   void emitToolMessage(callId, output);
+//   void emitMessage(role, contentText, reasoning);  // reasoning empty -> skip
+//   void emitAssistantWithToolCalls(contentText, reasoning, toolCalls);
+//   absl::Status onMissingRole(itemObj);
+template <typename Sink>
+class ResponsesInputBuilder {
+public:
+    explicit ResponsesInputBuilder(Sink& sink) :
+        sink(sink) {}
+
+    absl::Status build(const rapidjson::Value& inputArray) {
+        if (!inputArray.IsArray()) {
+            return absl::InvalidArgumentError("input is not an array");
+        }
+        for (rapidjson::SizeType i = 0; i < inputArray.GetArray().Size(); ++i) {
+            const auto& item = inputArray.GetArray()[i];
+            auto kind = classifyInputItem(item);
+            if (!kind.ok())
+                return kind.status();
+            absl::Status status;
+            switch (kind.value()) {
+            case ResponsesInputItemKind::REASONING:
+                status = onReasoningItem(item.GetObject());
+                break;
+            case ResponsesInputItemKind::FUNCTION_CALL:
+                pendingFunctionCalls.push_back(&item);
+                break;
+            case ResponsesInputItemKind::FUNCTION_CALL_OUTPUT:
+                status = onFunctionCallOutputItem(item.GetObject());
+                break;
+            case ResponsesInputItemKind::ROLE_ITEM:
+                status = onRoleItem(item.GetObject(), i);
+                break;
+            case ResponsesInputItemKind::MISSING_ROLE:
+                status = sink.onMissingRole(item.GetObject());
+                break;
+            }
+            if (!status.ok())
+                return status;
+        }
+        // Flush any trailing buffered function_calls (e.g. input ends with a
+        // function_call item that has no corresponding output yet).
+        flushPendingFunctionCalls("");
+        return absl::OkStatus();
+    }
+
+private:
+    absl::Status onReasoningItem(const rapidjson::Value::ConstObject& itemObj) {
+        std::string text = extractReasoningText(itemObj);
+        if (!text.empty()) {
+            if (!pendingReasoningContent.empty())
+                pendingReasoningContent += "\n";
+            pendingReasoningContent += text;
+        }
+        return absl::OkStatus();
+    }
+
+    absl::Status onFunctionCallOutputItem(const rapidjson::Value::ConstObject& itemObj) {
+        flushPendingFunctionCalls("");
+        std::string callId;
+        auto callIdIt = itemObj.FindMember("call_id");
+        if (callIdIt != itemObj.MemberEnd() && callIdIt->value.IsString())
+            callId = callIdIt->value.GetString();
+        std::string output;
+        auto outputIt = itemObj.FindMember("output");
+        if (outputIt != itemObj.MemberEnd() && outputIt->value.IsString())
+            output = outputIt->value.GetString();
+        sink.emitToolMessage(callId, output);
+        return absl::OkStatus();
+    }
+
+    absl::Status onRoleItem(const rapidjson::Value::ConstObject& itemObj, rapidjson::SizeType index) {
+        const std::string role = itemObj.FindMember("role")->value.GetString();
+        std::string contentText;
+        auto status = sink.extractContent(itemObj, index, contentText);
+        if (!status.ok())
+            return status;
+
+        // Assistant role with buffered function_calls: merge into one message
+        // (so the tool_calls field rides on the same assistant turn).
+        if (role == "assistant" && !pendingFunctionCalls.empty()) {
+            flushPendingFunctionCalls(contentText);
+            return absl::OkStatus();
+        }
+        // Non-assistant items must not absorb pending tool_calls; flush first.
+        // (flushPendingFunctionCalls also clears any orphan reasoning content.)
+        if (role != "assistant") {
+            flushPendingFunctionCalls("");
+        }
+
+        std::string reasoning;
+        if (role == "assistant" && !pendingReasoningContent.empty()) {
+            reasoning = std::move(pendingReasoningContent);
+            pendingReasoningContent.clear();
+        }
+        sink.emitMessage(role, contentText, reasoning);
+        return absl::OkStatus();
+    }
+
+    void flushPendingFunctionCalls(const std::string& assistantText) {
+        if (pendingFunctionCalls.empty()) {
+            pendingReasoningContent.clear();
+            return;
+        }
+        std::string reasoning = std::move(pendingReasoningContent);
+        pendingReasoningContent.clear();
+        sink.emitAssistantWithToolCalls(assistantText, reasoning, pendingFunctionCalls);
+        pendingFunctionCalls.clear();
+    }
+
+    Sink& sink;
+    std::vector<const rapidjson::Value*> pendingFunctionCalls;
+    std::string pendingReasoningContent;
+};
+
+// Sink that appends to ov::genai::ChatHistory (used when Python is disabled
+// or as the fallback C++ chat-history path). Owns a scratch rapidjson document
+// whose allocator backs the tool_calls Values until they are deep-copied into
+// a JsonContainer.
+class ChatHistorySink {
+public:
+    ChatHistorySink(ov::genai::ChatHistory& chatHistory, ImageHistory& imageHistory,
+        const std::optional<std::string>& allowedLocalMediaPath,
+        const std::optional<std::vector<std::string>>& allowedMediaDomains) :
+        chatHistory(chatHistory),
+        imageHistory(imageHistory),
+        allowedLocalMediaPath(allowedLocalMediaPath),
+        allowedMediaDomains(allowedMediaDomains) {
+        scratchDoc.SetObject();
+    }
+
+    absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj,
+        rapidjson::SizeType index, std::string& outText) {
+        outText.clear();
+        auto contentIt = itemObj.FindMember("content");
+        if (contentIt == itemObj.MemberEnd())
+            return absl::OkStatus();
+        if (contentIt->value.IsString()) {
+            outText = contentIt->value.GetString();
+            return absl::OkStatus();
+        }
+        if (!contentIt->value.IsArray())
+            return absl::InvalidArgumentError("input item content must be a string or array");
+        for (const auto& contentItem : contentIt->value.GetArray()) {
+            if (!contentItem.IsObject())
+                return absl::InvalidArgumentError("input content items must be objects");
+            auto contentObj = contentItem.GetObject();
+            auto typeIt = contentObj.FindMember("type");
+            if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString())
+                return absl::InvalidArgumentError("input content item type is missing or invalid");
+            const std::string type = typeIt->value.GetString();
+            if (type == "input_text" || type == "output_text") {
+                auto textIt = contentObj.FindMember("text");
+                if (textIt == contentObj.MemberEnd() || !textIt->value.IsString())
+                    return absl::InvalidArgumentError(absl::StrCat(type, " requires a valid text field"));
+                // Last text-bearing item wins, matching pre-refactor behaviour.
+                outText = textIt->value.GetString();
+            } else if (type == "input_image") {
+                auto status = appendInputImage(contentObj, index);
+                if (!status.ok())
+                    return status;
+            } else {
+                // Skip unrecognised content item types for forward compatibility.
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping unsupported content type: {}", type);
+            }
+        }
+        return absl::OkStatus();
+    }
+
+    void emitToolMessage(const std::string& callId, const std::string& output) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "tool";
+        if (!callId.empty())
+            chatHistory.last()["tool_call_id"] = callId;
+        chatHistory.last()["content"] = output;
+    }
+
+    void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = role;
+        chatHistory.last()["content"] = contentText;
+        if (!reasoning.empty())
+            chatHistory.last()["reasoning_content"] = reasoning;
+    }
+
+    void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning,
+        const std::vector<const rapidjson::Value*>& toolCalls) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "assistant";
+        chatHistory.last()["content"] = contentText;
+        if (!reasoning.empty())
+            chatHistory.last()["reasoning_content"] = reasoning;
+        auto& alloc = scratchDoc.GetAllocator();
+        rapidjson::Value toolCallsArray(rapidjson::kArrayType);
+        buildToolCallsArray(toolCalls, toolCallsArray, alloc);
+        // rapidJsonValueToJsonContainer deep-copies, so scratchDoc can be reused.
+        chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray);
+    }
+
+    absl::Status onMissingRole(const rapidjson::Value::ConstObject&) {
+        return absl::InvalidArgumentError("input item role is missing or invalid");
+    }
+
+private:
+    absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj, rapidjson::SizeType index) {
+        auto imageUrlIt = contentObj.FindMember("image_url");
+        if (imageUrlIt == contentObj.MemberEnd())
+            return absl::InvalidArgumentError("input_image requires image_url field");
+
+        std::string imageUrl;
+        if (imageUrlIt->value.IsString()) {
+            imageUrl = imageUrlIt->value.GetString();
+        } else if (imageUrlIt->value.IsObject()) {
+            auto imageUrlObj = imageUrlIt->value.GetObject();
+            auto urlIt = imageUrlObj.FindMember("url");
+            if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString())
+                return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid");
+            imageUrl = urlIt->value.GetString();
+        } else {
+            return absl::InvalidArgumentError("input_image.image_url must be a string or object");
+        }
+
+        auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains);
+        if (!tensorResult.ok())
+            return tensorResult.status();
+        imageHistory.push_back({index, tensorResult.value()});
+        return absl::OkStatus();
+    }
+
+    // Build a chat/completions tool_calls[] array into outArr using the given allocator.
+    static void buildToolCallsArray(const std::vector<const rapidjson::Value*>& toolCalls,
+        rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) {
+        for (const auto* fc : toolCalls) {
+            const FunctionCallFields fields = readFunctionCallFields(*fc);
+            rapidjson::Value funcObj(rapidjson::kObjectType);
+            funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc);
+            funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc);
+            rapidjson::Value tcObj(rapidjson::kObjectType);
+            tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc);
+            tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc);
+            tcObj.AddMember("function", funcObj, alloc);
+            outArr.PushBack(tcObj, alloc);
+        }
+    }
+
+    ov::genai::ChatHistory& chatHistory;
+    ImageHistory& imageHistory;
+    const std::optional<std::string>& allowedLocalMediaPath;
+    const std::optional<std::vector<std::string>>& allowedMediaDomains;
+    rapidjson::Document scratchDoc;
+};
+
+#if (PYTHON_DISABLE == 0)
+// Sink that appends to a rapidjson messages array, used to feed the Python
+// Jinja chat template path. Image content items are silently dropped (the
+// Python path receives only text).
+class ProcessedJsonSink {
+public:
+    ProcessedJsonSink(rapidjson::Value& messagesArray, rapidjson::Document::AllocatorType& alloc) :
+        messagesArray(messagesArray),
+        alloc(alloc) {}
+
+    absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj,
+        rapidjson::SizeType /*index*/, std::string& outText) {
+        auto contentIt = itemObj.FindMember("content");
+        outText = (contentIt != itemObj.MemberEnd()) ? extractTextContent(contentIt->value) : "";
+        return absl::OkStatus();
+    }
+
+    void emitToolMessage(const std::string& callId, const std::string& output) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("tool", alloc), alloc);
+        if (!callId.empty())
+            msgObj.AddMember("tool_call_id", rapidjson::Value(callId.c_str(), alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(output.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value(role.c_str(), alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc);
+        if (!reasoning.empty())
+            msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning,
+        const std::vector<const rapidjson::Value*>& toolCalls) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc);
+        if (!reasoning.empty())
+            msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        rapidjson::Value toolCallsArray(rapidjson::kArrayType);
+        for (const auto* fc : toolCalls) {
+            const FunctionCallFields fields = readFunctionCallFields(*fc);
+            rapidjson::Value funcObj(rapidjson::kObjectType);
+            funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc);
+            funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc);
+            rapidjson::Value tcObj(rapidjson::kObjectType);
+            tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc);
+            tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc);
+            tcObj.AddMember("function", funcObj, alloc);
+            toolCallsArray.PushBack(tcObj, alloc);
+        }
+        msgObj.AddMember("tool_calls", toolCallsArray, alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    absl::Status onMissingRole(const rapidjson::Value::ConstObject&) {
+        // Silently skip unknown items without a role in the processed JSON path.
+        return absl::OkStatus();
+    }
+
+private:
+    rapidjson::Value& messagesArray;
+    rapidjson::Document::AllocatorType& alloc;
+};
+#endif  // PYTHON_DISABLE == 0
+
 // --- Request parsing ---
 
 absl::Status OpenAIResponsesHandler::parseRequest(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength,
@@ -87,87 +579,12 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional<std::string> allow
         if (inputIt->value.GetArray().Size() == 0) {
             return absl::InvalidArgumentError("Messages array cannot be empty");
         }
-
-        for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) {
-            auto& item = inputIt->value.GetArray()[i];
-            if (!item.IsObject()) {
-                return absl::InvalidArgumentError("input array items must be objects");
-            }
-
-            auto itemObj = item.GetObject();
-            auto roleIt = itemObj.FindMember("role");
-            if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) {
-                return absl::InvalidArgumentError("input item role is missing or invalid");
-            }
-
-            request.chatHistory.push_back({});
-            request.chatHistory.last()["role"] = roleIt->value.GetString();
-
-            auto contentIt = itemObj.FindMember("content");
-            if (contentIt == itemObj.MemberEnd()) {
-                return absl::InvalidArgumentError("input item content is missing");
-            }
-
-            if (contentIt->value.IsString()) {
-                request.chatHistory.last()["content"] = contentIt->value.GetString();
-                continue;
-            }
-
-            if (!contentIt->value.IsArray()) {
-                return absl::InvalidArgumentError("input item content must be a string or array");
-            }
-            if (contentIt->value.GetArray().Size() == 0) {
-                return absl::InvalidArgumentError("Invalid message structure - content array is empty");
-            }
-
-            std::string contentText = "";
-            for (auto& contentItem : contentIt->value.GetArray()) {
-                if (!contentItem.IsObject()) {
-                    return absl::InvalidArgumentError("input content items must be objects");
-                }
-                auto contentObj = contentItem.GetObject();
-                auto typeIt = contentObj.FindMember("type");
-                if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) {
-                    return absl::InvalidArgumentError("input content item type is missing or invalid");
-                }
-
-                const std::string type = typeIt->value.GetString();
-                if (type == "input_text") {
-                    auto textIt = contentObj.FindMember("text");
-                    if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) {
-                        return absl::InvalidArgumentError("input_text requires a valid text field");
-                    }
-                    contentText = textIt->value.GetString();
-                } else if (type == "input_image") {
-                    std::string imageUrl;
-                    auto imageUrlIt = contentObj.FindMember("image_url");
-                    if (imageUrlIt == contentObj.MemberEnd()) {
-                        return absl::InvalidArgumentError("input_image requires image_url field");
-                    }
-                    if (imageUrlIt->value.IsString()) {
-                        imageUrl = imageUrlIt->value.GetString();
-                    } else if (imageUrlIt->value.IsObject()) {
-                        auto imageUrlObj = imageUrlIt->value.GetObject();
-                        auto urlIt = imageUrlObj.FindMember("url");
-                        if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) {
-                            return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid");
-                        }
-                        imageUrl = urlIt->value.GetString();
-                    } else {
-                        return absl::InvalidArgumentError("input_image.image_url must be a string or object");
-                    }
-
-                    auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains);
-                    if (!tensorResult.ok()) {
-                        return tensorResult.status();
-                    }
-                    request.imageHistory.push_back({i, tensorResult.value()});
-                } else {
-                    return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image.");
-                }
-            }
-
-            request.chatHistory.last()["content"] = contentText;
+        ChatHistorySink sink(request.chatHistory, request.imageHistory,
+            allowedLocalMediaPath, allowedMediaDomains);
+        ResponsesInputBuilder<ChatHistorySink> builder(sink);
+        auto status = builder.build(inputIt->value);
+        if (!status.ok()) {
+            return status;
         }
     } else {
         return absl::InvalidArgumentError("input is not a string or array");
@@ -189,6 +606,14 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional<uint32_t>
         return absl::InvalidArgumentError("input missing in request");
     }
 
+    // Convert tools array (Responses-flat -> chat/completions-nested) once, in place,
+    // before any consumer reads it. parseInput, parseToolsToJsonContainer and the
+    // processedJson builder all rely on the nested shape.
+    auto toolsIt = doc.FindMember("tools");
+    if (toolsIt != doc.MemberEnd() && toolsIt->value.IsArray()) {
+        convertResponsesToolsInPlace(toolsIt->value, doc.GetAllocator());
+    }
+
     auto messagesStatus = parseInput(allowedLocalMediaPath, allowedMediaDomains);
     if (!messagesStatus.ok()) {
         return messagesStatus;
@@ -228,30 +653,31 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional<uint32_t>
     }
 
 #if (PYTHON_DISABLE == 0)
-    // Build processedJson with "messages" array from chatHistory so that
-    // the Python chat template path (which reads request_json["messages"])
-    // can consume Responses API input without a separate code path.
+    // Build processedJson with a "messages" array in chat/completions format so that
+    // the Python Jinja template path can consume Responses API input without a separate code path.
+    // Handles reasoning, function_call (merged into assistant tool_calls), and
+    // function_call_output (converted to role:tool messages).
     {
         Document processedDoc;
         processedDoc.SetObject();
         auto& alloc = processedDoc.GetAllocator();
 
         Value messagesArray(kArrayType);
-        for (size_t i = 0; i < request.chatHistory.size(); ++i) {
-            Value msgObj(kObjectType);
-            auto role = request.chatHistory[i]["role"].as_string();
-            if (role.has_value()) {
-                msgObj.AddMember("role", Value(role.value().c_str(), alloc), alloc);
-            }
-            auto content = request.chatHistory[i]["content"].as_string();
-            if (content.has_value()) {
-                msgObj.AddMember("content", Value(content.value().c_str(), alloc), alloc);
+
+        auto inputArrIt = doc.FindMember("input");
+        if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsArray()) {
+            ProcessedJsonSink sink(messagesArray, alloc);
+            ResponsesInputBuilder<ProcessedJsonSink> builder(sink);
+            auto processedStatus = builder.build(inputArrIt->value);
+            if (!processedStatus.ok()) {
+                return processedStatus;
             }
-            messagesArray.PushBack(msgObj, alloc);
         }
+
         processedDoc.AddMember("messages", messagesArray, alloc);
 
-        // Copy tools from original doc if present
+        // Tools were already normalised to chat/completions nested format by
+        // convertResponsesToolsInPlace earlier in parseResponsesPart — just copy verbatim.
         auto toolsIt = doc.FindMember("tools");
         if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) {
             Value toolsCopy(toolsIt->value, alloc);
diff --git a/src/llm/py_jinja_template_processor.cpp b/src/llm/py_jinja_template_processor.cpp
index 432aa8e722..61116d3c5d 100644
--- a/src/llm/py_jinja_template_processor.cpp
+++ b/src/llm/py_jinja_template_processor.cpp
@@ -40,7 +40,7 @@ bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templ
         output = "Error: Chat template not loaded correctly, so it cannot be applied";
         return false;
     }
-
+    SPDLOG_DEBUG("Before chat template: \n {}", requestBody);
     py::gil_scoped_acquire acquire;
     try {
         auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(),
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
index e2ccd06e78..1e2486835b 100644
--- a/src/llm/servable.cpp
+++ b/src/llm/servable.cpp
@@ -22,6 +22,7 @@
 #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246 6313)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "absl/strings/str_cat.h"
 #include "mediapipe/framework/calculator_graph.h"
 #include <rapidjson/document.h>
 #include <rapidjson/prettywriter.h>
@@ -208,7 +209,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
             inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
         } catch (const std::exception& e) {
             SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what());
-            return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one.");
+            return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what()));
         }
 #endif
         if (inputText.size() == 0) {
@@ -240,7 +241,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
                 inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
             } catch (const std::exception& e) {
                 SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what());
-                return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one.");
+                return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what()));
             }
 #endif
             if (inputText.size() == 0) {

From 99b08c3e4bedca3caef4a515380e079954de695a Mon Sep 17 00:00:00 2001
From: mkulakow <devuser@ov-ptl-13.sclab.intel.com>
Date: Wed, 13 May 2026 13:21:49 +0200
Subject: [PATCH 2/3] uts

---
 .../continuous_batching/agentic_ai/README.md  |   2 +-
 src/llm/apis/openai_responses.cpp             |  39 +-
 src/test/http_openai_handler_test.cpp         | 761 ++++++++++++++++++
 3 files changed, 799 insertions(+), 3 deletions(-)

diff --git a/demos/continuous_batching/agentic_ai/README.md b/demos/continuous_batching/agentic_ai/README.md
index b630158a9d..2ba22afb8c 100644
--- a/demos/continuous_batching/agentic_ai/README.md
+++ b/demos/continuous_batching/agentic_ai/README.md
@@ -330,7 +330,7 @@ Pull and start OVMS:
 ```bash
 mkdir -p ${HOME}/models
 docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) openvino/model_server:weekly \
---rest_port 8000 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com
+--rest_port 8122 --model_repository_path /models --source_model Junrui2021/Qwen3-VL-8B-Instruct-int4 --model_name ovms-model --tool_parser hermes3 --target_device GPU --task text_generation --pipeline_type VLM_CB --allowed_media_domains raw.githubusercontent.com
 ```
 
 Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city.
diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp
index 6c7f44557f..2e68e6324e 100644
--- a/src/llm/apis/openai_responses.cpp
+++ b/src/llm/apis/openai_responses.cpp
@@ -224,6 +224,12 @@ static absl::StatusOr<ResponsesInputItemKind> classifyInputItem(const rapidjson:
 // "Message has tool role, but there was no previous assistant message with a
 // tool call!").
 //
+// Reasoning that is not followed by an assistant or function_call item is
+// emitted as a standalone assistant turn with empty content and the buffered
+// reasoning attached as `reasoning_content`. This preserves the model's
+// chain-of-thought across turns even when the prior turn produced no visible
+// output.
+//
 // The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a
 // rapidjson messages array) is provided by the Sink template parameter, which
 // must implement:
@@ -231,6 +237,7 @@ static absl::StatusOr<ResponsesInputItemKind> classifyInputItem(const rapidjson:
 //   void emitToolMessage(callId, output);
 //   void emitMessage(role, contentText, reasoning);  // reasoning empty -> skip
 //   void emitAssistantWithToolCalls(contentText, reasoning, toolCalls);
+//   void emitStandaloneReasoning(reasoning);  // assistant turn carrying only reasoning_content
 //   absl::Status onMissingRole(itemObj);
 template <typename Sink>
 class ResponsesInputBuilder {
@@ -313,7 +320,8 @@ class ResponsesInputBuilder {
             return absl::OkStatus();
         }
         // Non-assistant items must not absorb pending tool_calls; flush first.
-        // (flushPendingFunctionCalls also clears any orphan reasoning content.)
+        // (flushPendingFunctionCalls also emits any standalone reasoning content
+        // as a standalone assistant turn.)
         if (role != "assistant") {
             flushPendingFunctionCalls("");
         }
@@ -329,7 +337,16 @@ class ResponsesInputBuilder {
 
     void flushPendingFunctionCalls(const std::string& assistantText) {
         if (pendingFunctionCalls.empty()) {
-            pendingReasoningContent.clear();
+            // No tool calls, but possibly buffered reasoning to flush as a
+            // standalone assistant turn carrying only reasoning_content (no
+            // `content` field at all, so templates that gate on `message.content`
+            // skip the content branch and templates that gate on
+            // `message.reasoning_content` still see the buffered text).
+            if (!pendingReasoningContent.empty()) {
+                std::string reasoning = std::move(pendingReasoningContent);
+                pendingReasoningContent.clear();
+                sink.emitStandaloneReasoning(reasoning);
+            }
             return;
         }
         std::string reasoning = std::move(pendingReasoningContent);
@@ -427,6 +444,15 @@ class ChatHistorySink {
         chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray);
     }
 
+    // Emit an assistant turn that carries only reasoning_content (no content,
+    // no tool_calls). Used when reasoning is not followed by an assistant or
+    // function_call item.
+    void emitStandaloneReasoning(const std::string& reasoning) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "assistant";
+        chatHistory.last()["reasoning_content"] = reasoning;
+    }
+
     absl::Status onMissingRole(const rapidjson::Value::ConstObject&) {
         return absl::InvalidArgumentError("input item role is missing or invalid");
     }
@@ -515,6 +541,15 @@ class ProcessedJsonSink {
         messagesArray.PushBack(msgObj, alloc);
     }
 
+    // Emit an assistant turn that carries only reasoning_content (no content,
+    // no tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale.
+    void emitStandaloneReasoning(const std::string& reasoning) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc);
+        msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
     void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning,
         const std::vector<const rapidjson::Value*>& toolCalls) {
         rapidjson::Value msgObj(rapidjson::kObjectType);
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index a4e6585af0..58488bb411 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -3610,3 +3610,764 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields)
     EXPECT_FALSE(history[1].contains("tool_call_id"));
     EXPECT_FALSE(history[1].contains("name"));
 }
+
+namespace {
+std::shared_ptr<ovms::OpenAIResponsesHandler> parseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) {
+    doc.Parse(json.c_str());
+    EXPECT_FALSE(doc.HasParseError()) << json;
+    std::optional<uint32_t> maxTokensLimit;
+    uint32_t bestOfLimit = 0;
+    std::optional<uint32_t> maxModelLength;
+    auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(
+        doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer);
+    EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json;
+    return apiHandler;
+}
+
+// Variant for negative tests: returns the parseRequest status without asserting
+// it is OK, so the caller can verify the failure mode.
+absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) {
+    doc.Parse(json.c_str());
+    EXPECT_FALSE(doc.HasParseError()) << json;
+    std::optional<uint32_t> maxTokensLimit;
+    uint32_t bestOfLimit = 0;
+    std::optional<uint32_t> maxModelLength;
+    auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(
+        doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer);
+    return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength);
+}
+}  // namespace
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) {
+    // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name /
+    // tool.function.parameters. The Responses-flat shape ({type, name, parameters})
+    // must be rewritten in-place to chat/completions nested shape before it is
+    // forwarded to the template.
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get current weather",
+            "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+        }]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    EXPECT_TRUE(apiHandler->areToolsAvailable());
+
+    // Inspect the (now normalised) tools array on the request document directly.
+    ASSERT_TRUE(doc.HasMember("tools"));
+    ASSERT_TRUE(doc["tools"].IsArray());
+    ASSERT_EQ(doc["tools"].Size(), 1u);
+    const auto& tool = doc["tools"][0];
+    ASSERT_TRUE(tool.HasMember("function"));
+    ASSERT_TRUE(tool["function"].IsObject());
+    EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather");
+    EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather");
+    ASSERT_TRUE(tool["function"].HasMember("parameters"));
+    EXPECT_TRUE(tool["function"]["parameters"].IsObject());
+    // The flat fields should have been moved under `function`, leaving only `type` + `function`.
+    EXPECT_FALSE(tool.HasMember("name"));
+    EXPECT_FALSE(tool.HasMember("parameters"));
+    EXPECT_FALSE(tool.HasMember("description"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) {
+    // Tools that are already in chat/completions nested shape must pass through
+    // untouched (no double-wrapping).
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+            }
+        }]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    EXPECT_TRUE(apiHandler->areToolsAvailable());
+    ASSERT_TRUE(doc["tools"][0].HasMember("function"));
+    EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather");
+    // No spurious nested wrap.
+    EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) {
+    // A bare reasoning item, then an assistant message: the reasoning text should
+    // ride on the next assistant message as reasoning_content (matching the
+    // gpt-oss template's expected field). It must NOT produce its own message.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]},
+            {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 2);
+    EXPECT_EQ(history[0]["role"].get_string(), "user");
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_EQ(history[1]["content"].get_string(), "hello");
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) {
+    // Reasoning followed directly by a user message (no assistant/function_call
+    // in between) is emitted as a standalone assistant turn with empty content
+    // and the buffered text attached as reasoning_content. This preserves the
+    // model's chain-of-thought across turns even when the prior turn produced
+    // no visible output.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
+            {"role": "user", "content": [{"type":"input_text","text":"again"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 3);
+    EXPECT_EQ(history[0]["role"].get_string(), "user");
+
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_FALSE(history[1].contains("content"));
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan");
+    EXPECT_FALSE(history[1].contains("tool_calls"));
+
+    EXPECT_EQ(history[2]["role"].get_string(), "user");
+    EXPECT_FALSE(history[2].contains("reasoning_content"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) {
+    // Input ending with a reasoning item (no following assistant/function_call)
+    // — the buffered reasoning is flushed as a standalone trailing assistant
+    // turn rather than silently lost.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 2);
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_FALSE(history[1].contains("content"));
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) {
+    // function_call followed by function_call_output should produce:
+    //   user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...)
+    // The assistant message MUST own a tool_calls field; otherwise gpt-oss
+    // raises "Message has tool role, but there was no previous assistant
+    // message with a tool call!".
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1",
+             "output": "{\"temp_c\":17}"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 3);
+
+    EXPECT_EQ(history[0]["role"].get_string(), "user");
+
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_EQ(history[1]["content"].get_string(), "");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_TRUE(history[1]["tool_calls"].is_array());
+    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
+    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
+    EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function");
+    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather");
+    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}");
+
+    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
+    EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) {
+    // reasoning + function_call should both attach to the synthesised assistant
+    // turn that owns the tool_calls.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 3);
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather");
+    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) {
+    // Two function_calls back-to-back must produce a single assistant message
+    // with two entries in tool_calls, not two assistant turns.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call", "id": "call_2", "call_id": "call_2",
+             "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    // user, assistant(2 tool_calls), tool
+    ASSERT_EQ(history.size(), 3);
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_EQ(history[1]["tool_calls"].size(), 2);
+    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
+    EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2");
+    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) {
+    // Input ending with a function_call (no matching output) — the trailing
+    // function_call must still be flushed as an assistant message rather than
+    // silently lost.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 2);
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
+    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) {
+    // If an assistant role item follows a function_call, its text content should
+    // ride on the same merged message (assistant-with-tool_calls), not produce
+    // a second assistant turn.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"role": "assistant", "content": "calling tool"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 2);
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_EQ(history[1]["content"].get_string(), "calling tool");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) {
+    // The newer reasoning shape: content[].text instead of summary[].text.
+    // OVMS accepts both.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]},
+            {"role": "assistant", "content": "ok"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 2);
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) {
+    // function_call_output without call_id: should still emit a tool message
+    // (with no tool_call_id field) rather than failing parsing.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{}"},
+            {"type": "function_call_output", "output": "ok"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 3);
+    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+    EXPECT_FALSE(history[2].contains("tool_call_id"));
+    EXPECT_EQ(history[2]["content"].get_string(), "ok");
+}
+
+#if (PYTHON_DISABLE == 0)
+// processedJson (the chat/completions-shaped messages array fed to the Python
+// Jinja chat template) must mirror the chat history layout for the same input.
+// These tests assert the same buffering invariants on that path.
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    ASSERT_TRUE(processedDoc.HasMember("messages"));
+    const auto& messages = processedDoc["messages"];
+    ASSERT_TRUE(messages.IsArray());
+    ASSERT_EQ(messages.Size(), 3u);
+
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_TRUE(messages[1]["tool_calls"].IsArray());
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function");
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
+
+    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
+    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
+    EXPECT_STREQ(messages[2]["content"].GetString(), "ok");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) {
+    // The tools forwarded to the template via processedJson must be in the
+    // chat/completions nested shape (because convertResponsesToolsInPlace
+    // normalised the doc before processedJson is built).
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{
+            "type": "function",
+            "name": "get_weather",
+            "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+        }]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    ASSERT_TRUE(processedDoc.HasMember("tools"));
+    ASSERT_TRUE(processedDoc["tools"].IsArray());
+    ASSERT_EQ(processedDoc["tools"].Size(), 1u);
+    ASSERT_TRUE(processedDoc["tools"][0].HasMember("function"));
+    EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]},
+            {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 2u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    EXPECT_STREQ(messages[1]["content"].GetString(), "answer");
+    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) {
+    // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the
+    // processedJson path: an assistant turn carrying only reasoning_content
+    // (no `content`, no `tool_calls`).
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
+            {"role": "user", "content": [{"type":"input_text","text":"again"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 3u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    EXPECT_FALSE(messages[1].HasMember("content"));
+    EXPECT_FALSE(messages[1].HasMember("tool_calls"));
+    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan");
+    EXPECT_STREQ(messages[2]["role"].GetString(), "user");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 2u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    EXPECT_FALSE(messages[1].HasMember("content"));
+    EXPECT_FALSE(messages[1].HasMember("tool_calls"));
+    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) {
+    // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning
+    // and tool_calls must land on the same JSON object.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 3u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
+    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather");
+    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) {
+    // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates
+    // the rapidjson tool_calls array growth across PushBack calls.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call", "id": "call_2", "call_id": "call_2",
+             "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 3u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u);
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
+    EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2");
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
+    EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}");
+    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
+    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) {
+    // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing
+    // function_call without output produces an assistant turn with tool_calls
+    // and no following tool message.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 2u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) {
+    // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant
+    // text content and tool_calls coexist on a single JSON object.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"role": "assistant", "content": "calling tool"}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 2u);
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    ASSERT_TRUE(messages[1].HasMember("content"));
+    EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
+}
+#endif  // PYTHON_DISABLE == 0
+
+// --- Tools normalisation edge cases ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) {
+    // Flat Responses tools may omit `parameters` for zero-arg functions. The
+    // nested form should still be produced (with no `parameters` key under
+    // function), not fail or fabricate one.
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{"type": "function", "name": "ping", "description": "no args"}]
+    })";
+    parseResponses(doc, *tokenizer, json);
+    ASSERT_TRUE(doc.HasMember("tools"));
+    ASSERT_TRUE(doc["tools"].IsArray());
+    ASSERT_EQ(doc["tools"].Size(), 1u);
+    const auto& tool = doc["tools"][0];
+    ASSERT_TRUE(tool.HasMember("function"));
+    EXPECT_STREQ(tool["function"]["name"].GetString(), "ping");
+    EXPECT_STREQ(tool["function"]["description"].GetString(), "no args");
+    EXPECT_FALSE(tool["function"].HasMember("parameters"));
+    // The flat-shape `name` field at top level must have been removed.
+    EXPECT_FALSE(tool.HasMember("name"));
+    EXPECT_FALSE(tool.HasMember("description"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) {
+    // Tools with an unrecognised `type` (e.g. a future built-in tool) must be
+    // passed through verbatim rather than being incorrectly rewrapped.
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{"type": "web_search", "name": "search"}]
+    })";
+    parseResponses(doc, *tokenizer, json);
+    ASSERT_TRUE(doc["tools"].IsArray());
+    ASSERT_EQ(doc["tools"].Size(), 1u);
+    const auto& tool = doc["tools"][0];
+    EXPECT_STREQ(tool["type"].GetString(), "web_search");
+    EXPECT_STREQ(tool["name"].GetString(), "search");
+    EXPECT_FALSE(tool.HasMember("function"));
+}
+
+// --- Error paths ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputItemMissingRoleIsRejected) {
+    // An input item with no recognised `type` and no `role` cannot be
+    // classified — the chat-history sink must surface this as an
+    // InvalidArgumentError rather than silently dropping the turn.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"content": [{"type":"output_text","text":"orphaned"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("role"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentNotStringOrArrayIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": 42}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("content"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentItemMissingTypeIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"text":"no type field"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("type"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputTextMissingTextFieldIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("text"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputArrayItemNotObjectIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": ["not an object"]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("must be objects"));
+}
+
+// --- Multi-turn composite ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndFollowupAssistant) {
+    // End-to-end: user -> reasoning + function_call (merged on synthesised
+    // assistant) -> function_call_output -> reasoning + assistant final answer.
+    // Validates that buffering state is correctly reset between turns.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
+            {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    auto& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 4);
+
+    // user
+    EXPECT_EQ(history[0]["role"].get_string(), "user");
+
+    // synthesised assistant: empty content + reasoning + tool_calls
+    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
+    EXPECT_EQ(history[1]["content"].get_string(), "");
+    ASSERT_TRUE(history[1].contains("reasoning_content"));
+    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather");
+    ASSERT_TRUE(history[1].contains("tool_calls"));
+    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
+    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
+    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather");
+
+    // tool result
+    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
+    EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C");
+    EXPECT_FALSE(history[2].contains("reasoning_content"));
+    EXPECT_FALSE(history[2].contains("tool_calls"));
+
+    // final assistant turn: second reasoning buffer must have been used here,
+    // not leaked from the first turn or carried over.
+    EXPECT_EQ(history[3]["role"].get_string(), "assistant");
+    EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris.");
+    ASSERT_TRUE(history[3].contains("reasoning_content"));
+    EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer");
+    EXPECT_FALSE(history[3].contains("tool_calls"));
+}
+
+#if (PYTHON_DISABLE == 0)
+// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to
+// the chat-history multi-turn test above.
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+            {"type": "function_call", "id": "call_1", "call_id": "call_1",
+             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+            {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
+            {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
+            {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
+        ]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"];
+    ASSERT_EQ(messages.Size(), 4u);
+
+    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
+    EXPECT_STREQ(messages[1]["content"].GetString(), "");
+    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather");
+    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
+    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
+    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
+
+    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
+    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
+    EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C");
+
+    EXPECT_STREQ(messages[3]["role"].GetString(), "assistant");
+    EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris.");
+    ASSERT_TRUE(messages[3].HasMember("reasoning_content"));
+    EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer");
+    EXPECT_FALSE(messages[3].HasMember("tool_calls"));
+}
+#endif  // PYTHON_DISABLE == 0

From ed876b5efe7c5546a007502be7446db6fddfb72c Mon Sep 17 00:00:00 2001
From: mkulakow <devuser@ov-ptl-13.sclab.intel.com>
Date: Wed, 13 May 2026 14:46:30 +0200
Subject: [PATCH 3/3] Update tests

---
 src/test/http_openai_handler_test.cpp | 926 +++++++++-----------------
 1 file changed, 331 insertions(+), 595 deletions(-)

diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index 58488bb411..25ca17ac2d 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -3636,46 +3636,112 @@ absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& t
         doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer);
     return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength);
 }
-}  // namespace
 
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormalisedToNestedInDoc) {
-    // The chat template (e.g. gpt-oss) iterates tools looking up tool.function.name /
-    // tool.function.parameters. The Responses-flat shape ({type, name, parameters})
-    // must be rewritten in-place to chat/completions nested shape before it is
-    // forwarded to the template.
-    std::string json = R"({
-        "model": "llama",
-        "input": "hello",
-        "tools": [{
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get current weather",
-            "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
-        }]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    EXPECT_TRUE(apiHandler->areToolsAvailable());
+// Assert that parsing the given Responses API request produces a chat history
+// (and processedJson, when Python is enabled) equivalent to the expected
+// chat/completions request.
+//
+// The expected JSON is a chat/completions REQUEST body — an object with a
+// "messages" array and optionally a "tools" array. This makes each test read as
+// "given this Responses input, OVMS should produce this chat/completions
+// request" — which is exactly the contract of the Responses-to-chat/completions
+// translator.
+//
+// Comparison is structural via rapidjson Value::operator== (member order inside
+// objects is irrelevant).
+//
+// Both the chat-history path (used in the C++/non-Python build) and the
+// processedJson path (used by the Python Jinja template) are checked, so a
+// single test pins both downstream consumers.
+void expectResponsesEquivalentToChatCompletions(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer,
+    const std::string& responsesRequest, const std::string& expectedChatCompletions) {
+    auto handler = parseResponses(doc, tokenizer, responsesRequest);
 
-    // Inspect the (now normalised) tools array on the request document directly.
-    ASSERT_TRUE(doc.HasMember("tools"));
-    ASSERT_TRUE(doc["tools"].IsArray());
-    ASSERT_EQ(doc["tools"].Size(), 1u);
-    const auto& tool = doc["tools"][0];
-    ASSERT_TRUE(tool.HasMember("function"));
-    ASSERT_TRUE(tool["function"].IsObject());
-    EXPECT_STREQ(tool["function"]["name"].GetString(), "get_weather");
-    EXPECT_STREQ(tool["function"]["description"].GetString(), "Get current weather");
-    ASSERT_TRUE(tool["function"].HasMember("parameters"));
-    EXPECT_TRUE(tool["function"]["parameters"].IsObject());
-    // The flat fields should have been moved under `function`, leaving only `type` + `function`.
-    EXPECT_FALSE(tool.HasMember("name"));
-    EXPECT_FALSE(tool.HasMember("parameters"));
-    EXPECT_FALSE(tool.HasMember("description"));
+    rapidjson::Document expectedDoc;
+    expectedDoc.Parse(expectedChatCompletions.c_str());
+    ASSERT_FALSE(expectedDoc.HasParseError())
+        << "could not parse expected chat/completions: " << expectedChatCompletions;
+    ASSERT_TRUE(expectedDoc.HasMember("messages"))
+        << "expected chat/completions JSON must contain a 'messages' array";
+
+    // --- ChatHistory path (C++ / non-Python build) ---
+    const std::string actualHistoryJson = handler->getChatHistory().get_messages().to_json_string();
+    rapidjson::Document actualHistoryDoc;
+    actualHistoryDoc.Parse(actualHistoryJson.c_str());
+    ASSERT_FALSE(actualHistoryDoc.HasParseError()) << actualHistoryJson;
+    EXPECT_TRUE(actualHistoryDoc == expectedDoc["messages"])
+        << "ChatHistory messages mismatch.\n  actual:   " << actualHistoryJson
+        << "\n  expected: " << expectedChatCompletions;
+    // Tools on the C++ path are exposed via parseToolsToJsonContainer() — that
+    // is exactly what the non-Python servable forwards to GenAI. Compare its
+    // serialised JSON against the expected chat/completions tools.
+    if (expectedDoc.HasMember("tools")) {
+        auto toolsStatus = handler->parseToolsToJsonContainer();
+        ASSERT_TRUE(toolsStatus.ok()) << "parseToolsToJsonContainer failed: " << toolsStatus.status().message();
+        ASSERT_TRUE(toolsStatus.value().has_value()) << "parseToolsToJsonContainer returned nullopt";
+        const std::string actualToolsJson = toolsStatus.value()->to_json_string();
+        rapidjson::Document actualToolsDoc;
+        actualToolsDoc.Parse(actualToolsJson.c_str());
+        ASSERT_FALSE(actualToolsDoc.HasParseError()) << actualToolsJson;
+        EXPECT_TRUE(actualToolsDoc == expectedDoc["tools"])
+            << "parseToolsToJsonContainer mismatch.\n  actual:   " << actualToolsJson
+            << "\n  expected: " << expectedChatCompletions;
+    }
+
+#if (PYTHON_DISABLE == 0)
+    // --- processedJson path (Python Jinja chat template) ---
+    const std::string actualProcessedJson = handler->getProcessedJson();
+    rapidjson::Document actualProcessedDoc;
+    actualProcessedDoc.Parse(actualProcessedJson.c_str());
+    ASSERT_FALSE(actualProcessedDoc.HasParseError()) << actualProcessedJson;
+    ASSERT_TRUE(actualProcessedDoc.HasMember("messages")) << actualProcessedJson;
+    EXPECT_TRUE(actualProcessedDoc["messages"] == expectedDoc["messages"])
+        << "processedJson messages mismatch.\n  actual:   " << actualProcessedJson
+        << "\n  expected: " << expectedChatCompletions;
+    if (expectedDoc.HasMember("tools")) {
+        ASSERT_TRUE(actualProcessedDoc.HasMember("tools")) << actualProcessedJson;
+        EXPECT_TRUE(actualProcessedDoc["tools"] == expectedDoc["tools"])
+            << "processedJson tools mismatch.\n  actual:   " << actualProcessedJson
+            << "\n  expected: " << expectedChatCompletions;
+    }
+#endif
+}
+}  // namespace
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormaliseToChatCompletions) {
+    // Responses-flat tools shape ({type, name, parameters}) must be rewritten
+    // to chat/completions nested shape ({type, function:{...}}) before the
+    // request is forwarded to the chat template. Input is given as an array so
+    // both ChatHistory and processedJson sinks populate the messages array.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}],
+            "tools": [{
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get current weather",
+                "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+            }]
+        })",
+        R"({
+            "messages": [{"role":"user","content":"hello"}],
+            "tools": [{
+                "type":"function",
+                "function":{
+                    "name":"get_weather",
+                    "description":"Get current weather",
+                    "parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+                }
+            }]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) {
-    // Tools that are already in chat/completions nested shape must pass through
-    // untouched (no double-wrapping).
+    // Tools already in chat/completions nested shape must pass through without
+    // double-wrapping. This is asserted directly on the (in-place mutated)
+    // request document because the equivalence helper would not detect a
+    // spurious unwrap+rewrap that nets to the same shape.
     std::string json = R"({
         "model": "llama",
         "input": "hello",
@@ -3691,527 +3757,260 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) {
     EXPECT_TRUE(apiHandler->areToolsAvailable());
     ASSERT_TRUE(doc["tools"][0].HasMember("function"));
     EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather");
-    // No spurious nested wrap.
     EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) {
-    // A bare reasoning item, then an assistant message: the reasoning text should
-    // ride on the next assistant message as reasoning_content (matching the
-    // gpt-oss template's expected field). It must NOT produce its own message.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]},
-            {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 2);
-    EXPECT_EQ(history[0]["role"].get_string(), "user");
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_EQ(history[1]["content"].get_string(), "hello");
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "think first");
+    // A bare reasoning item, then an assistant message: the reasoning text
+    // rides on the next assistant message as reasoning_content and does NOT
+    // produce its own message.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]},
+                {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","content":"hello","reasoning_content":"think first"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) {
-    // Reasoning followed directly by a user message (no assistant/function_call
-    // in between) is emitted as a standalone assistant turn with empty content
-    // and the buffered text attached as reasoning_content. This preserves the
-    // model's chain-of-thought across turns even when the prior turn produced
-    // no visible output.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
-            {"role": "user", "content": [{"type":"input_text","text":"again"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 3);
-    EXPECT_EQ(history[0]["role"].get_string(), "user");
-
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_FALSE(history[1].contains("content"));
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "orphan");
-    EXPECT_FALSE(history[1].contains("tool_calls"));
-
-    EXPECT_EQ(history[2]["role"].get_string(), "user");
-    EXPECT_FALSE(history[2].contains("reasoning_content"));
+    // Reasoning followed by a non-assistant/non-function_call item is flushed
+    // as a standalone assistant turn carrying ONLY reasoning_content (no
+    // `content`, no `tool_calls`). This preserves the chain-of-thought across
+    // turns even when the prior turn produced no visible output.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
+                {"role": "user", "content": [{"type":"input_text","text":"again"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","reasoning_content":"orphan"},
+                {"role":"user","content":"again"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) {
-    // Input ending with a reasoning item (no following assistant/function_call)
-    // — the buffered reasoning is flushed as a standalone trailing assistant
-    // turn rather than silently lost.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 2);
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_FALSE(history[1].contains("content"));
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "trailing");
+    // Input ending with a reasoning item — the buffered reasoning is flushed
+    // as a trailing standalone assistant turn rather than silently lost.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","reasoning_content":"trailing"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) {
     // function_call followed by function_call_output should produce:
     //   user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...)
-    // The assistant message MUST own a tool_calls field; otherwise gpt-oss
-    // raises "Message has tool role, but there was no previous assistant
-    // message with a tool call!".
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1",
-             "output": "{\"temp_c\":17}"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 3);
-
-    EXPECT_EQ(history[0]["role"].get_string(), "user");
-
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_EQ(history[1]["content"].get_string(), "");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_TRUE(history[1]["tool_calls"].is_array());
-    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
-    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
-    EXPECT_EQ(history[1]["tool_calls"][0]["type"].get_string(), "function");
-    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather");
-    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["arguments"].get_string(), "{\"city\":\"Paris\"}");
-
-    EXPECT_EQ(history[2]["role"].get_string(), "tool");
-    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
-    EXPECT_EQ(history[2]["content"].get_string(), "{\"temp_c\":17}");
+    // The synthesised assistant message MUST own a tool_calls field; otherwise
+    // gpt-oss raises "Message has tool role, but there was no previous
+    // assistant message with a tool call!".
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1",
+                 "output": "{\"temp_c\":17}"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"{\"temp_c\":17}"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) {
     // reasoning + function_call should both attach to the synthesised assistant
     // turn that owns the tool_calls.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 3);
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather");
-    EXPECT_EQ(history[2]["role"].get_string(), "tool");
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"ok"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) {
     // Two function_calls back-to-back must produce a single assistant message
     // with two entries in tool_calls, not two assistant turns.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call", "id": "call_2", "call_id": "call_2",
-             "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    // user, assistant(2 tool_calls), tool
-    ASSERT_EQ(history.size(), 3);
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_EQ(history[1]["tool_calls"].size(), 2);
-    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
-    EXPECT_EQ(history[1]["tool_calls"][1]["id"].get_string(), "call_2");
-    EXPECT_EQ(history[2]["role"].get_string(), "tool");
-    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call", "id": "call_2", "call_id": "call_2",
+                 "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}},
+                    {"id":"call_2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"London\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"15C"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) {
     // Input ending with a function_call (no matching output) — the trailing
-    // function_call must still be flushed as an assistant message rather than
+    // function_call must still be flushed as an assistant turn rather than
     // silently lost.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 2);
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
-    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) {
-    // If an assistant role item follows a function_call, its text content should
-    // ride on the same merged message (assistant-with-tool_calls), not produce
-    // a second assistant turn.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"role": "assistant", "content": "calling tool"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 2);
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_EQ(history[1]["content"].get_string(), "calling tool");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
+    // If an assistant role item follows a function_call, its text content
+    // should ride on the same merged message (assistant-with-tool_calls), not
+    // produce a second assistant turn.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"role": "assistant", "content": "calling tool"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"calling tool","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) {
     // The newer reasoning shape: content[].text instead of summary[].text.
-    // OVMS accepts both.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]},
-            {"role": "assistant", "content": "ok"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 2);
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "new shape");
+    // OVMS accepts both and produces the same chat/completions output.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]},
+                {"role": "assistant", "content": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","content":"ok","reasoning_content":"new shape"}
+            ]
+        })");
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) {
-    // function_call_output without call_id: should still emit a tool message
-    // (with no tool_call_id field) rather than failing parsing.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{}"},
-            {"type": "function_call_output", "output": "ok"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 3);
-    EXPECT_EQ(history[2]["role"].get_string(), "tool");
-    EXPECT_FALSE(history[2].contains("tool_call_id"));
-    EXPECT_EQ(history[2]["content"].get_string(), "ok");
+    // function_call_output without call_id: the resulting tool message has no
+    // tool_call_id field rather than failing parsing or carrying an empty id.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{}"},
+                {"type": "function_call_output", "output": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{}"}}
+                ]},
+                {"role":"tool","content":"ok"}
+            ]
+        })");
 }
 
-#if (PYTHON_DISABLE == 0)
-// processedJson (the chat/completions-shaped messages array fed to the Python
-// Jinja chat template) must mirror the chat history layout for the same input.
-// These tests assert the same buffering invariants on that path.
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMirrorsFunctionCallMerge) {
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    ASSERT_TRUE(processedDoc.HasMember("messages"));
-    const auto& messages = processedDoc["messages"];
-    ASSERT_TRUE(messages.IsArray());
-    ASSERT_EQ(messages.Size(), 3u);
-
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_TRUE(messages[1]["tool_calls"].IsArray());
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["type"].GetString(), "function");
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
-
-    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
-    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
-    EXPECT_STREQ(messages[2]["content"].GetString(), "ok");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonContainsNormalisedTools) {
-    // The tools forwarded to the template via processedJson must be in the
-    // chat/completions nested shape (because convertResponsesToolsInPlace
-    // normalised the doc before processedJson is built).
-    std::string json = R"({
-        "model": "llama",
-        "input": "hello",
-        "tools": [{
-            "type": "function",
-            "name": "get_weather",
-            "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
-        }]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    ASSERT_TRUE(processedDoc.HasMember("tools"));
-    ASSERT_TRUE(processedDoc["tools"].IsArray());
-    ASSERT_EQ(processedDoc["tools"].Size(), 1u);
-    ASSERT_TRUE(processedDoc["tools"][0].HasMember("function"));
-    EXPECT_STREQ(processedDoc["tools"][0]["function"]["name"].GetString(), "get_weather");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAttachesReasoningOnAssistant) {
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"think"}]},
-            {"role": "assistant", "content": [{"type":"output_text","text":"answer"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 2u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    EXPECT_STREQ(messages[1]["content"].GetString(), "answer");
-    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "think");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonStandaloneReasoningOmitsContent) {
-    // Mirror of ResponsesStandaloneReasoningWithoutAssistantIsEmitted on the
-    // processedJson path: an assistant turn carrying only reasoning_content
-    // (no `content`, no `tool_calls`).
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
-            {"role": "user", "content": [{"type":"input_text","text":"again"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 3u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    EXPECT_FALSE(messages[1].HasMember("content"));
-    EXPECT_FALSE(messages[1].HasMember("tool_calls"));
-    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "orphan");
-    EXPECT_STREQ(messages[2]["role"].GetString(), "user");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingStandaloneReasoningOmitsContent) {
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
-            {"type": "reasoning", "content": [{"type":"reasoning_text","text":"trailing"}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 2u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    EXPECT_FALSE(messages[1].HasMember("content"));
-    EXPECT_FALSE(messages[1].HasMember("tool_calls"));
-    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "trailing");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonReasoningPlusFunctionCallRidesOnAssistant) {
-    // Mirror of ResponsesReasoningPlusFunctionCallRidesOnAssistant: reasoning
-    // and tool_calls must land on the same JSON object.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 3u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
-    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather");
-    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultipleFunctionCallsMergedInOneAssistant) {
-    // Mirror of ResponsesMultipleFunctionCallsMergedInOneAssistant: validates
-    // the rapidjson tool_calls array growth across PushBack calls.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call", "id": "call_2", "call_id": "call_2",
-             "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 3u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 2u);
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
-    EXPECT_STREQ(messages[1]["tool_calls"][1]["id"].GetString(), "call_2");
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
-    EXPECT_STREQ(messages[1]["tool_calls"][1]["function"]["arguments"].GetString(), "{\"city\":\"London\"}");
-    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
-    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonTrailingFunctionCallFlushedAsAssistant) {
-    // Mirror of ResponsesTrailingFunctionCallFlushedAsAssistant: trailing
-    // function_call without output produces an assistant turn with tool_calls
-    // and no following tool message.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 2u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonAssistantMessageAbsorbsBufferedFunctionCall) {
-    // Mirror of ResponsesAssistantMessageAbsorbsBufferedFunctionCall: assistant
-    // text content and tool_calls coexist on a single JSON object.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"role": "assistant", "content": "calling tool"}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 2u);
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    ASSERT_TRUE(messages[1].HasMember("content"));
-    EXPECT_STREQ(messages[1]["content"].GetString(), "calling tool");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["id"].GetString(), "call_1");
-}
-#endif  // PYTHON_DISABLE == 0
-
 // --- Tools normalisation edge cases ---
 
 TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) {
     // Flat Responses tools may omit `parameters` for zero-arg functions. The
     // nested form should still be produced (with no `parameters` key under
-    // function), not fail or fabricate one.
-    std::string json = R"({
-        "model": "llama",
-        "input": "hello",
-        "tools": [{"type": "function", "name": "ping", "description": "no args"}]
-    })";
-    parseResponses(doc, *tokenizer, json);
-    ASSERT_TRUE(doc.HasMember("tools"));
-    ASSERT_TRUE(doc["tools"].IsArray());
-    ASSERT_EQ(doc["tools"].Size(), 1u);
-    const auto& tool = doc["tools"][0];
-    ASSERT_TRUE(tool.HasMember("function"));
-    EXPECT_STREQ(tool["function"]["name"].GetString(), "ping");
-    EXPECT_STREQ(tool["function"]["description"].GetString(), "no args");
-    EXPECT_FALSE(tool["function"].HasMember("parameters"));
-    // The flat-shape `name` field at top level must have been removed.
-    EXPECT_FALSE(tool.HasMember("name"));
-    EXPECT_FALSE(tool.HasMember("description"));
-}
-
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesNonFunctionToolLeftIntact) {
-    // Tools with an unrecognised `type` (e.g. a future built-in tool) must be
-    // passed through verbatim rather than being incorrectly rewrapped.
-    std::string json = R"({
-        "model": "llama",
-        "input": "hello",
-        "tools": [{"type": "web_search", "name": "search"}]
-    })";
-    parseResponses(doc, *tokenizer, json);
-    ASSERT_TRUE(doc["tools"].IsArray());
-    ASSERT_EQ(doc["tools"].Size(), 1u);
-    const auto& tool = doc["tools"][0];
-    EXPECT_STREQ(tool["type"].GetString(), "web_search");
-    EXPECT_STREQ(tool["name"].GetString(), "search");
-    EXPECT_FALSE(tool.HasMember("function"));
+    // function), not fail or fabricate one. Input is given as an array so
+    // both ChatHistory and processedJson sinks populate the messages array.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}],
+            "tools": [{"type": "function", "name": "ping", "description": "no args"}]
+        })",
+        R"({
+            "messages": [{"role":"user","content":"hello"}],
+            "tools": [{"type":"function","function":{"name":"ping","description":"no args"}}]
+        })");
 }
 
 // --- Error paths ---
@@ -4284,90 +4083,27 @@ TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndF
     // End-to-end: user -> reasoning + function_call (merged on synthesised
     // assistant) -> function_call_output -> reasoning + assistant final answer.
     // Validates that buffering state is correctly reset between turns.
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
-            {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    auto& history = apiHandler->getChatHistory();
-    ASSERT_EQ(history.size(), 4);
-
-    // user
-    EXPECT_EQ(history[0]["role"].get_string(), "user");
-
-    // synthesised assistant: empty content + reasoning + tool_calls
-    EXPECT_EQ(history[1]["role"].get_string(), "assistant");
-    EXPECT_EQ(history[1]["content"].get_string(), "");
-    ASSERT_TRUE(history[1].contains("reasoning_content"));
-    EXPECT_EQ(history[1]["reasoning_content"].get_string(), "need to call get_weather");
-    ASSERT_TRUE(history[1].contains("tool_calls"));
-    ASSERT_EQ(history[1]["tool_calls"].size(), 1);
-    EXPECT_EQ(history[1]["tool_calls"][0]["id"].get_string(), "call_1");
-    EXPECT_EQ(history[1]["tool_calls"][0]["function"]["name"].get_string(), "get_weather");
-
-    // tool result
-    EXPECT_EQ(history[2]["role"].get_string(), "tool");
-    EXPECT_EQ(history[2]["tool_call_id"].get_string(), "call_1");
-    EXPECT_EQ(history[2]["content"].get_string(), "sunny, 22C");
-    EXPECT_FALSE(history[2].contains("reasoning_content"));
-    EXPECT_FALSE(history[2].contains("tool_calls"));
-
-    // final assistant turn: second reasoning buffer must have been used here,
-    // not leaked from the first turn or carried over.
-    EXPECT_EQ(history[3]["role"].get_string(), "assistant");
-    EXPECT_EQ(history[3]["content"].get_string(), "It is sunny and 22C in Paris.");
-    ASSERT_TRUE(history[3].contains("reasoning_content"));
-    EXPECT_EQ(history[3]["reasoning_content"].get_string(), "format the answer");
-    EXPECT_FALSE(history[3].contains("tool_calls"));
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
+                {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather in Paris?"},
+                {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"sunny, 22C"},
+                {"role":"assistant","content":"It is sunny and 22C in Paris.","reasoning_content":"format the answer"}
+            ]
+        })");
 }
-
-#if (PYTHON_DISABLE == 0)
-// Re-open the PYTHON_DISABLE block to keep the processedJson companion next to
-// the chat-history multi-turn test above.
-TEST_F(HttpOpenAIHandlerParsingTest, ResponsesProcessedJsonMultiTurnMirrorsChatHistory) {
-    std::string json = R"({
-        "model": "llama",
-        "input": [
-            {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
-            {"type": "function_call", "id": "call_1", "call_id": "call_1",
-             "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
-            {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
-            {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
-            {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
-        ]
-    })";
-    auto apiHandler = parseResponses(doc, *tokenizer, json);
-    rapidjson::Document processedDoc;
-    processedDoc.Parse(apiHandler->getProcessedJson().c_str());
-    ASSERT_FALSE(processedDoc.HasParseError());
-    const auto& messages = processedDoc["messages"];
-    ASSERT_EQ(messages.Size(), 4u);
-
-    EXPECT_STREQ(messages[1]["role"].GetString(), "assistant");
-    EXPECT_STREQ(messages[1]["content"].GetString(), "");
-    ASSERT_TRUE(messages[1].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[1]["reasoning_content"].GetString(), "need to call get_weather");
-    ASSERT_TRUE(messages[1].HasMember("tool_calls"));
-    ASSERT_EQ(messages[1]["tool_calls"].Size(), 1u);
-    EXPECT_STREQ(messages[1]["tool_calls"][0]["function"]["name"].GetString(), "get_weather");
-
-    EXPECT_STREQ(messages[2]["role"].GetString(), "tool");
-    EXPECT_STREQ(messages[2]["tool_call_id"].GetString(), "call_1");
-    EXPECT_STREQ(messages[2]["content"].GetString(), "sunny, 22C");
-
-    EXPECT_STREQ(messages[3]["role"].GetString(), "assistant");
-    EXPECT_STREQ(messages[3]["content"].GetString(), "It is sunny and 22C in Paris.");
-    ASSERT_TRUE(messages[3].HasMember("reasoning_content"));
-    EXPECT_STREQ(messages[3]["reasoning_content"].GetString(), "format the answer");
-    EXPECT_FALSE(messages[3].HasMember("tool_calls"));
-}
-#endif  // PYTHON_DISABLE == 0