diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp
index 60ec1c4f08..32d1fddc31 100644
--- a/src/llm/apis/openai_responses.cpp
+++ b/src/llm/apis/openai_responses.cpp
@@ -57,6 +57,537 @@ static std::string joinServerSideEvents(const std::vector<std::string>& events)
     return ss.str();
 }
 
+// Convert the Responses API tools array (flat function format) into the chat/completions
+// nested format ({type:"function", function:{name, description, parameters, ...}}) in place
+// on the request document. The chat template (e.g. gpt-oss) and the chat/completions tools
+// schema both expect the nested shape; doing this once up front lets every downstream
+// consumer (chat history path, processedJson builder for Python Jinja, parseToolsToJsonContainer)
+// share the same representation. Tools already in nested form, or non-function tools, are
+// left untouched.
+static void convertResponsesToolsInPlace(rapidjson::Value& toolsArray, rapidjson::Document::AllocatorType& alloc) {
+    if (!toolsArray.IsArray()) {
+        return;
+    }
+    for (auto& tool : toolsArray.GetArray()) {
+        if (!tool.IsObject()) {
+            continue;
+        }
+        auto toolObj = tool.GetObject();
+        if (toolObj.FindMember("function") != toolObj.MemberEnd()) {
+            continue;  // Already in nested chat/completions format.
+        }
+        auto typeIt = toolObj.FindMember("type");
+        const std::string toolType = (typeIt != toolObj.MemberEnd() && typeIt->value.IsString())
+                                         ? typeIt->value.GetString()
+                                         : "";
+        if (toolType != "function") {
+            continue;  // Preserve non-function tools as-is.
+        }
+        rapidjson::Value funcObj(rapidjson::kObjectType);
+        for (auto memberIt = toolObj.MemberBegin(); memberIt != toolObj.MemberEnd();) {
+            if (!memberIt->name.IsString()) {
+                ++memberIt;
+                continue;
+            }
+            const std::string fieldName = memberIt->name.GetString();
+            if (fieldName == "type") {
+                ++memberIt;
+                continue;
+            }
+            if (fieldName == "response") {
+                memberIt = tool.EraseMember(memberIt);
+                continue;
+            }
+            rapidjson::Value keyCopy(memberIt->name, alloc);
+            rapidjson::Value valCopy(memberIt->value, alloc);
+            funcObj.AddMember(keyCopy, valCopy, alloc);
+            memberIt = tool.EraseMember(memberIt);
+        }
+        tool.AddMember("function", funcObj, alloc);
+    }
+}
+
+// Pull the reasoning text out of a Responses API "reasoning" item.
+// Prefers the newer content[].text shape over the legacy summary[].text shape.
+static std::string extractReasoningText(const rapidjson::Value::ConstObject& itemObj) {
+    auto contentIt = itemObj.FindMember("content");
+    if (contentIt != itemObj.MemberEnd() && contentIt->value.IsArray()) {
+        for (const auto& ci : contentIt->value.GetArray()) {
+            if (!ci.IsObject())
+                continue;
+            auto textIt = ci.GetObject().FindMember("text");
+            if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    auto summaryIt = itemObj.FindMember("summary");
+    if (summaryIt != itemObj.MemberEnd() && summaryIt->value.IsArray()) {
+        for (const auto& si : summaryIt->value.GetArray()) {
+            if (!si.IsObject())
+                continue;
+            auto textIt = si.GetObject().FindMember("text");
+            if (textIt != si.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    return "";
+}
+
+// Extract a flat text string from a Responses API content field which may be
+// either a string or an array of {type,text} objects.
+static std::string extractTextContent(const rapidjson::Value& contentVal) {
+    if (contentVal.IsString()) {
+        return contentVal.GetString();
+    }
+    if (!contentVal.IsArray()) {
+        return "";
+    }
+    for (const auto& ci : contentVal.GetArray()) {
+        if (!ci.IsObject())
+            continue;
+        auto ctTypeIt = ci.GetObject().FindMember("type");
+        if (ctTypeIt == ci.GetObject().MemberEnd() || !ctTypeIt->value.IsString())
+            continue;
+        const std::string ctType = ctTypeIt->value.GetString();
+        if (ctType == "input_text" || ctType == "output_text") {
+            auto textIt = ci.GetObject().FindMember("text");
+            if (textIt != ci.GetObject().MemberEnd() && textIt->value.IsString()) {
+                return textIt->value.GetString();
+            }
+        }
+    }
+    return "";
+}
+
+// Read the three string fields (id, name, arguments) out of a function_call item.
+struct FunctionCallFields {
+    std::string id;
+    std::string name;
+    std::string arguments;
+};
+static FunctionCallFields readFunctionCallFields(const rapidjson::Value& item) {
+    FunctionCallFields out;
+    auto fcObj = item.GetObject();
+    auto idIt = fcObj.FindMember("id");
+    if (idIt != fcObj.MemberEnd() && idIt->value.IsString())
+        out.id = idIt->value.GetString();
+    auto nameIt = fcObj.FindMember("name");
+    if (nameIt != fcObj.MemberEnd() && nameIt->value.IsString())
+        out.name = nameIt->value.GetString();
+    auto argsIt = fcObj.FindMember("arguments");
+    if (argsIt != fcObj.MemberEnd() && argsIt->value.IsString())
+        out.arguments = argsIt->value.GetString();
+    return out;
+}
+
+// Classification of a Responses API input item used to dispatch to per-type
+// handlers in the builders below.
+enum class ResponsesInputItemKind {
+    REASONING,
+    FUNCTION_CALL,
+    FUNCTION_CALL_OUTPUT,
+    ROLE_ITEM,
+    MISSING_ROLE,
+};
+
+static absl::StatusOr<ResponsesInputItemKind> classifyInputItem(const rapidjson::Value& item) {
+    if (!item.IsObject()) {
+        return absl::InvalidArgumentError("input array items must be objects");
+    }
+    auto itemObj = item.GetObject();
+    auto itemTypeIt = itemObj.FindMember("type");
+    const std::string itemType = (itemTypeIt != itemObj.MemberEnd() && itemTypeIt->value.IsString())
+                                     ? itemTypeIt->value.GetString()
+                                     : "";
+    if (itemType == "reasoning")
+        return ResponsesInputItemKind::REASONING;
+    if (itemType == "function_call")
+        return ResponsesInputItemKind::FUNCTION_CALL;
+    if (itemType == "function_call_output")
+        return ResponsesInputItemKind::FUNCTION_CALL_OUTPUT;
+    auto roleIt = itemObj.FindMember("role");
+    if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString())
+        return ResponsesInputItemKind::MISSING_ROLE;
+    return ResponsesInputItemKind::ROLE_ITEM;
+}
+
+// Builds chat/completions-shaped messages from a Responses API input array.
+//
+// Reasoning items are buffered and attached as `reasoning_content` on the next
+// assistant message (matching the gpt-oss template's expected field).
+// Reasoning that is not followed by an assistant/function_call item is dropped,
+// since emitting a standalone {role:assistant, reasoning_content:...} message
+// with no content/tool_calls would confuse most chat templates.
+//
+// Pending function_call items are merged into the next assistant message as a
+// chat/completions-shaped tool_calls[] array. Without this, the assistant turn
+// would have no tool_calls field, the chat template would treat it as a final
+// answer, and a subsequent tool message would fail (e.g. gpt-oss raises
+// "Message has tool role, but there was no previous assistant message with a
+// tool call!").
+//
+// Reasoning that is not followed by an assistant or function_call item is
+// emitted as a standalone assistant turn with empty content and the buffered
+// reasoning attached as `reasoning_content`. This preserves the model's
+// chain-of-thought across turns even when the prior turn produced no visible
+// output.
+//
+// The algorithm is sink-agnostic; concrete output (ov::genai::ChatHistory vs a
+// rapidjson messages array) is provided by the Sink template parameter, which
+// must implement:
+//   absl::Status extractContent(itemObj, index, std::string& outText);
+//   void emitToolMessage(callId, output);
+//   void emitMessage(role, contentText, reasoning);  // reasoning empty -> skip
+//   void emitAssistantWithToolCalls(contentText, reasoning, toolCalls);
+//   void emitStandaloneReasoning(reasoning);  // assistant turn carrying only reasoning_content
+//   absl::Status onMissingRole(itemObj);
+template <typename Sink>
+class ResponsesInputBuilder {
+public:
+    explicit ResponsesInputBuilder(Sink& sink) :
+        sink(sink) {}
+
+    absl::Status build(const rapidjson::Value& inputArray) {
+        if (!inputArray.IsArray()) {
+            return absl::InvalidArgumentError("input is not an array");
+        }
+        for (rapidjson::SizeType i = 0; i < inputArray.GetArray().Size(); ++i) {
+            const auto& item = inputArray.GetArray()[i];
+            auto kind = classifyInputItem(item);
+            if (!kind.ok())
+                return kind.status();
+            absl::Status status;
+            switch (kind.value()) {
+            case ResponsesInputItemKind::REASONING:
+                status = onReasoningItem(item.GetObject());
+                break;
+            case ResponsesInputItemKind::FUNCTION_CALL:
+                pendingFunctionCalls.push_back(&item);
+                break;
+            case ResponsesInputItemKind::FUNCTION_CALL_OUTPUT:
+                status = onFunctionCallOutputItem(item.GetObject());
+                break;
+            case ResponsesInputItemKind::ROLE_ITEM:
+                status = onRoleItem(item.GetObject(), i);
+                break;
+            case ResponsesInputItemKind::MISSING_ROLE:
+                status = sink.onMissingRole(item.GetObject());
+                break;
+            }
+            if (!status.ok())
+                return status;
+        }
+        // Flush any trailing buffered function_calls (e.g. input ends with a
+        // function_call item that has no corresponding output yet).
+        flushPendingFunctionCalls("");
+        return absl::OkStatus();
+    }
+
+private:
+    absl::Status onReasoningItem(const rapidjson::Value::ConstObject& itemObj) {
+        std::string text = extractReasoningText(itemObj);
+        if (!text.empty()) {
+            if (!pendingReasoningContent.empty())
+                pendingReasoningContent += "\n";
+            pendingReasoningContent += text;
+        }
+        return absl::OkStatus();
+    }
+
+    absl::Status onFunctionCallOutputItem(const rapidjson::Value::ConstObject& itemObj) {
+        flushPendingFunctionCalls("");
+        std::string callId;
+        auto callIdIt = itemObj.FindMember("call_id");
+        if (callIdIt != itemObj.MemberEnd() && callIdIt->value.IsString())
+            callId = callIdIt->value.GetString();
+        std::string output;
+        auto outputIt = itemObj.FindMember("output");
+        if (outputIt != itemObj.MemberEnd() && outputIt->value.IsString())
+            output = outputIt->value.GetString();
+        sink.emitToolMessage(callId, output);
+        return absl::OkStatus();
+    }
+
+    absl::Status onRoleItem(const rapidjson::Value::ConstObject& itemObj, rapidjson::SizeType index) {
+        const std::string role = itemObj.FindMember("role")->value.GetString();
+        std::string contentText;
+        auto status = sink.extractContent(itemObj, index, contentText);
+        if (!status.ok())
+            return status;
+
+        // Assistant role with buffered function_calls: merge into one message
+        // (so the tool_calls field rides on the same assistant turn).
+        if (role == "assistant" && !pendingFunctionCalls.empty()) {
+            flushPendingFunctionCalls(contentText);
+            return absl::OkStatus();
+        }
+        // Non-assistant items must not absorb pending tool_calls; flush first.
+        // (flushPendingFunctionCalls also emits any standalone reasoning content
+        // as a standalone assistant turn.)
+        if (role != "assistant") {
+            flushPendingFunctionCalls("");
+        }
+
+        std::string reasoning;
+        if (role == "assistant" && !pendingReasoningContent.empty()) {
+            reasoning = std::move(pendingReasoningContent);
+            pendingReasoningContent.clear();
+        }
+        sink.emitMessage(role, contentText, reasoning);
+        return absl::OkStatus();
+    }
+
+    void flushPendingFunctionCalls(const std::string& assistantText) {
+        if (pendingFunctionCalls.empty()) {
+            // No tool calls, but possibly buffered reasoning to flush as a
+            // standalone assistant turn carrying only reasoning_content (no
+            // `content` field at all, so templates that gate on `message.content`
+            // skip the content branch and templates that gate on
+            // `message.reasoning_content` still see the buffered text).
+            if (!pendingReasoningContent.empty()) {
+                std::string reasoning = std::move(pendingReasoningContent);
+                pendingReasoningContent.clear();
+                sink.emitStandaloneReasoning(reasoning);
+            }
+            return;
+        }
+        std::string reasoning = std::move(pendingReasoningContent);
+        pendingReasoningContent.clear();
+        sink.emitAssistantWithToolCalls(assistantText, reasoning, pendingFunctionCalls);
+        pendingFunctionCalls.clear();
+    }
+
+    Sink& sink;
+    std::vector<const rapidjson::Value*> pendingFunctionCalls;
+    std::string pendingReasoningContent;
+};
+
+// Sink that appends to ov::genai::ChatHistory (used when Python is disabled
+// or as the fallback C++ chat-history path). Owns a scratch rapidjson document
+// whose allocator backs the tool_calls Values until they are deep-copied into
+// a JsonContainer.
+class ChatHistorySink {
+public:
+    ChatHistorySink(ov::genai::ChatHistory& chatHistory, ImageHistory& imageHistory,
+        const std::optional<std::string>& allowedLocalMediaPath,
+        const std::optional<std::vector<std::string>>& allowedMediaDomains) :
+        chatHistory(chatHistory),
+        imageHistory(imageHistory),
+        allowedLocalMediaPath(allowedLocalMediaPath),
+        allowedMediaDomains(allowedMediaDomains) {
+        scratchDoc.SetObject();
+    }
+
+    absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj,
+        rapidjson::SizeType index, std::string& outText) {
+        outText.clear();
+        auto contentIt = itemObj.FindMember("content");
+        if (contentIt == itemObj.MemberEnd())
+            return absl::OkStatus();
+        if (contentIt->value.IsString()) {
+            outText = contentIt->value.GetString();
+            return absl::OkStatus();
+        }
+        if (!contentIt->value.IsArray())
+            return absl::InvalidArgumentError("input item content must be a string or array");
+        for (const auto& contentItem : contentIt->value.GetArray()) {
+            if (!contentItem.IsObject())
+                return absl::InvalidArgumentError("input content items must be objects");
+            auto contentObj = contentItem.GetObject();
+            auto typeIt = contentObj.FindMember("type");
+            if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString())
+                return absl::InvalidArgumentError("input content item type is missing or invalid");
+            const std::string type = typeIt->value.GetString();
+            if (type == "input_text" || type == "output_text") {
+                auto textIt = contentObj.FindMember("text");
+                if (textIt == contentObj.MemberEnd() || !textIt->value.IsString())
+                    return absl::InvalidArgumentError(absl::StrCat(type, " requires a valid text field"));
+                // Last text-bearing item wins, matching pre-refactor behaviour.
+                outText = textIt->value.GetString();
+            } else if (type == "input_image") {
+                auto status = appendInputImage(contentObj, index);
+                if (!status.ok())
+                    return status;
+            } else {
+                // Skip unrecognised content item types for forward compatibility.
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping unsupported content type: {}", type);
+            }
+        }
+        return absl::OkStatus();
+    }
+
+    void emitToolMessage(const std::string& callId, const std::string& output) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "tool";
+        if (!callId.empty())
+            chatHistory.last()["tool_call_id"] = callId;
+        chatHistory.last()["content"] = output;
+    }
+
+    void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = role;
+        chatHistory.last()["content"] = contentText;
+        if (!reasoning.empty())
+            chatHistory.last()["reasoning_content"] = reasoning;
+    }
+
+    void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning,
+        const std::vector<const rapidjson::Value*>& toolCalls) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "assistant";
+        chatHistory.last()["content"] = contentText;
+        if (!reasoning.empty())
+            chatHistory.last()["reasoning_content"] = reasoning;
+        auto& alloc = scratchDoc.GetAllocator();
+        rapidjson::Value toolCallsArray(rapidjson::kArrayType);
+        buildToolCallsArray(toolCalls, toolCallsArray, alloc);
+        // rapidJsonValueToJsonContainer deep-copies, so scratchDoc can be reused.
+        chatHistory.last()["tool_calls"] = rapidJsonValueToJsonContainer(toolCallsArray);
+    }
+
+    // Emit an assistant turn that carries only reasoning_content (no content,
+    // no tool_calls). Used when reasoning is not followed by an assistant or
+    // function_call item.
+    void emitStandaloneReasoning(const std::string& reasoning) {
+        chatHistory.push_back({});
+        chatHistory.last()["role"] = "assistant";
+        chatHistory.last()["reasoning_content"] = reasoning;
+    }
+
+    absl::Status onMissingRole(const rapidjson::Value::ConstObject&) {
+        return absl::InvalidArgumentError("input item role is missing or invalid");
+    }
+
+private:
+    absl::Status appendInputImage(const rapidjson::Value::ConstObject& contentObj, rapidjson::SizeType index) {
+        auto imageUrlIt = contentObj.FindMember("image_url");
+        if (imageUrlIt == contentObj.MemberEnd())
+            return absl::InvalidArgumentError("input_image requires image_url field");
+
+        std::string imageUrl;
+        if (imageUrlIt->value.IsString()) {
+            imageUrl = imageUrlIt->value.GetString();
+        } else if (imageUrlIt->value.IsObject()) {
+            auto imageUrlObj = imageUrlIt->value.GetObject();
+            auto urlIt = imageUrlObj.FindMember("url");
+            if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString())
+                return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid");
+            imageUrl = urlIt->value.GetString();
+        } else {
+            return absl::InvalidArgumentError("input_image.image_url must be a string or object");
+        }
+
+        auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains);
+        if (!tensorResult.ok())
+            return tensorResult.status();
+        imageHistory.push_back({index, tensorResult.value()});
+        return absl::OkStatus();
+    }
+
+    // Build a chat/completions tool_calls[] array into outArr using the given allocator.
+    static void buildToolCallsArray(const std::vector<const rapidjson::Value*>& toolCalls,
+        rapidjson::Value& outArr, rapidjson::Document::AllocatorType& alloc) {
+        for (const auto* fc : toolCalls) {
+            const FunctionCallFields fields = readFunctionCallFields(*fc);
+            rapidjson::Value funcObj(rapidjson::kObjectType);
+            funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc);
+            funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc);
+            rapidjson::Value tcObj(rapidjson::kObjectType);
+            tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc);
+            tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc);
+            tcObj.AddMember("function", funcObj, alloc);
+            outArr.PushBack(tcObj, alloc);
+        }
+    }
+
+    ov::genai::ChatHistory& chatHistory;
+    ImageHistory& imageHistory;
+    const std::optional<std::string>& allowedLocalMediaPath;
+    const std::optional<std::vector<std::string>>& allowedMediaDomains;
+    rapidjson::Document scratchDoc;
+};
+
+#if (PYTHON_DISABLE == 0)
+// Sink that appends to a rapidjson messages array, used to feed the Python
+// Jinja chat template path. Image content items are silently dropped (the
+// Python path receives only text).
+class ProcessedJsonSink {
+public:
+    ProcessedJsonSink(rapidjson::Value& messagesArray, rapidjson::Document::AllocatorType& alloc) :
+        messagesArray(messagesArray),
+        alloc(alloc) {}
+
+    absl::Status extractContent(const rapidjson::Value::ConstObject& itemObj,
+        rapidjson::SizeType /*index*/, std::string& outText) {
+        auto contentIt = itemObj.FindMember("content");
+        outText = (contentIt != itemObj.MemberEnd()) ? extractTextContent(contentIt->value) : "";
+        return absl::OkStatus();
+    }
+
+    void emitToolMessage(const std::string& callId, const std::string& output) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("tool", alloc), alloc);
+        if (!callId.empty())
+            msgObj.AddMember("tool_call_id", rapidjson::Value(callId.c_str(), alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(output.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    void emitMessage(const std::string& role, const std::string& contentText, const std::string& reasoning) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value(role.c_str(), alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc);
+        if (!reasoning.empty())
+            msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    // Emit an assistant turn that carries only reasoning_content (no content,
+    // no tool_calls). See ChatHistorySink::emitStandaloneReasoning for rationale.
+    void emitStandaloneReasoning(const std::string& reasoning) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc);
+        msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    void emitAssistantWithToolCalls(const std::string& contentText, const std::string& reasoning,
+        const std::vector<const rapidjson::Value*>& toolCalls) {
+        rapidjson::Value msgObj(rapidjson::kObjectType);
+        msgObj.AddMember("role", rapidjson::Value("assistant", alloc), alloc);
+        msgObj.AddMember("content", rapidjson::Value(contentText.c_str(), alloc), alloc);
+        if (!reasoning.empty())
+            msgObj.AddMember("reasoning_content", rapidjson::Value(reasoning.c_str(), alloc), alloc);
+        rapidjson::Value toolCallsArray(rapidjson::kArrayType);
+        for (const auto* fc : toolCalls) {
+            const FunctionCallFields fields = readFunctionCallFields(*fc);
+            rapidjson::Value funcObj(rapidjson::kObjectType);
+            funcObj.AddMember("name", rapidjson::Value(fields.name.c_str(), alloc), alloc);
+            funcObj.AddMember("arguments", rapidjson::Value(fields.arguments.c_str(), alloc), alloc);
+            rapidjson::Value tcObj(rapidjson::kObjectType);
+            tcObj.AddMember("id", rapidjson::Value(fields.id.c_str(), alloc), alloc);
+            tcObj.AddMember("type", rapidjson::Value("function", alloc), alloc);
+            tcObj.AddMember("function", funcObj, alloc);
+            toolCallsArray.PushBack(tcObj, alloc);
+        }
+        msgObj.AddMember("tool_calls", toolCallsArray, alloc);
+        messagesArray.PushBack(msgObj, alloc);
+    }
+
+    absl::Status onMissingRole(const rapidjson::Value::ConstObject&) {
+        // Silently skip unknown items without a role in the processed JSON path.
+        return absl::OkStatus();
+    }
+
+private:
+    rapidjson::Value& messagesArray;
+    rapidjson::Document::AllocatorType& alloc;
+};
+#endif  // PYTHON_DISABLE == 0
+
 // --- Request parsing ---
 
 absl::Status OpenAIResponsesHandler::parseRequest(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength,
@@ -87,87 +618,12 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional<std::string> allow
         if (inputIt->value.GetArray().Size() == 0) {
             return absl::InvalidArgumentError("Messages array cannot be empty");
         }
-
-        for (size_t i = 0; i < inputIt->value.GetArray().Size(); ++i) {
-            auto& item = inputIt->value.GetArray()[i];
-            if (!item.IsObject()) {
-                return absl::InvalidArgumentError("input array items must be objects");
-            }
-
-            auto itemObj = item.GetObject();
-            auto roleIt = itemObj.FindMember("role");
-            if (roleIt == itemObj.MemberEnd() || !roleIt->value.IsString()) {
-                return absl::InvalidArgumentError("input item role is missing or invalid");
-            }
-
-            request.chatHistory.push_back({});
-            request.chatHistory.last()["role"] = roleIt->value.GetString();
-
-            auto contentIt = itemObj.FindMember("content");
-            if (contentIt == itemObj.MemberEnd()) {
-                return absl::InvalidArgumentError("input item content is missing");
-            }
-
-            if (contentIt->value.IsString()) {
-                request.chatHistory.last()["content"] = contentIt->value.GetString();
-                continue;
-            }
-
-            if (!contentIt->value.IsArray()) {
-                return absl::InvalidArgumentError("input item content must be a string or array");
-            }
-            if (contentIt->value.GetArray().Size() == 0) {
-                return absl::InvalidArgumentError("Invalid message structure - content array is empty");
-            }
-
-            std::string contentText = "";
-            for (auto& contentItem : contentIt->value.GetArray()) {
-                if (!contentItem.IsObject()) {
-                    return absl::InvalidArgumentError("input content items must be objects");
-                }
-                auto contentObj = contentItem.GetObject();
-                auto typeIt = contentObj.FindMember("type");
-                if (typeIt == contentObj.MemberEnd() || !typeIt->value.IsString()) {
-                    return absl::InvalidArgumentError("input content item type is missing or invalid");
-                }
-
-                const std::string type = typeIt->value.GetString();
-                if (type == "input_text") {
-                    auto textIt = contentObj.FindMember("text");
-                    if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) {
-                        return absl::InvalidArgumentError("input_text requires a valid text field");
-                    }
-                    contentText = textIt->value.GetString();
-                } else if (type == "input_image") {
-                    std::string imageUrl;
-                    auto imageUrlIt = contentObj.FindMember("image_url");
-                    if (imageUrlIt == contentObj.MemberEnd()) {
-                        return absl::InvalidArgumentError("input_image requires image_url field");
-                    }
-                    if (imageUrlIt->value.IsString()) {
-                        imageUrl = imageUrlIt->value.GetString();
-                    } else if (imageUrlIt->value.IsObject()) {
-                        auto imageUrlObj = imageUrlIt->value.GetObject();
-                        auto urlIt = imageUrlObj.FindMember("url");
-                        if (urlIt == imageUrlObj.MemberEnd() || !urlIt->value.IsString()) {
-                            return absl::InvalidArgumentError("input_image.image_url.url is missing or invalid");
-                        }
-                        imageUrl = urlIt->value.GetString();
-                    } else {
-                        return absl::InvalidArgumentError("input_image.image_url must be a string or object");
-                    }
-
-                    auto tensorResult = loadImage(imageUrl, allowedLocalMediaPath, allowedMediaDomains);
-                    if (!tensorResult.ok()) {
-                        return tensorResult.status();
-                    }
-                    request.imageHistory.push_back({i, tensorResult.value()});
-                } else {
-                    return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image.");
-                }
-            }
-
-            request.chatHistory.last()["content"] = contentText;
+        ChatHistorySink sink(request.chatHistory, request.imageHistory,
+            allowedLocalMediaPath, allowedMediaDomains);
+        ResponsesInputBuilder<ChatHistorySink> builder(sink);
+        auto status = builder.build(inputIt->value);
+        if (!status.ok()) {
+            return status;
         }
     } else {
         return absl::InvalidArgumentError("input is not a string or array");
@@ -189,6 +645,14 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional<uint32_t>
         return absl::InvalidArgumentError("input missing in request");
     }
 
+    // Convert tools array (Responses-flat -> chat/completions-nested) once, in place,
+    // before any consumer reads it. parseInput, parseToolsToJsonContainer and the
+    // processedJson builder all rely on the nested shape.
+    auto toolsIt = doc.FindMember("tools");
+    if (toolsIt != doc.MemberEnd() && toolsIt->value.IsArray()) {
+        convertResponsesToolsInPlace(toolsIt->value, doc.GetAllocator());
+    }
+
     auto messagesStatus = parseInput(allowedLocalMediaPath, allowedMediaDomains);
     if (!messagesStatus.ok()) {
         return messagesStatus;
@@ -228,33 +692,41 @@ absl::Status OpenAIResponsesHandler::parseResponsesPart(std::optional<uint32_t>
     }
 
 #if (PYTHON_DISABLE == 0)
-    // Build processedJson with "messages" array from chatHistory so that
-    // the Python chat template path (which reads request_json["messages"])
-    // can consume Responses API input without a separate code path.
+    // Build processedJson with a "messages" array in chat/completions format so that
+    // the Python Jinja template path can consume Responses API input without a separate code path.
+    // Handles reasoning, function_call (merged into assistant tool_calls), and
+    // function_call_output (converted to role:tool messages).
     {
         Document processedDoc;
         processedDoc.SetObject();
         auto& alloc = processedDoc.GetAllocator();
 
         Value messagesArray(kArrayType);
-        for (size_t i = 0; i < request.chatHistory.size(); ++i) {
-            Value msgObj(kObjectType);
-            auto role = request.chatHistory[i]["role"].as_string();
-            if (role.has_value()) {
-                msgObj.AddMember("role", Value(role.value().c_str(), alloc), alloc);
-            }
-            auto content = request.chatHistory[i]["content"].as_string();
-            if (content.has_value()) {
-                msgObj.AddMember("content", Value(content.value().c_str(), alloc), alloc);
+
+        auto inputArrIt = doc.FindMember("input");
+        if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsArray()) {
+            ProcessedJsonSink sink(messagesArray, alloc);
+            ResponsesInputBuilder<ProcessedJsonSink> builder(sink);
+            auto processedStatus = builder.build(inputArrIt->value);
+            if (!processedStatus.ok()) {
+                return processedStatus;
             }
+        } else if (inputArrIt != doc.MemberEnd() && inputArrIt->value.IsString()) {
+            // String input: emit a single user message so the Python Jinja path
+            // sees the same content the C++ chatHistory path does.
+            Value msgObj(kObjectType);
+            msgObj.AddMember("role", Value("user", alloc), alloc);
+            msgObj.AddMember("content", Value(inputArrIt->value.GetString(), alloc), alloc);
             messagesArray.PushBack(msgObj, alloc);
         }
+
         processedDoc.AddMember("messages", messagesArray, alloc);
 
-        // Copy tools from original doc if present
-        auto toolsIt = doc.FindMember("tools");
-        if (toolsIt != doc.MemberEnd() && !toolsIt->value.IsNull()) {
-            Value toolsCopy(toolsIt->value, alloc);
+        // Tools were already normalised to chat/completions nested format by
+        // convertResponsesToolsInPlace earlier in parseResponsesPart — just copy verbatim.
+        auto processedToolsIt = doc.FindMember("tools");
+        if (processedToolsIt != doc.MemberEnd() && !processedToolsIt->value.IsNull()) {
+            Value toolsCopy(processedToolsIt->value, alloc);
             processedDoc.AddMember("tools", toolsCopy, alloc);
         }
 
diff --git a/src/llm/py_jinja_template_processor.cpp b/src/llm/py_jinja_template_processor.cpp
index 432aa8e722..188a3c0daa 100644
--- a/src/llm/py_jinja_template_processor.cpp
+++ b/src/llm/py_jinja_template_processor.cpp
@@ -40,7 +40,6 @@ bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templ
         output = "Error: Chat template not loaded correctly, so it cannot be applied";
         return false;
     }
-
     py::gil_scoped_acquire acquire;
     try {
         auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(),
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
index 5a0955b4f5..d35db8d3b2 100644
--- a/src/llm/servable.cpp
+++ b/src/llm/servable.cpp
@@ -22,6 +22,7 @@
 #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246 6313)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "absl/strings/str_cat.h"
 #include "mediapipe/framework/calculator_graph.h"
 #include <rapidjson/document.h>
 #include <rapidjson/prettywriter.h>
@@ -209,7 +210,7 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
             inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
         } catch (const std::exception& e) {
             SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to apply chat template: {}", e.what());
-            return absl::Status(absl::StatusCode::kInvalidArgument, "Failed to apply chat template. The model either does not have chat template or has an invalid one.");
+            return absl::Status(absl::StatusCode::kInvalidArgument, absl::StrCat("Failed to apply chat template: ", e.what()));
         }
 #endif
         if (inputText.size() == 0) {
diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp
index 0ef06d22df..defa1af281 100644
--- a/src/llm/visual_language_model/continuous_batching/servable.cpp
+++ b/src/llm/visual_language_model/continuous_batching/servable.cpp
@@ -105,6 +105,12 @@ absl::Status VisualLanguageModelServable::prepareInputs(std::shared_ptr<GenAiSer
             return chatTemplateKwargsStatus.status();
         }
         const auto& chatTemplateKwargs = chatTemplateKwargsStatus.value();
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory messages: {}", chatHistory.get_messages().to_json_string());
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory.get_tools(): {}", chatHistory.get_tools().to_json_string());
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatHistory.get_extra_context(): {}", chatHistory.get_extra_context().to_json_string());
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM tools: {}", tools.has_value() ? tools->to_json_string() : std::string("<none>"));
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM chatTemplateKwargs: {}", chatTemplateKwargs.has_value() ? chatTemplateKwargs->to_json_string() : std::string("<none>"));
+        SPDLOG_LOGGER_TRACE(llm_calculator_logger, "VLM addGenerationPrompt: {}", addGenerationPrompt);
         vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
     } else {
         return absl::InvalidArgumentError("Unsupported endpoint");
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index c3a40cba3c..2a0ad01005 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -3834,3 +3834,546 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields)
     EXPECT_FALSE(history[1].contains("tool_call_id"));
     EXPECT_FALSE(history[1].contains("name"));
 }
+
+namespace {
+std::shared_ptr<ovms::OpenAIResponsesHandler> parseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) {
+    doc.Parse(json.c_str());
+    EXPECT_FALSE(doc.HasParseError()) << json;
+    std::optional<uint32_t> maxTokensLimit;
+    uint32_t bestOfLimit = 0;
+    std::optional<uint32_t> maxModelLength;
+    auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(
+        doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer);
+    EXPECT_EQ(apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus()) << json;
+    return apiHandler;
+}
+
+// Variant for negative tests: returns the parseRequest status without asserting
+// it is OK, so the caller can verify the failure mode.
+absl::Status tryParseResponses(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer, const std::string& json) {
+    doc.Parse(json.c_str());
+    EXPECT_FALSE(doc.HasParseError()) << json;
+    std::optional<uint32_t> maxTokensLimit;
+    uint32_t bestOfLimit = 0;
+    std::optional<uint32_t> maxModelLength;
+    auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(
+        doc, ovms::Endpoint::RESPONSES, std::chrono::system_clock::now(), tokenizer);
+    return apiHandler->parseRequest(maxTokensLimit, bestOfLimit, maxModelLength);
+}
+
+// Assert that parsing the given Responses API request produces a chat history
+// (and processedJson, when Python is enabled) equivalent to the expected
+// chat/completions request.
+//
+// The expected JSON is a chat/completions REQUEST body — an object with a
+// "messages" array and optionally a "tools" array. This makes each test read as
+// "given this Responses input, OVMS should produce this chat/completions
+// request" — which is exactly the contract of the Responses-to-chat/completions
+// translator.
+//
+// Comparison is structural via rapidjson Value::operator== (member order inside
+// objects is irrelevant).
+//
+// Both the chat-history path (used in the C++/non-Python build) and the
+// processedJson path (used by the Python Jinja template) are checked, so a
+// single test pins both downstream consumers.
+void expectResponsesEquivalentToChatCompletions(rapidjson::Document& doc, ov::genai::Tokenizer& tokenizer,
+    const std::string& responsesRequest, const std::string& expectedChatCompletions) {
+    auto handler = parseResponses(doc, tokenizer, responsesRequest);
+
+    rapidjson::Document expectedDoc;
+    expectedDoc.Parse(expectedChatCompletions.c_str());
+    ASSERT_FALSE(expectedDoc.HasParseError())
+        << "could not parse expected chat/completions: " << expectedChatCompletions;
+    ASSERT_TRUE(expectedDoc.HasMember("messages"))
+        << "expected chat/completions JSON must contain a 'messages' array";
+
+    // --- ChatHistory path (C++ / non-Python build) ---
+    const std::string actualHistoryJson = handler->getChatHistory().get_messages().to_json_string();
+    rapidjson::Document actualHistoryDoc;
+    actualHistoryDoc.Parse(actualHistoryJson.c_str());
+    ASSERT_FALSE(actualHistoryDoc.HasParseError()) << actualHistoryJson;
+    EXPECT_TRUE(actualHistoryDoc == expectedDoc["messages"])
+        << "ChatHistory messages mismatch.\n  actual:   " << actualHistoryJson
+        << "\n  expected: " << expectedChatCompletions;
+    // Tools on the C++ path are exposed via parseToolsToJsonContainer() — that
+    // is exactly what the non-Python servable forwards to GenAI. Compare its
+    // serialised JSON against the expected chat/completions tools.
+    if (expectedDoc.HasMember("tools")) {
+        auto toolsStatus = handler->parseToolsToJsonContainer();
+        ASSERT_TRUE(toolsStatus.ok()) << "parseToolsToJsonContainer failed: " << toolsStatus.status().message();
+        ASSERT_TRUE(toolsStatus.value().has_value()) << "parseToolsToJsonContainer returned nullopt";
+        const std::string actualToolsJson = toolsStatus.value()->to_json_string();
+        rapidjson::Document actualToolsDoc;
+        actualToolsDoc.Parse(actualToolsJson.c_str());
+        ASSERT_FALSE(actualToolsDoc.HasParseError()) << actualToolsJson;
+        EXPECT_TRUE(actualToolsDoc == expectedDoc["tools"])
+            << "parseToolsToJsonContainer mismatch.\n  actual:   " << actualToolsJson
+            << "\n  expected: " << expectedChatCompletions;
+    }
+
+#if (PYTHON_DISABLE == 0)
+    // --- processedJson path (Python Jinja chat template) ---
+    const std::string actualProcessedJson = handler->getProcessedJson();
+    rapidjson::Document actualProcessedDoc;
+    actualProcessedDoc.Parse(actualProcessedJson.c_str());
+    ASSERT_FALSE(actualProcessedDoc.HasParseError()) << actualProcessedJson;
+    ASSERT_TRUE(actualProcessedDoc.HasMember("messages")) << actualProcessedJson;
+    EXPECT_TRUE(actualProcessedDoc["messages"] == expectedDoc["messages"])
+        << "processedJson messages mismatch.\n  actual:   " << actualProcessedJson
+        << "\n  expected: " << expectedChatCompletions;
+    if (expectedDoc.HasMember("tools")) {
+        ASSERT_TRUE(actualProcessedDoc.HasMember("tools")) << actualProcessedJson;
+        EXPECT_TRUE(actualProcessedDoc["tools"] == expectedDoc["tools"])
+            << "processedJson tools mismatch.\n  actual:   " << actualProcessedJson
+            << "\n  expected: " << expectedChatCompletions;
+    }
+#endif
+}
+}  // namespace
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolsNormaliseToChatCompletions) {
+    // Responses-flat tools shape ({type, name, parameters}) must be rewritten
+    // to chat/completions nested shape ({type, function:{...}}) before the
+    // request is forwarded to the chat template. Input is given as an array so
+    // both ChatHistory and processedJson sinks populate the messages array.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}],
+            "tools": [{
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get current weather",
+                "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+            }]
+        })",
+        R"({
+            "messages": [{"role":"user","content":"hello"}],
+            "tools": [{
+                "type":"function",
+                "function":{
+                    "name":"get_weather",
+                    "description":"Get current weather",
+                    "parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+                }
+            }]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAlreadyNestedToolsAreLeftIntact) {
+    // Tools already in chat/completions nested shape must pass through without
+    // double-wrapping. This is asserted directly on the (in-place mutated)
+    // request document because the equivalence helper would not detect a
+    // spurious unwrap+rewrap that nets to the same shape.
+    std::string json = R"({
+        "model": "llama",
+        "input": "hello",
+        "tools": [{
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "parameters": {"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}
+            }
+        }]
+    })";
+    auto apiHandler = parseResponses(doc, *tokenizer, json);
+    EXPECT_TRUE(apiHandler->areToolsAvailable());
+    ASSERT_TRUE(doc["tools"][0].HasMember("function"));
+    EXPECT_STREQ(doc["tools"][0]["function"]["name"].GetString(), "get_weather");
+    EXPECT_FALSE(doc["tools"][0]["function"].HasMember("function"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningBufferedOntoNextAssistantMessage) {
+    // A bare reasoning item, then an assistant message: the reasoning text
+    // rides on the next assistant message as reasoning_content and does NOT
+    // produce its own message.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"think first"}]},
+                {"role": "assistant", "content": [{"type":"output_text","text":"hello"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","content":"hello","reasoning_content":"think first"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesStandaloneReasoningWithoutAssistantIsEmitted) {
+    // Reasoning followed by a non-assistant/non-function_call item is flushed
+    // as a standalone assistant turn carrying ONLY reasoning_content (no
+    // `content`, no `tool_calls`). This preserves the chain-of-thought across
+    // turns even when the prior turn produced no visible output.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"orphan"}]},
+                {"role": "user", "content": [{"type":"input_text","text":"again"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","reasoning_content":"orphan"},
+                {"role":"user","content":"again"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingStandaloneReasoningIsEmitted) {
+    // Input ending with a reasoning item — the buffered reasoning is flushed
+    // as a trailing standalone assistant turn rather than silently lost.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"trailing"}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","reasoning_content":"trailing"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallMergedIntoAssistantToolCalls) {
+    // function_call followed by function_call_output should produce:
+    //   user -> assistant(content="", tool_calls=[...]) -> tool(tool_call_id=...)
+    // The synthesised assistant message MUST own a tool_calls field; otherwise
+    // gpt-oss raises "Message has tool role, but there was no previous
+    // assistant message with a tool call!".
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1",
+                 "output": "{\"temp_c\":17}"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"{\"temp_c\":17}"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningPlusFunctionCallRidesOnAssistant) {
+    // reasoning + function_call should both attach to the synthesised assistant
+    // turn that owns the tool_calls.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"ok"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultipleFunctionCallsMergedInOneAssistant) {
+    // Two function_calls back-to-back must produce a single assistant message
+    // with two entries in tool_calls, not two assistant turns.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call", "id": "call_2", "call_id": "call_2",
+                 "name": "get_weather", "arguments": "{\"city\":\"London\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "15C"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}},
+                    {"id":"call_2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"London\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"15C"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesTrailingFunctionCallFlushedAsAssistant) {
+    // Input ending with a function_call (no matching output) — the trailing
+    // function_call must still be flushed as an assistant turn rather than
+    // silently lost.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesAssistantMessageAbsorbsBufferedFunctionCall) {
+    // If an assistant role item follows a function_call, its text content
+    // should ride on the same merged message (assistant-with-tool_calls), not
+    // produce a second assistant turn.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"role": "assistant", "content": "calling tool"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"calling tool","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesReasoningContentArrayShapeAccepted) {
+    // The newer reasoning shape: content[].text instead of summary[].text.
+    // OVMS accepts both and produces the same chat/completions output.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+                {"type": "reasoning", "content": [{"type":"reasoning_text","text":"new shape"}]},
+                {"role": "assistant", "content": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"hi"},
+                {"role":"assistant","content":"ok","reasoning_content":"new shape"}
+            ]
+        })");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFunctionCallOutputWithoutCallIdAccepted) {
+    // function_call_output without call_id: the resulting tool message has no
+    // tool_call_id field rather than failing parsing or carrying an empty id.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather?"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{}"},
+                {"type": "function_call_output", "output": "ok"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather?"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{}"}}
+                ]},
+                {"role":"tool","content":"ok"}
+            ]
+        })");
+}
+
+// --- Tools normalisation edge cases ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesFlatToolWithoutParametersIsNormalised) {
+    // Flat Responses tools may omit `parameters` for zero-arg functions. The
+    // nested form should still be produced (with no `parameters` key under
+    // function), not fail or fabricate one. Input is given as an array so
+    // both ChatHistory and processedJson sinks populate the messages array.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [{"role":"user","content":[{"type":"input_text","text":"hello"}]}],
+            "tools": [{"type": "function", "name": "ping", "description": "no args"}]
+        })",
+        R"({
+            "messages": [{"role":"user","content":"hello"}],
+            "tools": [{"type":"function","function":{"name":"ping","description":"no args"}}]
+        })");
+}
+
+// --- Error paths ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputItemMissingRoleIsRejected) {
+    // An input item with no recognised `type` and no `role` cannot be
+    // classified — the chat-history sink must surface this as an
+    // InvalidArgumentError rather than silently dropping the turn.
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text","text":"hi"}]},
+            {"content": [{"type":"output_text","text":"orphaned"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("role"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentNotStringOrArrayIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": 42}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("content"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputContentItemMissingTypeIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"text":"no type field"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("type"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputTextMissingTextFieldIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": [
+            {"role": "user", "content": [{"type":"input_text"}]}
+        ]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("text"));
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesInputArrayItemNotObjectIsRejected) {
+    std::string json = R"({
+        "model": "llama",
+        "input": ["not an object"]
+    })";
+    auto status = tryParseResponses(doc, *tokenizer, json);
+    EXPECT_EQ(status.code(), absl::StatusCode::kInvalidArgument);
+    EXPECT_THAT(std::string(status.message()), ::testing::HasSubstr("must be objects"));
+}
+
+// --- Multi-turn composite ---
+
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesMultiTurnReasoningFunctionCallAndFollowupAssistant) {
+    // End-to-end: user -> reasoning + function_call (merged on synthesised
+    // assistant) -> function_call_output -> reasoning + assistant final answer.
+    // Validates that buffering state is correctly reset between turns.
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": [{"type":"input_text","text":"weather in Paris?"}]},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"need to call get_weather"}]},
+                {"type": "function_call", "id": "call_1", "call_id": "call_1",
+                 "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+                {"type": "function_call_output", "call_id": "call_1", "output": "sunny, 22C"},
+                {"type": "reasoning", "summary": [{"type":"summary_text","text":"format the answer"}]},
+                {"role": "assistant", "content": [{"type":"output_text","text":"It is sunny and 22C in Paris."}]}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"weather in Paris?"},
+                {"role":"assistant","content":"","reasoning_content":"need to call get_weather","tool_calls":[
+                    {"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"call_1","content":"sunny, 22C"},
+                {"role":"assistant","content":"It is sunny and 22C in Paris.","reasoning_content":"format the answer"}
+            ]
+        })");
+}
+
+// Real BFCL replay shape: between every function_call and its function_call_output
+// the OpenAI SDK echoes back the empty assistant message that ovms returned in
+// `output[]`. With multiple turns this looks like:
+//   user -> fc1 -> {id:msg-0,role:assistant,type:message,content:[{type:output_text,text:""}]}
+//        -> fco1 -> fc2 -> msg-0 -> fco2 -> ...
+// The 4th request OVMS sees while running BFCL multi_turn_base_0 reports 128
+// MORE input_tokens on /responses than the equivalent /chat/completions call,
+// even though the message lists are structurally equivalent. This test
+// reproduces the exact shape so processedJson can be compared head-to-head.
+TEST_F(HttpOpenAIHandlerParsingTest, ResponsesBfclReplayShapeWithEchoedAssistantMessages) {
+    expectResponsesEquivalentToChatCompletions(doc, *tokenizer,
+        R"({
+            "model": "llama",
+            "input": [
+                {"role": "user", "content": "do work"},
+                {"type": "function_call", "id": "fc1", "call_id": "fc1",
+                 "name": "mkdir", "arguments": "{\"dir_name\":\"temp\"}",
+                 "namespace": null, "status": "completed"},
+                {"id": "msg-0", "type": "message", "role": "assistant", "status": "completed",
+                 "content": [{"type": "output_text", "text": "", "annotations": [], "logprobs": null}],
+                 "phase": null},
+                {"type": "function_call_output", "call_id": "fc1", "output": "None"},
+                {"type": "function_call", "id": "fc2", "call_id": "fc2",
+                 "name": "mv", "arguments": "{\"source\":\"a\",\"destination\":\"temp\"}",
+                 "namespace": null, "status": "completed"},
+                {"id": "msg-0", "type": "message", "role": "assistant", "status": "completed",
+                 "content": [{"type": "output_text", "text": "", "annotations": [], "logprobs": null}],
+                 "phase": null},
+                {"type": "function_call_output", "call_id": "fc2", "output": "{\"error\":\"no\"}"}
+            ]
+        })",
+        R"({
+            "messages": [
+                {"role":"user","content":"do work"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"fc1","type":"function","function":{"name":"mkdir","arguments":"{\"dir_name\":\"temp\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"fc1","content":"None"},
+                {"role":"assistant","content":"","tool_calls":[
+                    {"id":"fc2","type":"function","function":{"name":"mv","arguments":"{\"source\":\"a\",\"destination\":\"temp\"}"}}
+                ]},
+                {"role":"tool","tool_call_id":"fc2","content":"{\"error\":\"no\"}"}
+            ]
+        })");
+}