Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common_settings.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ WINDOWS_COMMON_STATIC_LIBS_COPTS = [
"/wd6240",
"/wd6326",
"/wd6385",
"/wd6386",

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whats this warning?

"/wd6294",
"/guard:cf",
"/utf-8",
Expand Down
15 changes: 15 additions & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2510,6 +2510,7 @@ cc_test(
"//src/llm:genai_servables",
"//src/llm:output_parsers",
":test_llm_output_parser_tests",
":test_llm_input_processing_tests",
"//src/test/mediapipe/calculators:mediapipe_test_calculators",
"//src/test/mediapipe/calculators:dependency_free_http_test_calculators",
"@mediapipe//mediapipe/calculators/ovms:ovms_calculator",
Expand Down Expand Up @@ -3035,6 +3036,20 @@ cc_library(
local_defines = COMMON_LOCAL_DEFINES,
)

cc_library(
name = "test_llm_input_processing_tests",
linkstatic = 1,
alwayslink = True,
srcs = glob(["test/llm/input_processing/*_test.cpp"]),
deps = [
"@com_google_googletest//:gtest",
":test_platform_utils",
"//src/llm:io_processing_input_processors",
],
copts = COPTS_TESTS,
local_defines = COMMON_LOCAL_DEFINES,
)

ovms_cc_library(
name = "capimodule",
hdrs = ["capi_frontend/capimodule.hpp"],
Expand Down
79 changes: 77 additions & 2 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ovms_cc_library(
"@stb//:image",
":openai_request",
":output_parsers",
":generation_config_builders",
"//third_party:genai",],
visibility = ["//visibility:public"],
)
Expand All @@ -81,6 +82,7 @@ ovms_cc_library(
":openai_api_handler",
":openai_request",
":output_parsers",
":io_processing_input_processor",
"//third_party:genai",],
visibility = ["//visibility:public"],
)
Expand All @@ -104,13 +106,85 @@ ovms_cc_library(
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_input_request",
hdrs = ["io_processing/input_request.hpp",
"io_processing/input_processing_config.hpp",
"io_processing/base_input_processor.hpp"],
srcs = [],
deps = [
"@mediapipe//mediapipe/framework:calculator_framework",
"//third_party:genai",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_input_processor_context",
hdrs = ["io_processing/input_processor_context.hpp"],
srcs = [],
deps = [
":io_processing_input_request",
"//third_party:genai",
] + select({
"//:disable_python": [],
"//:not_disable_python": [":py_jinja_template_processor"] + PYBIND_DEPS,
}),
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_input_processors",
hdrs = ["io_processing/input_processors/image_decoding_processor.hpp",
"io_processing/input_processors/chat_template_processor.hpp",
"io_processing/input_processors/raw_prompt_extractor.hpp",
"io_processing/input_processors/text_content_normalization_processor.hpp",
"io_processing/input_processors/tokenization_processor.hpp"],
srcs = ["io_processing/input_processors/image_decoding_processor.cpp",
"io_processing/input_processors/chat_template_processor.cpp",
"io_processing/input_processors/text_content_normalization_processor.cpp",
"io_processing/input_processors/tokenization_processor.cpp"],
deps = [
"@mediapipe//mediapipe/framework:calculator_framework",
"//src:libovmsprofiler",
"//third_party:curl",
"//src:image_conversion",
"//src/filesystem:libovmsfilesystem",
"@stb//:image",
":io_processing_input_request",
":openai_api_handler",
"//third_party:genai",
"//src:libovmslogging",
] + select({
"//:disable_python": [],
"//:not_disable_python": [":py_jinja_template_processor"] + PYBIND_DEPS,
}),
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_input_processor",
hdrs = ["io_processing/input_processor.hpp"],
srcs = ["io_processing/input_processor.cpp"],
deps = [
"@mediapipe//mediapipe/framework:calculator_framework",
":io_processing_input_request",
":io_processing_input_processors",
":io_processing_input_processor_context",
"//src:libovms_config",
"//third_party:genai",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "openai_request",
hdrs = ["apis/openai_request.hpp"],
srcs = [],
deps = ["//third_party:genai",
deps = ["//third_party:genai",
"//src/port:rapidjson_document",
":apis_tool_schema_wrapper",],
":apis_tool_schema_wrapper",
":io_processing_input_request",],
visibility = ["//visibility:public"],
)

Expand Down Expand Up @@ -344,6 +418,7 @@ ovms_cc_library(
":openai_completions_api_handler",
":openai_responses_handler",
":generation_config_builders",
":io_processing_input_processor_context",
"//src:httppayload",
"//src:libhttpclientconnection",
"//src:sse_utils",
Expand Down
59 changes: 40 additions & 19 deletions src/llm/apis/openai_api_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../../logging.hpp"
#include "../../profiler.hpp"
#include "../../filesystem/filesystem.hpp"
#include "../io_processing/generation_config_builder.hpp"
#pragma warning(push)
#pragma warning(disable : 6001 4324 6385 6386)
#include "absl/strings/escaping.h"
Expand Down Expand Up @@ -286,7 +287,7 @@ absl::Status OpenAIApiHandler::parseResponseFormat() {

// --- Shared parsing methods ---

absl::Status OpenAIApiHandler::ensureArgumentsInToolCalls(Value& messageObj, bool& jsonChanged) {
absl::Status OpenAIApiHandler::ensureArgumentsInToolCalls(Value& messageObj) {
auto& allocator = doc.GetAllocator();
auto toolCallsIt = messageObj.FindMember("tool_calls");
if (toolCallsIt != messageObj.MemberEnd() && toolCallsIt->value.IsArray()) {
Expand All @@ -307,7 +308,6 @@ absl::Status OpenAIApiHandler::ensureArgumentsInToolCalls(Value& messageObj, boo
rapidjson::Value argumentsValue;
argumentsValue.SetString("{}", allocator);
functionIt->value.GetObject().AddMember(argumentsKey, argumentsValue, allocator);
jsonChanged = true;
}
}
}
Expand Down Expand Up @@ -348,11 +348,9 @@ absl::Status OpenAIApiHandler::parseTools() {
return absl::InvalidArgumentError("tool_choice is not a valid JSON object or string");
}
}
bool jsonChanged = false;
if (toolChoice == "none") {
// remove tools from the request
doc.RemoveMember("tools");
jsonChanged = true;
}
auto it = doc.FindMember("tools");
if (it != doc.MemberEnd() && !it->value.IsNull()) {
Expand Down Expand Up @@ -405,7 +403,6 @@ absl::Status OpenAIApiHandler::parseTools() {
// If toolChoice is set to a specific function name, we keep only that tool
if (toolChoice != "auto" && toolChoice != "required" && toolChoice != functionName) {
it->value.Erase(&obj);
jsonChanged = true;
continue;
}

Expand All @@ -430,12 +427,6 @@ absl::Status OpenAIApiHandler::parseTools() {
}

request.toolChoice = toolChoice;
if (jsonChanged) {
StringBuffer buffer;
Writer<StringBuffer> writer(buffer);
doc.Accept(writer);
request.processedJson = buffer.GetString();
}
return absl::OkStatus();
}

Expand Down Expand Up @@ -492,18 +483,48 @@ const OpenAIRequest& OpenAIApiHandler::getRequest() const {
return request;
}

const std::string& OpenAIApiHandler::getProcessedJson() const {
return request.processedJson;
}

const ImageHistory& OpenAIApiHandler::getImageHistory() const {
return request.imageHistory;
}

ov::genai::ChatHistory& OpenAIApiHandler::getChatHistory() {
return request.chatHistory;
}

absl::StatusOr<InputRequest> OpenAIApiHandler::extractInputRequest(GenerationConfigBuilder& configBuilder) {
configBuilder.parseConfigFromRequest(request);
configBuilder.adjustConfigForDecodingMethod();
try {
configBuilder.validateStructuredOutputConfig(tokenizer);
} catch (const std::exception& e) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool guided generation will not be applied due to JSON schema validation failure: {}", e.what());
configBuilder.unsetStructuredOutputConfig();
}
InputRequest req;
req.generationConfig = configBuilder.getConfig();
if (endpoint == Endpoint::COMPLETIONS) {
req.input = request.prompt.value_or("");
} else {
// CHAT_COMPLETIONS and RESPONSES both use ChatHistory.
// Copied (not moved) so the handler retains its own copy for response serialization.
req.input = request.chatHistory;
// Populate tools and chat_template_kwargs on the copied ChatHistory so
// ChatTemplateProcessor can access them via get_tools()/get_extra_context().
auto& chatHistory = std::get<ov::genai::ChatHistory>(req.input);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it can throw an exception if req.input is not of type ChatHistory. can we try catch that and return error if it happens?

auto toolsResult = parseToolsToJsonContainer();
if (!toolsResult.ok()) {
return toolsResult.status();
}
if (toolsResult.value().has_value()) {
chatHistory.set_tools(toolsResult.value().value());
}
auto kwargsResult = parseChatTemplateKwargsToJsonContainer();
if (!kwargsResult.ok()) {
return kwargsResult.status();
}
if (kwargsResult.value().has_value()) {
chatHistory.set_extra_context(kwargsResult.value().value());
}
}
Comment thread
mzegla marked this conversation as resolved.
return req;
}

std::optional<int> OpenAIApiHandler::getMaxTokens() const {
return request.maxTokens;
}
Expand Down
11 changes: 8 additions & 3 deletions src/llm/apis/openai_api_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "absl/status/statusor.h"
#pragma warning(pop)
#include "../io_processing/output_parser.hpp"
#include "../io_processing/input_request.hpp"
#include "openai_request.hpp"

// Forward declarations for types only used by reference in virtual method signatures
Expand All @@ -50,6 +51,8 @@ using namespace rapidjson;

namespace ovms {

class GenerationConfigBuilder;

ov::genai::JsonContainer rapidJsonValueToJsonContainer(const rapidjson::Value& value);

enum class Endpoint {
Expand Down Expand Up @@ -113,7 +116,7 @@ class OpenAIApiHandler {
// Shared parsing helpers
absl::Status parseCommonPart(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength);
absl::Status parseResponseFormat();
absl::Status ensureArgumentsInToolCalls(Value& messageObj, bool& jsonChanged);
absl::Status ensureArgumentsInToolCalls(Value& messageObj);
ParsedOutput parseOutputIfNeeded(const std::vector<int64_t>& generatedIds);

// Shared VLM workaround: encode text to tokens using tokenizer, validates shape
Expand Down Expand Up @@ -156,8 +159,6 @@ class OpenAIApiHandler {
std::optional<std::string> getPrompt() const;
std::optional<int> getNumReturnSequences() const;
StreamOptions getStreamOptions() const;
const std::string& getProcessedJson() const;
const ImageHistory& getImageHistory() const;
ov::genai::ChatHistory& getChatHistory();
std::optional<int> getMaxTokens() const;
std::optional<std::string> getResponseFormat() const;
Expand All @@ -166,6 +167,10 @@ class OpenAIApiHandler {
std::string getModel() const;
std::string getToolChoice() const;
const std::shared_ptr<OutputParser>& getOutputParser() const;
// Builds a complete InputRequest: runs the full generation config pipeline
// (parse → adjust → validate) on the provided builder using this handler's
// request and tokenizer, then populates input from the parsed request.
absl::StatusOr<InputRequest> extractInputRequest(GenerationConfigBuilder& configBuilder);

// Verbose response configuration
void enableVerboseResponse(const std::string& promptAfterTemplate) {
Expand Down
Loading