Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
35412e1
save
dkalinowski May 22, 2026
f69b420
save
dkalinowski May 22, 2026
e4fbd04
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski May 22, 2026
db9ce20
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski May 26, 2026
40dcbf7
debug
dkalinowski May 26, 2026
b2c15a8
Some VLM chat templates dont have bos token
dkalinowski Jun 5, 2026
6d1c4fe
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski Jun 5, 2026
208c617
save
dkalinowski Jun 5, 2026
bbdfd4f
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski Jun 8, 2026
d95be3e
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski Jun 10, 2026
1e45176
save
dkalinowski Jun 10, 2026
5cbb9b7
safve
dkalinowski Jun 10, 2026
c097ab4
save
dkalinowski Jun 10, 2026
8d2a466
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski Jun 17, 2026
f35d39c
jinja/minja switch
dkalinowski Jun 17, 2026
c10776a
save
dkalinowski Jun 17, 2026
24c2889
style
dkalinowski Jun 18, 2026
4df55fd
save
dkalinowski Jun 22, 2026
f72d658
initial implementation for python's jinja path
dkalinowski Jun 22, 2026
8b9e1bf
support for GenAI"s chat application
dkalinowski Jun 22, 2026
7a19120
Merge remote-tracking branch 'origin/main' into vlm-python-jinja
dkalinowski Jun 22, 2026
b15ab9c
Merge branch 'vlm-python-jinja' into detect
dkalinowski Jun 22, 2026
8b45940
Merge remote-tracking branch 'origin/main' into detect
dkalinowski Jun 23, 2026
e702ec6
save
dkalinowski Jun 24, 2026
e501232
save
dkalinowski Jun 24, 2026
c4ad48d
parallel
dkalinowski Jun 25, 2026
c33c4b8
module and test
dkalinowski Jun 25, 2026
eff90c9
save
dkalinowski Jun 25, 2026
2891798
Merge remote-tracking branch 'origin/main' into detect
dkalinowski Jun 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
351 changes: 351 additions & 0 deletions docs/plan_chat_template_input_workarounds.md

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2510,6 +2510,7 @@ cc_test(
"//src/llm:genai_servables",
"//src/llm:output_parsers",
":test_llm_output_parser_tests",
":test_chat_template_workarounds",
"//src/test/mediapipe/calculators:mediapipe_test_calculators",
"//src/test/mediapipe/calculators:dependency_free_http_test_calculators",
"@mediapipe//mediapipe/calculators/ovms:ovms_calculator",
Expand Down Expand Up @@ -3035,6 +3036,27 @@ cc_library(
local_defines = COMMON_LOCAL_DEFINES,
)

cc_library(
name = "test_chat_template_workarounds",
linkstatic = 1,
alwayslink = True,
srcs = [
"test/llm/chat_template_analyzer_test.cpp",
"test/llm/input_workarounds_test.cpp",
"test/llm/chat_template_end_to_end_test.cpp",
],
deps = [
"@com_google_googletest//:gtest",
"//src/llm:chat_template_analyzer",
"//src/llm:chat_template_probe",
"//src/llm:input_workarounds",
"//third_party:genai",
":test_platform_utils",
],
copts = COPTS_TESTS,
local_defines = COMMON_LOCAL_DEFINES,
)

ovms_cc_library(
name = "capimodule",
hdrs = ["capi_frontend/capimodule.hpp"],
Expand Down
38 changes: 38 additions & 0 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,41 @@ ovms_cc_library(
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "chat_template_analyzer",
hdrs = ["chat_template_caps.hpp",
"chat_template_analyzer.hpp"],
srcs = ["chat_template_analyzer.cpp"],
deps = [],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "chat_template_probe",
hdrs = ["chat_template_probe.hpp",
"chat_template_caps.hpp"],
srcs = ["chat_template_probe.cpp"],
deps = [
"//third_party:genai",
"//src:libovmslogging",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "input_workarounds",
hdrs = ["input_workarounds.hpp",
"chat_template_caps.hpp"],
srcs = ["input_workarounds.cpp"],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
"//third_party:genai",
"//src:libovmslogging",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "partial_json_builder",
hdrs = ["io_processing/partial_json_builder.hpp"],
Expand Down Expand Up @@ -344,6 +379,9 @@ ovms_cc_library(
":openai_completions_api_handler",
":openai_responses_handler",
":generation_config_builders",
":chat_template_analyzer",
":chat_template_probe",
":input_workarounds",
"//src:httppayload",
"//src:libhttpclientconnection",
"//src:sse_utils",
Expand Down
145 changes: 145 additions & 0 deletions src/llm/chat_template_analyzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//*****************************************************************************
// Copyright 2025 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "chat_template_analyzer.hpp"

#include <string>

namespace ovms {

static bool contains(const std::string& haystack, const std::string& needle) {
return haystack.find(needle) != std::string::npos;
}


// TODO: remove comments before analysis
// TODO: expect GenAI to fix bug + dry-runs on separate threads?
ChatTemplateAnalysisResult ChatTemplateAnalyzer::analyze(const std::string& templateSource) {
ChatTemplateAnalysisResult result;
if (templateSource.empty()) {
return result;
}

// GPT-OSS detection — must be before other checks as it has a unique marker
if (contains(templateSource, "<|channel|>")) {
result.detectedModelFamily = "gptoss";
result.detectedToolParser = "gptoss";
result.detectedReasoningParser = "gptoss";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
return result;
}

// Gemma4 detection
if (contains(templateSource, "'<|tool_call>call:'") || contains(templateSource, "<|tool_call>call:")) {
result.detectedModelFamily = "gemma4";
result.detectedToolParser = "gemma4";
result.detectedReasoningParser = "gemma4";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
result.caps.requiresObjectArguments = true;
return result;
}

// Qwen3-Coder detection — uses <parameter= XML style
if (contains(templateSource, "<parameter=") && contains(templateSource, "</parameter>") && contains(templateSource, "<function=")) {
result.detectedModelFamily = "qwen3coder";
result.detectedToolParser = "qwen3coder";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
// Check for reasoning support (think tags)
if (contains(templateSource, "<think>") || contains(templateSource, "</think>")) {
result.detectedReasoningParser = "qwen3";
}
return result;
}

// LFM2 detection
if (contains(templateSource, "<|assistant_tool_call|>") || contains(templateSource, "<|tool_call_start|>")) {
result.detectedModelFamily = "lfm2";
result.detectedToolParser = "lfm2";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
return result;
}

// Phi-4 detection
if (contains(templateSource, "<|tool\xe2\x96\x81" "call\xe2\x96\x81" "begin|>")) { // <|tool▁call▁begin|> with Unicode ▁ (U+2581)
result.detectedModelFamily = "phi4";
result.detectedToolParser = "phi4";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
return result;
}

// Devstral detection — uses [TOOL_CALLS] with [TOOL_RESULTS]
if (contains(templateSource, "[TOOL_CALLS]") && contains(templateSource, "[TOOL_RESULTS]")) {
result.detectedModelFamily = "devstral";
result.detectedToolParser = "devstral";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
return result;
}

// Mistral detection — uses [TOOL_CALLS] without [TOOL_RESULTS] or uses [AVAILABLE_TOOLS]
if (contains(templateSource, "[TOOL_CALLS]") || (contains(templateSource, "[AVAILABLE_TOOLS]") && contains(templateSource, "[/AVAILABLE_TOOLS]"))) {
result.detectedModelFamily = "mistral";
result.detectedToolParser = "mistral";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
return result;
}

// Llama3 detection — <|python_tag|>
if (contains(templateSource, "<|python_tag|>")) {
result.detectedModelFamily = "llama3";
result.detectedToolParser = "llama3";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
result.caps.requiresNonNullContent = true;
return result;
}

// Hermes3/Qwen detection — <tool_call> / </tool_call> (without <parameter= which is Qwen3-Coder, already checked above)
if (contains(templateSource, "<tool_call>") && contains(templateSource, "</tool_call>")) {
result.detectedModelFamily = "hermes3";
result.detectedToolParser = "hermes3";
result.caps.supportsToolCalls = true;
result.caps.supportsTools = true;
result.caps.supportsToolResponses = true;
// Check for reasoning support (think tags in Qwen3)
if (contains(templateSource, "<think>") || contains(templateSource, "content.split('</think>')")) {
result.detectedReasoningParser = "qwen3";
}
return result;
}

// Reasoning-only detection (no tool parser matched but template has reasoning tags)
if (contains(templateSource, "<think>") || contains(templateSource, "content.split('</think>')")) {
result.detectedReasoningParser = "qwen3";
}

return result;
}

} // namespace ovms
39 changes: 39 additions & 0 deletions src/llm/chat_template_analyzer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//*****************************************************************************
// Copyright 2025 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once

#include <optional>
#include <string>

#include "chat_template_caps.hpp"

namespace ovms {

struct ChatTemplateAnalysisResult {
ChatTemplateCaps caps;
std::string detectedModelFamily;
std::optional<std::string> detectedToolParser;
std::optional<std::string> detectedReasoningParser;
};

class ChatTemplateAnalyzer {
public:
// Analyze the chat template source and return detected capabilities and parser names.
// Uses pattern matching on template source text (Tier 1 detection).
static ChatTemplateAnalysisResult analyze(const std::string& templateSource);
};

} // namespace ovms
34 changes: 34 additions & 0 deletions src/llm/chat_template_caps.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//*****************************************************************************
// Copyright 2025 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once

#include <string>

namespace ovms {

struct ChatTemplateCaps {
bool supportsSystemRole = true;
bool supportsTools = false;
bool supportsToolCalls = false;
bool supportsToolResponses = false;
bool requiresObjectArguments = false;
bool requiresNonNullContent = false;
bool requiresTypedContent = false;
bool supportsParallelToolCalls = false;
bool supportsToolCallId = false;
};

} // namespace ovms
Loading