diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 137804cd20..0a08eea3e1 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -54,8 +54,8 @@ def add_common_arguments(parser): parser_text.add_argument('--max_prompt_len', required=False, type=int, default=None, help='Sets NPU specific property for maximum number of tokens in the prompt. ' 'Not effective if target device is not NPU', dest='max_prompt_len') parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding') -parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') -parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') +parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss", "lfm2.5", "gemma4"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') +parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2", "lfm2.5", "gemma4"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation') parser_embeddings_ov = subparsers.add_parser('embeddings_ov', help='export model for embeddings endpoint with directory structure aligned with OpenVINO tools') diff --git a/docs/llm/reference.md b/docs/llm/reference.md index 698d05031b..4037897908 100644 --- a/docs/llm/reference.md +++ b/docs/llm/reference.md @@ -284,9 +284,15 @@ __Tool parsers:__ - `devstral` - `gptoss` - `qwen3coder` +- `lfm2` +- `lfm2.5` +- `gemma4` __Reasoning parsers:__ - `qwen3` +- `gptoss` +- `lfm2.5` +- `gemma4` Note that using `tools` might require a chat template other than the original. We recommend using templates from the [vLLM repository](https://github.com/vllm-project/vllm/tree/main/examples) for `hermes3`, `llama3`, `phi4`, `mistral`, `devstral`, `gptoss`, and `qwen3coder` models (if available). Save the selected template as `chat_template.jinja` in the model directory and it will be used instead of the default one. If a template is not available for your model, please refer to the model's documentation or use the default template provided by the model server. diff --git a/docs/parameters.md b/docs/parameters.md index edfddafa7e..edc92f5006 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -134,8 +134,8 @@ Task specific parameters for different tasks (text generation/image generation/e | `--max_prompt_len` | `integer` | Sets NPU specific property for maximum number of tokens in the prompt. | | `--kv_cache_precision` | `string` | Reduced kv cache precision to `u8` lowers the cache size consumption. Accepted values: `u8` or empty (default). | | `--model_distribution_policy` | `string` | TENSOR_PARALLEL distributes tensor to multiple sockets/devices and processes it in parallel. PIPELINE_PARALLEL distributes different tensors to process by each device. Accepted values: `TENSOR_PARALLEL`, `PIPELINE_PARALLEL` or empty (default). | -| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, gemma4] | -| `--tool_parser` | `string` | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, phi4, hermes3, mistral, qwen3coder, gptoss, devstral, lfm2, gemma4] | +| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, lfm2.5, gemma4] | +| `--tool_parser` | `string` | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, phi4, hermes3, mistral, qwen3coder, gptoss, devstral, lfm2, lfm2.5, gemma4] | | `--enable_tool_guided_generation` | `bool` | Enables enforcing tool schema during generation. Requires setting response parser. Default: false. | ### Image generation diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 813326ba9e..8956d7df1a 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -39,6 +39,7 @@ MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" GPT_OSS_MODEL="openai/gpt-oss-20b" DEVSTRAL_MODEL="unsloth/Devstral-Small-2507" LFM2_MODEL="LiquidAI/LFM2-2.6B" +LFM25_MODEL="LiquidAI/LFM2.5-8B-A1B" GEMMA4_MODEL="OpenVINO/gemma-4-E4B-it-int4-ov" if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then echo "Prepare models with python > 3.8."; exit 1 ; fi @@ -220,6 +221,16 @@ if [ ! -f "$1/$LFM2_MODEL/$TOKENIZER_FILE" ]; then echo "[ERROR] Models file $1/$LFM2_MODEL/$TOKENIZER_FILE does not exist." exit 1 fi +if [ -f "$1/$LFM25_MODEL/$TOKENIZER_FILE" ]; then + echo "Models file $1/$LFM25_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +else + mkdir -p $1/$LFM25_MODEL + convert_tokenizer $LFM25_MODEL --with_detokenizer -o $1/$LFM25_MODEL +fi +if [ ! -f "$1/$LFM25_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Models file $1/$LFM25_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi if [ -f "$1/$GEMMA4_MODEL/$TOKENIZER_FILE" ]; then echo "Models file $1/$GEMMA4_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." else diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index b458909bb3..81f083e9b5 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -29,6 +29,7 @@ demos/vlm_npu/README.md:157: mane ==> main, many, maine demos/vlm_npu/README.md:218: mane ==> main, many, maine demos/integration_with_OpenWebUI/README.md:423: Buildin ==> Building, Build in src/test/llm/output_parsers/lfm2_output_parser_test.cpp +src/test/llm/output_parsers/lfm25_output_parser_test.cpp windows_parse_tests.bat:35: seh ==> she windows_parse_tests.bat:119: SEH ==> SHE windows_parse_tests.bat:123: SEH ==> SHE diff --git a/src/llm/BUILD b/src/llm/BUILD index 397069b9de..1509b8a5e7 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -189,9 +189,9 @@ ovms_cc_library( ) ovms_cc_library( - name = "io_processing_lfm2_tool_parser", - hdrs = ["io_processing/lfm2/lfm2_tool_parser.hpp"], - srcs = ["io_processing/lfm2/lfm2_tool_parser.cpp"], + name = "io_processing_lfm2_utils", + hdrs = ["io_processing/lfm2/lfm2_utils.hpp"], + srcs = ["io_processing/lfm2/lfm2_utils.cpp"], deps = [ "@com_github_tencent_rapidjson//:rapidjson", "//src/port:rapidjson_document", @@ -203,6 +203,29 @@ ovms_cc_library( ], visibility = ["//visibility:public"], ) + +ovms_cc_library( + name = "io_processing_lfm2_tool_parser", + hdrs = ["io_processing/lfm2/lfm2_tool_parser.hpp"], + srcs = ["io_processing/lfm2/lfm2_tool_parser.cpp"], + deps = [ + ":io_processing_lfm2_utils", + ], + visibility = ["//visibility:public"], +) + + +ovms_cc_library( + name = "io_processing_lfm25_tool_parser", + hdrs = ["io_processing/lfm2/lfm25_tool_parser.hpp"], + srcs = ["io_processing/lfm2/lfm25_tool_parser.cpp"], + deps = [ + ":io_processing_lfm2_utils", + "//src:libovmslogging", + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( name = "io_processing_gemma4_tool_parser", hdrs = ["io_processing/gemma4/gemma4_tool_parser.hpp", "io_processing/gemma4/gemma4_reasoning_parser.hpp"], @@ -236,6 +259,22 @@ ovms_cc_library( visibility = ["//visibility:public"], ) +ovms_cc_library( + name = "io_processing_lfm25_reasoning_parser", + hdrs = ["io_processing/lfm2/lfm25_reasoning_parser.hpp"], + srcs = ["io_processing/lfm2/lfm25_reasoning_parser.cpp"], + deps = [ + "@com_github_tencent_rapidjson//:rapidjson", + "//src/port:rapidjson_document", + "//src:libovmslogging", + "//src:libovmsstring_utils", + ":io_processing_utils", + ":io_processing_base_output_parser", + "//third_party:genai", + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( # TODO split further so we don't have to recompile everything when changing one parser ... name = "output_parsers", hdrs = [ @@ -271,8 +310,10 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w ":io_processing_parser_config_validation", ":io_processing_qwen3coder_tool_parser", ":io_processing_lfm2_tool_parser", + ":io_processing_lfm25_tool_parser", ":io_processing_gemma4_tool_parser", ":io_processing_qwen3_reasoning_parser", + ":io_processing_lfm25_reasoning_parser", ":io_processing_utils", ":apis_tool_schema_wrapper", ], diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp new file mode 100644 index 0000000000..f3191fd6f5 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp @@ -0,0 +1,69 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "src/port/rapidjson_document.hpp" + +#include "../../../logging.hpp" +#include "lfm25_reasoning_parser.hpp" +#include "../utils.hpp" + +namespace ovms { +void Lfm25ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + auto startReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningStartTokenId); + auto endReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningEndTokenId); + + if (startReasoningIt == generatedTokens.end() || endReasoningIt == generatedTokens.end() || startReasoningIt >= endReasoningIt) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Lfm25ReasoningParser: Reasoning start or end token not found in the generated tokens, or in wrong order. Start token found: {}, End token found: {}, Start position: {}, End position: {}", + startReasoningIt != generatedTokens.end(), endReasoningIt != generatedTokens.end(), std::distance(generatedTokens.begin(), startReasoningIt), std::distance(generatedTokens.begin(), endReasoningIt)); + return; + } + + auto startPos = std::distance(generatedTokens.begin(), startReasoningIt); + auto endPos = std::distance(generatedTokens.begin(), endReasoningIt); + + std::string reasoningContent = tokenizer.decode(std::vector(startPos + generatedTokens.begin() + 1, endPos + generatedTokens.begin()), ov::genai::skip_special_tokens(true)); + + parsedOutput.reasoning = reasoningContent; +} + +std::optional Lfm25ReasoningParser::parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) { + if (tokens.empty()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty tokens for Lfm25ReasoningParser"); + return std::nullopt; + } + + if (std::find(tokens.begin(), tokens.end(), reasoningStartTokenId) != tokens.end() || + std::find(tokens.begin(), tokens.end(), reasoningEndTokenId) != tokens.end()) { + return std::nullopt; + } else { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + writer.StartObject(); + writer.String("delta"); + writer.StartObject(); + writer.String("reasoning_content"); + writer.String(chunk.c_str()); + writer.EndObject(); + writer.EndObject(); + rapidjson::Document doc; + doc.Parse(buffer.GetString()); + return doc; + } +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp new file mode 100644 index 0000000000..afc52c7f56 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -0,0 +1,54 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include "../base_output_parser.hpp" +#include +#include + +namespace ovms { +class Lfm25ReasoningParser : public BaseOutputParser { +protected: + const std::string parsingStartTag = ""; + const std::string parsingEndTag = ""; + + const int64_t reasoningStartTokenId = 124901; // + const int64_t reasoningEndTokenId = 124902; // + +public: + Lfm25ReasoningParser() = delete; + explicit Lfm25ReasoningParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags{this->parsingStartTag}; + return parsingStartTags; + } + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags{}; + return specialParsingStartTags; + } + const std::string& getParsingEndTag() const override { + return parsingEndTag; + } + + // It may be removed after changing logic in Lfm2ToolParser to use tokens in streaming instead of chunk content, both tool parser and reasoning parser need to have the same value for this function + bool requiresStreamingWithSpecialTokens() const override { + return true; + } +}; +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp new file mode 100644 index 0000000000..c1c37e99ef --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -0,0 +1,107 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "lfm25_tool_parser.hpp" + +namespace ovms { + +const std::string Lfm25ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; +const std::string Lfm25ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; + +const int64_t Lfm25ToolParser::toolCallStartTokenId = 124905; // <|tool_call_start|> +const int64_t Lfm25ToolParser::toolCallEndTokenId = 124906; // <|tool_call_end|> +const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // + +bool Lfm25ToolParser::parseNewContent() { + switch (this->currentState) { + case State::Content: { + return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); + } + case State::ToolCallStarted: { + auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + if (wasParsedCorrectly) { + this->toolCallIndex++; + } + return wasParsedCorrectly; + } + case State::ToolCallParameters: { + return parseToolCallParametersState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + } + case State::ToolCallEnded: { + return parseInToolCallEndedState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_END_TAG); + } + case State::AfterToolCall: + break; + } + return false; +} + +std::optional Lfm25ToolParser::parseChunk(const std::string& chunk, const std::vector& /*tokens*/, ov::genai::GenerationFinishReason finishReason) { + if (chunk.empty()) { + return std::nullopt; + } + + this->streamingContent += chunk; + + if (parseNewContent()) { + if (this->currentState == State::ToolCallParameters) { + return BaseOutputParser::wrapFirstDelta(this->toolCall.name, this->toolCallIndex); + } + if (this->currentState == State::ToolCallEnded) { + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); + } + if (this->currentState == State::Content) { + size_t contentEnd = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); + std::string content; + if (contentEnd != std::string::npos) { + content = this->streamingContent.substr(this->streamingPosition, contentEnd - this->streamingPosition); + } else { + content = this->streamingContent.substr(this->streamingPosition); + } + this->streamingPosition += content.size(); + cutEOSFromContent(content); + + if (!content.empty()) { + return wrapDeltaContent(content); + } + } + if (this->currentState == State::AfterToolCall) { + this->currentState = State::Content; + } + } + + if (finishReason != ov::genai::GenerationFinishReason::NONE) { + if ((this->currentState == State::ToolCallParameters || this->currentState == State::ToolCallEnded) && !this->toolCall.arguments.empty()) { + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); + } + + if (this->currentState == State::Content && this->streamingPosition < this->streamingContent.size()) { + auto content = this->streamingContent.substr(this->streamingPosition); + this->streamingPosition += content.size(); + cutEOSFromContent(content); + + if (!content.empty()) { + return wrapDeltaContent(content); + } + } + } + + return std::nullopt; +} + +void Lfm25ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + parseUnaryResponse(parsedOutput, generatedTokens, tokenizer, toolCallStartTokenId, toolCallEndTokenId, reasoningEndTokenId); +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp new file mode 100644 index 0000000000..d07b277077 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -0,0 +1,71 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include "lfm2_utils.hpp" + +namespace ovms { +class Lfm25ToolParser : public BaseOutputParser { +protected: + static const std::string TOOL_CALL_START_TAG; + static const std::string TOOL_CALL_END_TAG; + + static const int64_t toolCallStartTokenId; + static const int64_t toolCallEndTokenId; + static const int64_t reasoningEndTokenId; + +public: + Lfm25ToolParser() = delete; + explicit Lfm25ToolParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags = {TOOL_CALL_START_TAG}; + return parsingStartTags; + } + + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector beginningOnlyTags = {}; + return beginningOnlyTags; + } + + const std::vector& getSpecialTagsToErase() const override { + static const std::vector tagsToErase = {EOS_TOKEN_STR}; + return tagsToErase; + } + + const std::string& getParsingEndTag() const override { + return TOOL_CALL_END_TAG; + } + + bool requiresStreamingWithSpecialTokens() const override { + return true; + } + +private: + std::string streamingContent; + size_t streamingPosition{0}; + State currentState{State::Content}; + ToolCall toolCall; + + int toolCallIndex{TOOL_CALL_INDEX_START}; + + bool parseNewContent(); +}; +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index 056a5bead9..0230097b30 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -14,293 +14,32 @@ // limitations under the License. //***************************************************************************** #include "lfm2_tool_parser.hpp" -#include "../utils.hpp" -#include "../../../logging.hpp" -#include "../../../stringutils.hpp" -#include "rapidjson/error/en.h" -#include -#include -#include namespace ovms { const std::string Lfm2ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; const std::string Lfm2ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; -const std::string Lfm2ToolParser::EOS_TOKEN_STR = "<|im_end|>"; -const std::string Lfm2ToolParser::TOOL_LIST_START_INDICATOR = "["; -const std::string Lfm2ToolParser::TOOL_LIST_END_INDICATOR = "]"; -const std::string Lfm2ToolParser::TOOL_ARGS_START_INDICATOR = "("; -const std::string Lfm2ToolParser::TOOL_ARGS_END_INDICATOR = ")"; -const std::string Lfm2ToolParser::TOOL_SEPARATOR_STR = ", "; - -const int64_t Lfm2ToolParser::botTokenId = 10; -const int64_t Lfm2ToolParser::eotTokenId = 11; - -std::string Lfm2ToolParser::parseArrayParameter(std::string argumentStr) { - int quoteDepth = 0; - - for (size_t i = 1; i < argumentStr.size() - 1; ++i) { - if (argumentStr[i] != '\'') { - continue; - } - - bool isLastElement = (i == argumentStr.size() - 2); - bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; - - if (quoteDepth == 0) { - argumentStr[i] = '"'; - quoteDepth++; - } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement)) { - argumentStr[i] = '"'; - quoteDepth--; - } - } - - return argumentStr; -} - -std::string Lfm2ToolParser::parseObjectParameter(std::string argumentStr) { - int quoteDepth = 0; - - for (size_t i = 1; i < argumentStr.size() - 1; ++i) { - if (argumentStr[i] != '\'') { - continue; - } - - bool isLastElement = (i == argumentStr.size() - 2); - bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; - bool isFollowedByColon = !isLastElement && argumentStr[i + 1] == ':'; - - if (quoteDepth == 0) { - argumentStr[i] = '"'; - quoteDepth++; - } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement || isFollowedByColon)) { - argumentStr[i] = '"'; - quoteDepth--; - } - } - - return argumentStr; -} - -std::string Lfm2ToolParser::normalizeArgStr(const std::string& arg) { - if (arg.empty()) { - return arg; - } - - std::string normalized = arg; - trim(normalized); - std::string lower = normalized; - std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); - - if (lower == "true" || lower == "false" || lower == "null") { - return lower; - } - - const char first = normalized.front(); - const char last = normalized.back(); - if (first == '{' && last == '}') { - normalized = parseObjectParameter(normalized); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument contains is an object, replaced single quotes with double quotes for JSON parsing. Modified string: {}", normalized); - } - - if (first == '[' && last == ']') { - normalized = parseArrayParameter(normalized); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is an array, normalized quotes for JSON parsing. Modified string: {}", normalized); - } - - if ((first == '\'' && last == '\'')) { - normalized[0] = '"'; - normalized[normalized.size() - 1] = '"'; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is enclosed in quotes, replaced outer quotes with double quotes for JSON parsing. Modified string: {}", normalized); - } - - rapidjson::Document tempDoc; - rapidjson::Value finalValue; - tempDoc.Parse(normalized.c_str()); - if (tempDoc.HasParseError()) { - auto errorCode = tempDoc.GetParseError(); - auto errorMessage = rapidjson::GetParseError_En(errorCode); - size_t errorOffset = tempDoc.GetErrorOffset(); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse argument string as JSON. Argument string: {}, Error: {} Offset: {}", normalized, errorMessage, errorOffset); - - if (first == '\"' && last == '\"') { - normalized = normalized.substr(1, normalized.size() - 2); - } - finalValue.SetString(normalized.c_str(), static_cast(normalized.size()), tempDoc.GetAllocator()); - } else { - finalValue.CopyFrom(tempDoc, tempDoc.GetAllocator()); - } - - { - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - finalValue.Accept(writer); - normalized = buffer.GetString(); - } - - return normalized; -} - -void Lfm2ToolParser::writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer) { - std::string normalized = normalizeArgStr(arg); - - rapidjson::Document doc; - doc.Parse(normalized.c_str()); - - rapidjson::Value& argumentDoc = doc; - writeArgumentOfAnyType(argumentDoc, writer); -} - -Lfm2ToolParser::Argument Lfm2ToolParser::parseSingleArgument(const std::string& argumentStr) { - Lfm2ToolParser::Argument argument; - - size_t equalPos = argumentStr.find('='); - if (equalPos != std::string::npos) { - argument.name = argumentStr.substr(0, equalPos); - argument.value = argumentStr.substr(equalPos + 1); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument - name: {}, value: {}", argument.name, argument.value); - } else { - argument.name = argumentStr; - argument.value = ""; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument string: {} does not contain '=', setting name as entire string and value as empty", argumentStr); - } - return argument; -} - -std::vector Lfm2ToolParser::parseArguments(const std::string& argumentsStr) { - std::vector args; - std::vector parsedArgs; - - size_t argPos = 0; - while (argPos < argumentsStr.length()) { - size_t commaPos = findInStringRespectingSpecialChars(argumentsStr, TOOL_SEPARATOR_STR, argPos); - if (commaPos == std::string::npos) { - auto remainingStr = argumentsStr.substr(argPos); - args.push_back(remainingStr); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more commas found, adding remaining argument string: {}", remainingStr); - break; - } - auto argStr = argumentsStr.substr(argPos, commaPos - argPos); - args.push_back(argStr); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument string: {}", argStr); - argPos = commaPos + TOOL_SEPARATOR_STR.length(); - } - - for (const std::string& arg : args) { - parsedArgs.push_back(parseSingleArgument(arg)); - } - return parsedArgs; -} - -bool Lfm2ToolParser::parseInContentState() { - size_t toolCallStartTagPos = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); - size_t toolCallEndTagPos = this->streamingContent.find(TOOL_CALL_END_TAG, this->streamingPosition); - if (toolCallEndTagPos != std::string::npos && toolCallStartTagPos == std::string::npos) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); - this->streamingPosition = toolCallEndTagPos + TOOL_CALL_END_TAG.length(); - return false; - } - if (toolCallStartTagPos != std::string::npos) { - if (toolCallStartTagPos > this->streamingPosition) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Content found before tool call start tag at position: {}", toolCallStartTagPos); - return true; - } - this->streamingPosition = toolCallStartTagPos + TOOL_CALL_START_TAG.length(); - this->currentState = State::ToolCallStarted; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected start of tool call at position: {}", toolCallStartTagPos); - return false; - } - - return true; -} - -bool Lfm2ToolParser::parseInToolCallState() { - size_t toolListStartPos = this->streamingContent.find(TOOL_LIST_START_INDICATOR, this->streamingPosition); - size_t argsPos = this->streamingContent.find(TOOL_ARGS_START_INDICATOR, this->streamingPosition); - - if (toolListStartPos != std::string::npos) { - this->streamingPosition = toolListStartPos + TOOL_LIST_START_INDICATOR.length(); - } - - if (argsPos == std::string::npos) { - return false; - } - - std::string toolName = this->streamingContent.substr(this->streamingPosition, argsPos - this->streamingPosition); - trim(toolName); - this->toolCall = ToolCall{generateRandomId(), toolName, ""}; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); - this->streamingPosition = argsPos + TOOL_ARGS_START_INDICATOR.length(); - this->currentState = State::ToolCallParameters; - this->toolCallIndex++; - return true; -} - -bool Lfm2ToolParser::parseToolCallParametersState() { - size_t pos = findInStringRespectingSpecialChars(this->streamingContent, TOOL_ARGS_END_INDICATOR, this->streamingPosition); - if (pos == std::string::npos) { - return false; - } - std::string argumentsStr = this->streamingContent.substr(this->streamingPosition, pos - this->streamingPosition); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed arguments string: {}", argumentsStr); - std::vector arguments = parseArguments(argumentsStr); - - rapidjson::Document argsDoc(rapidjson::kObjectType); - rapidjson::StringBuffer sb; - rapidjson::Writer argsWriter(sb); - argsWriter.StartObject(); - - for (const Argument& argument : arguments) { - argsWriter.Key(argument.name.c_str()); - writeArgumentToWriter(argument.value, argsWriter); - } - - argsWriter.EndObject(); - this->toolCall.arguments = sb.GetString(); - this->currentState = State::ToolCallEnded; - this->streamingPosition = pos + TOOL_ARGS_END_INDICATOR.length(); - - return true; -} - -bool Lfm2ToolParser::parseInToolCallEndedState() { - size_t pos = this->streamingContent.find(TOOL_LIST_END_INDICATOR, this->streamingPosition); - size_t toolSeparatorPos = this->streamingContent.find(TOOL_SEPARATOR_STR, this->streamingPosition); - size_t toolCallEndTagPos = this->streamingContent.find(TOOL_CALL_END_TAG, this->streamingPosition); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Current state: ToolCallEnded. Streaming content from current position: {}", this->streamingContent.substr(this->streamingPosition)); - if (pos == std::string::npos && toolSeparatorPos == std::string::npos && toolCallEndTagPos == std::string::npos) { - return false; - } else if (toolSeparatorPos != std::string::npos && toolSeparatorPos < pos) { - this->streamingPosition = toolSeparatorPos + TOOL_SEPARATOR_STR.length(); - this->currentState = State::ToolCallStarted; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected separator between tool calls at position: {}, expecting another tool call to start", toolSeparatorPos); - } else if (toolCallEndTagPos != std::string::npos) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); - this->streamingPosition = toolCallEndTagPos + TOOL_CALL_END_TAG.length(); - this->currentState = State::AfterToolCall; - } else { - this->streamingPosition = pos + TOOL_LIST_END_INDICATOR.length(); - this->currentState = State::AfterToolCall; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool list at position: {}, returning to content state", pos); - } - return true; -} +const int64_t Lfm2ToolParser::botTokenId = 10; // <|tool_call_start|> +const int64_t Lfm2ToolParser::eotTokenId = 11; // <|tool_call_end|> bool Lfm2ToolParser::parseNewContent() { switch (this->currentState) { case State::Content: { - return parseInContentState(); + return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } case State::ToolCallStarted: { - return parseInToolCallState(); + auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + if (wasParsedCorrectly) { + this->toolCallIndex++; + } + return wasParsedCorrectly; } case State::ToolCallParameters: { - return parseToolCallParametersState(); + return parseToolCallParametersState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); } case State::ToolCallEnded: { - return parseInToolCallEndedState(); + return parseInToolCallEndedState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_END_TAG); } case State::AfterToolCall: break; @@ -308,28 +47,6 @@ bool Lfm2ToolParser::parseNewContent() { return false; } -rapidjson::Document Lfm2ToolParser::wrapDeltaContent(const std::string& content) { - rapidjson::Document doc(rapidjson::kObjectType); - rapidjson::Value deltaObj(rapidjson::kObjectType); - deltaObj.AddMember("content", rapidjson::Value(content.c_str(), doc.GetAllocator()), doc.GetAllocator()); - doc.AddMember("delta", deltaObj, doc.GetAllocator()); - return doc; -} - -rapidjson::Document Lfm2ToolParser::wrapDeltaArgs(const std::string& argsStr) { - rapidjson::Document doc(rapidjson::kObjectType); - doc.AddMember("arguments", rapidjson::Value(argsStr.c_str(), doc.GetAllocator()), doc.GetAllocator()); - - return BaseOutputParser::wrapDelta(doc, this->toolCallIndex); -} - -void Lfm2ToolParser::cutEOSFromContent(std::string& content) { - size_t eosPos = content.find(EOS_TOKEN_STR); - if (eosPos != std::string::npos) { - content = content.substr(0, eosPos); - } -} - std::optional Lfm2ToolParser::parseChunk(const std::string& chunk, const std::vector& /*tokens*/, ov::genai::GenerationFinishReason finishReason) { if (chunk.empty()) { return std::nullopt; @@ -342,7 +59,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& return BaseOutputParser::wrapFirstDelta(this->toolCall.name, this->toolCallIndex); } if (this->currentState == State::ToolCallEnded) { - return wrapDeltaArgs(this->toolCall.arguments); + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); } if (this->currentState == State::Content) { size_t contentEnd = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); @@ -366,7 +83,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& if (finishReason != ov::genai::GenerationFinishReason::NONE) { if ((this->currentState == State::ToolCallParameters || this->currentState == State::ToolCallEnded) && !this->toolCall.arguments.empty()) { - return wrapDeltaArgs(this->toolCall.arguments); + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); } if (this->currentState == State::Content && this->streamingPosition < this->streamingContent.size()) { @@ -383,99 +100,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& return std::nullopt; } -bool Lfm2ToolParser::parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall) { - size_t argsPos = toolStr.find(TOOL_ARGS_START_INDICATOR); - if (argsPos != std::string::npos) { - std::string toolName = toolStr.substr(0, argsPos); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); - - int argsStrLen = toolStr.length() - argsPos - TOOL_ARGS_START_INDICATOR.length() - TOOL_ARGS_END_INDICATOR.length(); - std::string argsStr = toolStr.substr(argsPos + TOOL_ARGS_START_INDICATOR.length(), argsStrLen); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed args string: {}", argsStr); - std::vector arguments = parseArguments(argsStr); - - toolCall.name = toolName; - rapidjson::Document argsDoc(rapidjson::kObjectType); - rapidjson::StringBuffer sb; - rapidjson::Writer argsWriter(sb); - argsWriter.StartObject(); - for (const Lfm2ToolParser::Argument& argument : arguments) { - argsWriter.Key(argument.name.c_str()); - writeArgumentToWriter(argument.value, argsWriter); - } - argsWriter.EndObject(); - toolCall.arguments = sb.GetString(); - toolCall.id = generateRandomId(); - return true; - } - return false; -} - void Lfm2ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { - std::vector tools; - std::vector> toolCallPositions; - size_t pos = 0; - int mainGuard = 0; - - while (pos != std::string::npos && mainGuard < MAX_TOOL_CALLS) { - size_t start, end; - auto it = std::find(generatedTokens.begin() + pos, generatedTokens.end(), botTokenId); - if (it != generatedTokens.end()) { - start = std::distance(generatedTokens.begin(), it); - } else { - break; - } - auto itArgs = std::find(generatedTokens.begin() + start, generatedTokens.end(), eotTokenId); - if (itArgs != generatedTokens.end()) { - end = std::distance(generatedTokens.begin(), itArgs); - } else { - break; - } - - std::string toolListStr = tokenizer.decode(std::vector(generatedTokens.begin() + start + 1, generatedTokens.begin() + end), ov::AnyMap{ov::genai::skip_special_tokens(false)}); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool list string: {}", toolListStr); - int toolGuard = 0; - toolListStr = toolListStr.substr(TOOL_LIST_START_INDICATOR.length(), toolListStr.length() - TOOL_LIST_START_INDICATOR.length() - TOOL_LIST_END_INDICATOR.length()); - - while (!toolListStr.empty() && toolGuard < MAX_TOOLS_PER_CALL) { - size_t toolEndPos = findInStringRespectingSpecialChars(toolListStr, TOOL_ARGS_END_INDICATOR, 0); - std::string singleTool; - if (toolEndPos != std::string::npos) { - singleTool = toolListStr.substr(0, toolEndPos + TOOL_ARGS_END_INDICATOR.length()); - if (toolEndPos + TOOL_ARGS_END_INDICATOR.length() < toolListStr.length()) { - toolListStr = toolListStr.substr(toolEndPos + TOOL_ARGS_END_INDICATOR.length() + TOOL_SEPARATOR_STR.length()); - } else { - toolListStr.clear(); - } - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed single tool string {}", singleTool); - } - - if (!singleTool.empty()) { - tools.push_back(singleTool); - } - toolGuard++; - } - mainGuard++; - - pos = end; - toolCallPositions.emplace_back(start, end); - } - - for (const std::string& tool : tools) { - ToolCall toolCall; - auto wasToolCallParsed = parseSingleToolCall(tool, toolCall); - if (wasToolCallParsed) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool call - name: {}, args: {}", toolCall.name, toolCall.arguments); - parsedOutput.toolCalls.push_back(toolCall); - } else { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse tool call from string: {}", tool); - } - } - - std::vector contentWithoutToolCalls = generatedTokens; - for (auto it = toolCallPositions.rbegin(); it != toolCallPositions.rend(); ++it) { - contentWithoutToolCalls.erase(contentWithoutToolCalls.begin() + it->first, contentWithoutToolCalls.begin() + it->second + 1); - } - parsedOutput.content = tokenizer.decode(contentWithoutToolCalls, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + parseUnaryResponse(parsedOutput, generatedTokens, tokenizer, botTokenId, eotTokenId); } } // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp index cd56634eb4..c607d827ff 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp @@ -17,39 +17,19 @@ #include #include #include "src/llm/io_processing/base_output_parser.hpp" +#include "../../../logging.hpp" +#include "./lfm2_utils.hpp" namespace ovms { class Lfm2ToolParser : public BaseOutputParser { protected: static const std::string TOOL_CALL_START_TAG; static const std::string TOOL_CALL_END_TAG; - static const std::string EOS_TOKEN_STR; - - static const std::string TOOL_LIST_START_INDICATOR; - static const std::string TOOL_LIST_END_INDICATOR; - static const std::string TOOL_ARGS_START_INDICATOR; - static const std::string TOOL_ARGS_END_INDICATOR; - static const std::string TOOL_SEPARATOR_STR; static const int64_t botTokenId; static const int64_t eotTokenId; - static constexpr size_t MAX_TOOL_CALLS = 100; - static constexpr size_t MAX_TOOLS_PER_CALL = 100; - static constexpr int TOOL_CALL_INDEX_START = -1; - enum class State { - Content, // Content -> ToolCallStarted (on TOOL_CALL_START_TAG) - ToolCallStarted, // ToolCallStarted -> ToolCallParameters (on TOOL_ARGS_START_INDICATOR, emits name) - ToolCallParameters, // ToolCallParameters -> ToolCallEnded (on TOOL_ARGS_END_INDICATOR, emits args) - ToolCallEnded, // ToolCallEnded -> ToolCallStarted (on separator) | AfterToolCall (on end tag/list end) - AfterToolCall // AfterToolCall -> Content - }; - public: - struct Argument { - std::string name; - std::string value; - }; Lfm2ToolParser() = delete; explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer) : BaseOutputParser(tokenizer) {} @@ -79,32 +59,14 @@ class Lfm2ToolParser : public BaseOutputParser { return true; } - static std::string normalizeArgStr(const std::string& arg); - static std::string parseArrayParameter(std::string argumentStr); - static std::string parseObjectParameter(std::string argumentStr); - private: - void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer); - - Argument parseSingleArgument(const std::string& argumentStr); - std::vector parseArguments(const std::string& argumentsStr); - void cutEOSFromContent(std::string& content); - - bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall); - bool parseNewContent(); - bool parseInContentState(); - bool parseInToolCallState(); - bool parseToolCallParametersState(); - bool parseInToolCallEndedState(); - - rapidjson::Document wrapDeltaContent(const std::string& content); - rapidjson::Document wrapDeltaArgs(const std::string& argsStr); - std::string streamingContent; size_t streamingPosition{0}; State currentState{State::Content}; ToolCall toolCall; int toolCallIndex{TOOL_CALL_INDEX_START}; + + bool parseNewContent(); }; } // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp new file mode 100644 index 0000000000..3ccc74b399 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -0,0 +1,406 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "lfm2_utils.hpp" +#include "../utils.hpp" +#include "../../../logging.hpp" +#include "../../../stringutils.hpp" +#include "rapidjson/error/en.h" +#include +#include +#include + +namespace ovms { +const std::string TOOL_LIST_START_INDICATOR = "["; +const std::string TOOL_LIST_END_INDICATOR = "]"; +const std::string TOOL_ARGS_START_INDICATOR = "("; +const std::string TOOL_ARGS_END_INDICATOR = ")"; +const std::string TOOL_SEPARATOR_STR = ", "; +const std::string EOS_TOKEN_STR = "<|im_end|>"; + +const int TOOL_CALL_INDEX_START = -1; + +std::string parseArrayParameter(std::string argumentStr) { + int quoteDepth = 0; + + for (size_t i = 1; i < argumentStr.size() - 1; ++i) { + if (argumentStr[i] != '\'') { + continue; + } + + bool isLastElement = (i == argumentStr.size() - 2); + bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; + + if (quoteDepth == 0) { + argumentStr[i] = '"'; + quoteDepth++; + } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement)) { + argumentStr[i] = '"'; + quoteDepth--; + } + } + + return argumentStr; +} + +std::string parseObjectParameter(std::string argumentStr) { + int quoteDepth = 0; + + for (size_t i = 1; i < argumentStr.size() - 1; ++i) { + if (argumentStr[i] != '\'') { + continue; + } + + bool isLastElement = (i == argumentStr.size() - 2); + bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; + bool isFollowedByColon = !isLastElement && argumentStr[i + 1] == ':'; + + if (quoteDepth == 0) { + argumentStr[i] = '"'; + quoteDepth++; + } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement || isFollowedByColon)) { + argumentStr[i] = '"'; + quoteDepth--; + } + } + + return argumentStr; +} + +std::string normalizeArgStr(const std::string& arg) { + if (arg.empty()) { + return arg; + } + + std::string normalized = arg; + trim(normalized); + std::string lower = normalized; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + if (lower == "true" || lower == "false" || lower == "null") { + return lower; + } + + const char first = normalized.front(); + const char last = normalized.back(); + if (first == '{' && last == '}') { + normalized = parseObjectParameter(normalized); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument contains is an object, replaced single quotes with double quotes for JSON parsing. Modified string: {}", normalized); + } + + if (first == '[' && last == ']') { + normalized = parseArrayParameter(normalized); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is an array, normalized quotes for JSON parsing. Modified string: {}", normalized); + } + + if ((first == '\'' && last == '\'')) { + normalized[0] = '"'; + normalized[normalized.size() - 1] = '"'; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is enclosed in quotes, replaced outer quotes with double quotes for JSON parsing. Modified string: {}", normalized); + } + + rapidjson::Document tempDoc; + rapidjson::Value finalValue; + tempDoc.Parse(normalized.c_str()); + if (tempDoc.HasParseError()) { + auto errorCode = tempDoc.GetParseError(); + auto errorMessage = rapidjson::GetParseError_En(errorCode); + size_t errorOffset = tempDoc.GetErrorOffset(); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse argument string as JSON. Argument string: {}, Error: {} Offset: {}", normalized, errorMessage, errorOffset); + + if (first == '\"' && last == '\"') { + normalized = normalized.substr(1, normalized.size() - 2); + } + finalValue.SetString(normalized.c_str(), static_cast(normalized.size()), tempDoc.GetAllocator()); + } else { + finalValue.CopyFrom(tempDoc, tempDoc.GetAllocator()); + } + + { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + finalValue.Accept(writer); + normalized = buffer.GetString(); + } + + return normalized; +} + +void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer) { + std::string normalized = normalizeArgStr(arg); + + rapidjson::Document doc; + doc.Parse(normalized.c_str()); + + rapidjson::Value& argumentDoc = doc; + writeArgumentOfAnyType(argumentDoc, writer); +} + +Argument parseSingleArgument(const std::string& argumentStr) { + Argument argument; + + size_t equalPos = argumentStr.find('='); + if (equalPos != std::string::npos) { + argument.name = argumentStr.substr(0, equalPos); + argument.value = argumentStr.substr(equalPos + 1); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument - name: {}, value: {}", argument.name, argument.value); + } else { + argument.name = argumentStr; + argument.value = ""; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument string: {} does not contain '=', setting name as entire string and value as empty", argumentStr); + } + return argument; +} + +std::vector parseArguments(const std::string& argumentsStr) { + std::vector args; + std::vector parsedArgs; + + size_t argPos = 0; + while (argPos < argumentsStr.length()) { + size_t commaPos = findInStringRespectingSpecialChars(argumentsStr, TOOL_SEPARATOR_STR, argPos); + if (commaPos == std::string::npos) { + auto remainingStr = argumentsStr.substr(argPos); + args.push_back(remainingStr); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more commas found, adding remaining argument string: {}", remainingStr); + break; + } + auto argStr = argumentsStr.substr(argPos, commaPos - argPos); + args.push_back(argStr); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument string: {}", argStr); + argPos = commaPos + TOOL_SEPARATOR_STR.length(); + } + + for (const std::string& arg : args) { + parsedArgs.push_back(parseSingleArgument(arg)); + } + return parsedArgs; +} + +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallStartTag, const std::string& toolCallEndTag) { + size_t toolCallStartTagPos = streamingContent.find(toolCallStartTag, streamingPosition); + size_t toolCallEndTagPos = streamingContent.find(toolCallEndTag, streamingPosition); + if (toolCallEndTagPos != std::string::npos && toolCallStartTagPos == std::string::npos) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); + streamingPosition = toolCallEndTagPos + toolCallEndTag.length(); + return false; + } + if (toolCallStartTagPos != std::string::npos) { + if (toolCallStartTagPos > streamingPosition) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Content found before tool call start tag at position: {}", toolCallStartTagPos); + return true; + } + currentState = State::ToolCallStarted; + streamingPosition = toolCallStartTagPos + toolCallStartTag.length(); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected start of tool call at position: {}", toolCallStartTagPos); + return false; + } + + return true; +} +bool parseInToolCallState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState) { + size_t toolListStartPos = streamingContent.find(TOOL_LIST_START_INDICATOR, streamingPosition); + size_t argsPos = streamingContent.find(TOOL_ARGS_START_INDICATOR, streamingPosition); + + if (toolListStartPos != std::string::npos) { + streamingPosition = toolListStartPos + TOOL_LIST_START_INDICATOR.length(); + } + + if (argsPos == std::string::npos) { + return false; + } + + std::string toolName = streamingContent.substr(streamingPosition, argsPos - streamingPosition); + trim(toolName); + toolCall = ToolCall{generateRandomId(), toolName, ""}; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); + streamingPosition = argsPos + TOOL_ARGS_START_INDICATOR.length(); + currentState = State::ToolCallParameters; + return true; +} + +bool parseToolCallParametersState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState) { + size_t pos = findInStringRespectingSpecialChars(streamingContent, TOOL_ARGS_END_INDICATOR, streamingPosition); + if (pos == std::string::npos) { + return false; + } + std::string argumentsStr = streamingContent.substr(streamingPosition, pos - streamingPosition); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed arguments string: {}", argumentsStr); + std::vector arguments = parseArguments(argumentsStr); + + rapidjson::Document argsDoc(rapidjson::kObjectType); + rapidjson::StringBuffer sb; + rapidjson::Writer argsWriter(sb); + argsWriter.StartObject(); + + for (const Argument& argument : arguments) { + argsWriter.Key(argument.name.c_str()); + writeArgumentToWriter(argument.value, argsWriter); + } + + argsWriter.EndObject(); + toolCall.arguments = sb.GetString(); + currentState = State::ToolCallEnded; + streamingPosition = pos + TOOL_ARGS_END_INDICATOR.length(); + + return true; +} + +bool parseInToolCallEndedState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallEndTag) { + size_t pos = streamingContent.find(TOOL_LIST_END_INDICATOR, streamingPosition); + size_t toolSeparatorPos = streamingContent.find(TOOL_SEPARATOR_STR, streamingPosition); + size_t toolCallEndTagPos = streamingContent.find(toolCallEndTag, streamingPosition); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Current state: ToolCallEnded. Streaming content from current position: {}", streamingContent.substr(streamingPosition)); + if (pos == std::string::npos && toolSeparatorPos == std::string::npos && toolCallEndTagPos == std::string::npos) { + return false; + } else if (toolSeparatorPos != std::string::npos && toolSeparatorPos < pos) { + streamingPosition = toolSeparatorPos + TOOL_SEPARATOR_STR.length(); + currentState = State::ToolCallStarted; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected separator between tool calls at position: {}, expecting another tool call to start", toolSeparatorPos); + } else if (toolCallEndTagPos != std::string::npos) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); + streamingPosition = toolCallEndTagPos + toolCallEndTag.length(); + currentState = State::AfterToolCall; + } else { + streamingPosition = pos + TOOL_LIST_END_INDICATOR.length(); + currentState = State::AfterToolCall; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool list at position: {}, returning to content state", pos); + } + return true; +} + +rapidjson::Document wrapDeltaContent(const std::string& content) { + rapidjson::Document doc(rapidjson::kObjectType); + rapidjson::Value deltaObj(rapidjson::kObjectType); + deltaObj.AddMember("content", rapidjson::Value(content.c_str(), doc.GetAllocator()), doc.GetAllocator()); + doc.AddMember("delta", deltaObj, doc.GetAllocator()); + return doc; +} + +rapidjson::Document wrapDeltaArgs(const std::string& argsStr, int toolCallIndex) { + rapidjson::Document doc(rapidjson::kObjectType); + doc.AddMember("arguments", rapidjson::Value(argsStr.c_str(), doc.GetAllocator()), doc.GetAllocator()); + + return BaseOutputParser::wrapDelta(doc, toolCallIndex); +} + +void cutEOSFromContent(std::string& content) { + size_t eosPos = content.find(EOS_TOKEN_STR); + if (eosPos != std::string::npos) { + content = content.substr(0, eosPos); + } +} + +bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall) { + size_t argsPos = toolStr.find(TOOL_ARGS_START_INDICATOR); + if (argsPos != std::string::npos) { + std::string toolName = toolStr.substr(0, argsPos); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); + + int argsStrLen = toolStr.length() - argsPos - TOOL_ARGS_START_INDICATOR.length() - TOOL_ARGS_END_INDICATOR.length(); + std::string argsStr = toolStr.substr(argsPos + TOOL_ARGS_START_INDICATOR.length(), argsStrLen); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed args string: {}", argsStr); + std::vector arguments = parseArguments(argsStr); + + toolCall.name = toolName; + rapidjson::Document argsDoc(rapidjson::kObjectType); + rapidjson::StringBuffer sb; + rapidjson::Writer argsWriter(sb); + argsWriter.StartObject(); + for (const Argument& argument : arguments) { + argsWriter.Key(argument.name.c_str()); + writeArgumentToWriter(argument.value, argsWriter); + } + argsWriter.EndObject(); + toolCall.arguments = sb.GetString(); + toolCall.id = generateRandomId(); + return true; + } + return false; +} + +void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& generatedTokens, ov::genai::Tokenizer& tokenizer, const int64_t botTokenId, const int64_t eotTokenId, const std::optional reasoningEndTokenId) { + std::vector tools; + std::vector> toolCallPositions; + size_t pos = 0; + + while (pos != std::string::npos) { + size_t start, end; + auto it = std::find(generatedTokens.begin() + pos, generatedTokens.end(), botTokenId); + if (it != generatedTokens.end()) { + start = std::distance(generatedTokens.begin(), it); + } else { + break; + } + auto itArgs = std::find(generatedTokens.begin() + start, generatedTokens.end(), eotTokenId); + if (itArgs != generatedTokens.end()) { + end = std::distance(generatedTokens.begin(), itArgs); + } else { + break; + } + + std::string toolListStr = tokenizer.decode(std::vector(generatedTokens.begin() + start + 1, generatedTokens.begin() + end), ov::AnyMap{ov::genai::skip_special_tokens(false)}); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool list string: {}", toolListStr); + toolListStr = toolListStr.substr(TOOL_LIST_START_INDICATOR.length(), toolListStr.length() - TOOL_LIST_START_INDICATOR.length() - TOOL_LIST_END_INDICATOR.length()); + + while (!toolListStr.empty()) { + size_t toolEndPos = findInStringRespectingSpecialChars(toolListStr, TOOL_ARGS_END_INDICATOR, 0); + std::string singleTool; + if (toolEndPos != std::string::npos) { + singleTool = toolListStr.substr(0, toolEndPos + TOOL_ARGS_END_INDICATOR.length()); + if (toolEndPos + TOOL_ARGS_END_INDICATOR.length() < toolListStr.length()) { + toolListStr = toolListStr.substr(toolEndPos + TOOL_ARGS_END_INDICATOR.length() + TOOL_SEPARATOR_STR.length()); + } else { + toolListStr.clear(); + } + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed single tool string {}", singleTool); + } else { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more tool calls found in tool list string: {}", toolListStr); + break; + } + + if (!singleTool.empty()) { + tools.push_back(singleTool); + } + } + pos = end; + toolCallPositions.emplace_back(start, end); + } + + for (const std::string& tool : tools) { + ToolCall toolCall; + auto wasToolCallParsed = parseSingleToolCall(tool, toolCall); + if (wasToolCallParsed) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool call - name: {}, args: {}", toolCall.name, toolCall.arguments); + parsedOutput.toolCalls.push_back(toolCall); + } else { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse tool call from string: {}", tool); + } + } + + std::vector contentWithoutToolCalls = generatedTokens; + for (auto it = toolCallPositions.rbegin(); it != toolCallPositions.rend(); ++it) { + contentWithoutToolCalls.erase(contentWithoutToolCalls.begin() + it->first, contentWithoutToolCalls.begin() + it->second + 1); + } + if (reasoningEndTokenId.has_value()) { + auto reasoningEndIt = std::find(contentWithoutToolCalls.begin(), contentWithoutToolCalls.end(), reasoningEndTokenId.value()); + if (reasoningEndIt != contentWithoutToolCalls.end()) { + contentWithoutToolCalls.erase(contentWithoutToolCalls.begin(), reasoningEndIt + 1); + } + } + + parsedOutput.content = tokenizer.decode(contentWithoutToolCalls, ov::AnyMap{ov::genai::skip_special_tokens(true)}); +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_utils.hpp b/src/llm/io_processing/lfm2/lfm2_utils.hpp new file mode 100644 index 0000000000..f98586cbd7 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm2_utils.hpp @@ -0,0 +1,60 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include "src/llm/io_processing/base_output_parser.hpp" + +namespace ovms { + +extern const std::string TOOL_LIST_START_INDICATOR; +extern const std::string TOOL_LIST_END_INDICATOR; +extern const std::string TOOL_ARGS_START_INDICATOR; +extern const std::string TOOL_ARGS_END_INDICATOR; +extern const std::string TOOL_SEPARATOR_STR; +extern const std::string EOS_TOKEN_STR; +extern const int TOOL_CALL_INDEX_START; + +struct Argument { + std::string name; + std::string value; +}; + +enum class State { + Content, + ToolCallStarted, + ToolCallParameters, + ToolCallEnded, + AfterToolCall +}; + +std::string parseArrayParameter(std::string argumentStr); +std::string parseObjectParameter(std::string argumentStr); +std::string normalizeArgStr(const std::string& arg); +void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer); +Argument parseSingleArgument(const std::string& argumentStr); +std::vector parseArguments(const std::string& argumentsStr); +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallStartTag, const std::string& toolCallEndTag); +bool parseInToolCallState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); +bool parseToolCallParametersState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); +bool parseInToolCallEndedState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallEndTag); +rapidjson::Document wrapDeltaContent(const std::string& content); +rapidjson::Document wrapDeltaArgs(const std::string& argsStr, int toolCallIndex); +void cutEOSFromContent(std::string& content); +bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall); +void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& generatedTokens, ov::genai::Tokenizer& tokenizer, const int64_t botTokenId, const int64_t eotTokenId, const std::optional reasoningEndTokenId = std::nullopt); + +} // namespace ovms diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index e676f2bea6..752353b18c 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -32,6 +32,8 @@ #include "gemma4/gemma4_reasoning_parser.hpp" #include "gptoss/reasoning_parser.hpp" #include "lfm2/lfm2_tool_parser.hpp" +#include "lfm2/lfm25_tool_parser.hpp" +#include "lfm2/lfm25_reasoning_parser.hpp" #include "gemma4/gemma4_tool_parser.hpp" namespace ovms { @@ -194,6 +196,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to toolParser = std::make_unique(tokenizer, toolNameSchemaMap); } else if (toolParserName == "lfm2") { toolParser = std::make_unique(tokenizer); + } else if (toolParserName == "lfm2.5") { + toolParser = std::make_unique(tokenizer); } else if (toolParserName == "gemma4") { toolParser = std::make_unique(tokenizer); } else if (!toolParserName.empty()) { @@ -207,6 +211,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to reasoningParser = std::make_unique(tokenizer); } else if (reasoningParserName == "gptoss") { reasoningParser = std::make_unique(tokenizer); + } else if (reasoningParserName == "lfm2.5") { + reasoningParser = std::make_unique(tokenizer); } else if (!reasoningParserName.empty()) { throw std::runtime_error("Unsupported reasoning parser: \"" + reasoningParserName + "\". Supported reasoning parsers are: " + getSupportedReasoningParserNamesAsString()); diff --git a/src/llm/io_processing/parser_config_validation.cpp b/src/llm/io_processing/parser_config_validation.cpp index 4f355fc20f..c40411f3a8 100644 --- a/src/llm/io_processing/parser_config_validation.cpp +++ b/src/llm/io_processing/parser_config_validation.cpp @@ -31,6 +31,7 @@ const std::vector& getSupportedToolParserNames() { "qwen3coder", "devstral", "lfm2", + "lfm2.5", "gemma4", }; return names; @@ -41,6 +42,7 @@ const std::vector& getSupportedReasoningParserNames() { "qwen3", "gemma4", "gptoss", + "lfm2.5", }; return names; } diff --git a/src/test/llm/output_parsers/lfm25_output_parser_test.cpp b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp new file mode 100644 index 0000000000..1c06a5a59d --- /dev/null +++ b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp @@ -0,0 +1,869 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../llm/io_processing/base_output_parser.hpp" +#include "../../../llm/io_processing/output_parser.hpp" +#include "../../platform_utils.hpp" + +using namespace ovms; + +#ifdef _WIN32 +const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\LiquidAI\\LFM2.5-8B-A1B"; +#else +// Hardcoded for usage in docker container +const std::string tokenizerPath = "/ovms/src/test/llm_testing/LiquidAI/LFM2.5-8B-A1B"; +#endif + +static std::unique_ptr lfm25Tokenizer; +static const ToolsSchemas_t& EMPTY_TOOLS_SCHEMA = {}; // not used in lfm2.5 + +class LFM25OutputParserTest : public ::testing::Test { +protected: + std::unique_ptr outputParserWithRegularToolParsing; + + static void SetUpTestSuite() { + try { + lfm25Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize lfm2.5 tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize lfm2 tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + lfm25Tokenizer.reset(); + } + + void SetUp() override { + // For LFM2 model there is only tool parser available + outputParserWithRegularToolParsing = std::make_unique(*lfm25Tokenizer, "lfm2.5", "lfm2.5", EMPTY_TOOLS_SCHEMA); + } + + void assertChunkEqual(const std::optional& doc, const std::optional& expectedDelta, const std::string& chunk) { + if (!expectedDelta.has_value() && !doc.has_value()) { + return; + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + std::string expected = expectedDelta.value(); + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + } else { + FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk; + } + } + void assertStreamingVec(const std::vector>>& chunkToDeltaVec) { + for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, {}, true, finishReason); + if (!expectedDelta.has_value() && !doc.has_value()) { + continue; // Both are nullopt, OK + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings + std::string expected = expectedDelta.value(); + std::string idKey = "\"id\":\""; + auto docIdPos = docStr.find(idKey); + auto expectedIdPos = expected.find(idKey); + if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) { + auto docIdStart = docIdPos + idKey.size(); + auto docIdEnd = docStr.find("\"", docIdStart); + auto expectedIdStart = expectedIdPos + idKey.size(); + auto expectedIdEnd = expected.find("\"", expectedIdStart); + ASSERT_NE(docIdEnd, std::string::npos); + ASSERT_NE(expectedIdEnd, std::string::npos); + std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart); + std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart); + EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk; + EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk; + // Compare everything except the id value + std::string docStrNoId = docStr; + std::string expectedNoId = expected; + docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*')); + expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*')); + EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk; + } else { + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + } + } else { + std::string expectedStr = expectedDelta.has_value() ? expectedDelta.value() : "std::nullopt"; + std::string docStr = doc.has_value() ? [&]() { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + return std::string(buffer.GetString()); + }() + : "std::nullopt"; + FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk + << "\nexpectedDelta: " << expectedStr + << "\ndoc: " << docStr; + } + } + } +}; + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithSingleToolCall) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + std::string testInput = input; + auto generatedTensor = lfm25Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, false); + EXPECT_EQ(parsedOutput.content, testInput); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithObjectArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[dummy(config={'name': 'astro_config', 'value': 99})]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "dummy"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"config\":{\"name\":\"astro_config\",\"value\":99}}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[test1(arg1=\"data1, data2\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "test1"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"data1, data2\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithListOfStringsAsArgument) { + std::string inputWithProperClosure = "<|tool_call_start|>[generate_DNA_sequence(length=100, preferences=['G', 'C'])]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "generate_DNA_sequence"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"length\":100,\"preferences\":[\"G\",\"C\"]}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParserToolCallWithBooleanArgument) { + std::string inputWithProperClosure = "<|tool_call_start|>[check_status(flag=True)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "check_status"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"flag\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseTwoToolCallsAtOnce) { + std::string inputWithProperClosure = "<|tool_call_start|>[dummy1(config={'name': 'astro_config', 'value': 99}), dummy2(config={'name': 'second_config', 'value': 199})]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 2); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "dummy1"); + EXPECT_EQ(parsedOutput.toolCalls[1].name, "dummy2"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"config\":{\"name\":\"astro_config\",\"value\":99}}"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"config\":{\"name\":\"second_config\",\"value\":199}}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithArrayArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[sort(array=[42, 17, 89, 5, 33], order=\"descending\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "sort"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringWithSingleQuotesArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[sort(array=[42, 17, 89, 5, 33], order='descending')]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "sort"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>" + "<|tool_call_start|>[another_tool(param1=\"data\", param2=true)]<|tool_call_end|>" + "<|tool_call_start|>[third_tool(key=\"value\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); + EXPECT_NE(secondToolCallId, thirdToolCallId); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithThreeToolCallsWithContentInBetween) { + std::string inputWithProperClosure = "Before tool calls content. " + "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>" + "This is some content between tool calls." + "<|tool_call_start|>[another_tool(param1=\"data\", param2=true)]<|tool_call_end|>" + " This is some content between second and third tool call. " + "<|tool_call_start|>[third_tool(key=\"value\")]<|tool_call_end|>" + "After tool calls content."; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Before tool calls content. This is some content between tool calls. This is some content between second and third tool call. After tool calls content."); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); + EXPECT_NE(secondToolCallId, thirdToolCallId); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithEmptyArguments) { + // Tool call with empty parentheses (no arguments) + std::string input = "<|tool_call_start|>[no_args_tool()]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "no_args_tool"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { + std::string input = "This is a regular model response without tool calls."; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, ""); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\n<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(LFM25OutputParserTest, HolisticStreaming) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(array", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"=[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"42", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 17", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 89", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 5", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 33", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"],", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" order", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"desc", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ending", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\"),", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithBiggerChunks) { + std::vector>> chunkToDeltaVec{ + {"SOME_CONTENT", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"SOME_CONTENT"}})"}, + {"MORE_CONTENT<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"MORE_CONTENT"}})"}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort(array=", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"[42, 17, 89, 5, 33], order=\"descending\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndFinishReason) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|><|im_end|>", ov::genai::GenerationFinishReason::STOP, std::nullopt}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndEOSToken) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|im_end|>", ov::genai::GenerationFinishReason::STOP, std::nullopt}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndEOSTokenAndContent) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"SOME_CONTENT<|im_end|>", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"content":"SOME_CONTENT"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithContentBetweenToolCalls) { + std::vector>> chunkToDeltaVec{ + // Tool call phase + // Starting first tool. Collecting chunk until full name is received. Don't return until then. + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(array", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"=[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"42", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 17", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 89", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 5", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 33", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"],", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" order", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"desc", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ending", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"Some ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"Some "}})"}, + {"content ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"content "}})"}, + {"between ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"between "}})"}, + {"tool ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"tool "}})"}, + {"calls.", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"calls."}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov ::genai ::GenerationFinishReason ::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"solve", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(e", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":2,"function":{"name":"solve"}}]}})"}, + {"quation", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"2", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"*", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"x", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"+", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"5)", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" =", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 13", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\"equation\":\"2*(x+5) = 13\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"And some content after second tool call", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"And some content after second tool call"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) { + std::vector>> chunkToDeltaVec{ + // Tool parser is available, but tools are not in the request so every chunk is just a regular content + {"<|tool_call_start|>\n", "{\"delta\":{\"content\":\"<|tool_call_start|>\\n\"}}"}, + {"{\"", "{\"delta\":{\"content\":\"{\\\"\"}}"}, + {"name", "{\"delta\":{\"content\":\"name\"}}"}, + {"\":", "{\"delta\":{\"content\":\"\\\":\"}}"}, + {" \"", "{\"delta\":{\"content\":\" \\\"\"}}"}, + {"super", "{\"delta\":{\"content\":\"super\"}}"}, + {"_tool", "{\"delta\":{\"content\":\"_tool\"}}"}, + {"_number", "{\"delta\":{\"content\":\"_number\"}}"}, + {"_two", "{\"delta\":{\"content\":\"_two\"}}"}, + {"\",", "{\"delta\":{\"content\":\"\\\",\"}}"}, + {" \"", "{\"delta\":{\"content\":\" \\\"\"}}"}, + {"arguments", "{\"delta\":{\"content\":\"arguments\"}}"}, + {"\":", "{\"delta\":{\"content\":\"\\\":\"}}"}, + {" {", "{\"delta\":{\"content\":\" {\"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"arg1", "{\"delta\":{\"content\":\"arg1\"}}"}, + {"\": ", "{\"delta\":{\"content\":\"\\\": \"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"val{{{ue1", "{\"delta\":{\"content\":\"val{{{ue1\"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"}", "{\"delta\":{\"content\":\"}\"}}"}, + {"}", "{\"delta\":{\"content\":\"}\"}}"}, + {"<|tool_call_end|>\n", "{\"delta\":{\"content\":\"<|tool_call_end|>\\n\"}}"}, + }; + + for (const auto& [chunk, expectedDelta] : chunkToDeltaVec) { + // Second argument is false as we simulate the case where tools have not been provided in the request + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, {}, false, ov::genai::GenerationFinishReason::NONE); + assertChunkEqual(doc, expectedDelta, chunk); + } +} + +// Malformed tool calls + +TEST_F(LFM25OutputParserTest, ParseToolCallWithMissingParentheses) { + std::string input = "<|tool_call_start|>[broken_tool]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithMissingClosingParenthesis) { + std::string input = "<|tool_call_start|>[broken_tool(arg1=\"value1\"]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithArgumentMissingEquals) { + // Argument without '=' sign - parseSingleArgument sets isValid = false + std::string input = "<|tool_call_start|>[broken(malformed_arg)]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + // The tool call is parsed but the argument value will be empty and invalid + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "broken"); +} + +// Tests with special characters +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingComparison) { + std::string input = R"x(<|tool_call_start|>[search(query="price >= 100, (sale)", limit=5)]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "search"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"query":"price >= 100, (sale)","limit":5})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingBracesAndBrackets) { + std::string input = R"(<|tool_call_start|>[format(template="Hello {name}, items: [a, b, c]", count=3)]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "format"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"template":"Hello {name}, items: [a, b, c]","count":3})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingSpecialCharacters) { + std::string impl = "import package\nimport package2\n\ndef func(a, b):\n\td={\"python\": \"dict\"}\n\tl = [\"list \\\"with escaped text\\\"\", 123, []]\n\treturn f\"formatted {a} and {b}\""; + std::string input = R"(<|tool_call_start|>[execute(code=")" + impl + R"(")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "execute"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"code":"import package\nimport package2\n\ndef func(a, b):\n\td={\"python\": \"dict\"}\n\tl = [\"list \\\"with escaped text\\\"\", 123, []]\n\treturn f\"formatted {a} and {b}\""})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingEscapedQuotes) { + std::string input = R"x(<|tool_call_start|>[execute(code="print(\"hello world\")", verbose=true)]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "execute"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"code":"print(\"hello world\")","verbose":true})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingApostrophes) { + std::string input = R"(<|tool_call_start|>[log(message="it's a test, isn't it?", level="warn")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "log"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"message":"it's a test, isn't it?","level":"warn"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingBackslashes) { + std::string input = R"(<|tool_call_start|>[read_file(path="C:\Users\test\file.txt", encoding="utf-8")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "read_file"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"path":"C:\\Users\\test\\file.txt","encoding":"utf-8"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsArrayWithStringsContainingQuotes) { + std::string input = R"(<|tool_call_start|>[save(lines=['it's the wonderful day', 'My name's Jan', 'That's Johns' car.'])]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"lines":["it's the wonderful day","My name's Jan","That's Johns' car."]})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentContainingSingleQuotes) { + std::string input = R"(<|tool_call_start|>[save(line="I've had line with single quotes")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"line":"I've had line with single quotes"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsObjectWithStringsContainingQuotes) { + std::string input = R"(<|tool_call_start|>[save(obj={'name':'it's the wonderful day', 'greeting':'Hello, my name's Jan', 'note':'That's Johns' car.'})]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"obj":{"name":"it's the wonderful day","greeting":"Hello, my name's Jan","note":"That's Johns' car."}})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingNestedJSON) { + std::string input = R"(<|tool_call_start|>[send(payload="{'key': 'value', 'count': 42}", endpoint="api")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "send"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"payload":"{'key': 'value', 'count': 42}","endpoint":"api"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithEmptyStringArgument) { + std::string input = R"(<|tool_call_start|>[create(name="", value=0)]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "create"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"name":"","value":0})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithUnicodeCharactersInArguments) { + std::string input = R"(<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithPythonCodeAsArgument) { + std::string input = R"x(<|tool_call_start|>[string_tool(param=" + if __name__ == "__main__": + addresses = {} + addresses["Hodor"] = """The door""" + addresses["Arya"] = "Winterfell" + for name, address in addresses.items(): + print(f'\n\t{name} lives at {address}\n\r')")]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"param":"\n if __name__ == \"__main__\":\n addresses = {}\n addresses[\"Hodor\"] = \"\"\"The door\"\"\"\n addresses[\"Arya\"] = \"Winterfell\"\n for name, address in addresses.items():\n print(f'\\n\\t{name} lives at {address}\\n\\r')"})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithReasoning) { + std::string input = R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...]<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + + std::cout << std::endl; + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...])"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithReasoningAndContent) { + std::string input = R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...]<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|> This is the content after the tool call.)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, " This is the content after the tool call."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...])"); +} + +TEST_F(LFM25OutputParserTest, ParseOutputWithReasoningAndContent) { + std::string input = R"(User wants me to answer what is the difference between "foo" and "bar". I should answer with a short explanation. [...] The difference between "foo" and "bar" is that "foo" is often used as a placeholder name in programming, while "bar" is another placeholder name that is commonly used alongside "foo".)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, " The difference between \"foo\" and \"bar\" is that \"foo\" is often used as a placeholder name in programming, while \"bar\" is another placeholder name that is commonly used alongside \"foo\"."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to answer what is the difference between "foo" and "bar". I should answer with a short explanation. [...])"); +} diff --git a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp index e20920aa6b..5b698d2a8c 100644 --- a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp +++ b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp @@ -167,8 +167,6 @@ TEST_F(LFM2OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) { TEST_F(LFM2OutputParserTest, ParseToolCallWithObjectArguments) { std::string inputWithProperClosure = "<|tool_call_start|>[dummy(config={'name': 'astro_config', 'value': 99})]<|tool_call_end|>"; - // LFM2 may produce last tool call without closing tag, so we test both cases - // The results should be identical std::vector inputs = {inputWithProperClosure}; for (auto& input : inputs) { auto generatedTensor = lfm2Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; @@ -188,8 +186,6 @@ TEST_F(LFM2OutputParserTest, ParseToolCallWithObjectArguments) { TEST_F(LFM2OutputParserTest, ParseToolCallWithStringArguments) { std::string inputWithProperClosure = "<|tool_call_start|>[test1(arg1=\"data1, data2\")]<|tool_call_end|>"; - // LFM2 may produce last tool call without closing tag, so we test both cases - // The results should be identical std::vector inputs = {inputWithProperClosure}; for (auto& input : inputs) { auto generatedTensor = lfm2Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; diff --git a/src/test/llm/output_parsers/parser_config_validation_test.cpp b/src/test/llm/output_parsers/parser_config_validation_test.cpp index f7c841ccaa..f173627b3f 100644 --- a/src/test/llm/output_parsers/parser_config_validation_test.cpp +++ b/src/test/llm/output_parsers/parser_config_validation_test.cpp @@ -52,7 +52,7 @@ class ParserConfigValidationTest : public ::testing::Test { TEST_F(ParserConfigValidationTest, RegistryHasExpectedToolParsers) { const auto& names = getSupportedToolParserNames(); for (const auto& expected : {"llama3", "hermes3", "phi4", "mistral", "gptoss", - "qwen3coder", "devstral", "lfm2", "gemma4"}) { + "qwen3coder", "devstral", "lfm2", "lfm2.5", "gemma4"}) { EXPECT_NE(std::find(names.begin(), names.end(), expected), names.end()) << "Expected tool parser '" << expected << "' missing from registry"; } @@ -63,7 +63,7 @@ TEST_F(ParserConfigValidationTest, RegistryHasExpectedToolParsers) { TEST_F(ParserConfigValidationTest, RegistryHasExpectedReasoningParsers) { const auto& names = getSupportedReasoningParserNames(); - for (const auto& expected : {"qwen3", "gemma4", "gptoss"}) { + for (const auto& expected : {"qwen3", "gemma4", "gptoss", "lfm2.5"}) { EXPECT_NE(std::find(names.begin(), names.end(), expected), names.end()) << "Expected reasoning parser '" << expected << "' missing from registry"; } @@ -79,6 +79,7 @@ TEST_F(ParserConfigValidationTest, SupportedNamesStringContainsAllParsers) { const std::string reasoningNames = getSupportedReasoningParserNamesAsString(); EXPECT_NE(reasoningNames.find("qwen3"), std::string::npos); EXPECT_NE(reasoningNames.find("gptoss"), std::string::npos); + EXPECT_NE(reasoningNames.find("lfm2.5"), std::string::npos); } TEST_F(ParserConfigValidationTest, OutputParserThrowsOnUnknownToolParser) {