openvinotoolkit · exzile · Jun 26, 2026
diff --git a/docs/llm/reference.md b/docs/llm/reference.md
@@ -109,6 +109,7 @@ The calculator supports the following `node_options` for tuning the pipeline con
 -    `optional string tool_parser` - name of the parser to use for tool calls extraction from model output before creating a response;
 -    `optional bool enable_tool_guided_generation` - enable enforcing tool schema during generation. Requires setting response parser. [default = false];
 -    `optional SparseAttentionConfig sparse_attention_config` - Sparse attention configuration. Disabled if not specified.
+-    `optional string generation_config_path` - path to a `generation_config.json` holding the default generation parameters for this node. Absolute, or relative to `models_path`. When unset, `generation_config.json` from `models_path` is used. Lets several deployments backed by the same model weights use different generation defaults without duplicating the model directory.
 
 ### Caching settings
 The value of `cache_size` might have performance and stability implications. It is used for storing LLM model KV cache data. Adjust it based on your environment capabilities, model size and expected level of concurrency.

diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp
@@ -140,9 +140,13 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
     }
     auto properties = std::static_pointer_cast<ContinuousBatchingServableProperties>(servable->getProperties());
     properties->modelsPath = parsedModelsPath;
-    std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json";
-    if (std::filesystem::exists(modelGenerationConfigPath)) {
-        properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string());
+    std::string generationConfigPath;
+    status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions);
+    if (!status.ok()) {
+        return status;
+    }
+    if (std::filesystem::exists(generationConfigPath)) {
+        properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath);
     }
     if (nodeOptions.has_tool_parser()) {
         properties->toolParserName = nodeOptions.tool_parser();

diff --git a/src/llm/language_model/legacy/servable_initializer.cpp b/src/llm/language_model/legacy/servable_initializer.cpp
@@ -50,9 +50,13 @@ Status LegacyServableInitializer::initialize(std::shared_ptr<GenAiServable>& ser
     auto properties = std::static_pointer_cast<LegacyServableProperties>(servable->getProperties());
 
     properties->modelsPath = parsedModelsPath;
-    std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json";
-    if (std::filesystem::exists(modelGenerationConfigPath)) {
-        properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string());
+    std::string generationConfigPath;
+    status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions);
+    if (!status.ok()) {
+        return status;
+    }
+    if (std::filesystem::exists(generationConfigPath)) {
+        properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath);
     }
 
     if (nodeOptions.has_tool_parser()) {

diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto
@@ -150,4 +150,11 @@ message LLMCalculatorOptions {
     }
 
     optional ChatTemplateMode chat_template_mode = 26;
+
+    // Optional path to a generation_config.json holding the default generation
+    // parameters for this node. Absolute, or relative to models_path. When unset,
+    // generation_config.json from models_path is used. Allows several deployments
+    // backed by the same model weights to use different generation defaults
+    // without duplicating the model directory.
+    optional string generation_config_path = 27;
 }
diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp
@@ -334,6 +334,30 @@ Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string
     return StatusCode::LLM_NODE_PATH_DOES_NOT_EXIST_AND_NOT_GGUFFILE;
 }
 
+Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions) {
+    // Default: generation_config.json inside the model directory.
+    outPath = (std::filesystem::path(parsedModelsPath) / "generation_config.json").string();
+    if (!nodeOptions.has_generation_config_path() || nodeOptions.generation_config_path().empty()) {
+        return StatusCode::OK;
+    }
+    // Explicit per-node override. A relative path is resolved against models_path
+    // (its parent directory when models_path points at a file, e.g. a GGUF).
+    std::filesystem::path overridePath(nodeOptions.generation_config_path());
+    if (overridePath.is_relative()) {
+        std::filesystem::path base(parsedModelsPath);
+        if (!std::filesystem::is_directory(base)) {
+            base = base.parent_path();
+        }
+        overridePath = base / overridePath;
+    }
+    if (!std::filesystem::exists(overridePath)) {
+        SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node generation_config_path: {} does not exist.", overridePath.string());
+        return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST;
+    }
+    outPath = overridePath.string();
+    return StatusCode::OK;
+}
+
 std::optional<uint32_t> parseMaxModelLength(std::string& modelsPath) {
     std::string configPath = FileSystem::appendSlash(modelsPath) + "config.json";
     std::optional<uint32_t> maxModelLength;

diff --git a/src/llm/servable_initializer.hpp b/src/llm/servable_initializer.hpp
@@ -61,6 +61,7 @@ class GenAiServableInitializer {
     virtual Status initialize(std::shared_ptr<GenAiServable>& servable, const mediapipe::LLMCalculatorOptions& nodeOptions, std::string graphPath) = 0;
 };
 Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath);
+Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions);
 std::optional<uint32_t> parseMaxModelLength(std::string& modelsPath);
 Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath);
 Status initializeGenAiServable(std::shared_ptr<GenAiServable>& servable, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath);

diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
@@ -4425,6 +4425,54 @@ TEST_F(LLMVLMOptionsHttpTest, LLMVLMNodeOptionsCheckPluginConfig) {
     LLMNodeOptionsCheckPluginConfig(modelsPath);
 }
 
+// Unit test for the per-node generation_config.json path override (issue #4233).
+TEST(LLMGenerationConfigPath, ResolveGenerationConfigPath) {
+    std::filesystem::path base = std::filesystem::temp_directory_path() / "ovms_gencfg_test";
+    std::filesystem::remove_all(base);
+    std::filesystem::path modelDir = base / "model";
+    std::filesystem::path overrideDir = base / "overrides";
+    std::filesystem::create_directories(modelDir);
+    std::filesystem::create_directories(overrideDir);
+    auto writeFile = [](const std::filesystem::path& p) {
+        std::ofstream ofs(p);
+        ofs << "{}";
+    };
+    writeFile(modelDir / "generation_config.json");
+    writeFile(overrideDir / "custom_generation_config.json");
+
+    // Case 1: no override -> default generation_config.json inside the model dir.
+    {
+        mediapipe::LLMCalculatorOptions nodeOptions;
+        std::string outPath;
+        ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
+        ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json");
+    }
+    // Case 2: explicit absolute override path.
+    {
+        mediapipe::LLMCalculatorOptions nodeOptions;
+        nodeOptions.set_generation_config_path((overrideDir / "custom_generation_config.json").string());
+        std::string outPath;
+        ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
+        ASSERT_EQ(std::filesystem::path(outPath), overrideDir / "custom_generation_config.json");
+    }
+    // Case 3: explicit relative override path is resolved against models_path.
+    {
+        mediapipe::LLMCalculatorOptions nodeOptions;
+        nodeOptions.set_generation_config_path("generation_config.json");
+        std::string outPath;
+        ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
+        ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json");
+    }
+    // Case 4: explicit override that does not exist -> error.
+    {
+        mediapipe::LLMCalculatorOptions nodeOptions;
+        nodeOptions.set_generation_config_path((overrideDir / "missing.json").string());
+        std::string outPath;
+        ASSERT_NE(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
+    }
+
+    std::filesystem::remove_all(base);
+}
 void LLMNodeOptionsCheckNonDefault(std::string& modelsPath) {
     std::string testPbtxt = R"(
         input_stream: "HTTP_REQUEST_PAYLOAD:input"