diff --git a/docs/llm/reference.md b/docs/llm/reference.md index 698d05031b..3aae2d9d82 100644 --- a/docs/llm/reference.md +++ b/docs/llm/reference.md @@ -109,6 +109,7 @@ The calculator supports the following `node_options` for tuning the pipeline con - `optional string tool_parser` - name of the parser to use for tool calls extraction from model output before creating a response; - `optional bool enable_tool_guided_generation` - enable enforcing tool schema during generation. Requires setting response parser. [default = false]; - `optional SparseAttentionConfig sparse_attention_config` - Sparse attention configuration. Disabled if not specified. +- `optional string generation_config_path` - path to a `generation_config.json` holding the default generation parameters for this node. Absolute, or relative to `models_path`. When unset, `generation_config.json` from `models_path` is used. Lets several deployments backed by the same model weights use different generation defaults without duplicating the model directory. ### Caching settings The value of `cache_size` might have performance and stability implications. It is used for storing LLM model KV cache data. Adjust it based on your environment capabilities, model size and expected level of concurrency. diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 1aaff99844..cdc66000bd 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -140,9 +140,13 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr(servable->getProperties()); properties->modelsPath = parsedModelsPath; - std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json"; - if (std::filesystem::exists(modelGenerationConfigPath)) { - properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string()); + std::string generationConfigPath; + status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions); + if (!status.ok()) { + return status; + } + if (std::filesystem::exists(generationConfigPath)) { + properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath); } if (nodeOptions.has_tool_parser()) { properties->toolParserName = nodeOptions.tool_parser(); diff --git a/src/llm/language_model/legacy/servable_initializer.cpp b/src/llm/language_model/legacy/servable_initializer.cpp index 52d041f74d..65c371feb9 100644 --- a/src/llm/language_model/legacy/servable_initializer.cpp +++ b/src/llm/language_model/legacy/servable_initializer.cpp @@ -50,9 +50,13 @@ Status LegacyServableInitializer::initialize(std::shared_ptr& ser auto properties = std::static_pointer_cast(servable->getProperties()); properties->modelsPath = parsedModelsPath; - std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json"; - if (std::filesystem::exists(modelGenerationConfigPath)) { - properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string()); + std::string generationConfigPath; + status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions); + if (!status.ok()) { + return status; + } + if (std::filesystem::exists(generationConfigPath)) { + properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath); } if (nodeOptions.has_tool_parser()) { diff --git a/src/llm/llm_calculator.proto b/src/llm/llm_calculator.proto index ce252ea899..aa08b41f1e 100644 --- a/src/llm/llm_calculator.proto +++ b/src/llm/llm_calculator.proto @@ -150,4 +150,11 @@ message LLMCalculatorOptions { } optional ChatTemplateMode chat_template_mode = 26; + + // Optional path to a generation_config.json holding the default generation + // parameters for this node. Absolute, or relative to models_path. When unset, + // generation_config.json from models_path is used. Allows several deployments + // backed by the same model weights to use different generation defaults + // without duplicating the model directory. + optional string generation_config_path = 27; } diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index 90673fdbc3..7ae25bb7b8 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -334,6 +334,30 @@ Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string return StatusCode::LLM_NODE_PATH_DOES_NOT_EXIST_AND_NOT_GGUFFILE; } +Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions) { + // Default: generation_config.json inside the model directory. + outPath = (std::filesystem::path(parsedModelsPath) / "generation_config.json").string(); + if (!nodeOptions.has_generation_config_path() || nodeOptions.generation_config_path().empty()) { + return StatusCode::OK; + } + // Explicit per-node override. A relative path is resolved against models_path + // (its parent directory when models_path points at a file, e.g. a GGUF). + std::filesystem::path overridePath(nodeOptions.generation_config_path()); + if (overridePath.is_relative()) { + std::filesystem::path base(parsedModelsPath); + if (!std::filesystem::is_directory(base)) { + base = base.parent_path(); + } + overridePath = base / overridePath; + } + if (!std::filesystem::exists(overridePath)) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node generation_config_path: {} does not exist.", overridePath.string()); + return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST; + } + outPath = overridePath.string(); + return StatusCode::OK; +} + std::optional parseMaxModelLength(std::string& modelsPath) { std::string configPath = FileSystem::appendSlash(modelsPath) + "config.json"; std::optional maxModelLength; diff --git a/src/llm/servable_initializer.hpp b/src/llm/servable_initializer.hpp index d742db9c3e..752fcdd443 100644 --- a/src/llm/servable_initializer.hpp +++ b/src/llm/servable_initializer.hpp @@ -61,6 +61,7 @@ class GenAiServableInitializer { virtual Status initialize(std::shared_ptr& servable, const mediapipe::LLMCalculatorOptions& nodeOptions, std::string graphPath) = 0; }; Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath); +Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions); std::optional parseMaxModelLength(std::string& modelsPath); Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath); Status initializeGenAiServable(std::shared_ptr& servable, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath); diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index e13cf29919..3598a953a1 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -4425,6 +4425,54 @@ TEST_F(LLMVLMOptionsHttpTest, LLMVLMNodeOptionsCheckPluginConfig) { LLMNodeOptionsCheckPluginConfig(modelsPath); } +// Unit test for the per-node generation_config.json path override (issue #4233). +TEST(LLMGenerationConfigPath, ResolveGenerationConfigPath) { + std::filesystem::path base = std::filesystem::temp_directory_path() / "ovms_gencfg_test"; + std::filesystem::remove_all(base); + std::filesystem::path modelDir = base / "model"; + std::filesystem::path overrideDir = base / "overrides"; + std::filesystem::create_directories(modelDir); + std::filesystem::create_directories(overrideDir); + auto writeFile = [](const std::filesystem::path& p) { + std::ofstream ofs(p); + ofs << "{}"; + }; + writeFile(modelDir / "generation_config.json"); + writeFile(overrideDir / "custom_generation_config.json"); + + // Case 1: no override -> default generation_config.json inside the model dir. + { + mediapipe::LLMCalculatorOptions nodeOptions; + std::string outPath; + ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK); + ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json"); + } + // Case 2: explicit absolute override path. + { + mediapipe::LLMCalculatorOptions nodeOptions; + nodeOptions.set_generation_config_path((overrideDir / "custom_generation_config.json").string()); + std::string outPath; + ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK); + ASSERT_EQ(std::filesystem::path(outPath), overrideDir / "custom_generation_config.json"); + } + // Case 3: explicit relative override path is resolved against models_path. + { + mediapipe::LLMCalculatorOptions nodeOptions; + nodeOptions.set_generation_config_path("generation_config.json"); + std::string outPath; + ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK); + ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json"); + } + // Case 4: explicit override that does not exist -> error. + { + mediapipe::LLMCalculatorOptions nodeOptions; + nodeOptions.set_generation_config_path((overrideDir / "missing.json").string()); + std::string outPath; + ASSERT_NE(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK); + } + + std::filesystem::remove_all(base); +} void LLMNodeOptionsCheckNonDefault(std::string& modelsPath) { std::string testPbtxt = R"( input_stream: "HTTP_REQUEST_PAYLOAD:input"