Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/llm/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ The calculator supports the following `node_options` for tuning the pipeline con
- `optional string tool_parser` - name of the parser to use for tool calls extraction from model output before creating a response;
- `optional bool enable_tool_guided_generation` - enable enforcing tool schema during generation. Requires setting response parser. [default = false];
- `optional SparseAttentionConfig sparse_attention_config` - Sparse attention configuration. Disabled if not specified.
- `optional string generation_config_path` - path to a `generation_config.json` holding the default generation parameters for this node. Absolute, or relative to `models_path`. When unset, `generation_config.json` from `models_path` is used. Lets several deployments backed by the same model weights use different generation defaults without duplicating the model directory.

### Caching settings
The value of `cache_size` might have performance and stability implications. It is used for storing LLM model KV cache data. Adjust it based on your environment capabilities, model size and expected level of concurrency.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,13 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
}
auto properties = std::static_pointer_cast<ContinuousBatchingServableProperties>(servable->getProperties());
properties->modelsPath = parsedModelsPath;
std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json";
if (std::filesystem::exists(modelGenerationConfigPath)) {
properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string());
std::string generationConfigPath;
status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions);
if (!status.ok()) {
return status;
}
if (std::filesystem::exists(generationConfigPath)) {
properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath);
}
if (nodeOptions.has_tool_parser()) {
properties->toolParserName = nodeOptions.tool_parser();
Expand Down
10 changes: 7 additions & 3 deletions src/llm/language_model/legacy/servable_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ Status LegacyServableInitializer::initialize(std::shared_ptr<GenAiServable>& ser
auto properties = std::static_pointer_cast<LegacyServableProperties>(servable->getProperties());

properties->modelsPath = parsedModelsPath;
std::filesystem::path modelGenerationConfigPath = std::filesystem::path(parsedModelsPath) / "generation_config.json";
if (std::filesystem::exists(modelGenerationConfigPath)) {
properties->baseGenerationConfig = ov::genai::GenerationConfig(modelGenerationConfigPath.string());
std::string generationConfigPath;
status = resolveGenerationConfigPath(generationConfigPath, parsedModelsPath, nodeOptions);
if (!status.ok()) {
return status;
}
if (std::filesystem::exists(generationConfigPath)) {
properties->baseGenerationConfig = ov::genai::GenerationConfig(generationConfigPath);
}

if (nodeOptions.has_tool_parser()) {
Expand Down
7 changes: 7 additions & 0 deletions src/llm/llm_calculator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,11 @@ message LLMCalculatorOptions {
}

optional ChatTemplateMode chat_template_mode = 26;

// Optional path to a generation_config.json holding the default generation
// parameters for this node. Absolute, or relative to models_path. When unset,
// generation_config.json from models_path is used. Allows several deployments
// backed by the same model weights to use different generation defaults
// without duplicating the model directory.
optional string generation_config_path = 27;
}
24 changes: 24 additions & 0 deletions src/llm/servable_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,30 @@ Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string
return StatusCode::LLM_NODE_PATH_DOES_NOT_EXIST_AND_NOT_GGUFFILE;
}

Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions) {
// Default: generation_config.json inside the model directory.
outPath = (std::filesystem::path(parsedModelsPath) / "generation_config.json").string();
if (!nodeOptions.has_generation_config_path() || nodeOptions.generation_config_path().empty()) {
return StatusCode::OK;
}
// Explicit per-node override. A relative path is resolved against models_path
// (its parent directory when models_path points at a file, e.g. a GGUF).
std::filesystem::path overridePath(nodeOptions.generation_config_path());
if (overridePath.is_relative()) {
std::filesystem::path base(parsedModelsPath);
if (!std::filesystem::is_directory(base)) {
base = base.parent_path();
}
overridePath = base / overridePath;
}
if (!std::filesystem::exists(overridePath)) {
SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node generation_config_path: {} does not exist.", overridePath.string());
return StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST;
}
outPath = overridePath.string();
return StatusCode::OK;
}

std::optional<uint32_t> parseMaxModelLength(std::string& modelsPath) {
std::string configPath = FileSystem::appendSlash(modelsPath) + "config.json";
std::optional<uint32_t> maxModelLength;
Expand Down
1 change: 1 addition & 0 deletions src/llm/servable_initializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class GenAiServableInitializer {
virtual Status initialize(std::shared_ptr<GenAiServable>& servable, const mediapipe::LLMCalculatorOptions& nodeOptions, std::string graphPath) = 0;
};
Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath);
Status resolveGenerationConfigPath(std::string& outPath, const std::string& parsedModelsPath, const mediapipe::LLMCalculatorOptions& nodeOptions);
std::optional<uint32_t> parseMaxModelLength(std::string& modelsPath);
Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath);
Status initializeGenAiServable(std::shared_ptr<GenAiServable>& servable, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath);
Expand Down
48 changes: 48 additions & 0 deletions src/test/llm/llmnode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4425,6 +4425,54 @@ TEST_F(LLMVLMOptionsHttpTest, LLMVLMNodeOptionsCheckPluginConfig) {
LLMNodeOptionsCheckPluginConfig(modelsPath);
}

// Unit test for the per-node generation_config.json path override (issue #4233).
TEST(LLMGenerationConfigPath, ResolveGenerationConfigPath) {
std::filesystem::path base = std::filesystem::temp_directory_path() / "ovms_gencfg_test";
std::filesystem::remove_all(base);
std::filesystem::path modelDir = base / "model";
std::filesystem::path overrideDir = base / "overrides";
std::filesystem::create_directories(modelDir);
std::filesystem::create_directories(overrideDir);
auto writeFile = [](const std::filesystem::path& p) {
std::ofstream ofs(p);
ofs << "{}";
};
writeFile(modelDir / "generation_config.json");
writeFile(overrideDir / "custom_generation_config.json");

// Case 1: no override -> default generation_config.json inside the model dir.
{
mediapipe::LLMCalculatorOptions nodeOptions;
std::string outPath;
ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json");
}
// Case 2: explicit absolute override path.
{
mediapipe::LLMCalculatorOptions nodeOptions;
nodeOptions.set_generation_config_path((overrideDir / "custom_generation_config.json").string());
std::string outPath;
ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
ASSERT_EQ(std::filesystem::path(outPath), overrideDir / "custom_generation_config.json");
}
// Case 3: explicit relative override path is resolved against models_path.
{
mediapipe::LLMCalculatorOptions nodeOptions;
nodeOptions.set_generation_config_path("generation_config.json");
std::string outPath;
ASSERT_EQ(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
ASSERT_EQ(std::filesystem::path(outPath), modelDir / "generation_config.json");
}
// Case 4: explicit override that does not exist -> error.
{
mediapipe::LLMCalculatorOptions nodeOptions;
nodeOptions.set_generation_config_path((overrideDir / "missing.json").string());
std::string outPath;
ASSERT_NE(ovms::resolveGenerationConfigPath(outPath, modelDir.string(), nodeOptions), ovms::StatusCode::OK);
}

std::filesystem::remove_all(base);
}
void LLMNodeOptionsCheckNonDefault(std::string& modelsPath) {
std::string testPbtxt = R"(
input_stream: "HTTP_REQUEST_PAYLOAD:input"
Expand Down