diff --git a/src/modelinstance.cpp b/src/modelinstance.cpp index 438b902b5e..074cd0ec2e 100644 --- a/src/modelinstance.cpp +++ b/src/modelinstance.cpp @@ -1023,6 +1023,14 @@ plugin_config_t ModelInstance::prepareDefaultPluginConfig(const ModelConfig& con Status ModelInstance::loadOVCompiledModel(const ModelConfig& config) { plugin_config_t pluginConfig = prepareDefaultPluginConfig(config); + if (config.getTargetDevice() == "CPU") { + Status status = applyDefaultCpuProperties(pluginConfig); + if (!status.ok()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to apply default CPU properties for model: {}; version: {}; error: {}", + getName(), getVersion(), status.string()); + return status; + } + } try { loadCompiledModelPtr(pluginConfig); } catch (ov::Exception& e) { diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index 25f6d49b2f..544ad2a69b 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -91,19 +91,7 @@ ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistr modelCacheDirectory(modelCacheDirectory), metricRegistry(registry), pythonBackend(pythonBackend) { - try { - this->ieCore = std::make_unique(); - ov::AnyMap cpuProperties; - Status status = applyDefaultCpuProperties(cpuProperties); - if (!status.ok()) { - SPDLOG_CRITICAL("Failed to apply default CPU properties. Reason: {}", status.string()); - throw std::runtime_error("Failed to apply default CPU properties"); - } - this->ieCore->set_property("CPU", cpuProperties); - } catch (const std::exception& ex) { - SPDLOG_CRITICAL("Failed to initialize OpenVINO Core with CPU properties. Reason: {}", ex.what()); - throw; - } + this->ieCore = std::make_unique(); OV_LOGGER("ov::Core(): {}", reinterpret_cast(this->ieCore.get())); // Take --cache_dir from CLI diff --git a/src/ov_utils.cpp b/src/ov_utils.cpp index 92ac74a168..aeb4ca1e11 100644 --- a/src/ov_utils.cpp +++ b/src/ov_utils.cpp @@ -150,47 +150,48 @@ Status validatePluginConfiguration(const plugin_config_t& pluginConfig, const st return StatusCode::OK; } -Status applyDefaultCpuProperties(ov::AnyMap& properties) { -#ifdef __linux__ - try { - if (!isRunningInDocker()) { - return StatusCode::OK; - } - const uint16_t coreCount = getCoreCount(); - - if (properties.find(ov::hint::enable_cpu_pinning.name()) == properties.end()) { - const bool cpuPinning = getDockerCpuQuota() <= 0; - properties[ov::hint::enable_cpu_pinning.name()] = cpuPinning; - SPDLOG_DEBUG("applyDefaultCpuProperties: setting enable_cpu_pinning to {}", cpuPinning); - } +Status applyDefaultCpuProperties(ov::AnyMap& properties, uint16_t coreCount, uint16_t physicalCoresPerSocket, uint16_t socketsCount, uint16_t dockerCpuQuota) { + if (properties.find(ov::hint::enable_cpu_pinning.name()) == properties.end()) { + const bool cpuPinning = dockerCpuQuota <= 0; + properties[ov::hint::enable_cpu_pinning.name()] = cpuPinning; + SPDLOG_DEBUG("applyDefaultCpuProperties, DockerCPUQuota: {} - setting enable_cpu_pinning to {}", dockerCpuQuota, cpuPinning); + } - bool isThroughput = false; - const auto perfIt = properties.find(ov::hint::performance_mode.name()); - if (perfIt != properties.end()) { + bool isThroughput = false; + const auto perfIt = properties.find(ov::hint::performance_mode.name()); + if (perfIt != properties.end()) { + try { + isThroughput = (perfIt->second.as() == ov::hint::PerformanceMode::THROUGHPUT); + } catch (...) { try { - isThroughput = (perfIt->second.as() == ov::hint::PerformanceMode::THROUGHPUT); + isThroughput = (perfIt->second.as() == "THROUGHPUT"); } catch (...) { - try { - isThroughput = (perfIt->second.as() == "THROUGHPUT"); - } catch (...) { - } - } - if (isThroughput && properties.find(ov::num_streams.name()) == properties.end()) { - properties[ov::num_streams.name()] = static_cast(coreCount); - SPDLOG_DEBUG("applyDefaultCpuProperties: setting num_streams to {} (THROUGHPUT hint active)", coreCount); } } + if (isThroughput && properties.find(ov::num_streams.name()) == properties.end()) { + int numStreams = std::min(static_cast(coreCount), static_cast(physicalCoresPerSocket * socketsCount)); + properties[ov::num_streams.name()] = numStreams; + SPDLOG_DEBUG("applyDefaultCpuProperties, CoreCount: {}, PhysicalCoresPerSocket: {}, SocketsCount: {} - setting num_streams to {} (THROUGHPUT hint active)", coreCount, physicalCoresPerSocket, socketsCount, numStreams); + } + } - if (properties.find(ov::inference_num_threads.name()) == properties.end()) { - int numThreads; - if (isThroughput) { - numThreads = static_cast(coreCount); - } else { - numThreads = std::min(static_cast(coreCount), static_cast(getPhysicalCoresPerSocket())); - } + if (properties.find(ov::inference_num_threads.name()) == properties.end()) { + if (coreCount <= physicalCoresPerSocket * socketsCount) { + int numThreads = static_cast(coreCount); properties[ov::inference_num_threads.name()] = numThreads; - SPDLOG_DEBUG("applyDefaultCpuProperties: setting inference_num_threads to {}", numThreads); + SPDLOG_DEBUG("applyDefaultCpuProperties: CoreCount: {}, PhysicalCoresPerSocket: {}, SocketsCount: {}, setting inference_num_threads to {}", coreCount, physicalCoresPerSocket, socketsCount, numThreads); + } + } + return StatusCode::OK; +} + +Status applyDefaultCpuProperties(ov::AnyMap& properties) { +#ifdef __linux__ + try { + if (!isRunningInDocker()) { + return StatusCode::OK; } + return applyDefaultCpuProperties(properties, getCoreCount(), getPhysicalCoresPerSocket(), getSocketsCount(), getDockerCpuQuota()); } catch (const std::exception& ex) { SPDLOG_WARN("Exception while applying default CPU properties: {}", ex.what()); } catch (...) { diff --git a/src/ov_utils.hpp b/src/ov_utils.hpp index c1ca92ecb4..cc62209d1d 100644 --- a/src/ov_utils.hpp +++ b/src/ov_utils.hpp @@ -59,6 +59,9 @@ Status validatePluginConfiguration(const plugin_config_t& pluginConfig, const st // Returns StatusCode::INTERNAL_ERROR on any OpenVINO exception. Status applyDefaultCpuProperties(ov::AnyMap& properties); +// Testable overload accepting explicit system parameters instead of querying the OS. +Status applyDefaultCpuProperties(ov::AnyMap& properties, uint16_t coreCount, uint16_t physicalCoresPerSocket, uint16_t socketsCount, uint16_t dockerCpuQuota); + // Logging // #1 model/global plugin CompiledMode:DUMMY / Global OpenVINO plugin:CPU // #2 version/_ diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index 0265e3e415..aa2895bc22 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -154,6 +154,24 @@ uint16_t getDockerCpuQuota() { return 0; // No quota set } +uint16_t getSocketsCount() { + std::set uniqueSockets; + std::ifstream cpuInfo("/proc/cpuinfo"); + if (!cpuInfo.is_open()) { + return 1; // Default to 1 socket if unable to read + } + std::string line; + while (std::getline(cpuInfo, line)) { + if (line.find("physical id") != std::string::npos) { + uniqueSockets.insert(line); + } + } + if (uniqueSockets.empty()) { + return 1; // Fallback when "physical id" is not present in /proc/cpuinfo + } + return static_cast(uniqueSockets.size()); +} + uint16_t getPhysicalCoresPerSocket() { std::set uniqueCores; std::ifstream cpuInfo("/proc/cpuinfo"); @@ -161,9 +179,19 @@ uint16_t getPhysicalCoresPerSocket() { return std::max(static_cast(std::thread::hardware_concurrency()), 1); } std::string line; + std::string currentPhysicalId; + std::string firstPhysicalId; while (std::getline(cpuInfo, line)) { + if (line.find("physical id") != std::string::npos) { + currentPhysicalId = line; + if (firstPhysicalId.empty()) { + firstPhysicalId = line; + } + } if (line.find("core id") != std::string::npos) { - uniqueCores.insert(line); + if (currentPhysicalId == firstPhysicalId) { + uniqueCores.insert(line); + } } } if (uniqueCores.empty()) { diff --git a/src/systeminfo.hpp b/src/systeminfo.hpp index fe47c758a5..4595bccd7a 100644 --- a/src/systeminfo.hpp +++ b/src/systeminfo.hpp @@ -40,5 +40,12 @@ uint16_t getDockerCpuQuota(); * @return uint16_t Number of physical cores, or hardware_concurrency if detection fails */ uint16_t getPhysicalCoresPerSocket(); + +/** + * @brief Get number of CPU sockets + * @return uint16_t Number of CPU sockets, or 1 if detection fails + */ +uint16_t getSocketsCount(); + #endif } // namespace ovms diff --git a/src/test/ov_utils_test.cpp b/src/test/ov_utils_test.cpp index 579426b99d..9635bee46a 100644 --- a/src/test/ov_utils_test.cpp +++ b/src/test/ov_utils_test.cpp @@ -240,3 +240,112 @@ TEST(OVUtils, ValidatePluginConfigurationAllowEnableMmap) { auto model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml", {}, pluginConfig); auto compiledModel = ieCore.compile_model(model, "CPU", pluginConfig); } + +#ifdef __linux__ +#include "../systeminfo.hpp" + +// Tests using the testable overload with explicit system parameters + +TEST(OVUtils, ApplyDefaultCpuPropertiesLatencyConstrainedContainer) { + // Simulate: 8 cores available in docker, 24 physical cores per socket, 2 sockets + ov::AnyMap properties; + properties[ov::hint::performance_mode.name()] = "LATENCY"; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/8, /*physicalCoresPerSocket=*/24, /*socketsCount=*/2, /*dockerCpuQuota=*/8); + ASSERT_TRUE(status.ok()); + + // coreCount(8) <= physicalCoresPerSocket(24) * socketsCount(2) = 48, so threads should be set + ASSERT_NE(properties.find(ov::inference_num_threads.name()), properties.end()); + EXPECT_EQ(properties[ov::inference_num_threads.name()].as(), 8); + + // NUM_STREAMS should not be set for LATENCY mode + EXPECT_EQ(properties.find(ov::num_streams.name()), properties.end()); + + // CPU pinning: dockerCpuQuota > 0 → pinning disabled + ASSERT_NE(properties.find(ov::hint::enable_cpu_pinning.name()), properties.end()); + EXPECT_EQ(properties[ov::hint::enable_cpu_pinning.name()].as(), false); +} + +TEST(OVUtils, ApplyDefaultCpuPropertiesThroughputConstrainedContainer) { + // Simulate: 8 cores available in docker, 24 physical cores per socket, 2 sockets + ov::AnyMap properties; + properties[ov::hint::performance_mode.name()] = "THROUGHPUT"; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/8, /*physicalCoresPerSocket=*/24, /*socketsCount=*/2, /*dockerCpuQuota=*/8); + ASSERT_TRUE(status.ok()); + + // num_streams = min(8, 24*2) = 8 + ASSERT_NE(properties.find(ov::num_streams.name()), properties.end()); + EXPECT_EQ(properties[ov::num_streams.name()].as(), 8); + + // inference_num_threads = 8 (coreCount <= totalPhysical) + ASSERT_NE(properties.find(ov::inference_num_threads.name()), properties.end()); + EXPECT_EQ(properties[ov::inference_num_threads.name()].as(), 8); +} + +TEST(OVUtils, ApplyDefaultCpuPropertiesThroughputUnconstrainedContainer) { + // Simulate: 96 cores available, 24 physical cores per socket, 2 sockets (48 physical total) + // coreCount(96) > physicalCoresPerSocket(24) * socketsCount(2) = 48 + ov::AnyMap properties; + properties[ov::hint::performance_mode.name()] = "THROUGHPUT"; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/96, /*physicalCoresPerSocket=*/24, /*socketsCount=*/2, /*dockerCpuQuota=*/0); + ASSERT_TRUE(status.ok()); + + // num_streams = min(96, 48) = 48 + ASSERT_NE(properties.find(ov::num_streams.name()), properties.end()); + EXPECT_EQ(properties[ov::num_streams.name()].as(), 48); + + // inference_num_threads NOT set (coreCount > totalPhysical, let OV decide) + EXPECT_EQ(properties.find(ov::inference_num_threads.name()), properties.end()); + + // CPU pinning: dockerCpuQuota == 0 → pinning enabled + ASSERT_NE(properties.find(ov::hint::enable_cpu_pinning.name()), properties.end()); + EXPECT_EQ(properties[ov::hint::enable_cpu_pinning.name()].as(), true); +} + +TEST(OVUtils, ApplyDefaultCpuPropertiesLatencyUnconstrainedContainer) { + // Simulate: 96 cores (with HT), 24 physical cores per socket, 2 sockets + ov::AnyMap properties; + properties[ov::hint::performance_mode.name()] = "LATENCY"; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/96, /*physicalCoresPerSocket=*/24, /*socketsCount=*/2, /*dockerCpuQuota=*/0); + ASSERT_TRUE(status.ok()); + + // coreCount(96) > 48, so inference_num_threads should NOT be set + EXPECT_EQ(properties.find(ov::inference_num_threads.name()), properties.end()); + EXPECT_EQ(properties.find(ov::num_streams.name()), properties.end()); +} + +TEST(OVUtils, ApplyDefaultCpuPropertiesNoPerformanceHint) { + // No PERFORMANCE_HINT set - should only set pinning and threads + ov::AnyMap properties; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/16, /*physicalCoresPerSocket=*/24, /*socketsCount=*/1, /*dockerCpuQuota=*/16); + ASSERT_TRUE(status.ok()); + + // No num_streams (no throughput hint) + EXPECT_EQ(properties.find(ov::num_streams.name()), properties.end()); + + // inference_num_threads set (16 <= 24) + ASSERT_NE(properties.find(ov::inference_num_threads.name()), properties.end()); + EXPECT_EQ(properties[ov::inference_num_threads.name()].as(), 16); +} + +TEST(OVUtils, ApplyDefaultCpuPropertiesDoesNotOverrideExistingValues) { + // Pre-set values should not be overwritten + ov::AnyMap properties; + properties[ov::hint::performance_mode.name()] = "THROUGHPUT"; + properties[ov::num_streams.name()] = 4; + properties[ov::inference_num_threads.name()] = 12; + properties[ov::hint::enable_cpu_pinning.name()] = true; + + auto status = ovms::applyDefaultCpuProperties(properties, /*coreCount=*/8, /*physicalCoresPerSocket=*/24, /*socketsCount=*/2, /*dockerCpuQuota=*/8); + ASSERT_TRUE(status.ok()); + + // All values should remain unchanged + EXPECT_EQ(properties[ov::num_streams.name()].as(), 4); + EXPECT_EQ(properties[ov::inference_num_threads.name()].as(), 12); + EXPECT_EQ(properties[ov::hint::enable_cpu_pinning.name()].as(), true); +} +#endif