diff --git a/src/BUILD b/src/BUILD index df50004d5b..da01fe8155 100644 --- a/src/BUILD +++ b/src/BUILD @@ -165,6 +165,31 @@ ovms_cc_library( name = "servable", hdrs = ["servable.hpp"], ) +ovms_cc_library( + name = "libovms_servable_definition", + hdrs = ["servable_definition.hpp"], +) +ovms_cc_library( + name = "libovms_single_version_servable_definition", + srcs = [ + "servable_definition_unload_guard.cpp", + "single_version_servable_definition.cpp", + ], + hdrs = [ + "servable_definition_unload_guard.hpp", + "single_version_servable_definition.hpp", + "tensorinfo_fwd.hpp", + ], + deps = [ + "servable", + "libovms_servable_definition", + "libovms_dags_pipelinedefinitionstatus", + "libovms_tensorinfo", + "libovmslogging", + "libovmsmodelversion", + "libovmsstatus", + ], +) cc_library( name = "ovms_header", hdrs = ["ovms.h"], @@ -482,6 +507,120 @@ ovms_cc_library( ], visibility = ["//visibility:public",], ) +ovms_cc_library( + name = "node_initializer", + srcs = [ + "mediapipe_internal/node_initializer.cpp", + ], + hdrs = [ + "mediapipe_internal/node_initializer.hpp", + "mediapipe_internal/graph_side_packets.hpp", + ], + deps = [ + "libovmsstatus", + ], + visibility = ["//visibility:public",], +) +ovms_cc_library( + name = "libovms_mediapipe", + srcs = [ + "mediapipe_internal/mediapipefactory.cpp", + "mediapipe_internal/mediapipefactory.hpp", + "mediapipe_internal/mediapipegraphconfig.hpp", + "mediapipe_internal/mediapipegraphconfig.cpp", + "mediapipe_internal/mediapipegraphdefinition.cpp", + "mediapipe_internal/mediapipegraphdefinition.hpp", + "mediapipe_internal/mediapipegraphexecutor.cpp", + "mediapipe_internal/mediapipegraphexecutor.hpp", + "copyable_object_wrapper.hpp", + ], + deps = select({ + "//:not_disable_python": [ + "//src/python:libovmspythonmodule", + ], + "//:disable_python": [] + }) + [ + "node_initializer", + "libovmsmediapipe_utils", + "libovms_dags_pipelinedefinitionstatus", + "libovms_single_version_servable_definition", + "libovms_tensorinfo", + "libovmslogging", + "libovmsstatus", + "model_metric_reporter", + "libovmsmetrics", + "libovms_ov_utils", + "libovmsstring_utils", + "libovms_servable_name_checker", + "libovms_metric_provider", + "libovmsfilesystem", + "libovms_version", + "libovms_execution_context", + "libovmstimer", + "libovmsprofiler", + "@com_github_tencent_rapidjson//:rapidjson", + "//src/port:rapidjson_stringbuffer", + "//src/port:rapidjson_writer", + "@mediapipe//mediapipe/framework:calculator_graph", + "@mediapipe//mediapipe/framework/port:parse_text_proto", + ], + visibility = ["//visibility:public",], + additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE, + alwayslink = 1, +) +ovms_cc_library( + name = "libovms_mediapipe_kfs_executor", + srcs = [ + "kfs_frontend/kfs_graph_executor_impl.cpp", + "kfs_frontend/kfs_graph_executor_impl.hpp", + ], + deps = select({ + "//:not_disable_python": [ + "//src/python:libovmspythonmodule", + ], + "//:disable_python": [] + }) + [ + "libovms_mediapipe", + "predict_request_validation_utils", + "tfs_utils", + "libovms_kfs_utils", + "libovms_kfs_grpc_inference_service_h", + "//src/kfserving_api:kfserving_api_cpp", + "opencv_dep", + "@mediapipe//mediapipe/framework/formats:image_frame", + "@mediapipe//mediapipe/framework/formats:image_frame_opencv", + "@mediapipe//mediapipe/framework/formats:tensor", + ], + visibility = ["//visibility:public",], + additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE, + alwayslink = 1, +) +ovms_cc_library( + name = "libovms_mediapipe_http_executor", + srcs = [ + "http_frontend/http_graph_executor_impl.cpp", + "http_frontend/http_graph_executor_impl.hpp", + ], + deps = select({ + "//:not_disable_python": [ + "//src/python:libovmspythonmodule", + ], + "//:disable_python": [] + }) + [ + "libovms_mediapipe", + "httppayload", + "libhttp_async_writer_interface", + "opencv_dep", + "//src/port:rapidjson_stringbuffer", + "//src/port:rapidjson_writer", + "@mediapipe//mediapipe/framework/formats:image_frame", + "@mediapipe//mediapipe/framework/formats:image_frame_opencv", + "@mediapipe//mediapipe/framework/formats:tensor", + ], + visibility = ["//visibility:public",], + additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE, + alwayslink = 1, +) ovms_cc_library( name = "ovms_lib", hdrs = [ @@ -508,8 +647,6 @@ ovms_cc_library( "dags/gatherexitnodeinputhandler.hpp", "dags/pipelinedefinition.hpp", "dags/pipelinedefinition.cpp", - "dags/pipelinedefinitionunloadguard.cpp", - "dags/pipelinedefinitionunloadguard.hpp", "dags/pipeline_factory.cpp", "dags/pipeline_factory.hpp", "get_model_metadata_impl.cpp", @@ -547,25 +684,7 @@ ovms_cc_library( "server.hpp", "statefulmodelinstance.cpp", "statefulmodelinstance.hpp", - ] + select({ - "//:not_disable_mediapipe": [ - "copyable_object_wrapper.hpp", - "mediapipe_internal/mediapipefactory.cpp", - "mediapipe_internal/mediapipefactory.hpp", - "mediapipe_internal/mediapipegraphconfig.hpp", - "mediapipe_internal/mediapipegraphconfig.cpp", - "mediapipe_internal/mediapipegraphdefinition.cpp", - "mediapipe_internal/mediapipegraphdefinition.hpp", - "mediapipe_internal/mediapipegraphexecutor.cpp", - "mediapipe_internal/mediapipegraphexecutor.hpp", - "mediapipe_internal/packettypes.hpp", - "kfs_frontend/kfs_graph_executor_impl.cpp", - "kfs_frontend/kfs_graph_executor_impl.hpp", - "http_frontend/http_graph_executor_impl.cpp", - "http_frontend/http_graph_executor_impl.hpp", - ], - "//:disable_mediapipe" : [], - }), + ], deps = select({ "//:not_disable_python": [ "//src/python:libovmspythonmodule", @@ -574,16 +693,29 @@ ovms_cc_library( }) + select({ "//conditions:default": [], "//:not_disable_mediapipe" : [ + "libovms_mediapipe", + "libovms_mediapipe_kfs_executor", + "libovms_mediapipe_http_executor", + "@mediapipe_calculators//:mediapipe_calculators", + "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data", + "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", + "@mediapipe//mediapipe/calculators/tensor:image_to_tensor_calculator", + "@mediapipe//mediapipe/modules/holistic_landmark:holistic_landmark_cpu", + "@mediapipe//mediapipe/calculators/geti/inference:inference_calculators", + "@mediapipe//mediapipe/calculators/geti/utils:utils", + "@mediapipe//mediapipe/calculators/geti/utils:emptylabel_calculators", + "@mediapipe//mediapipe/calculators/geti/serialization:calculators", + "//src/llm:openai_completions_api_handler", "//src/image_gen:image_gen_calculator", "//src/audio/speech_to_text:s2t_calculator", "//src/audio/text_to_speech:t2s_calculator", "//src/audio:audio_utils", "//src/image_gen:imagegen_init", - "//src/llm:openai_completions_api_handler", "//src/embeddings:embeddingscalculator_ov", "//src/rerank:rerankcalculator", "//src/rerank:rerankcalculator_ov", - "//src/llm:llmcalculator",], + "//src/llm:llmcalculator", + ], }) + select({ "//:enable_drogon": [ "libdrogon_http_server", @@ -682,6 +814,10 @@ ovms_cc_library( "kfs_backend_impl", "tfs_backend_impl", "anonymous_input_name", + "libovms_servable_name_checker", + "libovms_metric_provider", + "libovms_servable_definition", + "libovms_single_version_servable_definition", ] + select({ "//:not_disable_cloud": [ "libovmsazurefilesystem", @@ -694,22 +830,7 @@ ovms_cc_library( "@windows_opencl//:opencl", "@windows_opencl2//:opencl2", ], - }) - + select({ - "//conditions:default": [ - "@mediapipe_calculators//:mediapipe_calculators", # Need this dependencies here because we use ovms/src - cannot add in ovms_dependencies because we copy src directory later in Dockerfile - "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data", - "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", - "@mediapipe//mediapipe/calculators/tensor:image_to_tensor_calculator", - "@mediapipe//mediapipe/modules/holistic_landmark:holistic_landmark_cpu", - "libovmsmediapipe_utils", - "@mediapipe//mediapipe/calculators/geti/inference:inference_calculators", - "@mediapipe//mediapipe/calculators/geti/utils:utils", - "@mediapipe//mediapipe/calculators/geti/utils:emptylabel_calculators", - "@mediapipe//mediapipe/calculators/geti/serialization:calculators", - ], - "//:disable_mediapipe" : [], - }), + }), visibility = ["//visibility:public",], additional_copts = COPTS_PYTHON + COPTS_MEDIAPIPE + COPTS_DROGON + COPTS_OV_TRACE, additional_linkopts = select({ @@ -1104,40 +1225,6 @@ ovms_cc_library( visibility = ["//visibility:public"], ) -# Unused -ovms_cc_library( # TODO split dependencies - name = "libovmshttpservermodule", - hdrs = [ - "http_server.hpp", - "http_rest_api_handler.hpp", - "httpservermodule.hpp", - ], - srcs = [ - "http_rest_api_handler.cpp", - "http_server.cpp", - "httpservermodule.cpp", - ], - deps = [ - "@com_github_jupp0r_prometheus_cpp//core", - "@mediapipe//mediapipe/framework:calculator_framework", - "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto", - "@tensorflow_serving//tensorflow_serving/util:threadpool_executor", - "@tensorflow_serving//tensorflow_serving/util:json_tensor", - "libovms_module", - "libovmslogging", - "libovmsprofiler", - "libovmsstatus", - "libdrogon_http_server", - "//src/kfserving_api:kfserving_api_cpp", - ] + select({ - "//:not_disable_python": [ - "//src/python:libovmspythonmodule", - ], - "//:disable_python": [] - }), - visibility = ["//visibility:public"], -) - ovms_cc_library( name = "libovmsschema", hdrs = ["schema.hpp",], @@ -1188,7 +1275,8 @@ ovms_cc_library( name = "libovmsstatus", hdrs = ["status.hpp",], srcs = ["status.cpp",], - deps = ["libovmslogging"], + deps = ["libovmslogging", + "@fmtlib",], visibility = ["//visibility:public"], ) @@ -1203,6 +1291,20 @@ ovms_cc_library( visibility = ["//visibility:public",], ) +ovms_cc_library( + name = "libovms_servable_name_checker", + hdrs = ["servable_name_checker.hpp",], + deps = [], + visibility = ["//visibility:public"], +) + +ovms_cc_library( + name = "libovms_metric_provider", + hdrs = ["metric_provider.hpp",], + deps = [], + visibility = ["//visibility:public"], +) + ovms_cc_library( # make ovms_lib dependent, use share doptions name = "libovmsstring_utils", hdrs = ["stringutils.hpp",], @@ -1718,6 +1820,7 @@ ovms_cc_library( hdrs = ["modelversionstatus.hpp",], srcs = ["modelversionstatus.cpp",], deps = [ + "@fmtlib", "libovmslogging", "libovmsmodelversion", ], @@ -1772,12 +1875,12 @@ ovms_cc_library( name = "nodeinfo", hdrs = ["dags/nodeinfo.hpp",], deps = [ + "@fmtlib", "libovms_threadsafequeue", "libovmsmodelversion", "libovms_tensorinfo", "node_library", "libovms_dags_aliases", - "libovmslogging", ], visibility = ["//visibility:public"], ) @@ -1797,25 +1900,7 @@ ovms_cc_library( deps = [ "libovmsstatus", "@com_github_grpc_grpc//:grpc++", - "libovmslogging", - ], - visibility = ["//visibility:public"], -) -ovms_cc_library( - # TODO needs further splitting - name = "libovms_dags_pipelinedefinition", - hdrs = ["dags/pipelinedefinition.hpp",], - srcs = ["dags/pipelinedefinition.cpp",], - deps = [ - "@com_github_tencent_rapidjson//:rapidjson", - "libovms_kfs_utils", - "tfs_utils", - "libovms_dags_aliases", - "nodeinfo", - "libovms_dags_pipelinedefinitionstatus", - "libovms_tensorinfo", - "notifyreceiver", - "libovmsmodelversion", + "@fmtlib", ], visibility = ["//visibility:public"], ) diff --git a/src/audio/speech_to_text/BUILD b/src/audio/speech_to_text/BUILD index ec08ba386b..f260731db2 100644 --- a/src/audio/speech_to_text/BUILD +++ b/src/audio/speech_to_text/BUILD @@ -26,7 +26,8 @@ ovms_cc_library( ovms_cc_library( name = "s2t_calculator", - srcs = ["s2t_calculator.cc"], + srcs = ["s2t_calculator.cc", + "stt_node_initializer.cpp"], deps = [ "@mediapipe//mediapipe/framework:calculator_framework", "//src:httppayload", @@ -39,6 +40,8 @@ ovms_cc_library( "//third_party:genai", "//src/audio:audio_utils", "//src:libmodelconfigjsonparser", + "//src:node_initializer", + "//src:libovmsstring_utils", ], visibility = ["//visibility:public"], alwayslink = 1, diff --git a/src/audio/speech_to_text/stt_node_initializer.cpp b/src/audio/speech_to_text/stt_node_initializer.cpp new file mode 100644 index 0000000000..6e7a2be022 --- /dev/null +++ b/src/audio/speech_to_text/stt_node_initializer.cpp @@ -0,0 +1,73 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "../../mediapipe_internal/graph_side_packets.hpp" +#include "../../mediapipe_internal/node_initializer.hpp" +#include "../../stringutils.hpp" +#include "s2t_servable.hpp" +#include "mediapipe/framework/calculator.pb.h" +#include "src/audio/speech_to_text/s2t_calculator.pb.h" + +#include "../../logging.hpp" + +namespace ovms { +class SttNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "S2tCalculator"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& sttServableMap = sidePackets.sttServableMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("SpeechToText node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("SpeechToText node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (sttServableMap.find(nodeName) != sttServableMap.end()) { + SPDLOG_ERROR("SpeechToText node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + mediapipe::S2tCalculatorOptions nodeOptions; + const auto& calculatorOptions = nodeConfig.node_options(0); + if (!calculatorOptions.UnpackTo(&nodeOptions)) { + SPDLOG_ERROR("Failed to unpack calculator options"); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + auto servable = std::make_shared(nodeOptions, basePath); + sttServableMap.insert(std::pair>(nodeName, std::move(servable))); + return StatusCode::OK; + } +}; + +static bool sttNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/audio/text_to_speech/BUILD b/src/audio/text_to_speech/BUILD index 854c67c78d..80992c9660 100644 --- a/src/audio/text_to_speech/BUILD +++ b/src/audio/text_to_speech/BUILD @@ -34,7 +34,8 @@ ovms_cc_library( ovms_cc_library( name = "t2s_calculator", - srcs = ["t2s_calculator.cc"], + srcs = ["t2s_calculator.cc", + "tts_node_initializer.cpp"], deps = [ "@mediapipe//mediapipe/framework:calculator_framework", "//src:httppayload", @@ -46,6 +47,8 @@ ovms_cc_library( ":t2s_servable", "//src/audio:audio_utils", "//src:libmodelconfigjsonparser", + "//src:node_initializer", + "//src:libovmsstring_utils", ], visibility = ["//visibility:public"], alwayslink = 1, diff --git a/src/audio/text_to_speech/tts_node_initializer.cpp b/src/audio/text_to_speech/tts_node_initializer.cpp new file mode 100644 index 0000000000..ad2d49a16a --- /dev/null +++ b/src/audio/text_to_speech/tts_node_initializer.cpp @@ -0,0 +1,79 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include "../../mediapipe_internal/graph_side_packets.hpp" +#include "../../mediapipe_internal/node_initializer.hpp" +#include "../../stringutils.hpp" +#include "t2s_servable.hpp" +#include "mediapipe/framework/calculator.pb.h" +#include "src/audio/text_to_speech/t2s_calculator.pb.h" + +#include "../../logging.hpp" + +namespace ovms { +class TtsNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "T2sCalculator"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& ttsServableMap = sidePackets.ttsServableMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("TextToSpeech node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("TextToSpeech node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (ttsServableMap.find(nodeName) != ttsServableMap.end()) { + SPDLOG_ERROR("TextToSpeech node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + mediapipe::T2sCalculatorOptions nodeOptions; + const auto& calculatorOptions = nodeConfig.node_options(0); + if (!calculatorOptions.UnpackTo(&nodeOptions)) { + SPDLOG_ERROR("Failed to unpack calculator options"); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + try { + auto servable = std::make_shared(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.voices(), nodeOptions.plugin_config(), basePath); + ttsServableMap.insert(std::pair>(nodeName, std::move(servable))); + } catch (const std::runtime_error& e) { + SPDLOG_ERROR("TextToSpeech node name: {} initialization failed: {}. ", nodeName, e.what()); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + return StatusCode::OK; + } +}; + +static bool ttsNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/azurestorage.hpp b/src/azurestorage.hpp index f45b2e8084..3a0c3ae58d 100644 --- a/src/azurestorage.hpp +++ b/src/azurestorage.hpp @@ -21,7 +21,6 @@ #include #include -#include "logging.hpp" #include "status.hpp" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" diff --git a/src/capi_frontend/capi.cpp b/src/capi_frontend/capi.cpp index f6a4b2b159..13e3dc3202 100644 --- a/src/capi_frontend/capi.cpp +++ b/src/capi_frontend/capi.cpp @@ -28,14 +28,11 @@ #pragma warning(pop) #include "../dags/pipeline.hpp" +#include "../dags/pipeline_factory.hpp" #include "../dags/pipelinedefinition.hpp" -#include "../dags/pipelinedefinitionstatus.hpp" -#include "../dags/pipelinedefinitionunloadguard.hpp" +#include "../servable_definition_unload_guard.hpp" #include "../execution_context.hpp" #include "../version.hpp" -#if (MEDIAPIPE_DISABLE == 0) -#include "../mediapipe_internal/mediapipegraphdefinition.hpp" -#endif #include "../model_service.hpp" #include "../modelinstance.hpp" #include "capi_request_utils.hpp" // TODO @atobisze must be before executor @@ -49,8 +46,11 @@ #include "../ovms.h" // NOLINT #include "../prediction_service.hpp" #include "../profiler.hpp" +#include "../dags/pipelinedefinitionstatus.hpp" +#include "../servable_definition.hpp" #include "../servablemanagermodule.hpp" #include "../server.hpp" +#include "../single_version_servable_definition.hpp" #include "../status.hpp" #include "../timer.hpp" #include "buffer.hpp" @@ -73,7 +73,7 @@ using ovms::ModelInstanceUnloadGuard; using ovms::ModelManager; using ovms::Pipeline; using ovms::PipelineDefinition; -using ovms::PipelineDefinitionUnloadGuard; +using ovms::ServableDefinitionUnloadGuard; using ovms::ServableManagerModule; using ovms::Server; using ovms::Status; @@ -120,10 +120,10 @@ static Status getPipeline(ovms::Server& server, const InferenceRequest* request, if (!status.ok()) { return status; } - return modelManager->createPipeline(pipelinePtr, request->getServableName(), request, response); + return modelManager->getPipelineFactory().create(pipelinePtr, request->getServableName(), request, response, *modelManager); } -static Status getPipelineDefinition(Server& server, const std::string& servableName, PipelineDefinition** pipelineDefinition, std::unique_ptr& unloadGuard) { +static Status getPipelineDefinition(Server& server, const std::string& servableName, PipelineDefinition** pipelineDefinition, std::unique_ptr& unloadGuard) { ModelManager* modelManager{nullptr}; Status status = getModelManager(server, &modelManager); if (!status.ok()) { @@ -1190,21 +1190,16 @@ DLL_PUBLIC OVMS_Status* OVMS_GetServableState(OVMS_Server* serverPtr, const char std::shared_ptr modelInstance = modelManager->findModelInstance(servableName, servableVersion); if (modelInstance == nullptr) { - SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", servableName); - PipelineDefinition* pipelineDefinition = nullptr; - pipelineDefinition = modelManager->getPipelineFactory().findDefinitionByName(servableName); - if (!pipelineDefinition) { -#if (MEDIAPIPE_DISABLE == 0) - ovms::MediapipeGraphDefinition* mediapipeDefinition = modelManager->getMediapipeFactory().findDefinitionByName(servableName); - if (mediapipeDefinition) { - *state = convertToServableState(mediapipeDefinition->getStateCode()); - return nullptr; - } -#endif + SPDLOG_DEBUG("Requested model: {} does not exist. Searching for definition with that name...", servableName); + auto* definition = modelManager->findServableDefinition(servableName); + if (!definition) { return reinterpret_cast(new Status(StatusCode::MODEL_NAME_MISSING)); } - *state = convertToServableState(pipelineDefinition->getStateCode()); - + auto* svsd = dynamic_cast(definition); + if (!svsd) { + return reinterpret_cast(new Status(StatusCode::MODEL_NAME_MISSING)); + } + *state = convertToServableState(svsd->getStatus().getStateCode()); return nullptr; } if (!status.ok()) { @@ -1274,7 +1269,7 @@ DLL_PUBLIC OVMS_Status* OVMS_GetServableMetadata(OVMS_Server* serverPtr, const c if (status == StatusCode::MODEL_NAME_MISSING) { SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", servableName); PipelineDefinition* pipelineDefinition = nullptr; - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; status = getPipelineDefinition(server, servableName, &pipelineDefinition, unloadGuard); if (!status.ok() || !pipelineDefinition) { return reinterpret_cast(new Status(std::move(status))); diff --git a/src/capi_frontend/capi_request_utils.hpp b/src/capi_frontend/capi_request_utils.hpp index 7d22e1c7ce..565a17e417 100644 --- a/src/capi_frontend/capi_request_utils.hpp +++ b/src/capi_frontend/capi_request_utils.hpp @@ -19,10 +19,10 @@ #include #include "../ovms.h" // NOLINT +#include "src/logging.hpp" #include "../precision.hpp" #include "inferencerequest.hpp" #include "../shape.hpp" -#include "../logging.hpp" #include "../status.hpp" // TODO move impl @atobisze #include "../extractchoice.hpp" #include "../requesttensorextractor.hpp" diff --git a/src/color_format_configuration.cpp b/src/color_format_configuration.cpp index 0b6640bc88..f1973f53da 100644 --- a/src/color_format_configuration.cpp +++ b/src/color_format_configuration.cpp @@ -18,6 +18,8 @@ #include #include +#include "logging.hpp" + namespace ovms { const char ColorFormatConfiguration::COLOR_FORMAT_DELIMITER = ':'; diff --git a/src/dags/entry_node.hpp b/src/dags/entry_node.hpp index c146b5021f..018aabdde2 100644 --- a/src/dags/entry_node.hpp +++ b/src/dags/entry_node.hpp @@ -21,7 +21,6 @@ #include -#include "../logging.hpp" #include "../ovms.h" // NOLINT #include "../regularovtensorfactory.hpp" #include "../tensorinfo.hpp" diff --git a/src/dags/gatherexitnodeinputhandler.hpp b/src/dags/gatherexitnodeinputhandler.hpp index 9432e3b2ac..14ac2aa522 100644 --- a/src/dags/gatherexitnodeinputhandler.hpp +++ b/src/dags/gatherexitnodeinputhandler.hpp @@ -23,7 +23,6 @@ #include "../capi_frontend/capi_utils.hpp" #include "../capi_frontend/capi_dag_utils.hpp" #include "../kfs_frontend/kfs_utils.hpp" -#include "../logging.hpp" #include "../profiler.hpp" #include "../status.hpp" #include "../tfs_frontend/tfs_utils.hpp" diff --git a/src/dags/nodeinfo.hpp b/src/dags/nodeinfo.hpp index 503c9397df..6a20f31717 100644 --- a/src/dags/nodeinfo.hpp +++ b/src/dags/nodeinfo.hpp @@ -24,11 +24,12 @@ #include #include +#include + #include "../modelversion.hpp" #include "../tensorinfo.hpp" #include "aliases.hpp" #include "node_library.hpp" -#include "../logging.hpp" namespace ovms { diff --git a/src/dags/pipeline_factory.cpp b/src/dags/pipeline_factory.cpp index 003c627721..bc13f01e8f 100644 --- a/src/dags/pipeline_factory.cpp +++ b/src/dags/pipeline_factory.cpp @@ -15,6 +15,12 @@ //***************************************************************************** #include "pipeline_factory.hpp" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wall" +#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" +#pragma GCC diagnostic pop +#include "../kfs_frontend/kfs_utils.hpp" + #include "../capi_frontend/inferencerequest.hpp" #include "../capi_frontend/inferenceresponse.hpp" #include "../logging.hpp" diff --git a/src/dags/pipeline_factory.hpp b/src/dags/pipeline_factory.hpp index d8f447f669..4bb4c6b388 100644 --- a/src/dags/pipeline_factory.hpp +++ b/src/dags/pipeline_factory.hpp @@ -24,11 +24,6 @@ #include #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" -#pragma GCC diagnostic pop -#include "../kfs_frontend/kfs_grpc_inference_service.hpp" #include "nodeinfo.hpp" namespace ovms { diff --git a/src/dags/pipelinedefinition.cpp b/src/dags/pipelinedefinition.cpp index 434859c2ad..dcc76405cb 100644 --- a/src/dags/pipelinedefinition.cpp +++ b/src/dags/pipelinedefinition.cpp @@ -20,12 +20,14 @@ #include #include "../logging.hpp" +#include "../model.hpp" #include "../model_metric_reporter.hpp" #include "../modelinstance.hpp" #include "../modelinstanceunloadguard.hpp" #include "../modelmanager.hpp" #include "../ov_utils.hpp" #include "../prediction_service_utils.hpp" +#include "../servable_definition_unload_guard.hpp" #include "../status.hpp" #include "custom_node.hpp" #include "custom_node_library_internal_manager_wrapper.hpp" @@ -36,7 +38,6 @@ #include "nodeinfo.hpp" #include "nodestreamidguard.hpp" #include "pipeline.hpp" -#include "pipelinedefinitionunloadguard.hpp" namespace ovms { const std::string PipelineDefinition::SCHEDULER_CLASS_NAME{"Pipeline"}; @@ -59,25 +60,19 @@ PipelineDefinition::PipelineDefinition(const std::string& pipelineName, const pipeline_connections_t& connections, MetricRegistry* registry, const MetricConfig* metricConfig) : - pipelineName(pipelineName), + SingleVersionServableDefinition(pipelineName), nodeInfos(nodeInfos), connections(connections), reporter(std::make_unique(metricConfig, registry, pipelineName, VERSION)), - status(SCHEDULER_CLASS_NAME, this->pipelineName) {} + status(SCHEDULER_CLASS_NAME, getName()) {} Status PipelineDefinition::validate(ModelManager& manager) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of pipeline: {}", getName()); ValidationResultNotifier notifier(status, loadedNotify); - if (manager.modelExists(this->pipelineName)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline name: {} is already occupied by model.", pipelineName); + if (manager.servableExists(getName(), ServableType::Model | ServableType::Mediapipe)) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline name: {} is already occupied by model or mediapipe graph.", getName()); return StatusCode::PIPELINE_NAME_OCCUPIED; } -#if (MEDIAPIPE_DISABLE == 0) - if (manager.getMediapipeFactory().definitionExists(this->pipelineName)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline name: {} is already occupied by mediapipe graph.", pipelineName); - return StatusCode::PIPELINE_NAME_OCCUPIED; - } -#endif Status validationResult = initializeNodeResources(manager); if (!validationResult.ok()) { return validationResult; @@ -118,7 +113,7 @@ Status PipelineDefinition::initializeNodeResources(ModelManager& manager) { auto params = createCustomNodeParamArray(nodeInfo.parameters); int paramsLength = nodeInfo.parameters.size(); if (!nodeInfo.library.isValid()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline: {} node: {} refers to invalid library", pipelineName, nodeInfo.nodeName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline: {} node: {} refers to invalid library", getName(), nodeInfo.nodeName); return StatusCode::PIPELINE_DEFINITION_INVALID_NODE_LIBRARY; } auto status = nodeInfo.library.initialize(&customNodeLibraryInternalManager, params.get(), paramsLength); @@ -188,50 +183,12 @@ void PipelineDefinition::retire(ModelManager& manager) { this->connections.clear(); } -Status PipelineDefinition::waitForLoaded(std::unique_ptr& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds) { - unloadGuard = std::make_unique(*this); - - const uint32_t waitLoadedTimestepMicroseconds = 1000; - const uint32_t waitCheckpoints = waitForLoadedTimeoutMicroseconds / waitLoadedTimestepMicroseconds; - uint32_t waitCheckpointsCounter = waitCheckpoints; - std::mutex cvMtx; - std::unique_lock cvLock(cvMtx); - while (waitCheckpointsCounter-- != 0) { - if (status.isAvailable()) { - SPDLOG_DEBUG("Successfully waited for pipeline definition: {}", getName()); - return StatusCode::OK; - } - unloadGuard.reset(); - if (!status.canEndLoaded()) { - if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { - SPDLOG_DEBUG("Waiting for pipeline definition: {} ended due to timeout.", getName()); - return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET; - } else { - SPDLOG_DEBUG("Waiting for pipeline definition: {} ended since it failed to load.", getName()); - return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_ANYMORE; - } - } - SPDLOG_DEBUG("Waiting for available state for pipeline: {}, with timestep: {}us timeout: {}us check count: {}", - getName(), waitLoadedTimestepMicroseconds, waitForLoadedTimeoutMicroseconds, waitCheckpointsCounter); - loadedNotify.wait_for(cvLock, - std::chrono::microseconds(waitLoadedTimestepMicroseconds), - [this]() { - return this->status.isAvailable() || - !this->status.canEndLoaded(); - }); - unloadGuard = std::make_unique(*this); - } - if (!status.isAvailable()) { - if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { - SPDLOG_DEBUG("Waiting for pipeline definition: {} ended due to timeout.", getName()); - return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET; - } else { - SPDLOG_DEBUG("Waiting for pipeline definition: {} ended since it failed to load.", getName()); - return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_ANYMORE; - } - } - SPDLOG_DEBUG("Successfully waited for pipeline definition: {}", getName()); - return StatusCode::OK; +StatusCode PipelineDefinition::notLoadedYetCode() const { + return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET; +} + +StatusCode PipelineDefinition::notLoadedAnymoreCode() const { + return StatusCode::PIPELINE_DEFINITION_NOT_LOADED_ANYMORE; } template @@ -239,7 +196,7 @@ Status PipelineDefinition::create(std::unique_ptr& pipeline, const RequestType* request, ResponseType* response, ModelManager& manager) { - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; Status status = waitForLoaded(unloadGuard); if (!status.ok()) { return status; @@ -300,7 +257,7 @@ Status PipelineDefinition::create(std::unique_ptr& pipeline, } #pragma warning(push) #pragma warning(disable : 6011) - pipeline = std::make_unique(*entry, *exit, *this->reporter, pipelineName); + pipeline = std::make_unique(*entry, *exit, *this->reporter, getName()); #pragma warning(pop) for (auto& kv : nodes) { pipeline->push(std::move(kv.second)); @@ -515,7 +472,7 @@ class NodeValidator { } if (shape.size() < 3) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Validation of pipeline: {} definition failed. Node: {} demultiply cannot occur due to not enough shape dimensions: {}", - this->pipelineName, + pipelineName, demultiplicatorNodeInfo.nodeName, shape.size()); return StatusCode::PIPELINE_NOT_ENOUGH_SHAPE_DIMENSIONS_TO_DEMULTIPLY; @@ -525,7 +482,7 @@ class NodeValidator { auto demultiplyDimension = Dimension(demultiplicatorNodeInfo.demultiplyCount.value()); if (!shape[0].partiallyFitsInto(demultiplyDimension)) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Validation of pipeline: {} definition failed. Demultiply count: {} of node: {} does not match tensor first dimension value: {}", - this->pipelineName, + pipelineName, demultiplicatorNodeInfo.demultiplyCount.value(), demultiplicatorNodeInfo.nodeName, shape[0].toString()); @@ -533,14 +490,14 @@ class NodeValidator { } } else { SPDLOG_LOGGER_WARN(modelmanager_logger, "Pipeline: {}; Demultiply count: {} of node: {} is fixed while first dimension value of node library is not: {}. This pipeline may fail at execution stage.", - this->pipelineName, + pipelineName, demultiplicatorNodeInfo.demultiplyCount.value(), demultiplicatorNodeInfo.nodeName, shape[0].toString()); } } else if (!shape[0].isAny()) { SPDLOG_LOGGER_WARN(modelmanager_logger, "Pipeline: {}; Demultiply count: {} of node: {} is dynamic while first dimension value of gather node is not: {}. This pipeline may fail at execution stage.", - this->pipelineName, + pipelineName, demultiplicatorNodeInfo.demultiplyCount.value(), demultiplicatorNodeInfo.nodeName, shape[0].toString()); @@ -593,7 +550,7 @@ class NodeValidator { result = influenceShapeWithDemultiplexer(tensorInputShape, *demultiplicatorNode); if (!result.ok()) { SPDLOG_LOGGER_ERROR(dag_executor_logger, "Validation of pipeline: {} definition failed. Demultiply count: {} of gather_from node: {} does not match tensor first dimension value: {} of node: {}", - this->pipelineName, + pipelineName, demultiplicatorNode->demultiplyCount.value(), demultiplicatorNode->nodeName, tensorInputShape[1].toString(), @@ -602,7 +559,7 @@ class NodeValidator { } } else if (dependantNodeInfo.gatherFromNode.size() > 1) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Validation of pipeline: {} definition failed. Manual gathering from multiple nodes is not supported in node name: {}", - this->pipelineName, + pipelineName, dependantNodeInfo.nodeName); return StatusCode::PIPELINE_MANUAL_GATHERING_FROM_MULTIPLE_NODES_NOT_SUPPORTED; } @@ -761,7 +718,7 @@ class NodeValidator { dependantNodeInfo, this->inputsInfo, dependantNodeInfo.library.getInputsInfo, - this->pipelineName, + pipelineName, getCNLIMWrapperPtr(nodeResources.at(dependantNodeInfo.nodeName))); if (!result.ok()) { return result; @@ -770,7 +727,7 @@ class NodeValidator { dependantNodeInfo, this->outputsInfo, dependantNodeInfo.library.getOutputsInfo, - this->pipelineName, + pipelineName, getCNLIMWrapperPtr(nodeResources.at(dependantNodeInfo.nodeName))); if (!result.ok()) { return result; @@ -789,7 +746,7 @@ class NodeValidator { dependencyNodeInfo, this->dependencyInputsInfo, dependencyNodeInfo.library.getInputsInfo, - this->pipelineName, + pipelineName, getCNLIMWrapperPtr(nodeResources.at(dependencyNodeInfo.nodeName))); if (!result.ok()) { return result; @@ -798,7 +755,7 @@ class NodeValidator { dependencyNodeInfo, this->dependencyOutputsInfo, dependencyNodeInfo.library.getOutputsInfo, - this->pipelineName, + pipelineName, getCNLIMWrapperPtr(nodeResources.at(dependencyNodeInfo.nodeName))); if (!result.ok()) { return result; @@ -887,7 +844,7 @@ class NodeValidator { }; Status PipelineDefinition::validateNode(ModelManager& manager, const NodeInfo& dependantNodeInfo, const bool isMultiBatchAllowed) { - NodeValidator validator(this->pipelineName, manager, dependantNodeInfo, connections, nodeInfos, nodeResources, isMultiBatchAllowed); + NodeValidator validator(getName(), manager, dependantNodeInfo, connections, nodeInfos, nodeResources, isMultiBatchAllowed); return validator.validate(); } @@ -1038,22 +995,22 @@ Status PipelineDefinition::validateNodes(ModelManager& manager) { [](const NodeInfo& info) { return info.kind == NodeKind::EXIT; }); if (entryNodeCount <= 0) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} is missing request node", pipelineName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} is missing request node", getName()); return StatusCode::PIPELINE_MISSING_ENTRY_OR_EXIT; } if (exitNodeCount <= 0) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} is missing response node", pipelineName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} is missing response node", getName()); return StatusCode::PIPELINE_MISSING_ENTRY_OR_EXIT; } if (entryNodeCount > 1) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple request nodes", pipelineName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple request nodes", getName()); return StatusCode::PIPELINE_MULTIPLE_ENTRY_NODES; } if (exitNodeCount > 1) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple response nodes", pipelineName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple response nodes", getName()); return StatusCode::PIPELINE_MULTIPLE_EXIT_NODES; } @@ -1070,7 +1027,7 @@ Status PipelineDefinition::validateNodes(ModelManager& manager) { this->nodeInfos.end(), [](const NodeInfo& info) { return info.demultiplyCount.has_value(); }); if (isAnyNodeDynamicDemultiplexer && (demultiplexerCount > 1)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple demultiplexers with at least one dynamic.", pipelineName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple demultiplexers with at least one dynamic.", getName()); return StatusCode::NOT_IMPLEMENTED; } @@ -1081,7 +1038,7 @@ Status PipelineDefinition::validateNodes(ModelManager& manager) { }; if (std::count_if(nodeInfos.begin(), nodeInfos.end(), findByName) > 1) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple nodes with name: {}", pipelineName, node.nodeName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "PipelineDefinition: {} has multiple nodes with name: {}", getName(), node.nodeName); return StatusCode::PIPELINE_NODE_NAME_DUPLICATE; } @@ -1376,7 +1333,7 @@ Shape PipelineDefinition::getNodeGatherShape(const NodeInfo& info) const { someNodeInfo, nodeOutputsInfo, someNodeInfo.library.getOutputsInfo, - this->pipelineName, + getName(), getCNLIMWrapperPtr(nodeResources.at(someNodeInfo.nodeName))); if (!result.ok()) { SPDLOG_ERROR("Failed to read node: {} library metadata with error: {}", nodeName, result.string()); diff --git a/src/dags/pipelinedefinition.hpp b/src/dags/pipelinedefinition.hpp index b45527dcfc..33322dfad6 100644 --- a/src/dags/pipelinedefinition.hpp +++ b/src/dags/pipelinedefinition.hpp @@ -15,8 +15,6 @@ //***************************************************************************** #pragma once -#include -#include #include #include #include @@ -26,10 +24,10 @@ #include #include -#include "../kfs_frontend/kfs_utils.hpp" -#include "../tfs_frontend/tfs_utils.hpp" +#include "../model_metric_reporter.hpp" #include "../modelversion.hpp" #include "../notifyreceiver.hpp" +#include "../single_version_servable_definition.hpp" #include "../tensorinfo.hpp" #include "aliases.hpp" #include "nodeinfo.hpp" @@ -40,15 +38,12 @@ struct CNLIMWrapper; class MetricConfig; class MetricRegistry; class ModelManager; -class ServableMetricReporter; class NodeValidator; class Pipeline; -class PipelineDefinitionUnloadGuard; class Status; -class PipelineDefinition : public NotifyReceiver { +class PipelineDefinition : public SingleVersionServableDefinition, public NotifyReceiver { friend NodeValidator; - friend PipelineDefinitionUnloadGuard; struct ValidationResultNotifier { ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) : status(status), @@ -68,7 +63,6 @@ class PipelineDefinition : public NotifyReceiver { std::condition_variable& loadedNotify; }; - const std::string pipelineName; std::vector nodeInfos; std::map> nodeResources = {}; pipeline_connections_t connections; @@ -79,11 +73,6 @@ class PipelineDefinition : public NotifyReceiver { private: mutable std::shared_mutex metadataMtx; - std::atomic requestsHandlesCounter = 0; - std::condition_variable loadedNotify; - - // Pipelines are not versioned and any available definition has constant version equal 1. - static constexpr model_version_t VERSION = 1; std::unique_ptr reporter; @@ -99,7 +88,6 @@ class PipelineDefinition : public NotifyReceiver { Shape getNodeGatherShape(const NodeInfo& info) const; public: - static constexpr uint64_t WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS = 500000; PipelineDefinition(const std::string& pipelineName, const std::vector& nodeInfos, const pipeline_connections_t& connections, @@ -129,15 +117,15 @@ class PipelineDefinition : public NotifyReceiver { std::vector calculateNodeInfosDiff(const std::vector& nodeInfos); void deinitializeNodeResources(const std::vector& nodeInfosDiff); - const std::string& getName() const override { return pipelineName; } + const std::string& getName() const override { return SingleVersionServableDefinition::getName(); } const PipelineDefinitionStateCode getStateCode() const { return status.getStateCode(); } - const model_version_t getVersion() const { return VERSION; } + bool isAvailable() const override { return status.isAvailable(); } void receiveNotification(const std::string& ownerDetails) override { this->status.handle(UsedModelChangedEvent(ownerDetails)); } - const PipelineDefinitionStatus& getStatus() const { + const PipelineDefinitionStatus& getStatus() const override { return this->status; } @@ -148,15 +136,15 @@ class PipelineDefinition : public NotifyReceiver { void makeSubscriptions(ModelManager& manager); void resetSubscriptions(ModelManager& manager); - ServableMetricReporter& getMetricReporter() const { return *this->reporter; } + ServableMetricReporter& getMetricReporter() const override { return *this->reporter; } protected: Status updateInputsInfo(const ModelManager& manager); Status updateOutputsInfo(const ModelManager& manager); public: - const tensor_map_t getInputsInfo() const; - const tensor_map_t getOutputsInfo() const; + const tensor_map_t getInputsInfo() const override; + const tensor_map_t getOutputsInfo() const override; private: static Status getCustomNodeMetadata(const NodeInfo& customNodeInfo, tensor_map_t& inputsInfo, metadata_fn callback, const std::string& pipelineName, void* customNodeLibraryInternalManager); @@ -175,16 +163,10 @@ class PipelineDefinition : public NotifyReceiver { const Aliases& aliases, const Shape& gatherShape) const; - void increaseRequestsHandlesCount() { - ++requestsHandlesCounter; - } - - void decreaseRequestsHandlesCount() { - --requestsHandlesCounter; - } + StatusCode notLoadedYetCode() const override; + StatusCode notLoadedAnymoreCode() const override; public: static const std::string SCHEDULER_CLASS_NAME; - Status waitForLoaded(std::unique_ptr& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS); }; } // namespace ovms diff --git a/src/embeddings/BUILD b/src/embeddings/BUILD index 267b796354..77569dde98 100644 --- a/src/embeddings/BUILD +++ b/src/embeddings/BUILD @@ -68,7 +68,8 @@ mediapipe_proto_library( ovms_cc_library( name = "embeddingscalculator_ov", hdrs = [], - srcs = ["embeddings_calculator_ov.cc"], + srcs = ["embeddings_calculator_ov.cc", + "embeddings_node_initializer.cpp"], deps = [ "@mediapipe//mediapipe/framework:calculator_framework", "@com_github_tencent_rapidjson//:rapidjson", @@ -85,6 +86,8 @@ ovms_cc_library( "//src:libovms_execution_context", ":embeddings_api", "//third_party:openvino", + "//src:node_initializer", + "//src:libovmsstring_utils", ], visibility = ["//visibility:public"], alwayslink = 1, diff --git a/src/embeddings/embeddings_node_initializer.cpp b/src/embeddings/embeddings_node_initializer.cpp new file mode 100644 index 0000000000..c706a87fcf --- /dev/null +++ b/src/embeddings/embeddings_node_initializer.cpp @@ -0,0 +1,80 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "../mediapipe_internal/graph_side_packets.hpp" +#include "../mediapipe_internal/node_initializer.hpp" +#include "../stringutils.hpp" +#include "embeddings_servable.hpp" +#include "mediapipe/framework/calculator.pb.h" +#include "src/embeddings/embeddings_calculator_ov.pb.h" + +#include "../logging.hpp" + +namespace ovms { +class EmbeddingsNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "EmbeddingsCalculatorOV"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& embeddingsServableMap = sidePackets.embeddingsServableMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("Embeddings node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("Embeddings node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (embeddingsServableMap.find(nodeName) != embeddingsServableMap.end()) { + SPDLOG_ERROR("Embeddings node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + mediapipe::EmbeddingsCalculatorOVOptions nodeOptions; + nodeConfig.node_options(0).UnpackTo(&nodeOptions); + auto servable = std::make_shared( + nodeOptions.models_path(), + nodeOptions.target_device(), + nodeOptions.plugin_config(), + basePath, + nodeOptions.pooling(), + nodeOptions.normalize_embeddings()); + servable->initialize( + nodeOptions.models_path(), + nodeOptions.target_device(), + nodeOptions.plugin_config(), + basePath); + embeddingsServableMap.insert(std::pair>(nodeName, std::move(servable))); + return StatusCode::OK; + } +}; + +static bool embeddingsNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/get_model_metadata_impl.cpp b/src/get_model_metadata_impl.cpp index 195df6da95..ae4202649c 100644 --- a/src/get_model_metadata_impl.cpp +++ b/src/get_model_metadata_impl.cpp @@ -17,10 +17,11 @@ #include +#include "dags/pipeline_factory.hpp" #include "dags/pipelinedefinition.hpp" -#include "dags/pipelinedefinitionstatus.hpp" -#include "dags/pipelinedefinitionunloadguard.hpp" +#include "servable_definition_unload_guard.hpp" #include "execution_context.hpp" +#include "model.hpp" #include "modelinstance.hpp" #include "modelinstanceunloadguard.hpp" #include "modelmanager.hpp" @@ -168,7 +169,7 @@ Status GetModelMetadataImpl::buildResponse( const ModelManager& manager) { // 0 meaning immediately return unload guard if possible, otherwise do not wait for available state - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; auto status = pipelineDefinition.waitForLoaded(unloadGuard, 0); if (!status.ok()) { return status; diff --git a/src/grpc_utils.hpp b/src/grpc_utils.hpp index d9222f68f1..17baed5c3e 100644 --- a/src/grpc_utils.hpp +++ b/src/grpc_utils.hpp @@ -15,9 +15,9 @@ //***************************************************************************** #pragma once #include -#include -#include "logging.hpp" +#include +#include namespace ovms { class Status; diff --git a/src/http_rest_api_handler.cpp b/src/http_rest_api_handler.cpp index afe163e6dc..e45bbadfd8 100644 --- a/src/http_rest_api_handler.cpp +++ b/src/http_rest_api_handler.cpp @@ -38,8 +38,9 @@ #include "config.hpp" #include "dags/pipeline.hpp" +#include "dags/pipeline_factory.hpp" #include "dags/pipelinedefinition.hpp" -#include "dags/pipelinedefinitionunloadguard.hpp" +#include "servable_definition_unload_guard.hpp" #include "execution_context.hpp" #include "filesystem.hpp" #include "get_model_metadata_impl.hpp" @@ -68,6 +69,7 @@ #include "http_payload.hpp" #include "http_frontend/http_client_connection.hpp" #include "http_frontend/http_graph_executor_impl.hpp" +#include "mediapipe_internal/mediapipefactory.hpp" #include "mediapipe_internal/mediapipegraphexecutor.hpp" #endif @@ -1153,7 +1155,7 @@ Status HttpRestApiHandler::processPredictRequest( if (this->modelManager.modelExists(modelName)) { SPDLOG_DEBUG("Found model with name: {}. Searching for requested version...", modelName); status = processSingleModelRequest(modelName, modelVersion, request, requestOrder, responseProto, reporterOut); - } else if (this->modelManager.pipelineDefinitionExists(modelName)) { + } else if (this->modelManager.servableExists(modelName, ServableType::Pipeline)) { SPDLOG_DEBUG("Found pipeline with name: {}", modelName); status = processPipelineRequest(modelName, request, requestOrder, responseProto, reporterOut); } else { @@ -1247,7 +1249,7 @@ Status HttpRestApiHandler::getPipelineInputsAndReporter(const std::string& model if (!pipelineDefinition) { return StatusCode::MODEL_MISSING; } - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; Status status = pipelineDefinition->waitForLoaded(unloadGuard); if (!status.ok()) { return status; @@ -1288,7 +1290,7 @@ Status HttpRestApiHandler::processPipelineRequest(const std::string& modelName, tensorflow::serving::PredictRequest& requestProto = requestParser.getProto(); requestProto.mutable_model_spec()->set_name(modelName); - status = this->modelManager.createPipeline(pipelinePtr, modelName, &requestProto, &responseProto); + status = this->modelManager.getPipelineFactory().create(pipelinePtr, modelName, &requestProto, &responseProto, this->modelManager); if (!status.ok()) { INCREMENT_IF_ENABLED(reporterOut->getInferRequestMetric(executionContext, false)); return status; diff --git a/src/image_gen/BUILD b/src/image_gen/BUILD index 87056d4934..243044e3ed 100644 --- a/src/image_gen/BUILD +++ b/src/image_gen/BUILD @@ -74,7 +74,8 @@ ovms_cc_library( ovms_cc_library( name = "image_gen_calculator", - srcs = ["http_image_gen_calculator.cc"], + srcs = ["http_image_gen_calculator.cc", + "image_gen_node_initializer.cpp"], deps = [ "@mediapipe//mediapipe/framework:calculator_framework", "//src:httppayload", @@ -83,7 +84,11 @@ ovms_cc_library( ":pipelines", "//src:image_conversion", ":imagegenutils", - "//third_party:genai",], + "//third_party:genai", + ":imagegen_init", + "//src:node_initializer", + "//src:libovmsstring_utils", + "//third_party:openvino",], visibility = ["//visibility:public"], alwayslink = 1, ) diff --git a/src/image_gen/image_gen_node_initializer.cpp b/src/image_gen/image_gen_node_initializer.cpp new file mode 100644 index 0000000000..c2f5c5b50c --- /dev/null +++ b/src/image_gen/image_gen_node_initializer.cpp @@ -0,0 +1,84 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include + +#include "../mediapipe_internal/graph_side_packets.hpp" +#include "../mediapipe_internal/node_initializer.hpp" +#include "../stringutils.hpp" +#include "imagegen_init.hpp" +#include "pipelines.hpp" +#include "mediapipe/framework/calculator.pb.h" + +#include "../logging.hpp" + +namespace ovms { +class ImageGenNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "ImageGenCalculator"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& imageGenPipelinesMap = sidePackets.imageGenPipelinesMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("Image Gen node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("Image Gen node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (imageGenPipelinesMap.find(nodeName) != imageGenPipelinesMap.end()) { + SPDLOG_ERROR("Image Gen node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + auto statusOrArgs = prepareImageGenPipelineArgs(nodeConfig.node_options(0), basePath); + if (std::holds_alternative(statusOrArgs)) { + SPDLOG_ERROR("Failed to prepare Image Gen pipeline args for node: {}. Error: {}", graphName, std::get(statusOrArgs).string()); + return std::get(statusOrArgs); + } + std::shared_ptr servable; + try { + servable = std::make_shared(std::get(statusOrArgs)); + } catch (ov::Exception& e) { + SPDLOG_ERROR("Failed to create Image Generation pipelines: {}. Error: {}", graphName, e.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_ERROR("Failed to create Image Generation pipelines: {}. Unknown error", graphName); + return StatusCode::INTERNAL_ERROR; + } + imageGenPipelinesMap.insert(std::pair>(nodeName, std::move(servable))); + return StatusCode::OK; + } +}; + +static bool imageGenNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/image_gen/imagegen_init.cpp b/src/image_gen/imagegen_init.cpp index a96cbd764c..df9bde7059 100644 --- a/src/image_gen/imagegen_init.cpp +++ b/src/image_gen/imagegen_init.cpp @@ -20,6 +20,8 @@ #include #include +#include + #include "absl/strings/str_replace.h" #include "absl/strings/ascii.h" diff --git a/src/inference_request_common.hpp b/src/inference_request_common.hpp index a0f79f2460..b6405827e5 100644 --- a/src/inference_request_common.hpp +++ b/src/inference_request_common.hpp @@ -22,7 +22,6 @@ #include #include -#include "logging.hpp" #include "shape.hpp" #include "anonymous_input_name.hpp" #include "status.hpp" diff --git a/src/kfs_frontend/kfs_grpc_inference_service.cpp b/src/kfs_frontend/kfs_grpc_inference_service.cpp index e3499caa9f..e7da2f8eb1 100644 --- a/src/kfs_frontend/kfs_grpc_inference_service.cpp +++ b/src/kfs_frontend/kfs_grpc_inference_service.cpp @@ -26,9 +26,9 @@ #include "kfs_utils.hpp" #include "kfs_request_utils.hpp" #include "../dags/pipeline.hpp" -#include "../dags/pipelinedefinition.hpp" +#include "../dags/pipeline_factory.hpp" #include "../dags/pipelinedefinitionstatus.hpp" -#include "../dags/pipelinedefinitionunloadguard.hpp" +#include "../servable_definition_unload_guard.hpp" #include "../execution_context.hpp" #include "../grpc_utils.hpp" #if (MEDIAPIPE_DISABLE == 0) @@ -36,19 +36,22 @@ // kfs_graph_executor_impl needs to be included before mediapipegraphexecutor // because it contains functions required by graph execution template #include "kfs_graph_executor_impl.hpp" -#include "../mediapipe_internal/mediapipegraphdefinition.hpp" #include "../mediapipe_internal/mediapipegraphexecutor.hpp" // clang-format on #endif #include "../metric.hpp" +#include "../model.hpp" #include "../modelinstance.hpp" #include "../deserialization_main.hpp" #include "../inference_executor.hpp" #include "../modelinstanceunloadguard.hpp" #include "../modelmanager.hpp" #include "../ovinferrequestsqueue.hpp" +#include "../servable_definition.hpp" +#include "../servable_definition_unload_guard.hpp" #include "../servablemanagermodule.hpp" #include "../server.hpp" +#include "../single_version_servable_definition.hpp" #include "../status.hpp" #include "../stringutils.hpp" #include "../tensorinfo.hpp" @@ -85,7 +88,7 @@ Status KFSInferenceServiceImpl::getPipeline(const KFSRequest* request, KFSResponse* response, std::unique_ptr& pipelinePtr) { OVMS_PROFILE_FUNCTION(); - return this->modelManager.createPipeline(pipelinePtr, request->model_name(), request, response); + return this->modelManager.getPipelineFactory().create(pipelinePtr, request->model_name(), request, response, this->modelManager); } const std::string PLATFORM = "OpenVINO"; @@ -118,25 +121,18 @@ Status KFSInferenceServiceImpl::getModelReady(const KFSGetModelStatusRequest* re auto model = manager.findModelByName(name); SPDLOG_DEBUG("ModelReady requested name: {}, version: {}", name, versionString); if (model == nullptr) { - SPDLOG_DEBUG("ModelReady requested model {} is missing, trying to find pipeline with such name", name); - auto pipelineDefinition = manager.getPipelineFactory().findDefinitionByName(name); - if (!pipelineDefinition) { -#if (MEDIAPIPE_DISABLE == 0) - SPDLOG_DEBUG("ModelReady requested pipeline {} is missing, trying to find mediapipe with such name", name); - auto mediapipeGraphDefinition = manager.getMediapipeFactory().findDefinitionByName(name); - if (!mediapipeGraphDefinition) { - return StatusCode::MODEL_NAME_MISSING; - } - auto status = buildResponse(*mediapipeGraphDefinition, response); - INCREMENT_IF_ENABLED(mediapipeGraphDefinition->getMetricReporter().getModelReadyMetric(executionContext, status.ok())); - return status; -#else + SPDLOG_DEBUG("ModelReady requested model {} is missing, trying to find definition with such name", name); + auto* definition = manager.findServableDefinition(name); + if (!definition) { return StatusCode::MODEL_NAME_MISSING; -#endif } - auto status = buildResponse(*pipelineDefinition, response); - INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getModelReadyMetric(executionContext, status.ok())); - return status; + auto* svsd = dynamic_cast(definition); + if (!svsd) { + return StatusCode::MODEL_NAME_MISSING; + } + response->set_ready(svsd->isAvailable()); + INCREMENT_IF_ENABLED(svsd->getMetricReporter().getModelReadyMetric(executionContext, true)); + return StatusCode::OK; } std::shared_ptr instance = nullptr; if (!versionString.empty()) { @@ -201,24 +197,17 @@ Status KFSInferenceServiceImpl::ModelMetadataImpl(::grpc::ServerContext* context auto model = this->modelManager.findModelByName(name); SPDLOG_DEBUG("ModelMetadata requested name: {}, version: {}", name, versionString); if (model == nullptr) { - SPDLOG_DEBUG("GetModelMetadata: Model {} is missing, trying to find pipeline with such name", name); - auto pipelineDefinition = this->modelManager.getPipelineFactory().findDefinitionByName(name); - if (!pipelineDefinition) { -#if (MEDIAPIPE_DISABLE == 0) - SPDLOG_DEBUG("GetModelMetadata: Pipeline {} is missing, trying to find mediapipe with such name", name); - auto mediapipeGraphDefinition = this->modelManager.getMediapipeFactory().findDefinitionByName(name); - if (!mediapipeGraphDefinition) { - return StatusCode::MODEL_NAME_MISSING; - } - auto status = buildResponse(*mediapipeGraphDefinition, response); - INCREMENT_IF_ENABLED(mediapipeGraphDefinition->getMetricReporter().getModelMetadataMetric(executionContext, status.ok())); - return status; -#else - return Status(StatusCode::MODEL_NAME_MISSING); -#endif + SPDLOG_DEBUG("GetModelMetadata: Model {} is missing, trying to find definition with such name", name); + auto* definition = this->modelManager.findServableDefinition(name); + if (!definition) { + return StatusCode::MODEL_NAME_MISSING; + } + auto* svsd = dynamic_cast(definition); + if (!svsd) { + return StatusCode::MODEL_NAME_MISSING; } - auto status = buildResponse(*pipelineDefinition, response); - INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getModelMetadataMetric(executionContext, status.ok())); + auto status = buildResponse(*svsd, response); + INCREMENT_IF_ENABLED(svsd->getMetricReporter().getModelMetadataMetric(executionContext, status.ok())); return status; } std::shared_ptr instance = nullptr; @@ -369,24 +358,13 @@ Status KFSInferenceServiceImpl::buildResponse( } Status KFSInferenceServiceImpl::buildResponse( - PipelineDefinition& pipelineDefinition, - KFSGetModelStatusResponse* response) { - bool isReady = pipelineDefinition.getStatus().isAvailable(); - SPDLOG_DEBUG("Creating ModelReady response for pipeline: {}; ready: {}", pipelineDefinition.getName(), isReady); - response->set_ready(isReady); - return StatusCode::OK; -} - -#if (MEDIAPIPE_DISABLE == 0) -Status KFSInferenceServiceImpl::buildResponse( - MediapipeGraphDefinition& definition, + SingleVersionServableDefinition& definition, KFSGetModelStatusResponse* response) { bool isReady = definition.getStatus().isAvailable(); - SPDLOG_DEBUG("Creating ModelReady response for mediapipe: {}; ready: {}", definition.getName(), isReady); + SPDLOG_DEBUG("Creating ModelReady response for definition: {}; ready: {}", definition.getName(), isReady); response->set_ready(isReady); return StatusCode::OK; } -#endif static void addReadyVersions(Model& model, model_version_t versionAvailableDuringInitialCheck, @@ -443,61 +421,33 @@ KFSInferenceServiceImpl::KFSInferenceServiceImpl(const Server& server) : } Status KFSInferenceServiceImpl::buildResponse( - PipelineDefinition& pipelineDefinition, + SingleVersionServableDefinition& definition, KFSModelMetadataResponse* response) { - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; // 0 meaning immediately return unload guard if possible, otherwise do not wait for available state - auto status = pipelineDefinition.waitForLoaded(unloadGuard, 0); + auto status = definition.waitForLoaded(unloadGuard, 0); if (!status.ok()) { return status; } response->Clear(); - response->set_name(pipelineDefinition.getName()); + response->set_name(definition.getName()); response->add_versions("1"); response->set_platform(PLATFORM); - for (const auto& input : pipelineDefinition.getInputsInfo()) { + for (const auto& input : definition.getInputsInfo()) { convert(input, response->add_inputs()); } - for (const auto& output : pipelineDefinition.getOutputsInfo()) { + for (const auto& output : definition.getOutputsInfo()) { convert(output, response->add_outputs()); } return StatusCode::OK; } -#if (MEDIAPIPE_DISABLE == 0) -Status KFSInferenceServiceImpl::buildResponse( - MediapipeGraphDefinition& mediapipeGraphDefinition, - KFSModelMetadataResponse* response) { - std::unique_ptr unloadGuard; - // 0 meaning immediately return unload guard if possible, otherwise do not wait for available state - auto status = mediapipeGraphDefinition.waitForLoaded(unloadGuard, 0); - if (!status.ok()) { - return status; - } - - response->Clear(); - response->set_name(mediapipeGraphDefinition.getName()); - response->add_versions("1"); - response->set_platform(PLATFORM); - - for (const auto& input : mediapipeGraphDefinition.getInputsInfo()) { - convert(input, response->add_inputs()); - } - - for (const auto& output : mediapipeGraphDefinition.getOutputsInfo()) { - convert(output, response->add_outputs()); - } - - return StatusCode::OK; -} -#endif - void KFSInferenceServiceImpl::convert( const std::pair>& from, KFSModelMetadataResponse::TensorMetadata* to) { diff --git a/src/kfs_frontend/kfs_grpc_inference_service.hpp b/src/kfs_frontend/kfs_grpc_inference_service.hpp index 2e787d8d47..ad5e34b756 100644 --- a/src/kfs_frontend/kfs_grpc_inference_service.hpp +++ b/src/kfs_frontend/kfs_grpc_inference_service.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include "kfs_utils.hpp" #include "src/kfserving_api/grpc_predict_v2.grpc.pb.h" @@ -34,7 +34,6 @@ struct KFSModelExtraMetadata { namespace ovms { struct ExecutionContext; -class MediapipeGraphDefinition; class Model; class ModelInstance; class ModelInstanceUnloadGuard; @@ -42,9 +41,9 @@ class ModelManager; class ServableMetricReporter; class Pipeline; class Server; +class SingleVersionServableDefinition; class Status; class TensorInfo; -class PipelineDefinition; class KFSInferenceServiceImpl : public GRPCInferenceService::Service { protected: @@ -66,11 +65,9 @@ class KFSInferenceServiceImpl : public GRPCInferenceService::Service { ::grpc::Status ModelInfer(::grpc::ServerContext* context, const KFSRequest* request, KFSResponse* response) override; ::grpc::Status ModelStreamInfer(::grpc::ServerContext* context, ::grpc::ServerReaderWriter<::inference::ModelStreamInferResponse, ::inference::ModelInferRequest>* stream) override; static Status buildResponse(Model& model, ModelInstance& instance, KFSModelMetadataResponse* response, KFSModelExtraMetadata& extraMetadata); - static Status buildResponse(PipelineDefinition& pipelineDefinition, KFSModelMetadataResponse* response); + static Status buildResponse(SingleVersionServableDefinition& definition, KFSModelMetadataResponse* response); static Status buildResponse(std::shared_ptr instance, KFSGetModelStatusResponse* response); - static Status buildResponse(PipelineDefinition& pipelineDefinition, KFSGetModelStatusResponse* response); - static Status buildResponse(MediapipeGraphDefinition& pipelineDefinition, KFSGetModelStatusResponse* response); - static Status buildResponse(MediapipeGraphDefinition& mediapipeGraphDefinition, KFSModelMetadataResponse* response); + static Status buildResponse(SingleVersionServableDefinition& definition, KFSGetModelStatusResponse* response); static void convert(const std::pair>& from, KFSModelMetadataResponse::TensorMetadata* to); static Status getModelReady(const KFSGetModelStatusRequest* request, KFSGetModelStatusResponse* response, const ModelManager& manager, ExecutionContext executionContext); diff --git a/src/kfs_frontend/validation.hpp b/src/kfs_frontend/validation.hpp index 5dab096d64..b88e422b0a 100644 --- a/src/kfs_frontend/validation.hpp +++ b/src/kfs_frontend/validation.hpp @@ -24,7 +24,6 @@ #include "kfs_utils.hpp" #include "../precision.hpp" #include "../predict_request_validation_utils.hpp" -#include "../logging.hpp" #include "../profiler.hpp" #include "../tensorinfo.hpp" #include "../status.hpp" diff --git a/src/llm/BUILD b/src/llm/BUILD index ae37d936ca..50c43ec531 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -19,7 +19,8 @@ load("//:common_settings.bzl", "ovms_cc_library", "PYBIND_DEPS", "COPTS_PYTHON") ovms_cc_library( name = "llmcalculator", - srcs = ["http_llm_calculator.cc"], + srcs = ["http_llm_calculator.cc", + "llm_node_initializer.cpp"], deps = [ "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", @@ -28,7 +29,10 @@ ovms_cc_library( "//src:libovmsprofiler", ":genai_servables", "//src:httppayload", - "//third_party:genai",], + "//third_party:genai", + "//src:node_initializer", + "//src:libovmslogging", + "//src:libovmsstring_utils",], visibility = ["//visibility:public"], additional_copts = COPTS_PYTHON, alwayslink = 1, # needed, so the calculator can be registered by MediaPipe diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 6898b51604..64859a9fee 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -25,6 +25,8 @@ #include #include +#include + #include "openai_json_response.hpp" #include "../../logging.hpp" diff --git a/src/llm/llm_node_initializer.cpp b/src/llm/llm_node_initializer.cpp new file mode 100644 index 0000000000..1f7a1ca892 --- /dev/null +++ b/src/llm/llm_node_initializer.cpp @@ -0,0 +1,72 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "../mediapipe_internal/graph_side_packets.hpp" +#include "../mediapipe_internal/node_initializer.hpp" +#include "../stringutils.hpp" +#include "servable.hpp" +#include "servable_initializer.hpp" +#include "mediapipe/framework/calculator.pb.h" + +#include "../logging.hpp" + +namespace ovms { +class LlmNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "LLMCalculator"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& genAiServableMap = sidePackets.genAiServableMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("LLM node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("LLM node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (genAiServableMap.find(nodeName) != genAiServableMap.end()) { + SPDLOG_ERROR("LLM node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + std::shared_ptr servable; + Status status = initializeGenAiServable(servable, nodeConfig, basePath); + if (!status.ok()) { + SPDLOG_ERROR("Failed to process LLM node graph {}", graphName); + return status; + } + genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); + return StatusCode::OK; + } +}; + +static bool llmNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/logging.cpp b/src/logging.cpp index e89fce9a07..c0974c3a4e 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -15,6 +15,9 @@ //***************************************************************************** #include "logging.hpp" +#include +#include + #if (MEDIAPIPE_DISABLE == 0) #include #endif diff --git a/src/logging.hpp b/src/logging.hpp index 011458fe49..98b842f4d3 100644 --- a/src/logging.hpp +++ b/src/logging.hpp @@ -18,9 +18,6 @@ #include #include -#include -#include -#include #include namespace ovms { diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp new file mode 100644 index 0000000000..b509dc36eb --- /dev/null +++ b/src/mediapipe_internal/graph_side_packets.hpp @@ -0,0 +1,65 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include + +namespace ovms { +class PythonNodeResources; +class GenAiServable; +struct ImageGenerationPipelines; +struct EmbeddingsServable; +struct RerankServable; +struct SttServable; +class TtsServable; + +using PythonNodeResourcesMap = std::unordered_map>; +using GenAiServableMap = std::unordered_map>; +using RerankServableMap = std::unordered_map>; +using SttServableMap = std::unordered_map>; +using TtsServableMap = std::unordered_map>; +using EmbeddingsServableMap = std::unordered_map>; +using ImageGenerationPipelinesMap = std::unordered_map>; + +struct GraphSidePackets { + PythonNodeResourcesMap pythonNodeResourcesMap; + GenAiServableMap genAiServableMap; + ImageGenerationPipelinesMap imageGenPipelinesMap; + EmbeddingsServableMap embeddingsServableMap; + RerankServableMap rerankServableMap; + SttServableMap sttServableMap; + TtsServableMap ttsServableMap; + void clear() { + pythonNodeResourcesMap.clear(); + genAiServableMap.clear(); + imageGenPipelinesMap.clear(); + embeddingsServableMap.clear(); + rerankServableMap.clear(); + sttServableMap.clear(); + ttsServableMap.clear(); + } + bool empty() { + return (pythonNodeResourcesMap.empty() && + genAiServableMap.empty() && + imageGenPipelinesMap.empty() && + embeddingsServableMap.empty() && + rerankServableMap.empty() && + sttServableMap.empty() && + ttsServableMap.empty()); + } +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipefactory.cpp b/src/mediapipe_internal/mediapipefactory.cpp index aa3689ae31..03a714fafc 100644 --- a/src/mediapipe_internal/mediapipefactory.cpp +++ b/src/mediapipe_internal/mediapipefactory.cpp @@ -25,16 +25,9 @@ #include #include #include -#pragma warning(push) -#pragma warning(disable : 6001 4324 6385 6326 6308 6387 6246) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" -#pragma GCC diagnostic pop -#pragma warning(pop) -#include "../kfs_frontend/kfs_grpc_inference_service.hpp" #include "../logging.hpp" -#include "../modelmanager.hpp" +#include "../metric_provider.hpp" +#include "../servable_name_checker.hpp" #include "../status.hpp" #include "../stringutils.hpp" #pragma warning(push) @@ -62,13 +55,14 @@ MediapipeFactory::MediapipeFactory(PythonBackend* pythonBackend) { Status MediapipeFactory::createDefinition(const std::string& pipelineName, const MediapipeGraphConfig& config, - ModelManager& manager) { + MetricProvider& metrics, + const ServableNameChecker& checker) { if (definitionExists(pipelineName)) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Mediapipe graph definition: {} is already created", pipelineName); return StatusCode::PIPELINE_DEFINITION_ALREADY_EXIST; } - std::shared_ptr graphDefinition = std::make_shared(pipelineName, config, manager.getMetricRegistry(), &manager.getMetricConfig(), pythonBackend); - auto stat = graphDefinition->validate(manager); + std::shared_ptr graphDefinition = std::make_shared(pipelineName, config, metrics.getMetricRegistry(), &metrics.getMetricConfig(), pythonBackend); + auto stat = graphDefinition->validate(checker); if (stat.getCode() == StatusCode::MEDIAPIPE_GRAPH_NAME_OCCUPIED) { return stat; } @@ -94,19 +88,18 @@ MediapipeGraphDefinition* MediapipeFactory::findDefinitionByName(const std::stri Status MediapipeFactory::reloadDefinition(const std::string& name, const MediapipeGraphConfig& config, - ModelManager& manager) { + const ServableNameChecker& checker) { auto mgd = findDefinitionByName(name); if (mgd == nullptr) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Requested to reload mediapipe graph definition but it does not exist: {}", name); return StatusCode::INTERNAL_ERROR; } SPDLOG_LOGGER_INFO(modelmanager_logger, "Reloading mediapipe graph: {}", name); - return mgd->reload(manager, config); + return mgd->reload(checker, config); } Status MediapipeFactory::create(std::unique_ptr& pipeline, - const std::string& name, - ModelManager& manager) const { + const std::string& name) const { std::shared_lock lock(definitionsMtx); auto it = definitions.find(name); if (it == definitions.end()) { @@ -117,17 +110,17 @@ Status MediapipeFactory::create(std::unique_ptr& pipelin return definition.create(pipeline); } -void MediapipeFactory::retireOtherThan(std::set&& graphsInConfigFile, ModelManager& manager) { +void MediapipeFactory::retireOtherThan(std::set&& graphsInConfigFile) { std::for_each(definitions.begin(), definitions.end(), - [&graphsInConfigFile, &manager](auto& nameDefinitionPair) { + [&graphsInConfigFile](auto& nameDefinitionPair) { if (graphsInConfigFile.find(nameDefinitionPair.second->getName()) == graphsInConfigFile.end() && nameDefinitionPair.second->getStateCode() != PipelineDefinitionStateCode::RETIRED) { - nameDefinitionPair.second->retire(manager); + nameDefinitionPair.second->retire(); } }); } -Status MediapipeFactory::revalidatePipelines(ModelManager&) { +Status MediapipeFactory::revalidatePipelines() { SPDLOG_LOGGER_WARN(modelmanager_logger, "revalidation of mediapipe graphs not implemented yet"); return StatusCode::OK; } diff --git a/src/mediapipe_internal/mediapipefactory.hpp b/src/mediapipe_internal/mediapipefactory.hpp index e48146b0f0..0d03fcc7a4 100644 --- a/src/mediapipe_internal/mediapipefactory.hpp +++ b/src/mediapipe_internal/mediapipefactory.hpp @@ -25,18 +25,10 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 6001 4324 6308 6387 6246) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" -#pragma GCC diagnostic pop -#pragma warning(pop) -#include "../kfs_frontend/kfs_grpc_inference_service.hpp" - namespace ovms { -class ModelManager; +class MetricProvider; +class ServableNameChecker; class Status; class MediapipeGraphConfig; class MediapipeGraphDefinition; @@ -53,30 +45,22 @@ class MediapipeFactory { MediapipeFactory(PythonBackend* pythonBackend = nullptr); Status createDefinition(const std::string& pipelineName, const MediapipeGraphConfig& config, - ModelManager& manager); + MetricProvider& metrics, + const ServableNameChecker& checker); bool definitionExists(const std::string& name) const; -private: - template - Status createInternal(std::unique_ptr& pipeline, - const std::string& name, - const RequestType* request, - ResponseType* response, - ModelManager& manager) const; - public: Status create(std::unique_ptr& pipeline, - const std::string& name, - ModelManager& manager) const; + const std::string& name) const; MediapipeGraphDefinition* findDefinitionByName(const std::string& name) const; Status reloadDefinition(const std::string& pipelineName, const MediapipeGraphConfig& config, - ModelManager& manager); + const ServableNameChecker& checker); - void retireOtherThan(std::set&& pipelinesInConfigFile, ModelManager& manager); - Status revalidatePipelines(ModelManager&); + void retireOtherThan(std::set&& pipelinesInConfigFile); + Status revalidatePipelines(); const std::vector getMediapipePipelinesNames() const; const std::vector getNamesOfAvailableMediapipePipelines() const; ~MediapipeFactory(); diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index 9047765e75..c7bceef724 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -26,18 +26,11 @@ #include "../execution_context.hpp" #include "../filesystem.hpp" -#include "../kfs_frontend/kfs_utils.hpp" -#include "../kfs_frontend/kfs_request_utils.hpp" -#include "../deserialization_main.hpp" #include "../metric.hpp" #include "../model_metric_reporter.hpp" -#include "../modelmanager.hpp" #include "../ov_utils.hpp" -#include "../llm/servable.hpp" -#include "../llm/servable_initializer.hpp" -#if (PYTHON_DISABLE == 0) -#include "../python/pythonnoderesources.hpp" -#endif +#include "../servable_definition_unload_guard.hpp" +#include "../servable_name_checker.hpp" #include "../status.hpp" #include "../stringutils.hpp" #include "../tensorinfo.hpp" @@ -47,24 +40,13 @@ #include "mediapipe/framework/port/status.h" #include "mediapipe_utils.hpp" #include "mediapipegraphexecutor.hpp" -#include "src/embeddings/embeddings_calculator_ov.pb.h" -#include "src/rerank/rerank_calculator_ov.pb.h" - -#include "src/image_gen/pipelines.hpp" -#include "src/image_gen/imagegen_init.hpp" -#include "src/image_gen/image_gen_calculator.pb.h" +#include "node_initializer.hpp" namespace ovms { MediapipeGraphConfig MediapipeGraphDefinition::MGC; const std::string MediapipeGraphDefinition::SCHEDULER_CLASS_NAME{"Mediapipe"}; -const std::string MediapipeGraphDefinition::PYTHON_NODE_CALCULATOR_NAME{"PythonExecutorCalculator"}; -const std::string MediapipeGraphDefinition::LLM_NODE_CALCULATOR_NAME{"LLMCalculator"}; -const std::string MediapipeGraphDefinition::IMAGE_GEN_CALCULATOR_NAME{"ImageGenCalculator"}; -const std::string MediapipeGraphDefinition::STT_NODE_CALCULATOR_NAME{"S2tCalculator"}; -const std::string MediapipeGraphDefinition::TTS_NODE_CALCULATOR_NAME{"T2sCalculator"}; -const std::string MediapipeGraphDefinition::EMBEDDINGS_NODE_CALCULATOR_NAME{"EmbeddingsCalculatorOV"}; -const std::string MediapipeGraphDefinition::RERANK_NODE_CALCULATOR_NAME{"RerankCalculatorOV"}; + MediapipeGraphDefinition::~MediapipeGraphDefinition() = default; @@ -127,14 +109,14 @@ Status MediapipeGraphDefinition::dryInitializeTest() { } return StatusCode::OK; } -Status MediapipeGraphDefinition::validate(ModelManager& manager) { +Status MediapipeGraphDefinition::validate(const ServableNameChecker& checker) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName()); if (!this->sidePacketMaps.empty()) { SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state."); return StatusCode::INTERNAL_ERROR; } ValidationResultNotifier notifier(this->status, this->loadedNotify); - if (manager.modelExists(this->getName()) || manager.pipelineDefinitionExists(this->getName())) { + if (checker.servableExists(this->getName(), ServableType::Model | ServableType::Pipeline)) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Mediapipe graph name: {} is already occupied by model or pipeline.", this->getName()); return StatusCode::MEDIAPIPE_GRAPH_NAME_OCCUPIED; } @@ -192,8 +174,8 @@ MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, MetricRegistry* registry, const MetricConfig* metricConfig, PythonBackend* pythonBackend) : - name(name), - status(SCHEDULER_CLASS_NAME, this->name), + SingleVersionServableDefinition(name), + status(SCHEDULER_CLASS_NAME, getName()), pythonBackend(pythonBackend), reporter(std::make_unique(metricConfig, registry, name)) { mgconfig = config; @@ -254,7 +236,7 @@ Status MediapipeGraphDefinition::createOutputsInfo() { } Status MediapipeGraphDefinition::create(std::unique_ptr& pipeline) { - std::unique_ptr unloadGuard; + std::unique_ptr unloadGuard; Status status = waitForLoaded(unloadGuard); if (!status.ok()) { SPDLOG_DEBUG("Failed to execute mediapipe graph: {} since it is not available", getName()); @@ -332,7 +314,7 @@ Status MediapipeGraphDefinition::setStreamTypes() { return StatusCode::OK; } -Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGraphConfig& config) { +Status MediapipeGraphDefinition::reload(const ServableNameChecker& checker, const MediapipeGraphConfig& config) { // block creating new unloadGuards this->status.handle(ReloadEvent()); while (requestsHandlesCounter > 0) { @@ -340,10 +322,10 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr } this->mgconfig = config; this->sidePacketMaps.clear(); - return validate(manager); + return validate(checker); } -void MediapipeGraphDefinition::retire(ModelManager& manager) { +void MediapipeGraphDefinition::retire() { this->sidePacketMaps.clear(); this->status.handle(RetireEvent()); } @@ -356,273 +338,24 @@ bool MediapipeGraphDefinition::isReloadRequired(const MediapipeGraphConfig& conf return getMediapipeGraphConfig().isReloadRequired(config); } -Status MediapipeGraphDefinition::waitForLoaded(std::unique_ptr& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds) { - unloadGuard = std::make_unique(*this); - - const uint32_t waitLoadedTimestepMicroseconds = 1000; - const uint32_t waitCheckpoints = waitForLoadedTimeoutMicroseconds / waitLoadedTimestepMicroseconds; - uint32_t waitCheckpointsCounter = waitCheckpoints; - std::mutex cvMtx; - std::unique_lock cvLock(cvMtx); - while (waitCheckpointsCounter-- != 0) { - if (status.isAvailable()) { - SPDLOG_DEBUG("Successfully waited for mediapipe definition: {}", getName()); - return StatusCode::OK; - } - unloadGuard.reset(); - if (!status.canEndLoaded()) { - if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { - SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended due to timeout.", getName()); - return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET; - } else { - SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended since it failed to load.", getName()); - return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE; - } - } - SPDLOG_DEBUG("Waiting for available state for mediapipe: {}, with timestep: {}us timeout: {}us check count: {}", - getName(), waitLoadedTimestepMicroseconds, waitForLoadedTimeoutMicroseconds, waitCheckpointsCounter); - loadedNotify.wait_for(cvLock, - std::chrono::microseconds(waitLoadedTimestepMicroseconds), - [this]() { - return this->status.isAvailable() || - !this->status.canEndLoaded(); - }); - unloadGuard = std::make_unique(*this); - } - if (!status.isAvailable()) { - if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { - SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended due to timeout.", getName()); - return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET; - } else { - SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended since it failed to load.", getName()); - return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE; - } - } - SPDLOG_DEBUG("Successfully waited for mediapipe definition: {}", getName()); - return StatusCode::OK; +StatusCode MediapipeGraphDefinition::notLoadedYetCode() const { + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET; } -template -class ResourcesCleaningGuard { -public: - bool shouldCleanup{true}; - T& resources; - ResourcesCleaningGuard(T& resources) : - resources(resources) {} - ~ResourcesCleaningGuard() { - if (shouldCleanup) { - resources.clear(); - } - } - void disableCleaning() { - shouldCleanup = false; - } -}; +StatusCode MediapipeGraphDefinition::notLoadedAnymoreCode() const { + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE; +} Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); + auto& registry = NodeInitializerRegistry::instance(); for (int i = 0; i < config.node().size(); i++) { -#if (PYTHON_DISABLE == 0) - auto& pythonNodeResourcesMap = this->sidePacketMaps.pythonNodeResourcesMap; - if (config.node(i).calculator() == PYTHON_NODE_CALCULATOR_NAME) { - ResourcesCleaningGuard pythonResourcesCleaningGuard(pythonNodeResourcesMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Python node missing options in graph: {}. ", this->name); - return StatusCode::PYTHON_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Python node name is missing in graph: {}. ", this->name); - return StatusCode::PYTHON_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (pythonNodeResourcesMap.find(nodeName) != pythonNodeResourcesMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Python node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::PYTHON_NODE_NAME_ALREADY_EXISTS; - } - - std::shared_ptr nodeResources = nullptr; - Status status = PythonNodeResources::createPythonNodeResources(nodeResources, config.node(i), pythonBackend, mgconfig.getBasePath()); - if (nodeResources == nullptr || !status.ok()) { - SPDLOG_ERROR("Failed to process python node graph {}", this->name); - return status; - } - - pythonNodeResourcesMap.insert(std::pair>(nodeName, std::move(nodeResources))); - pythonResourcesCleaningGuard.disableCleaning(); - } -#endif - // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI) - if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) { - auto& genAiServableMap = this->sidePacketMaps.genAiServableMap; - ResourcesCleaningGuard genAiServablesCleaningGuard(genAiServableMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (genAiServableMap.find(nodeName) != genAiServableMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - std::shared_ptr servable; - Status status = initializeGenAiServable(servable, config.node(i), mgconfig.getBasePath()); - if (!status.ok()) { - SPDLOG_ERROR("Failed to process LLM node graph {}", this->name); - return status; - } - genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); - genAiServablesCleaningGuard.disableCleaning(); - } - // Passed to both calculators that require Image Generation pipelines - if (endsWith(config.node(i).calculator(), IMAGE_GEN_CALCULATOR_NAME)) { - auto& imageGenPipelinesMap = this->sidePacketMaps.imageGenPipelinesMap; - ResourcesCleaningGuard guard(imageGenPipelinesMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; // TODO: create new error code - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (imageGenPipelinesMap.find(nodeName) != imageGenPipelinesMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - auto statusOrArgs = prepareImageGenPipelineArgs(config.node(i).node_options(0), mgconfig.getBasePath()); - if (std::holds_alternative(statusOrArgs)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to prepare Image Gen pipeline args for node: {}. Error: {}", this->name, std::get(statusOrArgs).string()); - return std::get(statusOrArgs); - } - std::shared_ptr servable; - try { - servable = std::make_shared(std::get(statusOrArgs)); - } catch (ov::Exception& e) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create Image Generation pipelines: {}. Error: {}", this->name, e.what()); - return StatusCode::INTERNAL_ERROR; - } catch (...) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create Image Generation pipelines: {}. Unknown error", this->name); - return StatusCode::INTERNAL_ERROR; - } - imageGenPipelinesMap.insert(std::pair>(nodeName, std::move(servable))); - guard.disableCleaning(); - } - if (endsWith(config.node(i).calculator(), EMBEDDINGS_NODE_CALCULATOR_NAME)) { - auto& embeddingsServableMap = this->sidePacketMaps.embeddingsServableMap; - ResourcesCleaningGuard embeddingsServablesCleaningGuard(embeddingsServableMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (embeddingsServableMap.find(nodeName) != embeddingsServableMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - mediapipe::EmbeddingsCalculatorOVOptions nodeOptions; - config.node(i).node_options(0).UnpackTo(&nodeOptions); - std::shared_ptr servable = std::make_shared( - nodeOptions.models_path(), - nodeOptions.target_device(), - nodeOptions.plugin_config(), - mgconfig.getBasePath(), - nodeOptions.pooling(), - nodeOptions.normalize_embeddings()); - servable->initialize( - nodeOptions.models_path(), - nodeOptions.target_device(), - nodeOptions.plugin_config(), - mgconfig.getBasePath()); - embeddingsServableMap.insert(std::pair>(nodeName, std::move(servable))); - embeddingsServablesCleaningGuard.disableCleaning(); - } - if (endsWith(config.node(i).calculator(), RERANK_NODE_CALCULATOR_NAME)) { - auto& rerankServableMap = this->sidePacketMaps.rerankServableMap; - ResourcesCleaningGuard rerankServablesCleaningGuard(rerankServableMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (rerankServableMap.find(nodeName) != rerankServableMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - mediapipe::RerankCalculatorOVOptions nodeOptions; - config.node(i).node_options(0).UnpackTo(&nodeOptions); - std::shared_ptr servable = std::make_shared(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.plugin_config(), mgconfig.getBasePath()); - servable->initialize(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.plugin_config(), mgconfig.getBasePath()); - rerankServableMap.insert(std::pair>(nodeName, std::move(servable))); - rerankServablesCleaningGuard.disableCleaning(); - } - if (endsWith(config.node(i).calculator(), STT_NODE_CALCULATOR_NAME)) { - auto& sttServableMap = this->sidePacketMaps.sttServableMap; - ResourcesCleaningGuard sttServablesCleaningGuard(sttServableMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (sttServableMap.find(nodeName) != sttServableMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - mediapipe::S2tCalculatorOptions nodeOptions; - auto& calculatorOptions = config.node(i).node_options(0); - if (!calculatorOptions.UnpackTo(&nodeOptions)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to unpack calculator options"); - return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; - } - std::shared_ptr servable = std::make_shared(nodeOptions, mgconfig.getBasePath()); - sttServableMap.insert(std::pair>(nodeName, std::move(servable))); - sttServablesCleaningGuard.disableCleaning(); - } - if (endsWith(config.node(i).calculator(), TTS_NODE_CALCULATOR_NAME)) { - auto& ttsServableMap = this->sidePacketMaps.ttsServableMap; - ResourcesCleaningGuard ttsServablesCleaningGuard(ttsServableMap); - if (!config.node(i).node_options().size()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node missing options in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_OPTIONS; - } - if (config.node(i).name().empty()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node name is missing in graph: {}. ", this->name); - return StatusCode::LLM_NODE_MISSING_NAME; - } - std::string nodeName = config.node(i).name(); - if (ttsServableMap.find(nodeName) != ttsServableMap.end()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node name: {} already used in graph: {}. ", nodeName, this->name); - return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; - } - mediapipe::T2sCalculatorOptions nodeOptions; - auto& calculatorOptions = config.node(i).node_options(0); - if (!calculatorOptions.UnpackTo(&nodeOptions)) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to unpack calculator options"); - return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; - } - try { - std::shared_ptr servable = std::make_shared(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.voices(), nodeOptions.plugin_config(), mgconfig.getBasePath()); - ttsServableMap.insert(std::pair>(nodeName, std::move(servable))); - ttsServablesCleaningGuard.disableCleaning(); - } catch (const std::runtime_error& e) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node name: {} initialization failed: {}. ", nodeName, e.what()); - return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + for (const auto& initializer : registry.all()) { + if (initializer->matches(config.node(i).calculator())) { + Status status = initializer->initialize(config.node(i), getName(), mgconfig.getBasePath(), sidePacketMaps, pythonBackend); + if (!status.ok()) { + return status; + } } } } diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp index 14c9e0679f..a8fde683c9 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.hpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -14,91 +14,41 @@ // limitations under the License. //***************************************************************************** #pragma once -#include #include #include #include -#include #include #include #include #include #include "../dags/pipelinedefinitionstatus.hpp" -#include "../kfs_frontend/kfs_grpc_inference_service.hpp" -#include "../kfs_frontend/kfs_utils.hpp" #include "../metric.hpp" -#include "../tensorinfo.hpp" +#include "../model_metric_reporter.hpp" +#include "../single_version_servable_definition.hpp" +#include "../tensorinfo_fwd.hpp" #pragma warning(push) #pragma warning(disable : 4005 4309 6001 6385 6386 6326 6011 4005 4456 6246) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "mediapipe/framework/calculator_graph.h" -#include "mediapipe/framework/port/parse_text_proto.h" -#include "mediapipe/framework/port/status.h" #pragma GCC diagnostic pop #pragma warning(pop) #include "mediapipegraphconfig.hpp" +#include "graph_side_packets.hpp" #include "packettypes.hpp" -#include "../sidepacket_servable.hpp" -#include "../embeddings/embeddings_servable.hpp" -#include "../rerank/rerank_servable.hpp" -#include "../audio/speech_to_text/s2t_servable.hpp" -#include "../audio/text_to_speech/t2s_servable.hpp" - namespace ovms { -class MediapipeGraphDefinitionUnloadGuard; class MetricConfig; class MetricRegistry; -class MediapipeServableMetricReporter; -class ModelManager; +class ServableNameChecker; class MediapipeGraphExecutor; class Status; class PythonBackend; -class PythonNodeResources; -class GenAiServable; -struct ImageGenerationPipelines; -using PythonNodeResourcesMap = std::unordered_map>; -using GenAiServableMap = std::unordered_map>; -using RerankServableMap = std::unordered_map>; -using SttServableMap = std::unordered_map>; -using TtsServableMap = std::unordered_map>; -using EmbeddingsServableMap = std::unordered_map>; -using ImageGenerationPipelinesMap = std::unordered_map>; - -struct GraphSidePackets { - PythonNodeResourcesMap pythonNodeResourcesMap; - GenAiServableMap genAiServableMap; - ImageGenerationPipelinesMap imageGenPipelinesMap; - EmbeddingsServableMap embeddingsServableMap; - RerankServableMap rerankServableMap; - SttServableMap sttServableMap; - TtsServableMap ttsServableMap; - void clear() { - pythonNodeResourcesMap.clear(); - genAiServableMap.clear(); - imageGenPipelinesMap.clear(); - embeddingsServableMap.clear(); - rerankServableMap.clear(); - sttServableMap.clear(); - ttsServableMap.clear(); - } - bool empty() { - return (pythonNodeResourcesMap.empty() && - genAiServableMap.empty() && - imageGenPipelinesMap.empty() && - embeddingsServableMap.empty() && - rerankServableMap.empty() && - sttServableMap.empty() && - ttsServableMap.empty()); - } -}; -class MediapipeGraphDefinition { - friend MediapipeGraphDefinitionUnloadGuard; +class MediapipeGraphDefinition : public SingleVersionServableDefinition { public: virtual ~MediapipeGraphDefinition(); @@ -108,38 +58,26 @@ class MediapipeGraphDefinition { const MetricConfig* metricConfig = nullptr, PythonBackend* pythonBackend = nullptr); - const std::string& getName() const { return name; } - const PipelineDefinitionStatus& getStatus() const { + const std::string& getName() const override { return SingleVersionServableDefinition::getName(); } + const PipelineDefinitionStatus& getStatus() const override { return this->status; } const PipelineDefinitionStateCode getStateCode() const { return status.getStateCode(); } - const model_version_t getVersion() const { return VERSION; } - const tensor_map_t getInputsInfo() const; - const tensor_map_t getOutputsInfo() const; + bool isAvailable() const override { return status.isAvailable(); } + const tensor_map_t getInputsInfo() const override; + const tensor_map_t getOutputsInfo() const override; const MediapipeGraphConfig& getMediapipeGraphConfig() const { return this->mgconfig; } - MediapipeServableMetricReporter& getMetricReporter() const { return *this->reporter; } + MediapipeServableMetricReporter& getMetricReporter() const override { return *this->reporter; } Status create(std::unique_ptr& pipeline); - Status reload(ModelManager& manager, const MediapipeGraphConfig& config); - Status validate(ModelManager& manager); - void retire(ModelManager& manager); + Status reload(const ServableNameChecker& checker, const MediapipeGraphConfig& config); + Status validate(const ServableNameChecker& checker); + void retire(); Status initializeNodes(); bool isReloadRequired(const MediapipeGraphConfig& config) const; - static constexpr uint64_t WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS = 500000; static const std::string SCHEDULER_CLASS_NAME; - static const std::string PYTHON_NODE_CALCULATOR_NAME; - static const std::string LLM_NODE_CALCULATOR_NAME; - static const std::string IMAGE_GEN_CALCULATOR_NAME; - static const std::string EMBEDDINGS_NODE_CALCULATOR_NAME; - static const std::string RERANK_NODE_CALCULATOR_NAME; - static const std::string STT_NODE_CALCULATOR_NAME; - static const std::string TTS_NODE_CALCULATOR_NAME; - Status waitForLoaded(std::unique_ptr& unloadGuard, const uint32_t waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS); - - // Pipelines are not versioned and any available definition has constant version equal 1. - static constexpr model_version_t VERSION = 1; protected: GraphSidePackets sidePacketMaps; @@ -171,7 +109,6 @@ class MediapipeGraphDefinition { Status dryInitializeTest(); std::string chosenConfig; static MediapipeGraphConfig MGC; - const std::string name; bool passKfsRequestFlag; std::unordered_map inputTypes; @@ -185,17 +122,11 @@ class MediapipeGraphDefinition { Status createOutputsInfo(); Status createInputSidePacketsInfo(); - std::condition_variable loadedNotify; mutable std::shared_mutex metadataMtx; private: - void increaseRequestsHandlesCount() { - ++requestsHandlesCounter; - } - - void decreaseRequestsHandlesCount() { - --requestsHandlesCounter; - } + StatusCode notLoadedYetCode() const override; + StatusCode notLoadedAnymoreCode() const override; tensor_map_t inputsInfo; tensor_map_t outputsInfo; @@ -204,25 +135,8 @@ class MediapipeGraphDefinition { std::vector outputNames; std::vector inputSidePacketNames; - std::atomic requestsHandlesCounter = 0; - PythonBackend* pythonBackend; std::unique_ptr reporter; }; - -class MediapipeGraphDefinitionUnloadGuard { -public: - MediapipeGraphDefinitionUnloadGuard(MediapipeGraphDefinition& definition) : - definition(definition) { - definition.increaseRequestsHandlesCount(); - } - - ~MediapipeGraphDefinitionUnloadGuard() { - definition.decreaseRequestsHandlesCount(); - } - -private: - MediapipeGraphDefinition& definition; -}; } // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp index 93b53fdf8e..3c036a845a 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.cpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -31,8 +31,6 @@ #include "../python/python_backend.hpp" #endif -#include "../image_gen/pipelines.hpp" - namespace ovms { MediapipeGraphExecutor::MediapipeGraphExecutor( diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index c165469395..4d0b069f43 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -37,7 +37,7 @@ #pragma GCC diagnostic pop #pragma warning(pop) #include "mediapipe_utils.hpp" -#include "mediapipegraphdefinition.hpp" // for version in response and PythonNodeResourceMap +#include "graph_side_packets.hpp" #include "packettypes.hpp" namespace ovms { diff --git a/src/mediapipe_internal/node_initializer.cpp b/src/mediapipe_internal/node_initializer.cpp new file mode 100644 index 0000000000..c584bc05cf --- /dev/null +++ b/src/mediapipe_internal/node_initializer.cpp @@ -0,0 +1,31 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "node_initializer.hpp" + +namespace ovms { +NodeInitializerRegistry& NodeInitializerRegistry::instance() { + static NodeInitializerRegistry registry; + return registry; +} + +void NodeInitializerRegistry::add(std::unique_ptr initializer) { + initializers_.push_back(std::move(initializer)); +} + +const std::vector>& NodeInitializerRegistry::all() const { + return initializers_; +} +} // namespace ovms diff --git a/src/mediapipe_internal/node_initializer.hpp b/src/mediapipe_internal/node_initializer.hpp new file mode 100644 index 0000000000..a989535746 --- /dev/null +++ b/src/mediapipe_internal/node_initializer.hpp @@ -0,0 +1,53 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include + +#include "../status.hpp" + +namespace mediapipe { +class CalculatorGraphConfig_Node; +} + +namespace ovms { +struct GraphSidePackets; +class PythonBackend; + +class NodeInitializer { +public: + virtual ~NodeInitializer() = default; + virtual bool matches(const std::string& calculatorName) const = 0; + virtual Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* pythonBackend) = 0; +}; + +class NodeInitializerRegistry { +public: + static NodeInitializerRegistry& instance(); + void add(std::unique_ptr initializer); + const std::vector>& all() const; + +private: + NodeInitializerRegistry() = default; + std::vector> initializers_; +}; +} // namespace ovms diff --git a/src/dags/pipelinedefinitionunloadguard.hpp b/src/metric_provider.hpp similarity index 71% rename from src/dags/pipelinedefinitionunloadguard.hpp rename to src/metric_provider.hpp index c36466f21b..36824a8599 100644 --- a/src/dags/pipelinedefinitionunloadguard.hpp +++ b/src/metric_provider.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright 2020 Intel Corporation +// Copyright 2026 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,15 +16,15 @@ #pragma once namespace ovms { -class PipelineDefinition; -class PipelineDefinitionUnloadGuard { -public: - PipelineDefinitionUnloadGuard() = delete; - PipelineDefinitionUnloadGuard(PipelineDefinition& pipelineDefinition); - ~PipelineDefinitionUnloadGuard(); +class MetricConfig; +class MetricRegistry; -private: - PipelineDefinition& pipelineDefinition; +class MetricProvider { +public: + virtual ~MetricProvider() = default; + virtual MetricRegistry* getMetricRegistry() const = 0; + virtual const MetricConfig& getMetricConfig() const = 0; }; + } // namespace ovms diff --git a/src/model.cpp b/src/model.cpp index e5b6e00eb3..c6e05837ac 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -23,7 +23,6 @@ #include "customloaderinterface.hpp" #include "customloaders.hpp" -#include "dags/pipelinedefinition.hpp" #include "filesystem.hpp" #include "localfilesystem.hpp" #include "logging.hpp" @@ -33,6 +32,10 @@ namespace ovms { +bool Model::isAvailable() const { + return getDefaultModelInstance() != nullptr; +} + static StatusCode downloadModels(std::shared_ptr& fs, ModelConfig& config, std::shared_ptr versions) { if (versions->size() == 0) { return StatusCode::OK; diff --git a/src/model.hpp b/src/model.hpp index b83525c8ed..b86de8b42c 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -27,6 +27,7 @@ #include "modelchangesubscription.hpp" #include "modelconfig.hpp" #include "modelversion.hpp" +#include "servable_definition.hpp" namespace ov { class Core; @@ -42,7 +43,7 @@ class MetricRegistry; class Status; /* * @brief This class represent inference models */ -class Model { +class Model : public ServableDefinition { private: /** * @brief Mutex for protecting concurrent modifying and accessing modelVersions @@ -136,10 +137,12 @@ class Model { * * @return model name */ - const std::string& getName() const { + const std::string& getName() const override { return name; } + bool isAvailable() const override; + const bool isStateful() const { return stateful; } diff --git a/src/model_metric_reporter.hpp b/src/model_metric_reporter.hpp index f2238d7e7f..7cc4ab804a 100644 --- a/src/model_metric_reporter.hpp +++ b/src/model_metric_reporter.hpp @@ -15,7 +15,6 @@ //***************************************************************************** #pragma once -#include #include #include #include @@ -30,7 +29,15 @@ namespace ovms { class MetricRegistry; class MetricConfig; -class ServableMetricReporter { +class StatusMetricReporter { +public: + virtual ~StatusMetricReporter() = default; + virtual std::unique_ptr& getModelReadyMetric(const ExecutionContext& context, bool success = true) = 0; + virtual std::unique_ptr& getModelMetadataMetric(const ExecutionContext& context, bool success = true) = 0; + virtual std::unique_ptr& getGetModelStatusRequestSuccessMetric(const ExecutionContext& context) = 0; +}; + +class ServableMetricReporter : public StatusMetricReporter { MetricRegistry* registry; protected: @@ -77,7 +84,7 @@ class ServableMetricReporter { std::unique_ptr requestTimeGrpc; std::unique_ptr requestTimeRest; - inline std::unique_ptr& getGetModelStatusRequestSuccessMetric(const ExecutionContext& context) { + inline std::unique_ptr& getGetModelStatusRequestSuccessMetric(const ExecutionContext& context) override { if (context.method != ExecutionContext::Method::GetModelStatus) { static std::unique_ptr empty = nullptr; return empty; // In case something calls it from ConfigReload/ConfigStatus methods @@ -123,7 +130,7 @@ class ServableMetricReporter { } } - inline std::unique_ptr& getModelMetadataMetric(const ExecutionContext& context, bool success = true) { + inline std::unique_ptr& getModelMetadataMetric(const ExecutionContext& context, bool success = true) override { if (context.interface == ExecutionContext::Interface::GRPC) { return success ? this->requestSuccessGrpcModelMetadata : this->requestFailGrpcModelMetadata; } else { @@ -131,7 +138,7 @@ class ServableMetricReporter { } } - inline std::unique_ptr& getModelReadyMetric(const ExecutionContext& context, bool success = true) { + inline std::unique_ptr& getModelReadyMetric(const ExecutionContext& context, bool success = true) override { if (context.interface == ExecutionContext::Interface::GRPC) { return success ? this->requestSuccessGrpcModelReady : this->requestFailGrpcModelReady; } else { @@ -153,7 +160,7 @@ class ModelMetricReporter : public ServableMetricReporter { ModelMetricReporter(const MetricConfig* metricConfig, MetricRegistry* registry, const std::string& modelName, model_version_t modelVersion); }; -class MediapipeServableMetricReporter { +class MediapipeServableMetricReporter : public StatusMetricReporter { MetricRegistry* registry; protected: @@ -300,7 +307,7 @@ class MediapipeServableMetricReporter { return nullptr; } - inline std::unique_ptr& getModelMetadataMetric(const ExecutionContext& context, bool success = true) { + inline std::unique_ptr& getModelMetadataMetric(const ExecutionContext& context, bool success = true) override { if (context.interface == ExecutionContext::Interface::GRPC) { return success ? this->requestSuccessGrpcModelMetadata : this->requestFailGrpcModelMetadata; } else { @@ -308,7 +315,7 @@ class MediapipeServableMetricReporter { } } - inline std::unique_ptr& getModelReadyMetric(const ExecutionContext& context, bool success = true) { + inline std::unique_ptr& getModelReadyMetric(const ExecutionContext& context, bool success = true) override { if (context.interface == ExecutionContext::Interface::GRPC) { return success ? this->requestSuccessGrpcModelReady : this->requestFailGrpcModelReady; } else { @@ -316,6 +323,11 @@ class MediapipeServableMetricReporter { } } + inline std::unique_ptr& getGetModelStatusRequestSuccessMetric(const ExecutionContext& context) override { + static std::unique_ptr empty{nullptr}; + return empty; + } + MediapipeServableMetricReporter(const MetricConfig* metricConfig, MetricRegistry* registry, const std::string& graphName); }; diff --git a/src/model_service.cpp b/src/model_service.cpp index 658c628f76..9d1c6080e2 100644 --- a/src/model_service.cpp +++ b/src/model_service.cpp @@ -33,17 +33,16 @@ #include "tensorflow_serving/apis/model_service.pb.h" #pragma GCC diagnostic pop -#include "dags/pipelinedefinition.hpp" +#include "dags/pipelinedefinitionstatus.hpp" #include "execution_context.hpp" #include "grpc_utils.hpp" -#if (MEDIAPIPE_DISABLE == 0) -#include "mediapipe_internal/mediapipefactory.hpp" -#include "mediapipe_internal/mediapipegraphdefinition.hpp" -#endif +#include "model.hpp" #include "modelinstance.hpp" #include "modelmanager.hpp" +#include "servable_definition.hpp" #include "servablemanagermodule.hpp" #include "server.hpp" +#include "single_version_servable_definition.hpp" #include "status.hpp" using google::protobuf::util::JsonPrintOptions; @@ -110,25 +109,17 @@ Status GetModelStatusImpl::getModelStatus( std::string requested_model_name = request->model_spec().name(); auto model_ptr = manager.findModelByName(requested_model_name); if (!model_ptr) { - SPDLOG_DEBUG("GetModelStatus: Model {} is missing, trying to find pipeline with such name", requested_model_name); - auto pipelineDefinition = manager.getPipelineFactory().findDefinitionByName(requested_model_name); - if (!pipelineDefinition) { -#if (MEDIAPIPE_DISABLE == 0) - auto mediapipeGraphDefinition = manager.getMediapipeFactory().findDefinitionByName(requested_model_name); - if (!mediapipeGraphDefinition) { - return StatusCode::MODEL_NAME_MISSING; - } - addStatusToResponse(response, mediapipeGraphDefinition->getVersion(), mediapipeGraphDefinition->getStatus()); - SPDLOG_DEBUG("model_service: response: {}", response->DebugString()); - SPDLOG_DEBUG("MODEL_STATUS created a response for {} - {}", requested_model_name, requested_version); - return StatusCode::OK; -#else + SPDLOG_DEBUG("GetModelStatus: Model {} is missing, trying to find definition with such name", requested_model_name); + auto* definition = manager.findServableDefinition(requested_model_name); + if (!definition) { return StatusCode::MODEL_NAME_MISSING; -#endif } - INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getGetModelStatusRequestSuccessMetric(context)); - - addStatusToResponse(response, pipelineDefinition->getVersion(), pipelineDefinition->getStatus()); + auto* svsd = dynamic_cast(definition); + if (!svsd) { + return StatusCode::MODEL_NAME_MISSING; + } + INCREMENT_IF_ENABLED(svsd->getMetricReporter().getGetModelStatusRequestSuccessMetric(context)); + addStatusToResponse(response, svsd->getVersion(), svsd->getStatus()); SPDLOG_DEBUG("model_service: response: {}", response->DebugString()); SPDLOG_DEBUG("MODEL_STATUS created a response for {} - {}", requested_model_name, requested_version); return StatusCode::OK; @@ -186,34 +177,18 @@ Status GetModelStatusImpl::getAllModelsStatuses(std::map& pipelinesNames = manager.getPipelineFactory().getPipelinesNames(); - for (auto const& pipelineName : pipelinesNames) { - std::optional noValueModelVersion; - tensorflow::serving::GetModelStatusRequest request; - GetModelStatusImpl::createGrpcRequest(pipelineName, noValueModelVersion, &request); - tensorflow::serving::GetModelStatusResponse response; - auto status = GetModelStatusImpl::getModelStatus(&request, &response, manager, context); - if (status != StatusCode::OK) { - // Same situation like with models. - continue; - } - modelsStatusesTmp.insert({pipelineName, response}); - } -#if (MEDIAPIPE_DISABLE == 0) - const std::vector& mediapipePipelineNames = manager.getMediapipeFactory().getMediapipePipelinesNames(); - for (auto const& mediapipePipelineName : mediapipePipelineNames) { + const auto servableNames = manager.getServableDefinitionNames(); + for (const auto& servableName : servableNames) { std::optional noValueModelVersion; tensorflow::serving::GetModelStatusRequest request; - GetModelStatusImpl::createGrpcRequest(mediapipePipelineName, noValueModelVersion, &request); + GetModelStatusImpl::createGrpcRequest(servableName, noValueModelVersion, &request); tensorflow::serving::GetModelStatusResponse response; auto status = GetModelStatusImpl::getModelStatus(&request, &response, manager, context); if (status != StatusCode::OK) { - // Same situation like with models. continue; } - modelsStatusesTmp.insert({mediapipePipelineName, response}); + modelsStatusesTmp.insert({servableName, response}); } -#endif modelsStatuses.merge(modelsStatusesTmp); return StatusCode::OK; diff --git a/src/modelconfig.cpp b/src/modelconfig.cpp index d17e6fa6bf..d4666e83d3 100644 --- a/src/modelconfig.cpp +++ b/src/modelconfig.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include "src/port/rapidjson_writer.hpp" #pragma warning(pop) +#include "anonymous_input_name.hpp" #include "filesystem.hpp" #include "json_parser.hpp" #include "logging.hpp" @@ -811,4 +813,31 @@ const std::string ModelConfig::getPath() const { return getLocalPath() + FileSystem::getOsSeparator() + std::to_string(version); } +bool ModelConfig::anyShapeSetToAuto() const { + for (const auto& [name, shapeInfo] : getShapes()) { + if (shapeInfo.shapeMode == AUTO) + return true; + } + return false; +} + +bool ModelConfig::isShapeAuto(const std::string& name) const { + auto it = getShapes().find(name); + if (it == getShapes().end()) { + it = getShapes().find(ANONYMOUS_INPUT_NAME); + } + if (it == getShapes().end()) { + return false; + } + return it->second.shapeMode == Mode::AUTO; +} + +bool ModelConfig::isShapeAnonymous() const { + return getShapes().size() == 1 && getShapes().begin()->first == ANONYMOUS_INPUT_NAME; +} + +bool ModelConfig::isShapeAnonymousFixed() const { + return isShapeAnonymous() && !isShapeAuto(ANONYMOUS_INPUT_NAME); +} + } // namespace ovms diff --git a/src/modelconfig.hpp b/src/modelconfig.hpp index a41463cb1d..1cbb849338 100644 --- a/src/modelconfig.hpp +++ b/src/modelconfig.hpp @@ -15,11 +15,9 @@ //***************************************************************************** #pragma once -#include #include #include #include -#include #include #include #include @@ -30,7 +28,6 @@ #include #pragma warning(pop) -#include "anonymous_input_name.hpp" #include "layout_configuration.hpp" #include "color_format_configuration.hpp" #include "precision_configuration.hpp" @@ -192,7 +189,7 @@ class ModelConfig { /** * @brief Allowed configurable layouts */ - static const std::set configAllowedLayouts; + /** * @brief custom_loader_options config as map @@ -742,13 +739,7 @@ class ModelConfig { * * @return bool */ - bool anyShapeSetToAuto() const { - for (const auto& [name, shapeInfo] : getShapes()) { - if (shapeInfo.shapeMode == AUTO) - return true; - } - return false; - } + bool anyShapeSetToAuto() const; /** * @brief Get the shapes @@ -773,24 +764,9 @@ class ModelConfig { * * @return bool */ - bool isShapeAuto(const std::string& name) const { - auto it = getShapes().find(name); - if (it == getShapes().end()) { - it = getShapes().find(ANONYMOUS_INPUT_NAME); - } - if (it == getShapes().end()) { - return false; - } - return it->second.shapeMode == Mode::AUTO; - } - - bool isShapeAnonymous() const { - return getShapes().size() == 1 && getShapes().begin()->first == ANONYMOUS_INPUT_NAME; - } - - bool isShapeAnonymousFixed() const { - return isShapeAnonymous() && !isShapeAuto(ANONYMOUS_INPUT_NAME); - } + bool isShapeAuto(const std::string& name) const; + bool isShapeAnonymous() const; + bool isShapeAnonymousFixed() const; bool isCloudStored() const { return getLocalPath() != getBasePath(); diff --git a/src/modelinstance.hpp b/src/modelinstance.hpp index a8b4067d3e..a8ce7633ec 100644 --- a/src/modelinstance.hpp +++ b/src/modelinstance.hpp @@ -30,7 +30,6 @@ #include -#include "logging.hpp" #include "model_metric_reporter.hpp" #include "modelchangesubscription.hpp" #include "modelconfig.hpp" diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index e1588e86f7..83d6d36b0c 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -34,6 +34,7 @@ #endif #include +#include #pragma warning(push) #pragma warning(disable : 6313) #include @@ -64,9 +65,11 @@ #endif #include "metric_config.hpp" #include "metric_registry.hpp" +#include "model.hpp" #include "modelinstance.hpp" // for logging #include "ov_utils.hpp" #include "schema.hpp" +#include "servable_definition.hpp" #include "stringutils.hpp" namespace ovms { @@ -79,8 +82,9 @@ const std::string DEFAULT_MODEL_CACHE_DIRECTORY = "/opt/cache"; #endif ModelManager::ModelManager(const std::string& modelCacheDirectory, MetricRegistry* registry, PythonBackend* pythonBackend) : ieCore(std::make_unique()), + pipelineFactory(std::make_unique()), #if (MEDIAPIPE_DISABLE == 0) - mediapipeFactory(pythonBackend), + mediapipeFactory(std::make_unique(pythonBackend)), #endif waitForModelLoadedTimeoutMs(DEFAULT_WAIT_FOR_MODEL_LOADED_TIMEOUT_MS), modelCacheDirectory(modelCacheDirectory), @@ -511,7 +515,7 @@ Status ModelManager::processMediapipeConfig(const MediapipeGraphConfig& config, MediapipeGraphDefinition* mediapipeGraphDefinition = factory.findDefinitionByName(config.getGraphName()); if (mediapipeGraphDefinition == nullptr) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Mediapipe graph:{} was not loaded so far. Triggering load", config.getGraphName()); - auto status = factory.createDefinition(config.getGraphName(), config, *this); + auto status = factory.createDefinition(config.getGraphName(), config, *this, *this); return status; } if (mediapipeGraphDefinition->isReloadRequired(config)) { @@ -692,7 +696,7 @@ Status ModelManager::loadCustomNodeLibrariesConfig(rapidjson::Document& configJs Status ModelManager::loadMediapipeGraphsConfig(std::vector& mediapipesInConfigFile) { if (mediapipesInConfigFile.size() == 0) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Configuration file doesn't have mediapipe property."); - mediapipeFactory.retireOtherThan({}, *this); + mediapipeFactory->retireOtherThan({}); return StatusCode::OK; } std::set mediapipesInConfigFileNames; @@ -701,13 +705,13 @@ Status ModelManager::loadMediapipeGraphsConfig(std::vector for (const auto& mediapipeGraphConfig : mediapipesInConfigFile) { mediapipesInConfigFileNames.insert(mediapipeGraphConfig.getGraphName()); } - mediapipeFactory.retireOtherThan(std::move(mediapipesInConfigFileNames), *this); + mediapipeFactory->retireOtherThan(std::move(mediapipesInConfigFileNames)); std::set mediapipesAlreadyLoaded; for (const auto& mediapipeGraphConfig : mediapipesInConfigFile) { if (spdlog::default_logger_raw()->level() <= spdlog::level::debug) { mediapipeGraphConfig.logGraphConfigContent(); } - auto status = processMediapipeConfig(mediapipeGraphConfig, mediapipesAlreadyLoaded, mediapipeFactory); + auto status = processMediapipeConfig(mediapipeGraphConfig, mediapipesAlreadyLoaded, *mediapipeFactory); if (status != StatusCode::OK) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } @@ -725,18 +729,18 @@ Status ModelManager::loadPipelinesConfig(rapidjson::Document& configJson) { const auto itrp = configJson.FindMember("pipeline_config_list"); if (itrp == configJson.MemberEnd() || !itrp->value.IsArray()) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Configuration file doesn't have pipelines property."); - pipelineFactory.retireOtherThan({}, *this); + pipelineFactory->retireOtherThan({}, *this); return StatusCode::OK; } std::set pipelinesInConfigFile; Status firstErrorStatus = StatusCode::OK; for (const auto& pipelineConfig : itrp->value.GetArray()) { - auto status = processPipelineConfig(configJson, pipelineConfig, pipelinesInConfigFile, pipelineFactory, *this); + auto status = processPipelineConfig(configJson, pipelineConfig, pipelinesInConfigFile, *pipelineFactory, *this); if (status != StatusCode::OK) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } } - pipelineFactory.retireOtherThan(std::move(pipelinesInConfigFile), *this); + pipelineFactory->retireOtherThan(std::move(pipelinesInConfigFile), *this); return firstErrorStatus; } @@ -909,18 +913,11 @@ Status ModelManager::loadModels(const rapidjson::Value::MemberIterator& modelsCo modelConfig.setCacheDir(this->modelCacheDirectory); const auto& modelName = modelConfig.getName(); - if (pipelineDefinitionExists(modelName)) { + if (servableExists(modelName, ServableType::Pipeline | ServableType::Mediapipe)) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(StatusCode::MODEL_NAME_OCCUPIED); - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Model name: {} is already occupied by pipeline definition.", modelName); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Model name: {} is already occupied by pipeline or mediapipe graph definition.", modelName); continue; } -#if (MEDIAPIPE_DISABLE == 0) - if (mediapipeFactory.definitionExists(modelName)) { - IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(StatusCode::MODEL_NAME_OCCUPIED); - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Model name: {} is already occupied by mediapipe graph definition.", modelName); - continue; - } -#endif if (modelsInConfigFile.find(modelName) != modelsInConfigFile.end()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(StatusCode::MODEL_NAME_OCCUPIED); SPDLOG_LOGGER_WARN(modelmanager_logger, "Duplicated model names: {} defined in config file. Only first definition will be loaded.", modelName); @@ -1180,7 +1177,7 @@ Status ModelManager::updateConfigurationWithoutConfigFile() { reloadNeeded = true; } } - status = pipelineFactory.revalidatePipelines(*this); + status = pipelineFactory->revalidatePipelines(*this); if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } @@ -1421,6 +1418,10 @@ Status ModelManager::checkStatefulFlagChange(const std::string& modelName, bool return StatusCode::OK; } +std::shared_ptr ModelManager::modelFactory(const std::string& name, const bool isStateful) { + return std::make_shared(name, isStateful, &this->globalSequencesViewer); +} + std::shared_ptr ModelManager::getModelIfExistCreateElse(const std::string& modelName, const bool isStateful) { std::unique_lock modelsLock(modelsMtx); auto modelIt = models.find(modelName); @@ -1724,7 +1725,7 @@ const std::vector ModelManager::getNamesOfAvailableModels() const { Status ModelManager::createPipeline(std::unique_ptr& graph, const std::string& name) { #if (MEDIAPIPE_DISABLE == 0) - return this->mediapipeFactory.create(graph, name, *this); + return this->mediapipeFactory->create(graph, name); #else SPDLOG_ERROR("Mediapipe support was disabled during build process..."); return StatusCode::INTERNAL_ERROR; @@ -1734,4 +1735,58 @@ Status ModelManager::createPipeline(std::unique_ptr& gra void ModelManager::setRootDirectoryPath(const std::string& configFileFullPath) { FileSystem::setRootDirectoryPath(this->rootDirectoryPath, configFileFullPath); } + +bool ModelManager::servableExists(const std::string& name, ServableType check) const { + if (hasFlag(check, ServableType::Model) && findModelByName(name) != nullptr) { + return true; + } + if (hasFlag(check, ServableType::Pipeline) && pipelineFactory->definitionExists(name)) { + return true; + } +#if (MEDIAPIPE_DISABLE == 0) + if (hasFlag(check, ServableType::Mediapipe) && mediapipeFactory->definitionExists(name)) { + return true; + } +#endif + return false; +} + +const PipelineFactory& ModelManager::getPipelineFactory() const { + return *pipelineFactory; +} + +// Returns raw pointer - safe because definitions (Model, PipelineDefinition, +// MediapipeGraphDefinition) are never removed from their maps during server +// lifetime. They only transition to RETIRED state. This matches the existing +// contract of PipelineFactory::findDefinitionByName and +// MediapipeFactory::findDefinitionByName which also return raw pointers. +ServableDefinition* ModelManager::findServableDefinition(const std::string& name) const { + auto model = findModelByName(name); + if (model) { + return model.get(); + } + auto* pipelineDefinition = pipelineFactory->findDefinitionByName(name); + if (pipelineDefinition) { + return pipelineDefinition; + } +#if (MEDIAPIPE_DISABLE == 0) + auto* mediapipeDefinition = mediapipeFactory->findDefinitionByName(name); + if (mediapipeDefinition) { + return mediapipeDefinition; + } +#endif + return nullptr; +} + +std::vector ModelManager::getServableDefinitionNames() const { + std::vector names; + auto pipelineNames = pipelineFactory->getPipelinesNames(); + names.insert(names.end(), pipelineNames.begin(), pipelineNames.end()); +#if (MEDIAPIPE_DISABLE == 0) + auto mediapipeNames = mediapipeFactory->getMediapipePipelinesNames(); + names.insert(names.end(), mediapipeNames.begin(), mediapipeNames.end()); +#endif + return names; +} + } // namespace ovms diff --git a/src/modelmanager.hpp b/src/modelmanager.hpp index f7e79c76ed..2389073e76 100644 --- a/src/modelmanager.hpp +++ b/src/modelmanager.hpp @@ -26,23 +26,22 @@ #include #include -#include #pragma warning(push) #pragma warning(disable : 6313) #include #pragma warning(pop) -#include -#include -#include "dags/pipeline_factory.hpp" #include "global_sequences_viewer.hpp" -#if (MEDIAPIPE_DISABLE == 0) -#include "mediapipe_internal/mediapipefactory.hpp" -#endif #include "metric_config.hpp" -#include "model.hpp" +#include "metric_provider.hpp" +#include "modelconfig.hpp" +#include "servable_name_checker.hpp" #include "status.hpp" +namespace ov { +class Core; +} // namespace ov + namespace ovms { const uint32_t DEFAULT_WAIT_FOR_MODEL_LOADED_TIMEOUT_MS = 10000; @@ -54,16 +53,24 @@ struct ModelsSettingsImpl; class CustomLoaderConfig; class CustomNodeLibraryManager; class MetricRegistry; +class Model; class ModelConfig; class FileSystem; +class MediapipeFactory; +class MediapipeGraphConfig; class MediapipeGraphExecutor; +class ModelInstance; +class ServableDefinition; +class ModelInstanceUnloadGuard; +class Pipeline; +class PipelineFactory; struct FunctorSequenceCleaner; struct FunctorResourcesCleaner; class PythonBackend; /** * @brief Model manager is managing the list of model topologies enabled for serving and their versions. */ -class ModelManager { +class ModelManager : public ServableNameChecker, public MetricProvider { public: /** * @brief A default constructor is private @@ -84,9 +91,9 @@ class ModelManager { std::map> models; std::unique_ptr ieCore; - PipelineFactory pipelineFactory; + std::unique_ptr pipelineFactory; #if (MEDIAPIPE_DISABLE == 0) - MediapipeFactory mediapipeFactory; + std::unique_ptr mediapipeFactory; #endif std::unique_ptr customNodeLibraryManager; std::vector> resources = {}; @@ -320,13 +327,11 @@ class ModelManager { */ void startCleaner(); - const PipelineFactory& getPipelineFactory() const { - return pipelineFactory; - } + const PipelineFactory& getPipelineFactory() const; #if (MEDIAPIPE_DISABLE == 0) const MediapipeFactory& getMediapipeFactory() const { - return mediapipeFactory; + return *mediapipeFactory; } #endif @@ -363,20 +368,9 @@ class ModelManager { */ const std::shared_ptr findModelInstance(const std::string& name, model_version_t version = 0) const; - template - Status createPipeline(std::unique_ptr& pipeline, - const std::string& name, - const RequestType* request, - ResponseType* response) { - return pipelineFactory.create(pipeline, name, request, response, *this); - } Status createPipeline(std::unique_ptr& graph, const std::string& name); - const bool pipelineDefinitionExists(const std::string& name) const { - return pipelineFactory.definitionExists(name); - } - /** * @brief Starts model manager using provided config file * @@ -397,7 +391,7 @@ class ModelManager { * * @return const std::string& */ - const MetricConfig& getMetricConfig() const { + const MetricConfig& getMetricConfig() const override { return this->metricConfig; } @@ -445,9 +439,7 @@ class ModelManager { * * @return std::shared_ptr */ - virtual std::shared_ptr modelFactory(const std::string& name, const bool isStateful) { - return std::make_shared(name, isStateful, &this->globalSequencesViewer); - } + virtual std::shared_ptr modelFactory(const std::string& name, const bool isStateful); /** * @brief Reads available versions from given filesystem @@ -504,7 +496,13 @@ class ModelManager { */ void cleanupResources(); - MetricRegistry* getMetricRegistry() const { return this->metricRegistry; } + bool servableExists(const std::string& name, ServableType check = ServableType::All) const override; + + ServableDefinition* findServableDefinition(const std::string& name) const; + + std::vector getServableDefinitionNames() const; + + MetricRegistry* getMetricRegistry() const override { return this->metricRegistry; } }; void cleanerRoutine(uint32_t resourcesCleanupInterval, FunctorResourcesCleaner& functorResourcesCleaner, uint32_t sequenceCleanerInterval, FunctorSequenceCleaner& functorSequenceCleaner, std::future& cleanerExitSignal); diff --git a/src/modelversionstatus.hpp b/src/modelversionstatus.hpp index e46fe4be96..87dded0350 100644 --- a/src/modelversionstatus.hpp +++ b/src/modelversionstatus.hpp @@ -19,8 +19,9 @@ #include #include +#include + #include "modelversion.hpp" -#include "logging.hpp" // note: think about using https://github.com/Neargye/magic_enum when compatible compiler is supported. diff --git a/src/precision_configuration.cpp b/src/precision_configuration.cpp index 58b0dbcd9d..83365f9130 100644 --- a/src/precision_configuration.cpp +++ b/src/precision_configuration.cpp @@ -18,6 +18,8 @@ #include #include +#include "logging.hpp" + namespace ovms { const char PrecisionConfiguration::PRECISION_DELIMITER = ':'; diff --git a/src/predict_request_validation_utils_impl.hpp b/src/predict_request_validation_utils_impl.hpp index 57707ffc7c..14eff89771 100644 --- a/src/predict_request_validation_utils_impl.hpp +++ b/src/predict_request_validation_utils_impl.hpp @@ -22,7 +22,6 @@ #include #include -#include "logging.hpp" #include "shape.hpp" #include "anonymous_input_name.hpp" #include "status.hpp" diff --git a/src/prediction_service.cpp b/src/prediction_service.cpp index 74e23583d1..e211ab1ccc 100644 --- a/src/prediction_service.cpp +++ b/src/prediction_service.cpp @@ -33,6 +33,7 @@ #include "tfs_frontend/tfs_request_utils.hpp" #include "dags/pipeline.hpp" +#include "dags/pipeline_factory.hpp" #include "execution_context.hpp" #include "get_model_metadata_impl.hpp" #include "grpc_utils.hpp" @@ -88,7 +89,7 @@ Status PredictionServiceImpl::getPipeline(const PredictRequest* request, PredictResponse* response, std::unique_ptr& pipelinePtr) { OVMS_PROFILE_FUNCTION(); - return this->modelManager.createPipeline(pipelinePtr, request->model_spec().name(), request, response); + return this->modelManager.getPipelineFactory().create(pipelinePtr, request->model_spec().name(), request, response, this->modelManager); } grpc::Status ovms::PredictionServiceImpl::Predict( diff --git a/src/prediction_service_utils.hpp b/src/prediction_service_utils.hpp index 788141d0a4..21769d08c1 100644 --- a/src/prediction_service_utils.hpp +++ b/src/prediction_service_utils.hpp @@ -27,7 +27,6 @@ #include "kfs_frontend/kfs_grpc_inference_service.hpp" #include "extractchoice.hpp" #include "requesttensorextractor.hpp" -#include "logging.hpp" #include "shape.hpp" #include "status.hpp" diff --git a/src/profilermodule.hpp b/src/profilermodule.hpp index 9927b75ab8..de4eaa86a8 100644 --- a/src/profilermodule.hpp +++ b/src/profilermodule.hpp @@ -17,7 +17,6 @@ #include #include -#include "logging.hpp" #include "module.hpp" namespace ovms { diff --git a/src/python/BUILD b/src/python/BUILD index f4fd4c571e..7c0742efdf 100644 --- a/src/python/BUILD +++ b/src/python/BUILD @@ -41,7 +41,7 @@ mediapipe_proto_library( ovms_cc_library( name = "utils", hdrs = ["utils.hpp",], - srcs = [], + srcs = [], deps = PYBIND_DEPS + [ "//src:libovmslogging", ], @@ -98,7 +98,8 @@ ovms_cc_library( ovms_cc_library( name = "pythonexecutorcalculator", - srcs = ["python_executor_calculator.cc",], + srcs = ["python_executor_calculator.cc", + "python_node_initializer.cpp",], deps = PYBIND_DEPS + [ "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", @@ -106,6 +107,8 @@ ovms_cc_library( "pythonexecutorcalculator_cc_proto", "pythonbackend", "pythonnoderesources", + "//src:node_initializer", + "//src:libovmslogging", ], visibility = ["//visibility:private"], alwayslink = 1, diff --git a/src/python/python_node_initializer.cpp b/src/python/python_node_initializer.cpp new file mode 100644 index 0000000000..462d3586d1 --- /dev/null +++ b/src/python/python_node_initializer.cpp @@ -0,0 +1,70 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "../mediapipe_internal/graph_side_packets.hpp" +#include "../mediapipe_internal/node_initializer.hpp" +#include "pythonnoderesources.hpp" +#include "mediapipe/framework/calculator.pb.h" + +#include "../logging.hpp" + +namespace ovms { +class PythonNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "PythonExecutorCalculator"; + +public: + bool matches(const std::string& calculatorName) const override { + return calculatorName == CALCULATOR_NAME; + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* pythonBackend) override { + auto& pythonNodeResourcesMap = sidePackets.pythonNodeResourcesMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("Python node missing options in graph: {}. ", graphName); + return StatusCode::PYTHON_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("Python node name is missing in graph: {}. ", graphName); + return StatusCode::PYTHON_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (pythonNodeResourcesMap.find(nodeName) != pythonNodeResourcesMap.end()) { + SPDLOG_ERROR("Python node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::PYTHON_NODE_NAME_ALREADY_EXISTS; + } + std::shared_ptr nodeResources = nullptr; + Status status = PythonNodeResources::createPythonNodeResources(nodeResources, nodeConfig, pythonBackend, basePath); + if (nodeResources == nullptr || !status.ok()) { + SPDLOG_ERROR("Failed to process python node graph {}", graphName); + return status; + } + pythonNodeResourcesMap.insert(std::pair>(nodeName, std::move(nodeResources))); + return StatusCode::OK; + } +}; + +static bool pythonNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/python/utils.hpp b/src/python/utils.hpp index 13708be639..6e8b4f511d 100644 --- a/src/python/utils.hpp +++ b/src/python/utils.hpp @@ -18,6 +18,9 @@ #include #include + +#include "src/logging.hpp" + #pragma warning(push) #pragma warning(disable : 6326 28182 6011 28020) #include @@ -25,7 +28,6 @@ #include #pragma warning(pop) -#include "../logging.hpp" namespace py = pybind11; using namespace py::literals; diff --git a/src/rerank/BUILD b/src/rerank/BUILD index 7f3b1a6ec9..ae9668faeb 100644 --- a/src/rerank/BUILD +++ b/src/rerank/BUILD @@ -30,7 +30,8 @@ mediapipe_proto_library( ovms_cc_library( name = "rerank_servable", hdrs = ["rerank_servable.hpp"], - deps = ["//src:sidepacket_servable",], + deps = ["//src:sidepacket_servable", + "//src/port:rapidjson_document",], visibility = ["//visibility:public"], alwayslink = 1, ) @@ -65,7 +66,8 @@ ovms_cc_library( ovms_cc_library( name = "rerankcalculator_ov", - srcs = ["rerank_calculator_ov.cc"], + srcs = ["rerank_calculator_ov.cc", + "rerank_node_initializer.cpp"], deps = [ "@mediapipe//mediapipe/framework:calculator_framework", "@com_github_tencent_rapidjson//:rapidjson", @@ -81,6 +83,8 @@ ovms_cc_library( "//src:executingstreamidguard", "//src:libovms_execution_context", "//src/tokenize:tokenize_parser", + "//src:node_initializer", + "//src:libovmsstring_utils", ], visibility = ["//visibility:public"], alwayslink = 1, diff --git a/src/rerank/rerank_node_initializer.cpp b/src/rerank/rerank_node_initializer.cpp new file mode 100644 index 0000000000..fd2dade793 --- /dev/null +++ b/src/rerank/rerank_node_initializer.cpp @@ -0,0 +1,70 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include "../mediapipe_internal/graph_side_packets.hpp" +#include "../mediapipe_internal/node_initializer.hpp" +#include "../stringutils.hpp" +#include "rerank_servable.hpp" +#include "mediapipe/framework/calculator.pb.h" +#include "src/rerank/rerank_calculator_ov.pb.h" + +#include "../logging.hpp" + +namespace ovms { +class RerankNodeInitializer : public NodeInitializer { + static constexpr const char* CALCULATOR_NAME = "RerankCalculatorOV"; + +public: + bool matches(const std::string& calculatorName) const override { + return endsWith(calculatorName, CALCULATOR_NAME); + } + Status initialize( + const ::mediapipe::CalculatorGraphConfig_Node& nodeConfig, + const std::string& graphName, + const std::string& basePath, + GraphSidePackets& sidePackets, + PythonBackend* /*pythonBackend*/) override { + auto& rerankServableMap = sidePackets.rerankServableMap; + if (!nodeConfig.node_options().size()) { + SPDLOG_ERROR("Rerank node missing options in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_OPTIONS; + } + if (nodeConfig.name().empty()) { + SPDLOG_ERROR("Rerank node name is missing in graph: {}. ", graphName); + return StatusCode::LLM_NODE_MISSING_NAME; + } + std::string nodeName = nodeConfig.name(); + if (rerankServableMap.find(nodeName) != rerankServableMap.end()) { + SPDLOG_ERROR("Rerank node name: {} already used in graph: {}. ", nodeName, graphName); + return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS; + } + mediapipe::RerankCalculatorOVOptions nodeOptions; + nodeConfig.node_options(0).UnpackTo(&nodeOptions); + auto servable = std::make_shared(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.plugin_config(), basePath); + servable->initialize(nodeOptions.models_path(), nodeOptions.target_device(), nodeOptions.plugin_config(), basePath); + rerankServableMap.insert(std::pair>(nodeName, std::move(servable))); + return StatusCode::OK; + } +}; + +static bool rerankNodeInitializerRegistered = []() { + NodeInitializerRegistry::instance().add(std::make_unique()); + return true; +}(); +} // namespace ovms diff --git a/src/rerank/rerank_servable.hpp b/src/rerank/rerank_servable.hpp index 15e23983c4..7a626744eb 100644 --- a/src/rerank/rerank_servable.hpp +++ b/src/rerank/rerank_servable.hpp @@ -17,6 +17,7 @@ #include "../sidepacket_servable.hpp" #include "../filesystem.hpp" +#include #include #include #include diff --git a/src/servable.hpp b/src/servable.hpp index 7af7f1c9af..6c6e5858d2 100644 --- a/src/servable.hpp +++ b/src/servable.hpp @@ -18,7 +18,6 @@ #include #include "modelversion.hpp" -#include "tensorinfo.hpp" namespace ovms { class Servable { diff --git a/src/servable_definition.hpp b/src/servable_definition.hpp new file mode 100644 index 0000000000..8586bae346 --- /dev/null +++ b/src/servable_definition.hpp @@ -0,0 +1,29 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include + +namespace ovms { + +class ServableDefinition { +public: + virtual ~ServableDefinition() = default; + virtual const std::string& getName() const = 0; + virtual bool isAvailable() const = 0; +}; + +} // namespace ovms diff --git a/src/dags/pipelinedefinitionunloadguard.cpp b/src/servable_definition_unload_guard.cpp similarity index 63% rename from src/dags/pipelinedefinitionunloadguard.cpp rename to src/servable_definition_unload_guard.cpp index fc4c6964fd..5911c2f01f 100644 --- a/src/dags/pipelinedefinitionunloadguard.cpp +++ b/src/servable_definition_unload_guard.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright 2020 Intel Corporation +// Copyright 2026 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,17 +13,19 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#include "pipelinedefinitionunloadguard.hpp" +#include "servable_definition_unload_guard.hpp" -#include "pipelinedefinition.hpp" +#include "single_version_servable_definition.hpp" namespace ovms { -PipelineDefinitionUnloadGuard::PipelineDefinitionUnloadGuard(PipelineDefinition& pipelineDefinition) : - pipelineDefinition(pipelineDefinition) { - pipelineDefinition.increaseRequestsHandlesCount(); + +ServableDefinitionUnloadGuard::ServableDefinitionUnloadGuard(SingleVersionServableDefinition& definition) : + definition(definition) { + definition.increaseRequestsHandlesCount(); } -PipelineDefinitionUnloadGuard::~PipelineDefinitionUnloadGuard() { - pipelineDefinition.decreaseRequestsHandlesCount(); +ServableDefinitionUnloadGuard::~ServableDefinitionUnloadGuard() { + definition.decreaseRequestsHandlesCount(); } + } // namespace ovms diff --git a/src/servable_definition_unload_guard.hpp b/src/servable_definition_unload_guard.hpp new file mode 100644 index 0000000000..2caf00ebf4 --- /dev/null +++ b/src/servable_definition_unload_guard.hpp @@ -0,0 +1,32 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +namespace ovms { + +class SingleVersionServableDefinition; + +class ServableDefinitionUnloadGuard { +public: + ServableDefinitionUnloadGuard() = delete; + ServableDefinitionUnloadGuard(SingleVersionServableDefinition& definition); + ~ServableDefinitionUnloadGuard(); + +private: + SingleVersionServableDefinition& definition; +}; + +} // namespace ovms diff --git a/src/servable_name_checker.hpp b/src/servable_name_checker.hpp new file mode 100644 index 0000000000..373dd940c2 --- /dev/null +++ b/src/servable_name_checker.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +namespace ovms { + +enum class ServableType : uint8_t { + Model = 1 << 0, + Pipeline = 1 << 1, + Mediapipe = 1 << 2, + All = Model | Pipeline | Mediapipe +}; + +inline ServableType operator|(ServableType a, ServableType b) { + using T = std::underlying_type_t; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline bool hasFlag(ServableType value, ServableType flag) { + using T = std::underlying_type_t; + return (static_cast(value) & static_cast(flag)) != 0; +} + +class ServableNameChecker { +public: + virtual ~ServableNameChecker() = default; + virtual bool servableExists(const std::string& name, ServableType check = ServableType::All) const = 0; +}; + +} // namespace ovms diff --git a/src/single_version_servable_definition.cpp b/src/single_version_servable_definition.cpp new file mode 100644 index 0000000000..d1b8109402 --- /dev/null +++ b/src/single_version_servable_definition.cpp @@ -0,0 +1,74 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "single_version_servable_definition.hpp" + +#include +#include + +#include + +#include "dags/pipelinedefinitionstatus.hpp" +#include "servable_definition_unload_guard.hpp" + +namespace ovms { + +Status SingleVersionServableDefinition::waitForLoaded(std::unique_ptr& guard, const uint32_t waitForLoadedTimeoutMicroseconds) { + guard = std::make_unique(*this); + + const uint32_t waitLoadedTimestepMicroseconds = 1000; + const uint32_t waitCheckpoints = waitForLoadedTimeoutMicroseconds / waitLoadedTimestepMicroseconds; + uint32_t waitCheckpointsCounter = waitCheckpoints; + std::mutex cvMtx; + std::unique_lock cvLock(cvMtx); + while (waitCheckpointsCounter-- != 0) { + if (getStatus().isAvailable()) { + SPDLOG_DEBUG("Successfully waited for definition: {}", getName()); + return StatusCode::OK; + } + guard.reset(); + if (!getStatus().canEndLoaded()) { + if (getStatus().getStateCode() != PipelineDefinitionStateCode::RETIRED) { + SPDLOG_DEBUG("Waiting for definition: {} ended due to timeout.", getName()); + return notLoadedYetCode(); + } else { + SPDLOG_DEBUG("Waiting for definition: {} ended since it failed to load.", getName()); + return notLoadedAnymoreCode(); + } + } + SPDLOG_DEBUG("Waiting for available state for definition: {}, with timestep: {}us timeout: {}us check count: {}", + getName(), waitLoadedTimestepMicroseconds, waitForLoadedTimeoutMicroseconds, waitCheckpointsCounter); + loadedNotify.wait_for(cvLock, + std::chrono::microseconds(waitLoadedTimestepMicroseconds), + [this]() { + return this->getStatus().isAvailable() || + !this->getStatus().canEndLoaded(); + }); + guard = std::make_unique(*this); + } + if (!getStatus().isAvailable()) { + if (getStatus().getStateCode() != PipelineDefinitionStateCode::RETIRED) { + SPDLOG_DEBUG("Waiting for definition: {} ended due to timeout.", getName()); + return notLoadedYetCode(); + } else { + SPDLOG_DEBUG("Waiting for definition: {} ended since it failed to load.", getName()); + return notLoadedAnymoreCode(); + } + } + SPDLOG_DEBUG("Successfully waited for definition: {}", getName()); + return StatusCode::OK; +} + +} // namespace ovms diff --git a/src/single_version_servable_definition.hpp b/src/single_version_servable_definition.hpp new file mode 100644 index 0000000000..ff2765b4f3 --- /dev/null +++ b/src/single_version_servable_definition.hpp @@ -0,0 +1,66 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include + +#include "modelversion.hpp" +#include "servable.hpp" +#include "servable_definition.hpp" +#include "status.hpp" +#include "tensorinfo_fwd.hpp" + +namespace ovms { + +class PipelineDefinitionStatus; +class ServableDefinitionUnloadGuard; +class StatusMetricReporter; +enum class PipelineDefinitionStateCode; + +class SingleVersionServableDefinition : public ServableDefinition, public Servable { + friend class ServableDefinitionUnloadGuard; + +public: + static constexpr model_version_t VERSION = 1; + static constexpr uint64_t WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS = 500000; + + SingleVersionServableDefinition(const std::string& name) : + Servable(name, VERSION) {} + + const std::string& getName() const override { return Servable::getName(); } + model_version_t getVersion() const override { return Servable::getVersion(); } + + virtual const PipelineDefinitionStatus& getStatus() const = 0; + virtual const tensor_map_t getInputsInfo() const = 0; + virtual const tensor_map_t getOutputsInfo() const = 0; + virtual StatusMetricReporter& getMetricReporter() const = 0; + Status waitForLoaded(std::unique_ptr& guard, + uint32_t waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS); + +protected: + std::atomic requestsHandlesCounter = 0; + std::condition_variable loadedNotify; + + void increaseRequestsHandlesCount() { ++requestsHandlesCounter; } + void decreaseRequestsHandlesCount() { --requestsHandlesCounter; } + + virtual StatusCode notLoadedYetCode() const = 0; + virtual StatusCode notLoadedAnymoreCode() const = 0; +}; + +} // namespace ovms diff --git a/src/status.hpp b/src/status.hpp index 18a2b093b5..b6447a9066 100644 --- a/src/status.hpp +++ b/src/status.hpp @@ -21,7 +21,7 @@ #include #include -#include "logging.hpp" +#include namespace ovms { diff --git a/src/tensor_conversion.hpp b/src/tensor_conversion.hpp index 4b23a734b1..c5eb537062 100644 --- a/src/tensor_conversion.hpp +++ b/src/tensor_conversion.hpp @@ -21,6 +21,7 @@ #include #include "deps/opencv.hpp" +#include "logging.hpp" #include "precision.hpp" #include "predict_request_validation_utils_impl.hpp" #include "profiler.hpp" diff --git a/src/tensor_conversion_common.cpp b/src/tensor_conversion_common.cpp index ad073b1740..328705baa4 100644 --- a/src/tensor_conversion_common.cpp +++ b/src/tensor_conversion_common.cpp @@ -20,6 +20,7 @@ #include #include "deps/opencv.hpp" +#include "logging.hpp" #include "precision.hpp" #include "predict_request_validation_utils_impl.hpp" #include "profiler.hpp" diff --git a/src/tensorinfo_fwd.hpp b/src/tensorinfo_fwd.hpp new file mode 100644 index 0000000000..6d261eb845 --- /dev/null +++ b/src/tensorinfo_fwd.hpp @@ -0,0 +1,27 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include + +namespace ovms { + +class TensorInfo; +using tensor_map_t = std::map>; + +} // namespace ovms diff --git a/src/test/embeddingsnode_test.cpp b/src/test/embeddingsnode_test.cpp index 92a852a502..6443bba525 100644 --- a/src/test/embeddingsnode_test.cpp +++ b/src/test/embeddingsnode_test.cpp @@ -19,6 +19,7 @@ #include #include "../http_rest_api_handler.hpp" +#include "../mediapipe_internal/mediapipefactory.hpp" #include "../servablemanagermodule.hpp" #include "../server.hpp" #include "rapidjson/document.h" diff --git a/src/test/ensemble_flow_custom_node_tests.cpp b/src/test/ensemble_flow_custom_node_tests.cpp index 01a81d827e..b95241c066 100644 --- a/src/test/ensemble_flow_custom_node_tests.cpp +++ b/src/test/ensemble_flow_custom_node_tests.cpp @@ -45,9 +45,11 @@ #include "../dags/node_library_utils.hpp" #include "../dags/nodestreamidguard.hpp" #include "../dags/pipeline.hpp" +#include "../dags/pipeline_factory.hpp" #include "../dags/pipelinedefinition.hpp" #include "../execution_context.hpp" #include "../metric_registry.hpp" +#include "../model.hpp" #include "../model_metric_reporter.hpp" #include "../modelinstance.hpp" #include "../modelinstanceunloadguard.hpp" @@ -1520,7 +1522,7 @@ TEST_F(EnsembleFlowCustomNodeLoadConfigThenExecuteTest, AddSubCustomNode) { std::unique_ptr pipeline; this->prepareRequest(inputValues); this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); } @@ -1573,17 +1575,17 @@ TEST_F(EnsembleFlowCustomNodeLoadConfigThenExecuteTest, ReferenceMissingLibraryT // Loading correct configuration is required for test to pass. // This is due to fact that when OVMS loads pipeline definition for the first time and fails, its status is RETIRED. this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); response.Clear(); this->loadConfiguration(pipelineCustomNodeReferenceMissingLibraryConfig, StatusCode::PIPELINE_DEFINITION_INVALID_NODE_LIBRARY); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET); response.Clear(); this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); } @@ -1634,18 +1636,18 @@ TEST_F(EnsembleFlowCustomNodeLoadConfigThenExecuteTest, ReferenceLibraryWithExec // Loading correct configuration is required for test to pass. // This is due to fact that when OVMS loads pipeline definition for the first time and fails, its status is RETIRED. this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); response.Clear(); this->loadConfiguration(pipelineCustomNodeReferenceLibraryWithExecutionErrorMissingParamsLibraryConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::NODE_LIBRARY_EXECUTION_FAILED); response.Clear(); this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); } @@ -1697,18 +1699,18 @@ TEST_F(EnsembleFlowCustomNodeLoadConfigThenExecuteTest, MissingRequiredNodeParam // Loading correct configuration is required for test to pass. // This is due to fact that when OVMS loads pipeline definition for the first time and fails, its status is RETIRED. this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); response.Clear(); this->loadConfiguration(pipelineCustomNodeMissingParametersConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::NODE_LIBRARY_EXECUTION_FAILED); response.Clear(); this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); } @@ -1761,17 +1763,17 @@ TEST_F(EnsembleFlowCustomNodeLoadConfigThenExecuteTest, ReferenceLibraryWithRest // Loading correct configuration is required for test to pass. // This is due to fact that when OVMS loads pipeline definition for the first time and fails, its status is RETIRED. this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); response.Clear(); this->loadConfiguration(pipelineCustomNodeLibraryNotEscapedPathConfig, StatusCode::PIPELINE_DEFINITION_INVALID_NODE_LIBRARY); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET); response.Clear(); this->loadCorrectConfiguration(); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); this->checkResponseForCorrectConfiguration(); } @@ -1920,7 +1922,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, JustDiff this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperationsConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); @@ -2011,7 +2013,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Differen this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperationsThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); prepareDifferentOpsExpectedOutput(expectedOutput, input, factors); @@ -2093,7 +2095,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Differen this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperations2OutputsConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); @@ -2207,7 +2209,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Differen this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperationsThenDummyThenChooseMaximumConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); @@ -2322,7 +2324,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Differen this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperationsThenDummyThenChooseMaximumThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); @@ -2415,7 +2417,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Demultip this->prepareRequest(request, input, differentOpsInputName, {4, 1, 10}); this->loadConfiguration(demultiplyThenDummyThenChooseMaximumConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); auto status = pipeline->execute(DEFAULT_TEST_CONTEXT); ASSERT_EQ(status, StatusCode::OK) << status.string(); @@ -4474,7 +4476,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, J std::vector input{static_cast(dynamicDemultiplyCount), 1, 2, 3, 4, 5, 6, 7, 8, 9}; this->prepareRequest(request, input, differentOpsInputName); this->loadConfiguration(pipelineCustomNodeDynamicDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(dynamicDemultiplyCount * DUMMY_MODEL_OUTPUT_SIZE); @@ -4667,7 +4669,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, D std::iota(input.begin(), input.end(), 42); this->prepareRequest(request, input, differentOpsInputName, {dynamicDemultiplyCount, 1, 10}); this->loadConfiguration(pipelineEntryNodeDynamicDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput = input; @@ -4741,7 +4743,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, D std::iota(input.begin(), input.end(), 42); this->prepareRequest(request, input, pipelineInputName, {3, 5, DUMMY_MODEL_INPUT_SIZE}); this->loadConfiguration(pipelineEntryNodeDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput = input; @@ -4774,7 +4776,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, D std::vector input{static_cast(dynamicDemultiplyCount), 1, 2, 3, 4, 5, 6, 7, 8, 9}; this->prepareRequest(request, input, differentOpsInputName); this->loadConfiguration(pipelineCustomNodeDynamicDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); auto status = pipeline->execute(DEFAULT_TEST_CONTEXT); ASSERT_EQ(status, StatusCode::PIPELINE_TOO_LARGE_DIMENSION_SIZE_TO_DEMULTIPLY) << status.string(); } @@ -4870,7 +4872,7 @@ TEST_F(EnsembleFlowCustomNodeAndDemultiplexerLoadConfigThenExecuteTest, Differen this->prepareRequest(request, input, differentOpsInputName); this->prepareRequest(request, factors, differentOpsFactorsName); this->loadConfiguration(pipelineCustomNodeDifferentOperationsThenDummyThenChooseMaximumNotInOrderConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(4 * DUMMY_MODEL_OUTPUT_SIZE); @@ -4887,7 +4889,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, D std::vector input{static_cast(dynamicDemultiplyCount), 1, 2, 3, 4, 5, 6, 7, 8, 9}; this->prepareRequest(request, input, differentOpsInputName); this->loadConfiguration(pipelineCustomNodeDynamicDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::PIPELINE_DEMULTIPLEXER_NO_RESULTS); } @@ -4897,7 +4899,7 @@ TEST_F(EnsembleFlowCustomNodeAndDynamicDemultiplexerLoadConfigThenExecuteTest, D std::vector input{static_cast(dynamicDemultiplyCount), 1, 2, 3, 4, 5, 6, 7, 8, 9}; this->prepareRequest(request, input, differentOpsInputName); this->loadConfiguration(pipelineCustomNodeDynamicDemultiplexThenDummyConfig); - ASSERT_EQ(manager.createPipeline(pipeline, pipelineName, &request, &response), StatusCode::OK); + ASSERT_EQ(manager.getPipelineFactory().create(pipeline, pipelineName, &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); std::vector expectedOutput(dynamicDemultiplyCount * DUMMY_MODEL_OUTPUT_SIZE); diff --git a/src/test/ensemble_mapping_config_tests.cpp b/src/test/ensemble_mapping_config_tests.cpp index 23e6a84a4c..5c34f11f0c 100644 --- a/src/test/ensemble_mapping_config_tests.cpp +++ b/src/test/ensemble_mapping_config_tests.cpp @@ -26,6 +26,7 @@ #include "../execution_context.hpp" #include "../get_model_metadata_impl.hpp" #include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../model.hpp" #include "../model_metric_reporter.hpp" #include "../modelconfig.hpp" #include "../modelmanager.hpp" diff --git a/src/test/ensemble_tests.cpp b/src/test/ensemble_tests.cpp index 1eb83595d4..c0d4794b8c 100644 --- a/src/test/ensemble_tests.cpp +++ b/src/test/ensemble_tests.cpp @@ -39,7 +39,9 @@ #include "../inference_executor.hpp" #include "../localfilesystem.hpp" #include "../logging.hpp" +#include "../mediapipe_internal/mediapipefactory.hpp" #include "../metric_registry.hpp" +#include "../model.hpp" #include "../model_metric_reporter.hpp" #include "../modelconfig.hpp" #include "../modelinstance.hpp" @@ -225,10 +227,11 @@ class EnsembleFlowTest : public TestWithTempDir { ConstructorEnabledModelManager managerWithDummyModel; managerWithDummyModel.loadConfig(fileToReload); std::unique_ptr pipeline; - auto status = managerWithDummyModel.createPipeline(pipeline, + auto status = managerWithDummyModel.getPipelineFactory().create(pipeline, "pipeline1Dummy", &request, - &response); + &response, + managerWithDummyModel); ASSERT_EQ(status, ovms::StatusCode::PIPELINE_DEFINITION_NAME_MISSING) << status.string(); } @@ -2886,10 +2889,11 @@ TEST_F(EnsembleFlowTest, PipelineFactoryCreationWithInputOutputsMappings) { ConstructorEnabledModelManager managerWithDummyModel; managerWithDummyModel.loadConfig(fileToReload); std::unique_ptr pipeline; - auto status = managerWithDummyModel.createPipeline(pipeline, + auto status = managerWithDummyModel.getPipelineFactory().create(pipeline, "pipeline1Dummy", &request, - &response); + &response, + managerWithDummyModel); ASSERT_EQ(status, ovms::StatusCode::OK) << status.string(); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); const int dummySeriallyConnectedCount = 1; @@ -2959,10 +2963,11 @@ TEST_F(EnsembleFlowTest, PipelineFactoryCreationWithInputOutputsMappings2Paralle ConstructorEnabledModelManager managerWithDummyModel; managerWithDummyModel.loadConfig(fileToReload); std::unique_ptr pipeline; - auto status = managerWithDummyModel.createPipeline(pipeline, + auto status = managerWithDummyModel.getPipelineFactory().create(pipeline, "pipeline1Dummy", &request, - &response); + &response, + managerWithDummyModel); ASSERT_EQ(status, ovms::StatusCode::OK) << status.string(); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); ASSERT_EQ(response.outputs().count(customPipelineOutputName), 1); @@ -4362,7 +4367,7 @@ TEST_F(EnsembleFlowTest, MediapipeConfigModelWithSameNamePipeline) { ASSERT_FALSE(manager.getMediapipeFactory().definitionExists(MEDIAPIPE_DUMMY_NAME)); - ASSERT_TRUE(manager.pipelineDefinitionExists(MEDIAPIPE_DUMMY_NAME)); + ASSERT_TRUE(manager.servableExists(MEDIAPIPE_DUMMY_NAME, ServableType::Pipeline)); } #endif TEST_F(EnsembleFlowTest, PipelineConfigModelWithSameName) { diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp index 94648d0e68..73b1c37b25 100644 --- a/src/test/http_openai_handler_test.cpp +++ b/src/test/http_openai_handler_test.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #include #include diff --git a/src/test/llm/assisted_decoding_test.cpp b/src/test/llm/assisted_decoding_test.cpp index e265cd6b68..9c870a1f63 100644 --- a/src/test/llm/assisted_decoding_test.cpp +++ b/src/test/llm/assisted_decoding_test.cpp @@ -22,6 +22,8 @@ #include #include +#include + #include #include #include diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index 55f87fa720..0cbcfa2f2d 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -22,6 +22,8 @@ #include #include +#include + #include #include #include diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index 55b6ab96ed..dd3358858d 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -36,6 +36,7 @@ #pragma GCC diagnostic pop #include "../config.hpp" +#include "../dags/pipeline_factory.hpp" #include "../dags/pipelinedefinition.hpp" #include "../grpcservermodule.hpp" #include "../http_rest_api_handler.hpp" @@ -47,6 +48,7 @@ #include "../mediapipe_internal/mediapipegraphexecutor.hpp" #include "../metric_config.hpp" #include "../metric_module.hpp" +#include "../model.hpp" #include "../model_service.hpp" #include "../ovms_exit_codes.hpp" #include "../precision.hpp" @@ -1487,8 +1489,6 @@ TEST_F(MediapipeStreamFlowAddTest, Infer) { TEST_F(MediapipeStreamFlowAddTest, InferOnUnloadedGraph) { const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); - const ServableManagerModule* smm = dynamic_cast(server.getModule(SERVABLE_MANAGER_MODULE_NAME)); - ModelManager& modelManager = smm->getServableManager(); auto* definition = this->getMPDefinitionByName(this->modelName); ASSERT_NE(definition, nullptr); @@ -1532,10 +1532,10 @@ TEST_F(MediapipeStreamFlowAddTest, InferOnUnloadedGraph) { checkAddResponse("out", this->requestData1[2], this->requestData1[2], this->request[2], msg.infer_response(), 1, 1, this->modelName); return true; }); - std::thread unloader([&startUnloading, &finishedUnloading, &definition, &modelManager]() { + std::thread unloader([&startUnloading, &finishedUnloading, &definition]() { // Wait till first response notifies that we should start unloading startUnloading.get_future().get(); - definition->retire(modelManager); + definition->retire(); // Notify second request to arrive because we unloaded the graph finishedUnloading.set_value(); }); @@ -1654,11 +1654,9 @@ TEST_F(MediapipeStreamFlowAddTest, InferOnReloadedGraph) { TEST_F(MediapipeStreamFlowAddTest, NegativeShouldNotReachInferDueToRetiredGraph) { const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); - const ServableManagerModule* smm = dynamic_cast(server.getModule(SERVABLE_MANAGER_MODULE_NAME)); - ModelManager& modelManager = smm->getServableManager(); auto* definition = this->getMPDefinitionByName(this->modelName); ASSERT_NE(definition, nullptr); - definition->retire(modelManager); + definition->retire(); // Opening new stream, expect graph to be unavailable MockedServerReaderWriter<::inference::ModelStreamInferResponse, ::inference::ModelInferRequest> stream; diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp index 02e7c4178a..5af193b6c3 100644 --- a/src/test/streaming_test.cpp +++ b/src/test/streaming_test.cpp @@ -29,6 +29,7 @@ #include "../status.hpp" #include "../stringutils.hpp" #include "mediapipe/framework/port/integral_types.h" +#include "../mediapipe_internal/mediapipefactory.hpp" #include "constructor_enabled_model_manager.hpp" #include "platform_utils.hpp" #include "test_utils.hpp" diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp index ccbdd60758..0cf85e1f2f 100644 --- a/src/test/stress_test_utils.hpp +++ b/src/test/stress_test_utils.hpp @@ -1726,7 +1726,7 @@ class ConfigChangeStressTest : public TestWithTempDir { RequestType request = preparePipelinePredictRequest(request2); ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR; if (typeid(ServableType) == typeid(ovms::Pipeline)) { - createPipelineStatus = this->manager->createPipeline(pipelinePtr, pipelineName, &request, &response); + createPipelineStatus = this->manager->getPipelineFactory().create(pipelinePtr, pipelineName, &request, &response, *(this->manager)); #if (MEDIAPIPE_DISABLE == 0) } else if (typeid(ServableType) == typeid(ovms::MediapipeGraphExecutor)) { mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus); diff --git a/src/test/test_utils.cpp b/src/test/test_utils.cpp index a80f924f9f..5c21e15158 100644 --- a/src/test/test_utils.cpp +++ b/src/test/test_utils.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/src/tfs_frontend/serialization.hpp b/src/tfs_frontend/serialization.hpp index f6925206d1..1d227bc976 100644 --- a/src/tfs_frontend/serialization.hpp +++ b/src/tfs_frontend/serialization.hpp @@ -29,7 +29,6 @@ #pragma GCC diagnostic pop #include "../profiler.hpp" -#include "../logging.hpp" #include "../status.hpp" #include "../serialization_common.hpp" #include "../tensorinfo.hpp" diff --git a/src/tfs_frontend/tfs_request_utils.hpp b/src/tfs_frontend/tfs_request_utils.hpp index d040e7a0a9..e6f55d451d 100644 --- a/src/tfs_frontend/tfs_request_utils.hpp +++ b/src/tfs_frontend/tfs_request_utils.hpp @@ -26,7 +26,6 @@ #include "../extractchoice.hpp" #include "../requesttensorextractor.hpp" #include "../statefulrequestprocessor.hpp" -#include "../logging.hpp" #include "../profiler.hpp" #include "../shape.hpp" #include "../status.hpp"