pytorch · chenweng-quic · Apr 22, 2026
@@ -306,7 +306,6 @@ def __init__(
             traditional_general_artifacts = [
                 f"{self.qnn_sdk}/lib/{self.target}/libQnnSystem.so",
                 f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so",
-                f"{self.qnn_sdk}/lib/{self.target}/libQnnModelDlc.so",
             ]
             self.backend_library_paths.update(
                 {
@@ -352,9 +351,11 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None):
             )
             output_callback(result)
         else:
-            subprocess.run(
+            result = subprocess.run(
                 cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout
             )
+        if result.returncode != 0:
+            raise RuntimeError(f"adb command failed: {cmds}")
 
     def push(  # noqa: C901
         self,

@@ -246,6 +246,7 @@ Error QnnManager::InitContext(
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
         backend_bundle_ptr_->implementation.get(),
+        backend_bundle_ptr_->system_implementation.get(),
         backend_bundle_ptr_->qnn_backend_ptr.get(),
         backend_bundle_ptr_->qnn_device_ptr.get(),
         qnn_context_blob_,
@@ -279,7 +280,10 @@ Error QnnManager::InitContext(
         BackendInitializeState::INITIALIZED;
   }
 
-  if (IsOnlinePrepare()) {
+  if (IsOnlinePrepare() &&
+      backend_params_ptr_->qnn_backend_cache_ptr_->GetCacheState() ==
+          QnnBackendCache::SERIALIZE) {
+    // The place is used for AOT
     // Check whether the QNN version supports the DLC format.
     Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
     backend_bundle_ptr_->implementation->GetQnnInterface()
@@ -304,6 +308,7 @@ Error QnnManager::InitContextCache() {
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
         backend_bundle_ptr_->implementation.get(),
+        backend_bundle_ptr_->system_implementation.get(),
         backend_bundle_ptr_->qnn_backend_ptr.get(),
         backend_bundle_ptr_->qnn_device_ptr.get(),
         qnn_context_blob_,
@@ -477,9 +482,9 @@ Error QnnManager::ProfileExecuteData(
 }
 
 void QnnManager::Destroy() {
+  qnn_dlc_manager_->Destroy();
   backend_params_ptr_.reset(new BackendConfigParameters());
   backend_bundle_ptr_.reset(new QnnBackendBundle());
-  qnn_dlc_manager_->Destroy();
 }
 
 void QnnManager::DestroyContext() {
@@ -540,12 +545,17 @@ Error QnnManager::GetContextBinary(
 
 Error QnnManager::CompileDlc() {
   Qnn_ErrorHandle_t error;
-  auto qnn_dlc_graph_info = qnn_dlc_manager_->GetQnnDlcGraphInfoPtr();
-  uint32_t qnn_dlc_graph_info_num = qnn_dlc_manager_->GetQnnDlcGraphInfoNum();
-  for (uint32_t i = 0; i < qnn_dlc_graph_info_num; ++i) {
-    auto& graphInfo = (*qnn_dlc_graph_info)[i];
+  auto graphs = qnn_dlc_manager_->GetQnnDlcGraphInfoPtr();
+  uint32_t num_graphs = qnn_dlc_manager_->GetQnnDlcGraphInfoNum();
+  for (uint32_t i = 0; i < num_graphs; ++i) {
+    auto& graphInfo = graphs[i].graphInfoV1;
+    Qnn_GraphHandle_t graphHandle;
+    backend_bundle_ptr_->implementation->GetQnnInterface().qnn_graph_retrieve(
+        backend_params_ptr_->qnn_context_ptr_->GetHandle(),
+        graphInfo.graphName,
+        &graphHandle);
     backend_params_ptr_->qnn_graph_ptr_->SetGraphHandle(
-        graphInfo.graphName, graphInfo.graph);
+        graphInfo.graphName, graphHandle);
     error =
         backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graphInfo.graphName);
     if (error != QNN_SUCCESS) {
@@ -560,9 +570,9 @@ Error QnnManager::CompileDlc() {
 
     // Mapping memory address for the input and output of mutable buffer
     std::unordered_map<int, const void*> mutable_buffer_id_to_memory_map;
-    for (uint32_t i = 0; i < graphInfo.numInputTensors; ++i) {
-      auto tw = CreateTensorWrapper(graphInfo.inputTensors[i]);
-      tw->UpdateQnnTensorMeta(graphInfo.inputTensors[i]);
+    for (uint32_t i = 0; i < graphInfo.numGraphInputs; ++i) {
+      auto tw = CreateTensorWrapper(graphInfo.graphInputs[i]);
+      tw->UpdateQnnTensorMeta(graphInfo.graphInputs[i]);
 
       int mutable_buffer_id = ExtractMutableBufferNumber(tw->GetName());
       if (mutable_buffer_id != -1) {
@@ -573,9 +583,9 @@ Error QnnManager::CompileDlc() {
       }
       graph_inputs.push_back(tw);
     }
-    for (uint32_t i = 0; i < graphInfo.numOutputTensors; ++i) {
-      auto tw = CreateTensorWrapper(graphInfo.outputTensors[i]);
-      tw->UpdateQnnTensorMeta(graphInfo.outputTensors[i]);
+    for (uint32_t i = 0; i < graphInfo.numGraphOutputs; ++i) {
+      auto tw = CreateTensorWrapper(graphInfo.graphOutputs[i]);
+      tw->UpdateQnnTensorMeta(graphInfo.graphOutputs[i]);
       int mutable_buffer_id = ExtractMutableBufferNumber(tw->GetName());
       if (mutable_buffer_id != -1 &&
           mutable_buffer_id_to_memory_map.find(mutable_buffer_id) !=

@@ -17,6 +17,7 @@ using executorch::runtime::Error;
 
 std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
     QnnImplementation* implementation_ptr,
+    QnnSystemImplementation* system_implementation_ptr,
     QnnBackend* qnn_backend_ptr,
     QnnDevice* qnn_device_ptr,
     const QnnExecuTorchContextBinary& qnn_context_blob,
@@ -67,6 +68,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
 
       backend_params->qnn_context_ptr_ = std::make_unique<HtpContext>(
           implementation_ptr,
+          system_implementation_ptr,
           qnn_backend_ptr,
           qnn_device_ptr,
           backend_params->qnn_backend_cache_ptr_.get(),
@@ -110,6 +112,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
 
       backend_params->qnn_context_ptr_ = std::make_unique<GpuContext>(
           implementation_ptr,
+          system_implementation_ptr,
           qnn_backend_ptr,
           qnn_device_ptr,
           backend_params->qnn_backend_cache_ptr_.get(),
@@ -154,6 +157,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
 
       backend_params->qnn_context_ptr_ = std::make_unique<LpaiContext>(
           implementation_ptr,
+          system_implementation_ptr,
           qnn_backend_ptr,
           qnn_device_ptr,
           backend_params->qnn_backend_cache_ptr_.get(),

@@ -63,6 +63,7 @@ class QnnBackendFactory {
  public:
   std::unique_ptr<BackendConfigParameters> Create(
       QnnImplementation* implementation,
+      QnnSystemImplementation* system_implementation,
       QnnBackend* qnn_backend_ptr,
       QnnDevice* qnn_device_ptr,
       const QnnExecuTorchContextBinary& qnn_context_blob,

@@ -152,8 +152,15 @@ Error QnnBackendUnifiedRegistry::GetOrCreateBackendBundle(
   if (backend->VerifyQNNSDKVersion() != Error::Ok) {
     return Error::Internal;
   }
+  // 5. Create QnnSystemImplementation and load qnn library
+  std::unique_ptr<QnnSystemImplementation> system_implementation =
+      std::make_unique<QnnSystemImplementation>("libQnnSystem.so");
+  ret = system_implementation->Load();
+  ET_CHECK_OR_RETURN_ERROR(
+      ret == Error::Ok, Internal, "Fail to load Qnn system library");
 
   bundle->implementation = std::move(implementation);
+  bundle->system_implementation = std::move(system_implementation);
   bundle->qnn_logger_ptr = std::move(logger);
   bundle->qnn_backend_ptr = std::move(backend);
   bundle->qnn_device_ptr = std::move(device);

@@ -12,6 +12,7 @@
 #include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnSysImplementation.h>
 #include <executorch/runtime/core/error.h>
 
 #include <memory>
@@ -28,19 +29,22 @@ struct QnnBackendBundle {
   std::unique_ptr<QnnLogger> qnn_logger_ptr;
   std::unique_ptr<QnnBackend> qnn_backend_ptr;
   std::unique_ptr<QnnDevice> qnn_device_ptr;
+  std::unique_ptr<QnnSystemImplementation> system_implementation;
 
   // Default ctor
   QnnBackendBundle()
       : implementation(nullptr),
         qnn_logger_ptr(nullptr),
         qnn_backend_ptr(nullptr),
-        qnn_device_ptr(nullptr) {}
+        qnn_device_ptr(nullptr),
+        system_implementation{nullptr} {}
   // Default dtor
   ~QnnBackendBundle() {
     qnn_device_ptr.reset();
     qnn_backend_ptr.reset();
     qnn_logger_ptr.reset();
     implementation.reset();
+    system_implementation.reset();
   }
 };
 

@@ -86,9 +86,9 @@ Error QnnContext::Configure() {
     return Error::Internal;
   }
   if (cache_->GetCacheState() == QnnBackendCache::ONLINE_PREPARE) {
-    // Register graphs from DLC during online prepare for HTP/GPU/DSP backends
+    // The place is used for runtime
     return qnn_dlc_manager_->RegisterGraphsFromDLC(
-        implementation_, backend_, this, cache_);
+        implementation_, system_implementation_, backend_, this, cache_);
   }
   return Error::Ok;
 }

@@ -25,12 +25,14 @@ class QnnContext {
  public:
   explicit QnnContext(
       QnnImplementation* implementation,
+      QnnSystemImplementation* system_implementation,
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,
       QnnDlcManager* qnn_dlc_manager)
       : handle_(nullptr),
         implementation_(implementation),
+        system_implementation_(system_implementation),
         backend_(backend),
         device_(device),
         cache_(cache),
@@ -75,6 +77,7 @@ class QnnContext {
  private:
   Qnn_ContextHandle_t handle_;
   QnnImplementation* implementation_;
+  QnnSystemImplementation* system_implementation_;
   QnnBackend* backend_;
   QnnDevice* device_;
   QnnBackendCache* cache_;

@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 #pragma once
+#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 
 #include <QnnTypes.h>
@@ -26,12 +27,12 @@ class QnnDlcManager {
       const QnnExecuTorchContextBinary& qnn_context_blob,
       const QnnExecuTorchOptions* options);
 
-  qnn_wrapper_api::GraphInfoPtr_t* GetQnnDlcGraphInfoPtr() {
-    return qnn_dlc_graph_info_;
+  QnnSystemContext_GraphInfo_t* GetQnnDlcGraphInfoPtr() {
+    return graphs_;
   }
 
   uint32_t GetQnnDlcGraphInfoNum() {
-    return qnn_dlc_graph_info_num_;
+    return num_graphs_;
   }
 
   std::unique_ptr<BackendConfigParameters> backend_params_ptr_ =
@@ -47,19 +48,69 @@ class QnnDlcManager {
 
   Error RegisterGraphsFromDLC(
       QnnImplementation* implementation,
+      QnnSystemImplementation* system_implementation,
       QnnBackend* backend,
       QnnContext* context,
-      QnnBackendCache* cache);
+      QnnBackendCache* cache) {
+    const QnnSystemInterface& system_interface =
+        system_implementation->GetQnnSystemInterface();
+
+    // create dlc_handle
+    QnnSystemDlc_Handle_t dlc_handle = nullptr;
+    backend_bundle_ptr_->qnn_logger_ptr = std::make_unique<QnnLogger>(
+        implementation,
+        LoggingCallback,
+        get_option(options_->log_level(), QNN_RUNTIME_LOG_LEVEL));
+
+    Qnn_ErrorHandle_t error =
+        system_interface.qnn_system_dlc_create_from_binary(
+            /*logger=*/backend_bundle_ptr_->qnn_logger_ptr->GetHandle(),
+            /*buffer=*/(const uint8_t*)qnn_context_blob_.buffer,
+            /*bufferSize=*/qnn_context_blob_.nbytes,
+            /*dlcHandle=*/&dlc_handle);
+    if (error != QNN_SUCCESS) {
+      QNN_EXECUTORCH_LOG_ERROR(
+          "Can't create dlc from binary. Error %d.", QNN_GET_ERROR_CODE(error));
+      return Error::Internal;
+    }
+
+    // compose graphs from dlc
+    const QnnInterface_t* interface =
+        implementation->GetQnnInterface().GetInterface();
+    // QnnSystemContext_GraphInfo_t* graphs = nullptr;
+    // uint32_t num_graphs = 0;
+    error = system_interface.qnn_system_dlc_compose_graphs(
+        /*dlcHandle=*/dlc_handle,
+        /*graphConfigs=*/nullptr,
+        /*numGraphConfigs=*/0,
+        /*backend=*/backend->GetHandle(),
+        /*context=*/context->GetHandle(),
+        /*backendInterface=*/*interface,
+        /*graphVersion=*/QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1,
+        /*graphs=*/&graphs_,
+        /*numGraphs=*/&num_graphs_);
+    if (error != QNN_SUCCESS) {
+      QNN_EXECUTORCH_LOG_ERROR(
+          "Can't compose graph from dlc. Error %d.", QNN_GET_ERROR_CODE(error));
+      return Error::Internal;
+    }
+
+    for (uint32_t i = 0; i < num_graphs_; ++i) {
+      auto& graphInfo = graphs_[i].graphInfoV1;
+      cache->SetGraphNames(graphInfo.graphName);
+    }
+
+    return Error::Ok;
+  }
 
  private:
   static constexpr const char* library_name_ = "libQnnIr.so";
 
   const QnnExecuTorchContextBinary& qnn_context_blob_;
   const QnnExecuTorchOptions* options_;
 
-  static constexpr const char* dlc_lib_ = "libQnnModelDlc.so";
-  qnn_wrapper_api::GraphInfoPtr_t* qnn_dlc_graph_info_ = nullptr;
-  uint32_t qnn_dlc_graph_info_num_ = 0;
+  QnnSystemContext_GraphInfo_t* graphs_ = nullptr;
+  uint32_t num_graphs_ = 0;
 
   Error LoadQnnIrLibrary();
 

@@ -106,6 +106,9 @@ class QnnInterface {
   const QNN_INTERFACE_VER_TYPE& GetInterfaceVer() const {
     return qnn_interface_->QNN_INTERFACE_VER_NAME;
   }
+  const QnnInterface_t* GetInterface() const {
+    return qnn_interface_;
+  }
   void Unload() {
     qnn_interface_ = nullptr;
   }

@@ -42,6 +42,12 @@ class QnnSystemInterface {
       system_context_get_binary_info,
       systemContextGetBinaryInfo);
   DEFINE_SHIM_FUNCTION_SYS_INTERFACE(system_context_free, systemContextFree);
+  DEFINE_SHIM_FUNCTION_SYS_INTERFACE(
+      system_dlc_compose_graphs,
+      systemDlcComposeGraphs);
+  DEFINE_SHIM_FUNCTION_SYS_INTERFACE(
+      system_dlc_create_from_binary,
+      systemDlcCreateFromBinary);
 
  private:
   const QnnSystemInterface_t* qnn_sys_interface_{nullptr};

@@ -16,12 +16,19 @@ using executorch::runtime::Error;
 
 GpuContext::GpuContext(
     QnnImplementation* implementation,
+    QnnSystemImplementation* system_implementation,
     QnnBackend* backend,
     QnnDevice* device,
     QnnBackendCache* cache,
     QnnDlcManager* qnn_dlc_manager,
     const QnnExecuTorchGpuBackendOptions* gpu_options)
-    : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) {
+    : QnnContext(
+          implementation,
+          system_implementation,
+          backend,
+          device,
+          cache,
+          qnn_dlc_manager) {
   gpu_context_custom_config_ =
       std::make_unique<GpuContextCustomConfig>(gpu_options);
 }

@@ -19,6 +19,7 @@ class GpuContext : public QnnContext {
  public:
   GpuContext(
       QnnImplementation* implementation,
+      QnnSystemImplementation* system_implementation,
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,