Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions backends/xnnpack/runtime/XNNPACKBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,14 @@ class XnnpackBackend final
// concurrent inits from resetting is_finalized_ or overwriting
// named_data_map_ while compileModel is using the shared weights cache.
std::unique_lock<std::mutex> lock_weights_cache(
weights_cache_mutex_, std::defer_lock);
options_.weights_cache_mutex(), std::defer_lock);
if (use_weight_cache) {
lock_weights_cache.lock();

const auto& cache_path = options_.get_packed_cache_path();
if (!cache_path.empty()) {
weights_cache_->set_packed_cache_path(cache_path);
}
options_.weights_cache().set_packed_cache_path(cache_path);

weights_cache_->initialize_for_runtime(
options_.weights_cache().initialize_for_runtime(
context.get_runtime_allocator(), named_data_map);
workspace->set_uses_weight_cache();
}
Expand All @@ -120,7 +118,7 @@ class XnnpackBackend final
processed->data(),
processed->size(),
executor,
weights_cache_.get(),
&options_.weights_cache(),
workspace_ptr,
named_data_map,
use_weight_cache);
Expand Down Expand Up @@ -149,7 +147,7 @@ class XnnpackBackend final
auto workspace = executor->get_workspace();

std::unique_lock<std::mutex> lock_weights_cache(
weights_cache_mutex_, std::defer_lock);
options_.weights_cache_mutex(), std::defer_lock);
if (executor->uses_weight_cache() || workspace->uses_weight_cache()) {
lock_weights_cache.lock();
}
Expand Down Expand Up @@ -180,14 +178,15 @@ class XnnpackBackend final
auto workspace = executor->get_workspace();

const std::lock_guard<std::mutex> lock_weights_cache(
weights_cache_mutex_);
options_.weights_cache_mutex());

#ifdef ENABLE_XNNPACK_PROFILING
executor->print_avg_op_timings();
#endif

if (executor->uses_weight_cache()) {
weights_cache_->delete_packed_data(executor->get_packed_data_names());
options_.weights_cache().delete_packed_data(
executor->get_packed_data_names());
}

// This is needed to serialize access to xnn_delete_runtime which is not
Expand Down Expand Up @@ -218,27 +217,29 @@ class XnnpackBackend final
Error set_option(
BackendOptionContext& context,
const Span<BackendOption>& backend_options) override {
// Process every option even if one fails — applying a `packed_cache_path`
// and triggering `save_weight_cache_on_disk` in the same array must not
// depend on declaration order. Capture the first error and report it
// after the loop. All option-key dispatch — including the disk-save
// side effect — lives inside XnnpackBackendOptions::set_option, which
// owns the weights-cache instance and its mutex.
Error first_err = Error::Ok;
for (const auto& option : backend_options) {
Error err = options_.set_option(option);
if (err != Error::Ok) {
return err;
if (err != Error::Ok && first_err == Error::Ok) {
first_err = err;
}
}
return Error::Ok;
return first_err;
}

private:
mutable xnnpack::XnnpackBackendOptions options_;

// Weights cache is global to all delegate instances.
mutable std::mutex weights_cache_mutex_;
std::unique_ptr<XNNWeightsCache> weights_cache_ =
std::make_unique<XNNWeightsCache>();

// Lock Hiearchy for Mutexes:
// weights_cache_mutex_
// workspace_meta_mutex_
// workspace_mutex_ (owned by executor)
// Lock hierarchy for mutexes:
// options_.weights_cache_mutex()
// workspace_meta_mutex_
// workspace_mutex_ (owned by executor)
};

namespace {
Expand Down
11 changes: 11 additions & 0 deletions backends/xnnpack/runtime/XNNPACKBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ const char weight_cache_option_key[] = "weight_cache_enabled";
// @lint-ignore CLANGTIDY facebook-hte-CArray
const char packed_cache_path_option_key[] = "packed_cache_path";

/// EXPERIMENTAL — option name and semantics may change without notice.
///
/// Setting this to `true` triggers persisting the packed weight cache to disk
/// so a subsequent process load can mmap the same file and skip XNNPACK weight
/// repacking. The on-disk path is configured via
/// `packed_cache_path_option_key`. The disk write is a one-shot side effect
/// (the value is not stored): every `true` set fires another save.
// Must remain a C array for the BackendOptions template overloads.
// @lint-ignore CLANGTIDY facebook-hte-CArray
const char save_weight_cache_on_disk_option_key[] = "save_weight_cache_on_disk";

/// Workspace sharing mode. This is a backend option that can be set via the
/// set_option API to control memory sharing between CALL_DELEGATE instances.
/// This is useful for reducing memory consumption.
Expand Down
Loading
Loading