diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index 8375e69a124..3a5d6ab7958 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -95,14 +95,16 @@ class XnnpackBackend final // concurrent inits from resetting is_finalized_ or overwriting // named_data_map_ while compileModel is using the shared weights cache. std::unique_lock lock_weights_cache( - options_.weights_cache_mutex(), std::defer_lock); + weights_cache_mutex_, std::defer_lock); if (use_weight_cache) { lock_weights_cache.lock(); const auto& cache_path = options_.get_packed_cache_path(); - options_.weights_cache().set_packed_cache_path(cache_path); + if (!cache_path.empty()) { + weights_cache_->set_packed_cache_path(cache_path); + } - options_.weights_cache().initialize_for_runtime( + weights_cache_->initialize_for_runtime( context.get_runtime_allocator(), named_data_map); workspace->set_uses_weight_cache(); } @@ -118,7 +120,7 @@ class XnnpackBackend final processed->data(), processed->size(), executor, - &options_.weights_cache(), + weights_cache_.get(), workspace_ptr, named_data_map, use_weight_cache); @@ -147,7 +149,7 @@ class XnnpackBackend final auto workspace = executor->get_workspace(); std::unique_lock lock_weights_cache( - options_.weights_cache_mutex(), std::defer_lock); + weights_cache_mutex_, std::defer_lock); if (executor->uses_weight_cache() || workspace->uses_weight_cache()) { lock_weights_cache.lock(); } @@ -178,15 +180,14 @@ class XnnpackBackend final auto workspace = executor->get_workspace(); const std::lock_guard lock_weights_cache( - options_.weights_cache_mutex()); + weights_cache_mutex_); #ifdef ENABLE_XNNPACK_PROFILING executor->print_avg_op_timings(); #endif if (executor->uses_weight_cache()) { - options_.weights_cache().delete_packed_data( - executor->get_packed_data_names()); + weights_cache_->delete_packed_data(executor->get_packed_data_names()); } // This is needed to serialize access to xnn_delete_runtime which is not @@ -217,29 +218,27 @@ class XnnpackBackend final Error set_option( BackendOptionContext& context, const Span& backend_options) override { - // Process every option even if one fails — applying a `packed_cache_path` - // and triggering `save_weight_cache_on_disk` in the same array must not - // depend on declaration order. Capture the first error and report it - // after the loop. All option-key dispatch — including the disk-save - // side effect — lives inside XnnpackBackendOptions::set_option, which - // owns the weights-cache instance and its mutex. - Error first_err = Error::Ok; for (const auto& option : backend_options) { Error err = options_.set_option(option); - if (err != Error::Ok && first_err == Error::Ok) { - first_err = err; + if (err != Error::Ok) { + return err; } } - return first_err; + return Error::Ok; } private: mutable xnnpack::XnnpackBackendOptions options_; - // Lock hierarchy for mutexes: - // options_.weights_cache_mutex() - // workspace_meta_mutex_ - // workspace_mutex_ (owned by executor) + // Weights cache is global to all delegate instances. + mutable std::mutex weights_cache_mutex_; + std::unique_ptr weights_cache_ = + std::make_unique(); + + // Lock Hiearchy for Mutexes: + // weights_cache_mutex_ + // workspace_meta_mutex_ + // workspace_mutex_ (owned by executor) }; namespace { diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h index 1053a206360..e3492c3f5f3 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.h +++ b/backends/xnnpack/runtime/XNNPACKBackend.h @@ -20,17 +20,6 @@ const char weight_cache_option_key[] = "weight_cache_enabled"; // @lint-ignore CLANGTIDY facebook-hte-CArray const char packed_cache_path_option_key[] = "packed_cache_path"; -/// EXPERIMENTAL — option name and semantics may change without notice. -/// -/// Setting this to `true` triggers persisting the packed weight cache to disk -/// so a subsequent process load can mmap the same file and skip XNNPACK weight -/// repacking. The on-disk path is configured via -/// `packed_cache_path_option_key`. The disk write is a one-shot side effect -/// (the value is not stored): every `true` set fires another save. -// Must remain a C array for the BackendOptions template overloads. -// @lint-ignore CLANGTIDY facebook-hte-CArray -const char save_weight_cache_on_disk_option_key[] = "save_weight_cache_on_disk"; - /// Workspace sharing mode. This is a backend option that can be set via the /// set_option API to control memory sharing between CALL_DELEGATE instances. /// This is useful for reducing memory consumption. diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp index 005169249bc..70c410e5729 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.cpp +++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp @@ -63,72 +63,6 @@ XNNWeightsCache::~XNNWeightsCache() { #endif } -// Trivial helpers for little-endian byte serialization of the trailer. -template -static void append_le(std::vector& buf, T value) { - const auto* p = reinterpret_cast(&value); - buf.insert(buf.end(), p, p + sizeof(T)); -} - -template -static T read_le(const uint8_t* src) { - T value; - memcpy(&value, src, sizeof(T)); - return value; -} - -#ifndef _WIN32 -// Open the cache file and take an advisory exclusive lock. Returns the -// fd, or -1 if open/flock failed (logs the failure). The caller decides -// how to recover (typically: skip the mmap path for this init). -static int open_locked(const std::string& path, int flags) { - int fd = open(path.c_str(), flags, 0600); - if (fd < 0) { - ET_LOG(Error, "open(%s) failed (errno=%d)", path.c_str(), errno); - return -1; - } - if (flock(fd, LOCK_EX | LOCK_NB) != 0) { - ET_LOG(Error, "flock(%s) failed (errno=%d)", path.c_str(), errno); - close(fd); - return -1; - } - return fd; -} - -// Drop in-memory state that referenced a now-truncated cache file. -// Heap-backed entries (live in packed_pointer_to_container_) stay; their -// packed_data_ptrs_ slots remain valid so existing offsets don't shift. -void XNNWeightsCache::reset_for_fresh_write() { - for (auto& region : mmap_regions_) { - if (region.addr != nullptr && region.addr != MAP_FAILED) { - munmap(region.addr, region.size); - } - } - mmap_regions_.clear(); - mmap_regions_synced_ = 0; - packed_file_used_ = 0; - ptr_to_file_offset_.clear(); - file_ptr_to_region_index_.clear(); - for (auto it = name_to_packed_data_metadata_.begin(); - it != name_to_packed_data_metadata_.end();) { - bool is_heap_backed = false; - if (it->second.offset < packed_data_ptrs_.size()) { - void* ptr = packed_data_ptrs_[it->second.offset]; - if (ptr != nullptr && - packed_pointer_to_container_.find(ptr) != - packed_pointer_to_container_.end()) { - is_heap_backed = true; - } - } - if (is_heap_backed) { - ++it; - } else { - it = name_to_packed_data_metadata_.erase(it); - } - } -} -#endif - Error XNNWeightsCache::initialize_for_runtime( MemoryAllocator* runtime_allocator, const NamedDataMap* named_data_map) { @@ -137,52 +71,38 @@ Error XNNWeightsCache::initialize_for_runtime( is_finalized_ = false; #ifndef _WIN32 - if (packed_cache_path_.empty() || packed_file_fd_ >= 0) { - return Error::Ok; - } - - // Already loaded earlier this session; just reopen the write fd that - // save_packed_index() closed. Subsequent reserve_space can extend the - // file for any entries not in the saved trailer. - if (cache_loaded_) { - packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR); - return Error::Ok; - } - - // First init for this path: try to load the saved trailer; on success - // open a write fd for any new entries. If load fails, fall through to - // fresh-write below. - if (load_packed_cache()) { - ET_LOG( - Info, - "Loaded packed weight cache: %s (%zu entries)", - packed_cache_path_.c_str(), - name_to_packed_data_metadata_.size()); - packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR); - return Error::Ok; - } - - // Fresh write. Skip O_TRUNC in open_locked so a concurrent holder's - // mmap stays valid; truncate explicitly only after we hold the lock. - packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR | O_CREAT); - if (packed_file_fd_ < 0) { - return Error::Ok; - } - if (ftruncate(packed_file_fd_, 0) != 0) { - ET_LOG( - Error, - "ftruncate(0) failed for %s (errno=%d); heap fallback this init", - packed_cache_path_.c_str(), - errno); - close(packed_file_fd_); - packed_file_fd_ = -1; - return Error::Ok; + // Open the file for packed weights. Each reserve_space() call + // independently mmaps a region of the file. Once packed_file_disabled_ + // is set we never re-open — re-opening with O_TRUNC would corrupt any + // still-live mappings into the same path and cause SIGBUS on access. + if (!packed_cache_path_.empty() && packed_file_fd_ < 0 && + !packed_file_disabled_) { + packed_file_fd_ = + open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); + if (packed_file_fd_ < 0) { + ET_LOG( + Error, + "Failed to open packed weight file: %s (errno=%d)", + packed_cache_path_.c_str(), + errno); + } else if (flock(packed_file_fd_, LOCK_EX | LOCK_NB) != 0) { + // Another XNNWeightsCache instance (this process or another) is + // already using this path. O_TRUNC above would corrupt its mappings. + // Disable mmap for this instance to prevent collision; fall back to + // heap allocation for the remainder of this cache's lifetime. + ET_LOG( + Error, + "Another instance is using packed weight cache file %s (errno=%d); " + "disabling mmap path", + packed_cache_path_.c_str(), + errno); + close(packed_file_fd_); + packed_file_fd_ = -1; + packed_file_disabled_ = true; + } else { + ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str()); + } } - reset_for_fresh_write(); - ET_LOG( - Info, - "Opened packed weight file for writing: %s", - packed_cache_path_.c_str()); #endif return Error::Ok; @@ -244,84 +164,59 @@ Result XNNWeightsCache::load_unpacked_data( static_cast(named_data.get().data()); unpacked_data_.push_back(std::move(named_data.get())); unpacked_data_to_name_[data_pointer] = name; - return data_pointer; -} - -void XNNWeightsCache::release_entry(void* packed_data_ptr) { - packed_pointer_to_container_.erase(packed_data_ptr); -#ifndef _WIN32 - // Per-entry file-backed mmap region: munmap to release VM. The - // packed_data_ptrs_ slot is nulled by the caller so existing offsets - // stay valid. - auto region_it = file_ptr_to_region_index_.find(packed_data_ptr); - if (region_it != file_ptr_to_region_index_.end()) { - MmapRegion& region = mmap_regions_[region_it->second]; - if (region.addr != nullptr && region.addr != MAP_FAILED) { - munmap(region.addr, region.size); - region.addr = nullptr; - region.size = 0; - } - file_ptr_to_region_index_.erase(region_it); - } -#endif -} -void XNNWeightsCache::full_unload() { -#ifndef _WIN32 - for (auto& region : mmap_regions_) { - if (region.addr != nullptr && region.addr != MAP_FAILED) { - munmap(region.addr, region.size); - region.addr = nullptr; - region.size = 0; - } - } - mmap_regions_.clear(); - mmap_regions_synced_ = 0; - packed_data_ptrs_.clear(); - ptr_to_file_offset_.clear(); - file_ptr_to_region_index_.clear(); - cache_loaded_ = false; - if (packed_file_fd_ >= 0) { - close(packed_file_fd_); - packed_file_fd_ = -1; - } -#endif + return data_pointer; } Error XNNWeightsCache::delete_packed_data( const std::vector& packed_data_names) { if (!is_finalized_) { - ET_LOG(Error, "delete_packed_data called before finalize_for_runtime"); + ET_LOG( + Error, + "Error, attempted to delete packed data from the cache but the cache is not finalized"); return Error::InvalidArgument; } for (const std::string& name : packed_data_names) { auto entry = name_to_packed_data_metadata_.find(name); if (entry == name_to_packed_data_metadata_.end()) { - ET_LOG(Error, "delete_packed_data: '%s' not found", name.c_str()); + ET_LOG( + Error, + "Error, attempted to deleted packed data: %s, from the cache but it wasn't found", + name.c_str()); return Error::InvalidArgument; + } else { + entry->second.ref_count--; + if (entry->second.ref_count == 0) { + void* packed_data_ptr = packed_data_ptrs_[entry->second.offset]; + // Erase the key/value from the map frees the pointer holding the + // packed data. No-op on the file-backed mmap path, where the + // container is not populated. + packed_pointer_to_container_.erase(packed_data_ptr); +#ifndef _WIN32 + // File-backed mmap path: munmap the region so VM and page-cache + // usage is released, not just retained until cache destruction. + // The vector slot is set to nullptr below so existing offsets remain + // valid for any concurrent lookups. + auto region_it = file_ptr_to_region_index_.find(packed_data_ptr); + if (region_it != file_ptr_to_region_index_.end()) { + size_t idx = region_it->second; + MmapRegion& region = mmap_regions_[idx]; + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + region.addr = nullptr; + region.size = 0; + } + file_ptr_to_region_index_.erase(region_it); + } +#endif + // Remove the pointer from packed_data_ptrs_. + packed_data_ptrs_[entry->second.offset] = nullptr; + // Erase the name to packed metadata entry. + name_to_packed_data_metadata_.erase(entry->first); + } } - if (--entry->second.ref_count > 0) { - continue; - } - // Keep from_load entries: their packed bytes live in the cache file - // and stay valid until full unload. Erasing them would force the - // next init to re-pack and append ~450 MB to the file per cycle. - if (entry->second.from_load) { - entry->second.in_current_runtime = false; - continue; - } - release_entry(packed_data_ptrs_[entry->second.offset]); - packed_data_ptrs_[entry->second.offset] = nullptr; - name_to_packed_data_metadata_.erase(entry); } - // Last entry gone: drop all in-memory state. File on disk is preserved - // so the next process can load_packed_cache and skip re-packing. If - // reserve_space after the last save corrupted the trailer, load will - // fall through to fresh-write — same outcome as truncating here. - if (name_to_packed_data_metadata_.empty()) { - full_unload(); - } return Error::Ok; } @@ -331,11 +226,15 @@ size_t XNNWeightsCache::look_up( const void* unpacked_weights_ptr = cache_key->kernel; const void* unpacked_bias_ptr = cache_key->bias; auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr); + + // Check if weight_pointer has been cached if (entry == context->unpacked_data_to_name_.end()) { return SIZE_MAX; } + std::string weight_bias_name = entry->second; + // Check if bias_pointer has been cached if (unpacked_bias_ptr != nullptr) { auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr); if (bias_entry != context->unpacked_data_to_name_.end()) { @@ -343,12 +242,14 @@ size_t XNNWeightsCache::look_up( } } + // check if weight_bias_name has been packed already auto packed_weight_entry = context->name_to_packed_data_metadata_.find(weight_bias_name); if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) { return SIZE_MAX; } packed_weight_entry->second.in_current_runtime = true; + return packed_weight_entry->second.offset; } @@ -363,11 +264,16 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) { ET_LOG( Error, - "reserve_space ftruncate to %zu failed (errno=%d)", + "ftruncate to %zu failed (errno=%d)", file_offset + map_size, errno); close(context->packed_file_fd_); context->packed_file_fd_ = -1; + // Existing mmap_regions_ still reference this inode. Disable the + // file-backed path permanently so a future initialize_for_runtime + // doesn't re-open + O_TRUNC the same path and trigger SIGBUS on the + // stale mappings. + context->packed_file_disabled_ = true; return context->reserve_space_heap(n); } @@ -379,18 +285,15 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { context->packed_file_fd_, file_offset); if (ptr == MAP_FAILED) { - ET_LOG( - Error, - "reserve_space mmap %zu bytes failed (errno=%d)", - map_size, - errno); + ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno); close(context->packed_file_fd_); context->packed_file_fd_ = -1; + context->packed_file_disabled_ = true; return context->reserve_space_heap(n); } // mmap returns page-aligned (>= 4 KiB), which trivially satisfies the - // 64-byte kPackedAllocationAlignment XNNPACK expects. + // 64-byte kPackedAllocationAlignment XNNPACK expects. Assert defensively. ET_DCHECK_MSG( (reinterpret_cast(ptr) % kPackedAllocationAlignment) == 0, "mmap returned ptr not aligned to %zu bytes", @@ -399,10 +302,10 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { context->packed_file_used_ = file_offset + map_size; context->file_ptr_to_region_index_[ptr] = context->mmap_regions_.size(); context->mmap_regions_.push_back({ptr, map_size}); - context->ptr_to_file_offset_[ptr] = file_offset; return ptr; } #endif + return context->reserve_space_heap(n); } @@ -440,8 +343,11 @@ size_t XNNWeightsCache::look_up_or_insert( size_t size) { size_t offset = context->look_up(context, cache_key); - // XNNPACK calls with ptr==nullptr after a cache hit (no packing - // happened, nothing to validate against). Return the offset as-is. + // XNNPACK can call this with ptr==nullptr when it previously hit the cache + // and skipped packing. We can't validate against the ptr contents in this + // case, so just return the offset. This might actually be a bug in XNNPACK + // since calling look_up_or_insert with ptr==nullptr doesn't really make + // sense... if (ptr == nullptr) { return offset; } @@ -451,7 +357,7 @@ size_t XNNWeightsCache::look_up_or_insert( if (saved_ptr != nullptr && 0 == memcmp(ptr, saved_ptr, size)) { return offset; } - // Cache out of date: name hits but packed bytes differ. + // Failure, cache is out of date return SIZE_MAX; } @@ -470,7 +376,6 @@ size_t XNNWeightsCache::look_up_or_insert( } PackedDataMeta packed_data_metadata; packed_data_metadata.offset = next_offset; - packed_data_metadata.data_size = size; packed_data_metadata.ref_count = 0; // ref_count is only incremented after finalizing for runtime packed_data_metadata.in_current_runtime = true; @@ -503,209 +408,6 @@ void XNNWeightsCache::set_packed_cache_path(const std::string& path) { packed_cache_path_ = path; } -Error XNNWeightsCache::save_packed_index() { -#ifndef _WIN32 - if (packed_file_fd_ < 0) { - return Error::Ok; - } - // Skip no-op saves: identical bytes would still bump mtime via - // pwrite/fsync, making the cache file appear modified on every load. - // The `mmap_regions_at_last_save_ > 0` guard is sufficient because a - // successful save closes packed_file_fd_ before returning, so re-entry - // past the `fd < 0` early-return above requires initialize_for_runtime - // to reopen the fd, which only happens via load_packed_cache (or the - // fresh-write path) that always populates at least one mmap region. - if (mmap_regions_.size() == mmap_regions_at_last_save_ && - mmap_regions_at_last_save_ > 0) { - return Error::Ok; - } - - size_t index_start = packed_file_used_; - std::vector buf; - uint32_t entry_count = 0; - - // Index entry: [name_len:u32][name][file_offset:u64][data_size:u64] - for (const auto& [name, meta] : name_to_packed_data_metadata_) { - void* ptr = packed_data_ptrs_[meta.offset]; - auto it = ptr_to_file_offset_.find(ptr); - if (it == ptr_to_file_offset_.end()) { - continue; - } - entry_count++; - append_le(buf, static_cast(name.size())); - buf.insert(buf.end(), name.begin(), name.end()); - append_le(buf, static_cast(it->second)); - append_le(buf, static_cast(meta.data_size)); - } - - // Footer: [index_start:u64][entry_count:u32][magic:u32][version:u32] - append_le(buf, static_cast(index_start)); - append_le(buf, entry_count); - append_le(buf, kCacheMagic); - append_le(buf, kCacheVersion); - - if (ftruncate(packed_file_fd_, index_start + buf.size()) != 0) { - ET_LOG(Error, "Failed to extend file for index (errno=%d)", errno); - return Error::Internal; - } - ssize_t written = - pwrite(packed_file_fd_, buf.data(), buf.size(), index_start); - if (written != static_cast(buf.size())) { - ET_LOG(Error, "Failed to write index (errno=%d)", errno); - return Error::Internal; - } - // Ensure trailer is on disk before we declare success. - if (fsync(packed_file_fd_) != 0) { - ET_LOG(Error, "fsync of packed cache failed (errno=%d)", errno); - // Continue — data is in page cache; durability is best-effort. - } - ET_LOG( - Info, - "Saved packed weight index: %u entries at offset %zu", - entry_count, - index_start); - - // Promote freshly-packed entries to from_load now that they're durable - // on disk, so delete_packed_data preserves them across unload/reload. - for (auto& [name, meta] : name_to_packed_data_metadata_) { - if (!meta.from_load && - ptr_to_file_offset_.find(packed_data_ptrs_[meta.offset]) != - ptr_to_file_offset_.end()) { - meta.from_load = true; - } - } - - mmap_regions_at_last_save_ = mmap_regions_.size(); - - // Close the fd so the next init re-enters load_packed_cache and reads - // the trailer we just wrote. - if (close(packed_file_fd_) != 0) { - ET_LOG(Error, "close of packed cache fd failed (errno=%d)", errno); - } - packed_file_fd_ = -1; -#endif - return Error::Ok; -} - -bool XNNWeightsCache::load_packed_cache() { -#ifndef _WIN32 - int fd = open(packed_cache_path_.c_str(), O_RDONLY); - if (fd < 0) { - return false; - } - // Prevent racing with a concurrent writer - if (flock(fd, LOCK_SH | LOCK_NB) != 0) { - close(fd); - return false; - } - struct stat st {}; - if (fstat(fd, &st) != 0 || st.st_size < 20) { - close(fd); - return false; - } - size_t file_size = static_cast(st.st_size); - - uint8_t footer[20]; - if (pread(fd, footer, 20, file_size - 20) != 20) { - close(fd); - return false; - } - uint64_t index_start = read_le(footer); - uint32_t entry_count = read_le(footer + 8); - uint32_t magic = read_le(footer + 12); - uint32_t version = read_le(footer + 16); - - if (magic != kCacheMagic || version != kCacheVersion || - index_start >= file_size - 20) { - close(fd); - return false; - } - const size_t index_region_end = file_size - 20; - - void* map = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); - close(fd); - if (map == MAP_FAILED) { - return false; - } - mmap_regions_.push_back({map, file_size}); - - const uint8_t* cursor = static_cast(map) + index_start; - const uint8_t* end = static_cast(map) + index_region_end; - - for (uint32_t i = 0; i < entry_count && cursor + 4 <= end; ++i) { - uint32_t name_len = read_le(cursor); - cursor += 4; - if (cursor + name_len + 16 > end) { - // Truncated entry header: trailer doesn't match the entry_count we - // read from the footer, so the cache is corrupt. Apply the same - // full rollback as the invalid-bounds branch below — otherwise the - // entries inserted so far would be silently accepted as a partial - // cache, and the next save_packed_index would rewrite a trailer - // covering only that subset (permanently dropping the rest). - ET_LOG( - Error, - "load_packed_cache: truncated entry header at index %u (entry_count=%u); aborting load", - i, - entry_count); - munmap(map, file_size); - mmap_regions_.pop_back(); - name_to_packed_data_metadata_.clear(); - packed_data_ptrs_.clear(); - ptr_to_file_offset_.clear(); - return false; - } - std::string name(reinterpret_cast(cursor), name_len); - cursor += name_len; - uint64_t file_offset = read_le(cursor); - cursor += 8; - uint64_t data_size = read_le(cursor); - cursor += 8; - - // Bounds check: the entry's bytes must lie entirely inside the - // packed-data region. - if (file_offset >= index_start || data_size > index_start - file_offset) { - ET_LOG( - Error, - "load_packed_cache: entry '%s' has invalid bounds (file_offset=%llu, data_size=%llu, index_start=%llu); aborting load", - name.c_str(), - static_cast(file_offset), - static_cast(data_size), - static_cast(index_start)); - // Roll back any partial state. - munmap(map, file_size); - mmap_regions_.pop_back(); - name_to_packed_data_metadata_.clear(); - packed_data_ptrs_.clear(); - ptr_to_file_offset_.clear(); - return false; - } - - size_t ptr_index = packed_data_ptrs_.size(); - void* entry_ptr = static_cast(map) + file_offset; - packed_data_ptrs_.push_back(entry_ptr); - // Tracked so a subsequent save_packed_index can rewrite the trailer - // covering both loaded and newly-packed entries. - ptr_to_file_offset_[entry_ptr] = file_offset; - PackedDataMeta meta; - meta.offset = ptr_index; - meta.data_size = data_size; - meta.ref_count = 0; - meta.in_current_runtime = false; - meta.from_load = true; - name_to_packed_data_metadata_[name] = meta; - } - - cache_loaded_ = true; - packed_file_used_ = index_start; - // In-memory state matches the on-disk trailer; the next save would be - // a no-op. Initialize watermark so save_packed_index short-circuits. - mmap_regions_at_last_save_ = mmap_regions_.size(); - return true; -#else - return false; -#endif -} - } // namespace delegate } // namespace xnnpack } // namespace backends diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h index 4bfa916d289..a41fed49fd1 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.h +++ b/backends/xnnpack/runtime/XNNWeightsCache.h @@ -30,20 +30,12 @@ using executorch::runtime::MemoryAllocator; using executorch::runtime::Result; struct PackedDataMeta { - size_t offset{}; - size_t data_size{0}; + size_t offset; // Count number of xnn_runtime_t this packed data is used in - size_t ref_count{}; + size_t ref_count; // true if this packed data was inserted or looked up for the // current runtime being created - bool in_current_runtime{}; - // True if this entry's bytes are persisted in the on-disk cache file - // (either originally loaded via load_packed_cache, or freshly packed - // and then save_packed_index-ed). Used by delete_packed_data to - // detect when all persistent entries are gone, at which point - // cache_loaded_ is auto-invalidated so the next init re-enters - // load_packed_cache and reuses the saved file instead of re-packing. - bool from_load{false}; + bool in_current_runtime; }; class XNNWeightsCache { @@ -146,16 +138,7 @@ class XNNWeightsCache { */ void set_packed_cache_path(const std::string& path); - /** Save packed weight index so subsequent loads skip packing. */ - Error save_packed_index(); - private: - static constexpr uint32_t kCacheMagic = 0x58505743; // "XPWC" - static constexpr uint32_t kCacheVersion = 1; - bool load_packed_cache(); - void reset_for_fresh_write(); - void release_entry(void* packed_data_ptr); - void full_unload(); // Runtime Allocator used to reserve memory for packed weights MemoryAllocator* runtime_allocator_; @@ -184,28 +167,18 @@ class XNNWeightsCache { std::string packed_cache_path_; int packed_file_fd_{-1}; size_t packed_file_used_{0}; - // True once load_packed_cache() has populated metadata from a saved - // index, OR once a fresh-write session has been persisted to disk via - // save_packed_index() (so subsequent inits can load from it). - bool cache_loaded_{false}; - // Tracks file offset of each file-backed allocation. Used by - // save_packed_index() to serialize (name → offset, size) index. - std::unordered_map ptr_to_file_offset_; + // Set after an unrecoverable mmap/ftruncate failure. Prevents re-opening + // the cache file on subsequent initialize_for_runtime() calls — re-opening + // with O_TRUNC would truncate the inode beneath any still-live mmap pages + // and the next access would raise SIGBUS. Once disabled, all reserve_space + // calls fall back to heap allocation for the lifetime of this cache. + bool packed_file_disabled_{false}; struct MmapRegion { void* addr; size_t size; }; std::vector mmap_regions_; size_t mmap_regions_synced_{0}; - // Number of regions present at the time of the most recent successful - // save_packed_index. Used to skip no-op saves: identical bytes would - // still bump mtime via pwrite/fsync, making the cache file appear - // modified on every load when nothing has actually changed. A successful - // save closes packed_file_fd_ before returning, so the no-op check is - // unreachable except after a load_packed_cache (or fresh-write path) - // re-opens the fd — both paths populate at least one mmap region, so - // the "zero regions saved" edge case never lives long enough to matter. - size_t mmap_regions_at_last_save_{0}; // For file-backed packed allocations, maps the returned ptr to its index // in mmap_regions_, so delete_packed_data() can munmap when ref_count==0. std::unordered_map file_ptr_to_region_index_; diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp index 25289027a62..ffaba9508d8 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp @@ -79,41 +79,15 @@ Error XnnpackBackendOptions::set_option(const BackendOption& option) { ET_LOG(Error, "XNNPACK packed cache path must be a string."); return Error::InvalidArgument; } - // Same lock as weights_cache_: init() reads packed_cache_path_ under - // weights_cache_mutex_ and feeds it straight into the cache, so an - // unprotected write here would race with that read. - const std::lock_guard lock(weights_cache_mutex_); packed_cache_path_ = std::string(val->data()); ET_LOG( Debug, "Setting XNNPACK packed cache path to %s.", packed_cache_path_.c_str()); - } else if (strcmp(option.key, save_weight_cache_on_disk_option_key) == 0) { - auto* val = std::get_if(&option.value); - if (!val) { - ET_LOG(Error, "XNNPACK save_weight_cache_on_disk must be a bool."); - return Error::InvalidArgument; - } - if (*val) { - return save_weights_cache_locked(); - } } return Error::Ok; } -delegate::XNNWeightsCache& XnnpackBackendOptions::weights_cache() { - return weights_cache_; -} - -std::mutex& XnnpackBackendOptions::weights_cache_mutex() { - return weights_cache_mutex_; -} - -Error XnnpackBackendOptions::save_weights_cache_locked() { - const std::lock_guard lock(weights_cache_mutex_); - return weights_cache_.save_packed_index(); -} - bool XnnpackBackendOptions::resolve_weight_cache( const ET_RUNTIME_NAMESPACE::BackendInitContext& context) const { return resolve_option( diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.h b/backends/xnnpack/runtime/XnnpackBackendOptions.h index 3b4d5fcd211..aed037ac835 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.h +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.h @@ -9,7 +9,6 @@ #pragma once #include -#include #include #include #include @@ -17,7 +16,6 @@ #include #include -#include namespace executorch::backends::xnnpack { @@ -46,44 +44,9 @@ class XnnpackBackendOptions { const std::string& get_packed_cache_path() const; void set_packed_cache_path(const std::string& path); - // Shared XNNWeightsCache (one instance per backend, like the workspace - // manager). The cache itself is not internally synchronized; callers - // MUST hold weights_cache_mutex() around every weights_cache() call — - // including reading the reference and calling any method on it. The - // same mutex also protects packed_cache_path_, so a typical - // load/init/compile sequence holds one lock for the whole block: - // - // std::lock_guard lock(options.weights_cache_mutex()); - // options.weights_cache().set_packed_cache_path( - // options.get_packed_cache_path()); - // options.weights_cache().initialize_for_runtime(...); - // XNNCompiler::compileModel(..., &options.weights_cache(), ...); - // - // The mutex is intentionally exposed (rather than wrapping every - // method) because XNNCompiler needs a raw cache pointer to pass into - // XNNPACK callbacks that fire during xnn_create_runtime; those - // callbacks must run under the same lock as the surrounding init. - delegate::XNNWeightsCache& weights_cache(); - std::mutex& weights_cache_mutex(); - - // Invokes save_packed_index() on the cache while holding the cache - // mutex. Returns the cache's error code; the caller does not need to - // grab the mutex itself. This is the entry point used by set_option() - // when `save_weight_cache_on_disk_option_key` is requested. - runtime::Error save_weights_cache_locked(); - private: XNNWorkspaceManager workspace_manager_; - // Weights cache is shared across all delegate instances. Owned here so - // that all backend-option-keyed state (workspace manager, weights cache, - // packed-cache path) lives in a single place; XnnpackBackend holds an - // XnnpackBackendOptions and delegates synchronization to its mutex. - // Protects weights_cache_ AND packed_cache_path_ (init reads the path - // while holding this lock and hands it to the cache). - std::mutex weights_cache_mutex_; - delegate::XNNWeightsCache weights_cache_; - #ifdef ENABLE_XNNPACK_SHARED_WORKSPACE std::atomic sharing_mode_{WorkspaceSharingMode::Global}; #else diff --git a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp index 59cfbcbdb5d..83937887e25 100644 --- a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp +++ b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp @@ -16,15 +16,8 @@ #include #include #include -#include #include -#include -#include #include -#include -#include -#include -#include using executorch::backends::xnnpack::delegate::XNNWeightsCache; using executorch::extension::FileDataLoader; @@ -359,475 +352,4 @@ TEST_F(XNNWeightsCacheTest, PackedWeightsMmapPathLockCollision) { ::unlink(cache_path.c_str()); } - -// Verify load_packed_cache produces byte-identical inference results to -// a fresh build of the same graph. Guards against weight pointers being -// mis-mapped after cache load. -TEST_F(XNNWeightsCacheTest, SaveAndLoad_PreservesInferenceOutput) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_output_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input_tensor(num_batches * input_channels + padding, 1.0f); - - // Run 1: no cache file (pure heap pack). - std::vector output_baseline(num_batches * output_channels, 0.0f); - { - XNNWeightsCache cache; - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input_tensor.data(), - output_baseline.data()); - } - - // Run 2: file-backed mmap path, save trailer. - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - std::vector output_write(num_batches * output_channels, 0.0f); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input_tensor.data(), - output_write.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - EXPECT_EQ(output_write, output_baseline); - } - - // Run 3: fresh instance loads from disk; output must match. - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - ASSERT_GT(cache.get_packed_data_names().size(), 0u); - std::vector output_load(num_batches * output_channels, 0.0f); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input_tensor.data(), - output_load.data()); - EXPECT_EQ(output_load, output_baseline); - } - - ::unlink(cache_path.c_str()); -} - -// Corrupted cache file must not crash; load_packed_cache returns false and -// the next init falls through to the fresh-build path that overwrites it. -TEST_F(XNNWeightsCacheTest, LoadPackedCache_RejectsCorruptTrailer) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_corrupt_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - // Write a file with valid size but garbage trailer. - { - std::ofstream f(cache_path, std::ios::binary); - std::vector garbage(1024, '\xCC'); - f.write(garbage.data(), garbage.size()); - } - - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - // Must not crash; load returns false → falls through to fresh build. - Error err = - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - ASSERT_EQ(err, Error::Ok); - - // Fresh build still works. - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector output(num_batches * output_channels, 0.0f); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - - ::unlink(cache_path.c_str()); -} - -// Repeated init+run+save cycles on the same file must not grow the cache -// file. Guards against the regression where each PTE init re-packed weights -// and appended a fresh copy (+500 MB per inference observed in production). -TEST_F(XNNWeightsCacheTest, MultiSessionLoad_DoesNotGrowCacheFile) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_nogrow_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector output(num_batches * output_channels, 0.0f); - - // Cycle 1: fresh write of cache. - off_t size_after_first_save = 0; - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - size_after_first_save = st.st_size; - ASSERT_GT(size_after_first_save, 0); - } - - // Cycle 2: fresh instance loads from disk, runs, saves. No new weights - // were packed → file must be byte-for-byte identical in length. - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - ASSERT_GT(cache.get_packed_data_names().size(), 0u); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - } - { - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - EXPECT_EQ(st.st_size, size_after_first_save); - } - - // Cycle 3: simulate PTE destroy + recreate inside the same instance. - // delete_packed_data on from_load entries must not erase metadata, so - // the second init's look_up still hits → no new file append. - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - cache.delete_packed_data(cache.get_packed_data_names()); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - } - { - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - EXPECT_EQ(st.st_size, size_after_first_save); - } - - ::unlink(cache_path.c_str()); -} - -// After loading from disk, delete_packed_data must skip from_load entries -// so the next init still hits the cache. Bug would re-pack weights from -// scratch each time the backend destroys + recreates a delegate. -TEST_F( - XNNWeightsCacheTest, - DeletePackedData_OnFromLoadEntries_PreservesMetadata) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_fromload_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector output(num_batches * output_channels, 0.0f); - - // Seed the cache file. - { - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - } - - // Fresh instance: all populated entries are from_load=true. - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - size_t loaded_count = cache.get_packed_data_names().size(); - ASSERT_GT(loaded_count, 0u); - - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - - // Repeated delete must never erase from_load entries — contrast with - // ReusePackedWeights where two delete calls drop the count to 0. - for (int i = 0; i < 5; ++i) { - cache.delete_packed_data(cache.get_packed_data_names()); - EXPECT_EQ(cache.get_packed_data_names().size(), loaded_count) - << "from_load entries should survive delete; iteration " << i; - } - - ::unlink(cache_path.c_str()); -} - -// A model with multiple PTE/method delegates initializes the cache -// sequentially before any one is destroyed. The second PTE's init must -// see the first PTE's packed entries already in the map → look_up hits, -// no new reserve_space, file does not grow per PTE. -TEST_F(XNNWeightsCacheTest, MultiplePTEsInSameInstance_NoFileGrowth) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_multipte_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector out_pte1(num_batches * output_channels, 0.0f); - std::vector out_pte2(num_batches * output_channels, 0.0f); - - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - - // PTE 1: fresh pack + save. - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - out_pte1.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - - off_t size_after_pte1 = 0; - { - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - size_after_pte1 = st.st_size; - ASSERT_GT(size_after_pte1, 0); - } - size_t names_after_pte1 = cache.get_packed_data_names().size(); - ASSERT_GT(names_after_pte1, 0u); - - // PTE 2: sibling delegate, NO destroy between. look_up must hit the - // entry from PTE 1 → no new reserve_space → file size unchanged after - // save. - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - out_pte2.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - - { - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - EXPECT_EQ(st.st_size, size_after_pte1) - << "PTE 2 with same weights must not append to the cache file"; - } - EXPECT_EQ(cache.get_packed_data_names().size(), names_after_pte1); - - // Both PTEs produced the same output for the same input (correctness). - EXPECT_EQ(out_pte1, out_pte2); - - // PTE 3: third sibling. Still no growth. - std::vector out_pte3(num_batches * output_channels, 0.0f); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - out_pte3.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - { - struct stat st {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); - EXPECT_EQ(st.st_size, size_after_pte1); - } - EXPECT_EQ(out_pte3, out_pte1); - - ::unlink(cache_path.c_str()); -} - -// save_packed_index must be a true no-op when no new reserve_space happened -// since the last save — same content but writing would still bump mtime, -// making the cache file look modified on every model load. -TEST_F(XNNWeightsCacheTest, SavePackedIndex_NoNewReserves_IsNoOp) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_noop_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector output(num_batches * output_channels, 0.0f); - - // Seed cache + first save. - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - - // Force an old mtime so any real write is detectable as a forward jump, - // without relying on wall-clock granularity / sleep (sleeps are flaky and - // forbidden by lint). - const struct timespec old_times[2] = { - {1000000, 0}, // atime - {1000000, 0}, // mtime - }; - ASSERT_EQ(::utimensat(AT_FDCWD, cache_path.c_str(), old_times, 0), 0); - - struct stat st_before {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st_before), 0); - - // Second save with no intervening reserve_space → no-op short-circuit. - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - - struct stat st_after {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st_after), 0); - EXPECT_EQ(st_before.st_size, st_after.st_size); - EXPECT_EQ(st_before.st_mtime, st_after.st_mtime); - - ::unlink(cache_path.c_str()); -} - -// Stress test for gjcomer's V6 review concern: concurrent -// `set_packed_cache_path` + `save_packed_index` against the shared cache -// must not crash or leave the on-disk file inconsistent under the lock -// discipline that XNNPACKBackend uses (single mutex around the cache). -// This does NOT exercise concurrent runtime creation — XNNPACK's runtime -// init itself is not thread-safe and would require XNNPACKBackend -// machinery to test properly. -TEST_F(XNNWeightsCacheTest, ConcurrentOptionsAndSave_NoCrash_FileStable) { - std::string cache_path = std::string("/tmp/xnn_weights_cache_concurrent_") + - std::to_string(::getpid()) + ".packed_cache"; - ::unlink(cache_path.c_str()); - - // Seed a populated cache + initial save so subsequent save_packed_index - // calls hit the no-op short-circuit path (the case most prone to race). - std::vector batches{1, 2, 3}; - size_t input_channels = 3; - size_t output_channels = 4; - size_t num_batches = 1 * 2 * 3; - size_t padding = 32; - std::vector input(num_batches * input_channels + padding, 1.0f); - std::vector output(num_batches * output_channels, 0.0f); - XNNWeightsCache cache; - cache.set_packed_cache_path(cache_path); - cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); - BuildAndRunGraphWithWeightsCache( - cache, - batches, - input_channels, - output_channels, - input.data(), - output.data()); - ASSERT_EQ(cache.save_packed_index(), Error::Ok); - - struct stat st_baseline {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st_baseline), 0); - - // Lock discipline matches XNNPACKBackend's `weights_cache_mutex_`: every - // cache mutation is serialized. Threads spam set_packed_cache_path and - // save_packed_index under the shared lock for ~25 iterations each. - std::mutex cache_mu; - constexpr int kThreads = 4; - constexpr int kIters = 25; - std::atomic failure_count{0}; - std::vector threads; - threads.reserve(kThreads); - for (int t = 0; t < kThreads; ++t) { - threads.emplace_back([&]() { - for (int i = 0; i < kIters; ++i) { - try { - const std::lock_guard lock(cache_mu); - // Re-set the same path — should be benign / a stable no-op. - cache.set_packed_cache_path(cache_path); - // No new reserves between calls → save short-circuits. - (void)cache.save_packed_index(); - } catch (const std::exception&) { - failure_count.fetch_add(1); - } - } - }); - } - for (auto& th : threads) { - th.join(); - } - - EXPECT_EQ(failure_count.load(), 0); - - // File must not balloon: every iteration's save is a no-op. - struct stat st_after {}; - ASSERT_EQ(::stat(cache_path.c_str(), &st_after), 0); - EXPECT_EQ(st_after.st_size, st_baseline.st_size); - - ::unlink(cache_path.c_str()); -} - #endif