diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index 8375e69a124..3a5d6ab7958 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -95,14 +95,16 @@ class XnnpackBackend final
     // concurrent inits from resetting is_finalized_ or overwriting
     // named_data_map_ while compileModel is using the shared weights cache.
     std::unique_lock<std::mutex> lock_weights_cache(
-        options_.weights_cache_mutex(), std::defer_lock);
+        weights_cache_mutex_, std::defer_lock);
     if (use_weight_cache) {
       lock_weights_cache.lock();
 
       const auto& cache_path = options_.get_packed_cache_path();
-      options_.weights_cache().set_packed_cache_path(cache_path);
+      if (!cache_path.empty()) {
+        weights_cache_->set_packed_cache_path(cache_path);
+      }
 
-      options_.weights_cache().initialize_for_runtime(
+      weights_cache_->initialize_for_runtime(
           context.get_runtime_allocator(), named_data_map);
       workspace->set_uses_weight_cache();
     }
@@ -118,7 +120,7 @@ class XnnpackBackend final
         processed->data(),
         processed->size(),
         executor,
-        &options_.weights_cache(),
+        weights_cache_.get(),
         workspace_ptr,
         named_data_map,
         use_weight_cache);
@@ -147,7 +149,7 @@ class XnnpackBackend final
     auto workspace = executor->get_workspace();
 
     std::unique_lock<std::mutex> lock_weights_cache(
-        options_.weights_cache_mutex(), std::defer_lock);
+        weights_cache_mutex_, std::defer_lock);
     if (executor->uses_weight_cache() || workspace->uses_weight_cache()) {
       lock_weights_cache.lock();
     }
@@ -178,15 +180,14 @@ class XnnpackBackend final
       auto workspace = executor->get_workspace();
 
       const std::lock_guard<std::mutex> lock_weights_cache(
-          options_.weights_cache_mutex());
+          weights_cache_mutex_);
 
 #ifdef ENABLE_XNNPACK_PROFILING
       executor->print_avg_op_timings();
 #endif
 
       if (executor->uses_weight_cache()) {
-        options_.weights_cache().delete_packed_data(
-            executor->get_packed_data_names());
+        weights_cache_->delete_packed_data(executor->get_packed_data_names());
       }
 
       // This is needed to serialize access to xnn_delete_runtime which is not
@@ -217,29 +218,27 @@ class XnnpackBackend final
   Error set_option(
       BackendOptionContext& context,
       const Span<BackendOption>& backend_options) override {
-    // Process every option even if one fails — applying a `packed_cache_path`
-    // and triggering `save_weight_cache_on_disk` in the same array must not
-    // depend on declaration order. Capture the first error and report it
-    // after the loop. All option-key dispatch — including the disk-save
-    // side effect — lives inside XnnpackBackendOptions::set_option, which
-    // owns the weights-cache instance and its mutex.
-    Error first_err = Error::Ok;
     for (const auto& option : backend_options) {
       Error err = options_.set_option(option);
-      if (err != Error::Ok && first_err == Error::Ok) {
-        first_err = err;
+      if (err != Error::Ok) {
+        return err;
       }
     }
-    return first_err;
+    return Error::Ok;
   }
 
  private:
   mutable xnnpack::XnnpackBackendOptions options_;
 
-  // Lock hierarchy for mutexes:
-  //   options_.weights_cache_mutex()
-  //   workspace_meta_mutex_
-  //   workspace_mutex_ (owned by executor)
+  // Weights cache is global to all delegate instances.
+  mutable std::mutex weights_cache_mutex_;
+  std::unique_ptr<XNNWeightsCache> weights_cache_ =
+      std::make_unique<XNNWeightsCache>();
+
+  // Lock Hiearchy for Mutexes:
+  // weights_cache_mutex_
+  // workspace_meta_mutex_
+  // workspace_mutex_ (owned by executor)
 };
 
 namespace {
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h
index 1053a206360..e3492c3f5f3 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.h
+++ b/backends/xnnpack/runtime/XNNPACKBackend.h
@@ -20,17 +20,6 @@ const char weight_cache_option_key[] = "weight_cache_enabled";
 // @lint-ignore CLANGTIDY facebook-hte-CArray
 const char packed_cache_path_option_key[] = "packed_cache_path";
 
-/// EXPERIMENTAL — option name and semantics may change without notice.
-///
-/// Setting this to `true` triggers persisting the packed weight cache to disk
-/// so a subsequent process load can mmap the same file and skip XNNPACK weight
-/// repacking. The on-disk path is configured via
-/// `packed_cache_path_option_key`. The disk write is a one-shot side effect
-/// (the value is not stored): every `true` set fires another save.
-// Must remain a C array for the BackendOptions template overloads.
-// @lint-ignore CLANGTIDY facebook-hte-CArray
-const char save_weight_cache_on_disk_option_key[] = "save_weight_cache_on_disk";
-
 /// Workspace sharing mode. This is a backend option that can be set via the
 /// set_option API to control memory sharing between CALL_DELEGATE instances.
 /// This is useful for reducing memory consumption.
diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp
index 005169249bc..70c410e5729 100644
--- a/backends/xnnpack/runtime/XNNWeightsCache.cpp
+++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp
@@ -63,72 +63,6 @@ XNNWeightsCache::~XNNWeightsCache() {
 #endif
 }
 
-// Trivial helpers for little-endian byte serialization of the trailer.
-template <typename T>
-static void append_le(std::vector<uint8_t>& buf, T value) {
-  const auto* p = reinterpret_cast<const uint8_t*>(&value);
-  buf.insert(buf.end(), p, p + sizeof(T));
-}
-
-template <typename T>
-static T read_le(const uint8_t* src) {
-  T value;
-  memcpy(&value, src, sizeof(T));
-  return value;
-}
-
-#ifndef _WIN32
-// Open the cache file and take an advisory exclusive lock. Returns the
-// fd, or -1 if open/flock failed (logs the failure). The caller decides
-// how to recover (typically: skip the mmap path for this init).
-static int open_locked(const std::string& path, int flags) {
-  int fd = open(path.c_str(), flags, 0600);
-  if (fd < 0) {
-    ET_LOG(Error, "open(%s) failed (errno=%d)", path.c_str(), errno);
-    return -1;
-  }
-  if (flock(fd, LOCK_EX | LOCK_NB) != 0) {
-    ET_LOG(Error, "flock(%s) failed (errno=%d)", path.c_str(), errno);
-    close(fd);
-    return -1;
-  }
-  return fd;
-}
-
-// Drop in-memory state that referenced a now-truncated cache file.
-// Heap-backed entries (live in packed_pointer_to_container_) stay; their
-// packed_data_ptrs_ slots remain valid so existing offsets don't shift.
-void XNNWeightsCache::reset_for_fresh_write() {
-  for (auto& region : mmap_regions_) {
-    if (region.addr != nullptr && region.addr != MAP_FAILED) {
-      munmap(region.addr, region.size);
-    }
-  }
-  mmap_regions_.clear();
-  mmap_regions_synced_ = 0;
-  packed_file_used_ = 0;
-  ptr_to_file_offset_.clear();
-  file_ptr_to_region_index_.clear();
-  for (auto it = name_to_packed_data_metadata_.begin();
-       it != name_to_packed_data_metadata_.end();) {
-    bool is_heap_backed = false;
-    if (it->second.offset < packed_data_ptrs_.size()) {
-      void* ptr = packed_data_ptrs_[it->second.offset];
-      if (ptr != nullptr &&
-          packed_pointer_to_container_.find(ptr) !=
-              packed_pointer_to_container_.end()) {
-        is_heap_backed = true;
-      }
-    }
-    if (is_heap_backed) {
-      ++it;
-    } else {
-      it = name_to_packed_data_metadata_.erase(it);
-    }
-  }
-}
-#endif
-
 Error XNNWeightsCache::initialize_for_runtime(
     MemoryAllocator* runtime_allocator,
     const NamedDataMap* named_data_map) {
@@ -137,52 +71,38 @@ Error XNNWeightsCache::initialize_for_runtime(
   is_finalized_ = false;
 
 #ifndef _WIN32
-  if (packed_cache_path_.empty() || packed_file_fd_ >= 0) {
-    return Error::Ok;
-  }
-
-  // Already loaded earlier this session; just reopen the write fd that
-  // save_packed_index() closed. Subsequent reserve_space can extend the
-  // file for any entries not in the saved trailer.
-  if (cache_loaded_) {
-    packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR);
-    return Error::Ok;
-  }
-
-  // First init for this path: try to load the saved trailer; on success
-  // open a write fd for any new entries. If load fails, fall through to
-  // fresh-write below.
-  if (load_packed_cache()) {
-    ET_LOG(
-        Info,
-        "Loaded packed weight cache: %s (%zu entries)",
-        packed_cache_path_.c_str(),
-        name_to_packed_data_metadata_.size());
-    packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR);
-    return Error::Ok;
-  }
-
-  // Fresh write. Skip O_TRUNC in open_locked so a concurrent holder's
-  // mmap stays valid; truncate explicitly only after we hold the lock.
-  packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR | O_CREAT);
-  if (packed_file_fd_ < 0) {
-    return Error::Ok;
-  }
-  if (ftruncate(packed_file_fd_, 0) != 0) {
-    ET_LOG(
-        Error,
-        "ftruncate(0) failed for %s (errno=%d); heap fallback this init",
-        packed_cache_path_.c_str(),
-        errno);
-    close(packed_file_fd_);
-    packed_file_fd_ = -1;
-    return Error::Ok;
+  // Open the file for packed weights. Each reserve_space() call
+  // independently mmaps a region of the file. Once packed_file_disabled_
+  // is set we never re-open — re-opening with O_TRUNC would corrupt any
+  // still-live mappings into the same path and cause SIGBUS on access.
+  if (!packed_cache_path_.empty() && packed_file_fd_ < 0 &&
+      !packed_file_disabled_) {
+    packed_file_fd_ =
+        open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
+    if (packed_file_fd_ < 0) {
+      ET_LOG(
+          Error,
+          "Failed to open packed weight file: %s (errno=%d)",
+          packed_cache_path_.c_str(),
+          errno);
+    } else if (flock(packed_file_fd_, LOCK_EX | LOCK_NB) != 0) {
+      // Another XNNWeightsCache instance (this process or another) is
+      // already using this path. O_TRUNC above would corrupt its mappings.
+      // Disable mmap for this instance to prevent collision; fall back to
+      // heap allocation for the remainder of this cache's lifetime.
+      ET_LOG(
+          Error,
+          "Another instance is using packed weight cache file %s (errno=%d); "
+          "disabling mmap path",
+          packed_cache_path_.c_str(),
+          errno);
+      close(packed_file_fd_);
+      packed_file_fd_ = -1;
+      packed_file_disabled_ = true;
+    } else {
+      ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str());
+    }
   }
-  reset_for_fresh_write();
-  ET_LOG(
-      Info,
-      "Opened packed weight file for writing: %s",
-      packed_cache_path_.c_str());
 #endif
 
   return Error::Ok;
@@ -244,84 +164,59 @@ Result<const uint8_t*> XNNWeightsCache::load_unpacked_data(
       static_cast<const uint8_t*>(named_data.get().data());
   unpacked_data_.push_back(std::move(named_data.get()));
   unpacked_data_to_name_[data_pointer] = name;
-  return data_pointer;
-}
-
-void XNNWeightsCache::release_entry(void* packed_data_ptr) {
-  packed_pointer_to_container_.erase(packed_data_ptr);
-#ifndef _WIN32
-  // Per-entry file-backed mmap region: munmap to release VM. The
-  // packed_data_ptrs_ slot is nulled by the caller so existing offsets
-  // stay valid.
-  auto region_it = file_ptr_to_region_index_.find(packed_data_ptr);
-  if (region_it != file_ptr_to_region_index_.end()) {
-    MmapRegion& region = mmap_regions_[region_it->second];
-    if (region.addr != nullptr && region.addr != MAP_FAILED) {
-      munmap(region.addr, region.size);
-      region.addr = nullptr;
-      region.size = 0;
-    }
-    file_ptr_to_region_index_.erase(region_it);
-  }
-#endif
-}
 
-void XNNWeightsCache::full_unload() {
-#ifndef _WIN32
-  for (auto& region : mmap_regions_) {
-    if (region.addr != nullptr && region.addr != MAP_FAILED) {
-      munmap(region.addr, region.size);
-      region.addr = nullptr;
-      region.size = 0;
-    }
-  }
-  mmap_regions_.clear();
-  mmap_regions_synced_ = 0;
-  packed_data_ptrs_.clear();
-  ptr_to_file_offset_.clear();
-  file_ptr_to_region_index_.clear();
-  cache_loaded_ = false;
-  if (packed_file_fd_ >= 0) {
-    close(packed_file_fd_);
-    packed_file_fd_ = -1;
-  }
-#endif
+  return data_pointer;
 }
 
 Error XNNWeightsCache::delete_packed_data(
     const std::vector<std::string>& packed_data_names) {
   if (!is_finalized_) {
-    ET_LOG(Error, "delete_packed_data called before finalize_for_runtime");
+    ET_LOG(
+        Error,
+        "Error, attempted to delete packed data from the cache but the cache is not finalized");
     return Error::InvalidArgument;
   }
   for (const std::string& name : packed_data_names) {
     auto entry = name_to_packed_data_metadata_.find(name);
     if (entry == name_to_packed_data_metadata_.end()) {
-      ET_LOG(Error, "delete_packed_data: '%s' not found", name.c_str());
+      ET_LOG(
+          Error,
+          "Error, attempted to deleted packed data: %s, from the cache but it wasn't found",
+          name.c_str());
       return Error::InvalidArgument;
+    } else {
+      entry->second.ref_count--;
+      if (entry->second.ref_count == 0) {
+        void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
+        // Erase the key/value from the map frees the pointer holding the
+        // packed data. No-op on the file-backed mmap path, where the
+        // container is not populated.
+        packed_pointer_to_container_.erase(packed_data_ptr);
+#ifndef _WIN32
+        // File-backed mmap path: munmap the region so VM and page-cache
+        // usage is released, not just retained until cache destruction.
+        // The vector slot is set to nullptr below so existing offsets remain
+        // valid for any concurrent lookups.
+        auto region_it = file_ptr_to_region_index_.find(packed_data_ptr);
+        if (region_it != file_ptr_to_region_index_.end()) {
+          size_t idx = region_it->second;
+          MmapRegion& region = mmap_regions_[idx];
+          if (region.addr != nullptr && region.addr != MAP_FAILED) {
+            munmap(region.addr, region.size);
+            region.addr = nullptr;
+            region.size = 0;
+          }
+          file_ptr_to_region_index_.erase(region_it);
+        }
+#endif
+        // Remove the pointer from packed_data_ptrs_.
+        packed_data_ptrs_[entry->second.offset] = nullptr;
+        // Erase the name to packed metadata entry.
+        name_to_packed_data_metadata_.erase(entry->first);
+      }
     }
-    if (--entry->second.ref_count > 0) {
-      continue;
-    }
-    // Keep from_load entries: their packed bytes live in the cache file
-    // and stay valid until full unload. Erasing them would force the
-    // next init to re-pack and append ~450 MB to the file per cycle.
-    if (entry->second.from_load) {
-      entry->second.in_current_runtime = false;
-      continue;
-    }
-    release_entry(packed_data_ptrs_[entry->second.offset]);
-    packed_data_ptrs_[entry->second.offset] = nullptr;
-    name_to_packed_data_metadata_.erase(entry);
   }
 
-  // Last entry gone: drop all in-memory state. File on disk is preserved
-  // so the next process can load_packed_cache and skip re-packing. If
-  // reserve_space after the last save corrupted the trailer, load will
-  // fall through to fresh-write — same outcome as truncating here.
-  if (name_to_packed_data_metadata_.empty()) {
-    full_unload();
-  }
   return Error::Ok;
 }
 
@@ -331,11 +226,15 @@ size_t XNNWeightsCache::look_up(
   const void* unpacked_weights_ptr = cache_key->kernel;
   const void* unpacked_bias_ptr = cache_key->bias;
   auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr);
+
+  // Check if weight_pointer has been cached
   if (entry == context->unpacked_data_to_name_.end()) {
     return SIZE_MAX;
   }
+
   std::string weight_bias_name = entry->second;
 
+  // Check if bias_pointer has been cached
   if (unpacked_bias_ptr != nullptr) {
     auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr);
     if (bias_entry != context->unpacked_data_to_name_.end()) {
@@ -343,12 +242,14 @@ size_t XNNWeightsCache::look_up(
     }
   }
 
+  // check if weight_bias_name has been packed already
   auto packed_weight_entry =
       context->name_to_packed_data_metadata_.find(weight_bias_name);
   if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) {
     return SIZE_MAX;
   }
   packed_weight_entry->second.in_current_runtime = true;
+
   return packed_weight_entry->second.offset;
 }
 
@@ -363,11 +264,16 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
     if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) {
       ET_LOG(
           Error,
-          "reserve_space ftruncate to %zu failed (errno=%d)",
+          "ftruncate to %zu failed (errno=%d)",
           file_offset + map_size,
           errno);
       close(context->packed_file_fd_);
       context->packed_file_fd_ = -1;
+      // Existing mmap_regions_ still reference this inode. Disable the
+      // file-backed path permanently so a future initialize_for_runtime
+      // doesn't re-open + O_TRUNC the same path and trigger SIGBUS on the
+      // stale mappings.
+      context->packed_file_disabled_ = true;
       return context->reserve_space_heap(n);
     }
 
@@ -379,18 +285,15 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
         context->packed_file_fd_,
         file_offset);
     if (ptr == MAP_FAILED) {
-      ET_LOG(
-          Error,
-          "reserve_space mmap %zu bytes failed (errno=%d)",
-          map_size,
-          errno);
+      ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno);
       close(context->packed_file_fd_);
       context->packed_file_fd_ = -1;
+      context->packed_file_disabled_ = true;
       return context->reserve_space_heap(n);
     }
 
     // mmap returns page-aligned (>= 4 KiB), which trivially satisfies the
-    // 64-byte kPackedAllocationAlignment XNNPACK expects.
+    // 64-byte kPackedAllocationAlignment XNNPACK expects. Assert defensively.
     ET_DCHECK_MSG(
         (reinterpret_cast<uintptr_t>(ptr) % kPackedAllocationAlignment) == 0,
         "mmap returned ptr not aligned to %zu bytes",
@@ -399,10 +302,10 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
     context->packed_file_used_ = file_offset + map_size;
     context->file_ptr_to_region_index_[ptr] = context->mmap_regions_.size();
     context->mmap_regions_.push_back({ptr, map_size});
-    context->ptr_to_file_offset_[ptr] = file_offset;
     return ptr;
   }
 #endif
+
   return context->reserve_space_heap(n);
 }
 
@@ -440,8 +343,11 @@ size_t XNNWeightsCache::look_up_or_insert(
     size_t size) {
   size_t offset = context->look_up(context, cache_key);
 
-  // XNNPACK calls with ptr==nullptr after a cache hit (no packing
-  // happened, nothing to validate against). Return the offset as-is.
+  // XNNPACK can call this with ptr==nullptr when it previously hit the cache
+  // and skipped packing. We can't validate against the ptr contents in this
+  // case, so just return the offset. This might actually be a bug in XNNPACK
+  // since calling look_up_or_insert with ptr==nullptr doesn't really make
+  // sense...
   if (ptr == nullptr) {
     return offset;
   }
@@ -451,7 +357,7 @@ size_t XNNWeightsCache::look_up_or_insert(
     if (saved_ptr != nullptr && 0 == memcmp(ptr, saved_ptr, size)) {
       return offset;
     }
-    // Cache out of date: name hits but packed bytes differ.
+    // Failure, cache is out of date
     return SIZE_MAX;
   }
 
@@ -470,7 +376,6 @@ size_t XNNWeightsCache::look_up_or_insert(
     }
     PackedDataMeta packed_data_metadata;
     packed_data_metadata.offset = next_offset;
-    packed_data_metadata.data_size = size;
     packed_data_metadata.ref_count =
         0; // ref_count is only incremented after finalizing for runtime
     packed_data_metadata.in_current_runtime = true;
@@ -503,209 +408,6 @@ void XNNWeightsCache::set_packed_cache_path(const std::string& path) {
   packed_cache_path_ = path;
 }
 
-Error XNNWeightsCache::save_packed_index() {
-#ifndef _WIN32
-  if (packed_file_fd_ < 0) {
-    return Error::Ok;
-  }
-  // Skip no-op saves: identical bytes would still bump mtime via
-  // pwrite/fsync, making the cache file appear modified on every load.
-  // The `mmap_regions_at_last_save_ > 0` guard is sufficient because a
-  // successful save closes packed_file_fd_ before returning, so re-entry
-  // past the `fd < 0` early-return above requires initialize_for_runtime
-  // to reopen the fd, which only happens via load_packed_cache (or the
-  // fresh-write path) that always populates at least one mmap region.
-  if (mmap_regions_.size() == mmap_regions_at_last_save_ &&
-      mmap_regions_at_last_save_ > 0) {
-    return Error::Ok;
-  }
-
-  size_t index_start = packed_file_used_;
-  std::vector<uint8_t> buf;
-  uint32_t entry_count = 0;
-
-  // Index entry: [name_len:u32][name][file_offset:u64][data_size:u64]
-  for (const auto& [name, meta] : name_to_packed_data_metadata_) {
-    void* ptr = packed_data_ptrs_[meta.offset];
-    auto it = ptr_to_file_offset_.find(ptr);
-    if (it == ptr_to_file_offset_.end()) {
-      continue;
-    }
-    entry_count++;
-    append_le(buf, static_cast<uint32_t>(name.size()));
-    buf.insert(buf.end(), name.begin(), name.end());
-    append_le(buf, static_cast<uint64_t>(it->second));
-    append_le(buf, static_cast<uint64_t>(meta.data_size));
-  }
-
-  // Footer: [index_start:u64][entry_count:u32][magic:u32][version:u32]
-  append_le(buf, static_cast<uint64_t>(index_start));
-  append_le(buf, entry_count);
-  append_le(buf, kCacheMagic);
-  append_le(buf, kCacheVersion);
-
-  if (ftruncate(packed_file_fd_, index_start + buf.size()) != 0) {
-    ET_LOG(Error, "Failed to extend file for index (errno=%d)", errno);
-    return Error::Internal;
-  }
-  ssize_t written =
-      pwrite(packed_file_fd_, buf.data(), buf.size(), index_start);
-  if (written != static_cast<ssize_t>(buf.size())) {
-    ET_LOG(Error, "Failed to write index (errno=%d)", errno);
-    return Error::Internal;
-  }
-  // Ensure trailer is on disk before we declare success.
-  if (fsync(packed_file_fd_) != 0) {
-    ET_LOG(Error, "fsync of packed cache failed (errno=%d)", errno);
-    // Continue — data is in page cache; durability is best-effort.
-  }
-  ET_LOG(
-      Info,
-      "Saved packed weight index: %u entries at offset %zu",
-      entry_count,
-      index_start);
-
-  // Promote freshly-packed entries to from_load now that they're durable
-  // on disk, so delete_packed_data preserves them across unload/reload.
-  for (auto& [name, meta] : name_to_packed_data_metadata_) {
-    if (!meta.from_load &&
-        ptr_to_file_offset_.find(packed_data_ptrs_[meta.offset]) !=
-            ptr_to_file_offset_.end()) {
-      meta.from_load = true;
-    }
-  }
-
-  mmap_regions_at_last_save_ = mmap_regions_.size();
-
-  // Close the fd so the next init re-enters load_packed_cache and reads
-  // the trailer we just wrote.
-  if (close(packed_file_fd_) != 0) {
-    ET_LOG(Error, "close of packed cache fd failed (errno=%d)", errno);
-  }
-  packed_file_fd_ = -1;
-#endif
-  return Error::Ok;
-}
-
-bool XNNWeightsCache::load_packed_cache() {
-#ifndef _WIN32
-  int fd = open(packed_cache_path_.c_str(), O_RDONLY);
-  if (fd < 0) {
-    return false;
-  }
-  // Prevent racing with a concurrent writer
-  if (flock(fd, LOCK_SH | LOCK_NB) != 0) {
-    close(fd);
-    return false;
-  }
-  struct stat st {};
-  if (fstat(fd, &st) != 0 || st.st_size < 20) {
-    close(fd);
-    return false;
-  }
-  size_t file_size = static_cast<size_t>(st.st_size);
-
-  uint8_t footer[20];
-  if (pread(fd, footer, 20, file_size - 20) != 20) {
-    close(fd);
-    return false;
-  }
-  uint64_t index_start = read_le<uint64_t>(footer);
-  uint32_t entry_count = read_le<uint32_t>(footer + 8);
-  uint32_t magic = read_le<uint32_t>(footer + 12);
-  uint32_t version = read_le<uint32_t>(footer + 16);
-
-  if (magic != kCacheMagic || version != kCacheVersion ||
-      index_start >= file_size - 20) {
-    close(fd);
-    return false;
-  }
-  const size_t index_region_end = file_size - 20;
-
-  void* map = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
-  close(fd);
-  if (map == MAP_FAILED) {
-    return false;
-  }
-  mmap_regions_.push_back({map, file_size});
-
-  const uint8_t* cursor = static_cast<const uint8_t*>(map) + index_start;
-  const uint8_t* end = static_cast<const uint8_t*>(map) + index_region_end;
-
-  for (uint32_t i = 0; i < entry_count && cursor + 4 <= end; ++i) {
-    uint32_t name_len = read_le<uint32_t>(cursor);
-    cursor += 4;
-    if (cursor + name_len + 16 > end) {
-      // Truncated entry header: trailer doesn't match the entry_count we
-      // read from the footer, so the cache is corrupt. Apply the same
-      // full rollback as the invalid-bounds branch below — otherwise the
-      // entries inserted so far would be silently accepted as a partial
-      // cache, and the next save_packed_index would rewrite a trailer
-      // covering only that subset (permanently dropping the rest).
-      ET_LOG(
-          Error,
-          "load_packed_cache: truncated entry header at index %u (entry_count=%u); aborting load",
-          i,
-          entry_count);
-      munmap(map, file_size);
-      mmap_regions_.pop_back();
-      name_to_packed_data_metadata_.clear();
-      packed_data_ptrs_.clear();
-      ptr_to_file_offset_.clear();
-      return false;
-    }
-    std::string name(reinterpret_cast<const char*>(cursor), name_len);
-    cursor += name_len;
-    uint64_t file_offset = read_le<uint64_t>(cursor);
-    cursor += 8;
-    uint64_t data_size = read_le<uint64_t>(cursor);
-    cursor += 8;
-
-    // Bounds check: the entry's bytes must lie entirely inside the
-    // packed-data region.
-    if (file_offset >= index_start || data_size > index_start - file_offset) {
-      ET_LOG(
-          Error,
-          "load_packed_cache: entry '%s' has invalid bounds (file_offset=%llu, data_size=%llu, index_start=%llu); aborting load",
-          name.c_str(),
-          static_cast<unsigned long long>(file_offset),
-          static_cast<unsigned long long>(data_size),
-          static_cast<unsigned long long>(index_start));
-      // Roll back any partial state.
-      munmap(map, file_size);
-      mmap_regions_.pop_back();
-      name_to_packed_data_metadata_.clear();
-      packed_data_ptrs_.clear();
-      ptr_to_file_offset_.clear();
-      return false;
-    }
-
-    size_t ptr_index = packed_data_ptrs_.size();
-    void* entry_ptr = static_cast<char*>(map) + file_offset;
-    packed_data_ptrs_.push_back(entry_ptr);
-    // Tracked so a subsequent save_packed_index can rewrite the trailer
-    // covering both loaded and newly-packed entries.
-    ptr_to_file_offset_[entry_ptr] = file_offset;
-    PackedDataMeta meta;
-    meta.offset = ptr_index;
-    meta.data_size = data_size;
-    meta.ref_count = 0;
-    meta.in_current_runtime = false;
-    meta.from_load = true;
-    name_to_packed_data_metadata_[name] = meta;
-  }
-
-  cache_loaded_ = true;
-  packed_file_used_ = index_start;
-  // In-memory state matches the on-disk trailer; the next save would be
-  // a no-op. Initialize watermark so save_packed_index short-circuits.
-  mmap_regions_at_last_save_ = mmap_regions_.size();
-  return true;
-#else
-  return false;
-#endif
-}
-
 } // namespace delegate
 } // namespace xnnpack
 } // namespace backends
diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h
index 4bfa916d289..a41fed49fd1 100644
--- a/backends/xnnpack/runtime/XNNWeightsCache.h
+++ b/backends/xnnpack/runtime/XNNWeightsCache.h
@@ -30,20 +30,12 @@ using executorch::runtime::MemoryAllocator;
 using executorch::runtime::Result;
 
 struct PackedDataMeta {
-  size_t offset{};
-  size_t data_size{0};
+  size_t offset;
   // Count number of xnn_runtime_t this packed data is used in
-  size_t ref_count{};
+  size_t ref_count;
   // true if this packed data was inserted or looked up for the
   // current runtime being created
-  bool in_current_runtime{};
-  // True if this entry's bytes are persisted in the on-disk cache file
-  // (either originally loaded via load_packed_cache, or freshly packed
-  // and then save_packed_index-ed). Used by delete_packed_data to
-  // detect when all persistent entries are gone, at which point
-  // cache_loaded_ is auto-invalidated so the next init re-enters
-  // load_packed_cache and reuses the saved file instead of re-packing.
-  bool from_load{false};
+  bool in_current_runtime;
 };
 
 class XNNWeightsCache {
@@ -146,16 +138,7 @@ class XNNWeightsCache {
    */
   void set_packed_cache_path(const std::string& path);
 
-  /** Save packed weight index so subsequent loads skip packing. */
-  Error save_packed_index();
-
  private:
-  static constexpr uint32_t kCacheMagic = 0x58505743; // "XPWC"
-  static constexpr uint32_t kCacheVersion = 1;
-  bool load_packed_cache();
-  void reset_for_fresh_write();
-  void release_entry(void* packed_data_ptr);
-  void full_unload();
   // Runtime Allocator used to reserve memory for packed weights
   MemoryAllocator* runtime_allocator_;
 
@@ -184,28 +167,18 @@ class XNNWeightsCache {
   std::string packed_cache_path_;
   int packed_file_fd_{-1};
   size_t packed_file_used_{0};
-  // True once load_packed_cache() has populated metadata from a saved
-  // index, OR once a fresh-write session has been persisted to disk via
-  // save_packed_index() (so subsequent inits can load from it).
-  bool cache_loaded_{false};
-  // Tracks file offset of each file-backed allocation. Used by
-  // save_packed_index() to serialize (name → offset, size) index.
-  std::unordered_map<void*, size_t> ptr_to_file_offset_;
+  // Set after an unrecoverable mmap/ftruncate failure. Prevents re-opening
+  // the cache file on subsequent initialize_for_runtime() calls — re-opening
+  // with O_TRUNC would truncate the inode beneath any still-live mmap pages
+  // and the next access would raise SIGBUS. Once disabled, all reserve_space
+  // calls fall back to heap allocation for the lifetime of this cache.
+  bool packed_file_disabled_{false};
   struct MmapRegion {
     void* addr;
     size_t size;
   };
   std::vector<MmapRegion> mmap_regions_;
   size_t mmap_regions_synced_{0};
-  // Number of regions present at the time of the most recent successful
-  // save_packed_index. Used to skip no-op saves: identical bytes would
-  // still bump mtime via pwrite/fsync, making the cache file appear
-  // modified on every load when nothing has actually changed. A successful
-  // save closes packed_file_fd_ before returning, so the no-op check is
-  // unreachable except after a load_packed_cache (or fresh-write path)
-  // re-opens the fd — both paths populate at least one mmap region, so
-  // the "zero regions saved" edge case never lives long enough to matter.
-  size_t mmap_regions_at_last_save_{0};
   // For file-backed packed allocations, maps the returned ptr to its index
   // in mmap_regions_, so delete_packed_data() can munmap when ref_count==0.
   std::unordered_map<void*, size_t> file_ptr_to_region_index_;
diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp
index 25289027a62..ffaba9508d8 100644
--- a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp
+++ b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp
@@ -79,41 +79,15 @@ Error XnnpackBackendOptions::set_option(const BackendOption& option) {
       ET_LOG(Error, "XNNPACK packed cache path must be a string.");
       return Error::InvalidArgument;
     }
-    // Same lock as weights_cache_: init() reads packed_cache_path_ under
-    // weights_cache_mutex_ and feeds it straight into the cache, so an
-    // unprotected write here would race with that read.
-    const std::lock_guard<std::mutex> lock(weights_cache_mutex_);
     packed_cache_path_ = std::string(val->data());
     ET_LOG(
         Debug,
         "Setting XNNPACK packed cache path to %s.",
         packed_cache_path_.c_str());
-  } else if (strcmp(option.key, save_weight_cache_on_disk_option_key) == 0) {
-    auto* val = std::get_if<bool>(&option.value);
-    if (!val) {
-      ET_LOG(Error, "XNNPACK save_weight_cache_on_disk must be a bool.");
-      return Error::InvalidArgument;
-    }
-    if (*val) {
-      return save_weights_cache_locked();
-    }
   }
   return Error::Ok;
 }
 
-delegate::XNNWeightsCache& XnnpackBackendOptions::weights_cache() {
-  return weights_cache_;
-}
-
-std::mutex& XnnpackBackendOptions::weights_cache_mutex() {
-  return weights_cache_mutex_;
-}
-
-Error XnnpackBackendOptions::save_weights_cache_locked() {
-  const std::lock_guard<std::mutex> lock(weights_cache_mutex_);
-  return weights_cache_.save_packed_index();
-}
-
 bool XnnpackBackendOptions::resolve_weight_cache(
     const ET_RUNTIME_NAMESPACE::BackendInitContext& context) const {
   return resolve_option<bool>(
diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.h b/backends/xnnpack/runtime/XnnpackBackendOptions.h
index 3b4d5fcd211..aed037ac835 100644
--- a/backends/xnnpack/runtime/XnnpackBackendOptions.h
+++ b/backends/xnnpack/runtime/XnnpackBackendOptions.h
@@ -9,7 +9,6 @@
 #pragma once
 
 #include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
-#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
 #include <executorch/backends/xnnpack/runtime/XNNWorkspaceManager.h>
 #include <executorch/runtime/backend/backend_init_context.h>
 #include <executorch/runtime/backend/options.h>
@@ -17,7 +16,6 @@
 #include <executorch/runtime/core/result.h>
 
 #include <atomic>
-#include <mutex>
 
 namespace executorch::backends::xnnpack {
 
@@ -46,44 +44,9 @@ class XnnpackBackendOptions {
   const std::string& get_packed_cache_path() const;
   void set_packed_cache_path(const std::string& path);
 
-  // Shared XNNWeightsCache (one instance per backend, like the workspace
-  // manager). The cache itself is not internally synchronized; callers
-  // MUST hold weights_cache_mutex() around every weights_cache() call —
-  // including reading the reference and calling any method on it. The
-  // same mutex also protects packed_cache_path_, so a typical
-  // load/init/compile sequence holds one lock for the whole block:
-  //
-  //   std::lock_guard lock(options.weights_cache_mutex());
-  //   options.weights_cache().set_packed_cache_path(
-  //       options.get_packed_cache_path());
-  //   options.weights_cache().initialize_for_runtime(...);
-  //   XNNCompiler::compileModel(..., &options.weights_cache(), ...);
-  //
-  // The mutex is intentionally exposed (rather than wrapping every
-  // method) because XNNCompiler needs a raw cache pointer to pass into
-  // XNNPACK callbacks that fire during xnn_create_runtime; those
-  // callbacks must run under the same lock as the surrounding init.
-  delegate::XNNWeightsCache& weights_cache();
-  std::mutex& weights_cache_mutex();
-
-  // Invokes save_packed_index() on the cache while holding the cache
-  // mutex. Returns the cache's error code; the caller does not need to
-  // grab the mutex itself. This is the entry point used by set_option()
-  // when `save_weight_cache_on_disk_option_key` is requested.
-  runtime::Error save_weights_cache_locked();
-
  private:
   XNNWorkspaceManager workspace_manager_;
 
-  // Weights cache is shared across all delegate instances. Owned here so
-  // that all backend-option-keyed state (workspace manager, weights cache,
-  // packed-cache path) lives in a single place; XnnpackBackend holds an
-  // XnnpackBackendOptions and delegates synchronization to its mutex.
-  // Protects weights_cache_ AND packed_cache_path_ (init reads the path
-  // while holding this lock and hands it to the cache).
-  std::mutex weights_cache_mutex_;
-  delegate::XNNWeightsCache weights_cache_;
-
 #ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
   std::atomic<WorkspaceSharingMode> sharing_mode_{WorkspaceSharingMode::Global};
 #else
diff --git a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp
index 59cfbcbdb5d..83937887e25 100644
--- a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp
+++ b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp
@@ -16,15 +16,8 @@
 #include <executorch/runtime/core/result.h>
 #include <executorch/runtime/platform/runtime.h>
 #include <executorch/schema/program_generated.h>
-#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <sys/stat.h>
-#include <unistd.h>
 #include <xnnpack.h>
-#include <atomic>
-#include <fstream>
-#include <mutex>
-#include <thread>
 
 using executorch::backends::xnnpack::delegate::XNNWeightsCache;
 using executorch::extension::FileDataLoader;
@@ -359,475 +352,4 @@ TEST_F(XNNWeightsCacheTest, PackedWeightsMmapPathLockCollision) {
 
   ::unlink(cache_path.c_str());
 }
-
-// Verify load_packed_cache produces byte-identical inference results to
-// a fresh build of the same graph. Guards against weight pointers being
-// mis-mapped after cache load.
-TEST_F(XNNWeightsCacheTest, SaveAndLoad_PreservesInferenceOutput) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_output_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input_tensor(num_batches * input_channels + padding, 1.0f);
-
-  // Run 1: no cache file (pure heap pack).
-  std::vector<float> output_baseline(num_batches * output_channels, 0.0f);
-  {
-    XNNWeightsCache cache;
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input_tensor.data(),
-        output_baseline.data());
-  }
-
-  // Run 2: file-backed mmap path, save trailer.
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    std::vector<float> output_write(num_batches * output_channels, 0.0f);
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input_tensor.data(),
-        output_write.data());
-    ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-    EXPECT_EQ(output_write, output_baseline);
-  }
-
-  // Run 3: fresh instance loads from disk; output must match.
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    ASSERT_GT(cache.get_packed_data_names().size(), 0u);
-    std::vector<float> output_load(num_batches * output_channels, 0.0f);
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input_tensor.data(),
-        output_load.data());
-    EXPECT_EQ(output_load, output_baseline);
-  }
-
-  ::unlink(cache_path.c_str());
-}
-
-// Corrupted cache file must not crash; load_packed_cache returns false and
-// the next init falls through to the fresh-build path that overwrites it.
-TEST_F(XNNWeightsCacheTest, LoadPackedCache_RejectsCorruptTrailer) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_corrupt_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  // Write a file with valid size but garbage trailer.
-  {
-    std::ofstream f(cache_path, std::ios::binary);
-    std::vector<char> garbage(1024, '\xCC');
-    f.write(garbage.data(), garbage.size());
-  }
-
-  XNNWeightsCache cache;
-  cache.set_packed_cache_path(cache_path);
-  // Must not crash; load returns false → falls through to fresh build.
-  Error err =
-      cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  ASSERT_EQ(err, Error::Ok);
-
-  // Fresh build still works.
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> output(num_batches * output_channels, 0.0f);
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      output.data());
-
-  ::unlink(cache_path.c_str());
-}
-
-// Repeated init+run+save cycles on the same file must not grow the cache
-// file. Guards against the regression where each PTE init re-packed weights
-// and appended a fresh copy (+500 MB per inference observed in production).
-TEST_F(XNNWeightsCacheTest, MultiSessionLoad_DoesNotGrowCacheFile) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_nogrow_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> output(num_batches * output_channels, 0.0f);
-
-  // Cycle 1: fresh write of cache.
-  off_t size_after_first_save = 0;
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input.data(),
-        output.data());
-    ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    size_after_first_save = st.st_size;
-    ASSERT_GT(size_after_first_save, 0);
-  }
-
-  // Cycle 2: fresh instance loads from disk, runs, saves. No new weights
-  // were packed → file must be byte-for-byte identical in length.
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    ASSERT_GT(cache.get_packed_data_names().size(), 0u);
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input.data(),
-        output.data());
-    ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-  }
-  {
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    EXPECT_EQ(st.st_size, size_after_first_save);
-  }
-
-  // Cycle 3: simulate PTE destroy + recreate inside the same instance.
-  // delete_packed_data on from_load entries must not erase metadata, so
-  // the second init's look_up still hits → no new file append.
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input.data(),
-        output.data());
-    cache.delete_packed_data(cache.get_packed_data_names());
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input.data(),
-        output.data());
-    ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-  }
-  {
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    EXPECT_EQ(st.st_size, size_after_first_save);
-  }
-
-  ::unlink(cache_path.c_str());
-}
-
-// After loading from disk, delete_packed_data must skip from_load entries
-// so the next init still hits the cache. Bug would re-pack weights from
-// scratch each time the backend destroys + recreates a delegate.
-TEST_F(
-    XNNWeightsCacheTest,
-    DeletePackedData_OnFromLoadEntries_PreservesMetadata) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_fromload_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> output(num_batches * output_channels, 0.0f);
-
-  // Seed the cache file.
-  {
-    XNNWeightsCache cache;
-    cache.set_packed_cache_path(cache_path);
-    cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-    BuildAndRunGraphWithWeightsCache(
-        cache,
-        batches,
-        input_channels,
-        output_channels,
-        input.data(),
-        output.data());
-    ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-  }
-
-  // Fresh instance: all populated entries are from_load=true.
-  XNNWeightsCache cache;
-  cache.set_packed_cache_path(cache_path);
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  size_t loaded_count = cache.get_packed_data_names().size();
-  ASSERT_GT(loaded_count, 0u);
-
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      output.data());
-
-  // Repeated delete must never erase from_load entries — contrast with
-  // ReusePackedWeights where two delete calls drop the count to 0.
-  for (int i = 0; i < 5; ++i) {
-    cache.delete_packed_data(cache.get_packed_data_names());
-    EXPECT_EQ(cache.get_packed_data_names().size(), loaded_count)
-        << "from_load entries should survive delete; iteration " << i;
-  }
-
-  ::unlink(cache_path.c_str());
-}
-
-// A model with multiple PTE/method delegates initializes the cache
-// sequentially before any one is destroyed. The second PTE's init must
-// see the first PTE's packed entries already in the map → look_up hits,
-// no new reserve_space, file does not grow per PTE.
-TEST_F(XNNWeightsCacheTest, MultiplePTEsInSameInstance_NoFileGrowth) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_multipte_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> out_pte1(num_batches * output_channels, 0.0f);
-  std::vector<float> out_pte2(num_batches * output_channels, 0.0f);
-
-  XNNWeightsCache cache;
-  cache.set_packed_cache_path(cache_path);
-
-  // PTE 1: fresh pack + save.
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      out_pte1.data());
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-
-  off_t size_after_pte1 = 0;
-  {
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    size_after_pte1 = st.st_size;
-    ASSERT_GT(size_after_pte1, 0);
-  }
-  size_t names_after_pte1 = cache.get_packed_data_names().size();
-  ASSERT_GT(names_after_pte1, 0u);
-
-  // PTE 2: sibling delegate, NO destroy between. look_up must hit the
-  // entry from PTE 1 → no new reserve_space → file size unchanged after
-  // save.
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      out_pte2.data());
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-
-  {
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    EXPECT_EQ(st.st_size, size_after_pte1)
-        << "PTE 2 with same weights must not append to the cache file";
-  }
-  EXPECT_EQ(cache.get_packed_data_names().size(), names_after_pte1);
-
-  // Both PTEs produced the same output for the same input (correctness).
-  EXPECT_EQ(out_pte1, out_pte2);
-
-  // PTE 3: third sibling. Still no growth.
-  std::vector<float> out_pte3(num_batches * output_channels, 0.0f);
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      out_pte3.data());
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-  {
-    struct stat st {};
-    ASSERT_EQ(::stat(cache_path.c_str(), &st), 0);
-    EXPECT_EQ(st.st_size, size_after_pte1);
-  }
-  EXPECT_EQ(out_pte3, out_pte1);
-
-  ::unlink(cache_path.c_str());
-}
-
-// save_packed_index must be a true no-op when no new reserve_space happened
-// since the last save — same content but writing would still bump mtime,
-// making the cache file look modified on every model load.
-TEST_F(XNNWeightsCacheTest, SavePackedIndex_NoNewReserves_IsNoOp) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_noop_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> output(num_batches * output_channels, 0.0f);
-
-  // Seed cache + first save.
-  XNNWeightsCache cache;
-  cache.set_packed_cache_path(cache_path);
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      output.data());
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-
-  // Force an old mtime so any real write is detectable as a forward jump,
-  // without relying on wall-clock granularity / sleep (sleeps are flaky and
-  // forbidden by lint).
-  const struct timespec old_times[2] = {
-      {1000000, 0}, // atime
-      {1000000, 0}, // mtime
-  };
-  ASSERT_EQ(::utimensat(AT_FDCWD, cache_path.c_str(), old_times, 0), 0);
-
-  struct stat st_before {};
-  ASSERT_EQ(::stat(cache_path.c_str(), &st_before), 0);
-
-  // Second save with no intervening reserve_space → no-op short-circuit.
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-
-  struct stat st_after {};
-  ASSERT_EQ(::stat(cache_path.c_str(), &st_after), 0);
-  EXPECT_EQ(st_before.st_size, st_after.st_size);
-  EXPECT_EQ(st_before.st_mtime, st_after.st_mtime);
-
-  ::unlink(cache_path.c_str());
-}
-
-// Stress test for gjcomer's V6 review concern: concurrent
-// `set_packed_cache_path` + `save_packed_index` against the shared cache
-// must not crash or leave the on-disk file inconsistent under the lock
-// discipline that XNNPACKBackend uses (single mutex around the cache).
-// This does NOT exercise concurrent runtime creation — XNNPACK's runtime
-// init itself is not thread-safe and would require XNNPACKBackend
-// machinery to test properly.
-TEST_F(XNNWeightsCacheTest, ConcurrentOptionsAndSave_NoCrash_FileStable) {
-  std::string cache_path = std::string("/tmp/xnn_weights_cache_concurrent_") +
-      std::to_string(::getpid()) + ".packed_cache";
-  ::unlink(cache_path.c_str());
-
-  // Seed a populated cache + initial save so subsequent save_packed_index
-  // calls hit the no-op short-circuit path (the case most prone to race).
-  std::vector<size_t> batches{1, 2, 3};
-  size_t input_channels = 3;
-  size_t output_channels = 4;
-  size_t num_batches = 1 * 2 * 3;
-  size_t padding = 32;
-  std::vector<float> input(num_batches * input_channels + padding, 1.0f);
-  std::vector<float> output(num_batches * output_channels, 0.0f);
-  XNNWeightsCache cache;
-  cache.set_packed_cache_path(cache_path);
-  cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get());
-  BuildAndRunGraphWithWeightsCache(
-      cache,
-      batches,
-      input_channels,
-      output_channels,
-      input.data(),
-      output.data());
-  ASSERT_EQ(cache.save_packed_index(), Error::Ok);
-
-  struct stat st_baseline {};
-  ASSERT_EQ(::stat(cache_path.c_str(), &st_baseline), 0);
-
-  // Lock discipline matches XNNPACKBackend's `weights_cache_mutex_`: every
-  // cache mutation is serialized. Threads spam set_packed_cache_path and
-  // save_packed_index under the shared lock for ~25 iterations each.
-  std::mutex cache_mu;
-  constexpr int kThreads = 4;
-  constexpr int kIters = 25;
-  std::atomic<int> failure_count{0};
-  std::vector<std::thread> threads;
-  threads.reserve(kThreads);
-  for (int t = 0; t < kThreads; ++t) {
-    threads.emplace_back([&]() {
-      for (int i = 0; i < kIters; ++i) {
-        try {
-          const std::lock_guard<std::mutex> lock(cache_mu);
-          // Re-set the same path — should be benign / a stable no-op.
-          cache.set_packed_cache_path(cache_path);
-          // No new reserves between calls → save short-circuits.
-          (void)cache.save_packed_index();
-        } catch (const std::exception&) {
-          failure_count.fetch_add(1);
-        }
-      }
-    });
-  }
-  for (auto& th : threads) {
-    th.join();
-  }
-
-  EXPECT_EQ(failure_count.load(), 0);
-
-  // File must not balloon: every iteration's save is a no-op.
-  struct stat st_after {};
-  ASSERT_EQ(::stat(cache_path.c_str(), &st_after), 0);
-  EXPECT_EQ(st_after.st_size, st_baseline.st_size);
-
-  ::unlink(cache_path.c_str());
-}
-
 #endif