diff --git a/src/passes/MemoryPacking.cpp b/src/passes/MemoryPacking.cpp
index b43abac2787..c1c0297f6d0 100644
--- a/src/passes/MemoryPacking.cpp
+++ b/src/passes/MemoryPacking.cpp
@@ -105,6 +105,8 @@ struct MemoryPacking : public Pass {
   void run(Module* module) override;
   bool canOptimize(std::vector<std::unique_ptr<Memory>>& memories,
                    std::vector<std::unique_ptr<DataSegment>>& dataSegments);
+  void
+  zeroOutTrampledData(std::vector<std::unique_ptr<DataSegment>>& dataSegments);
   void optimizeSegmentOps(Module* module);
   void getSegmentReferrers(Module* module, ReferrersMap& referrers);
   void dropUnusedSegments(Module* module,
@@ -247,7 +249,6 @@ bool MemoryPacking::canOptimize(
   // All active segments have constant offsets, known at this time, so we may be
   // able to optimize, but must still check for the trampling problem mentioned
   // earlier.
-  // TODO: optimize in the trampling case
   DisjointSpans space;
   for (auto& segment : dataSegments) {
     if (segment->isActive()) {
@@ -255,15 +256,87 @@ bool MemoryPacking::canOptimize(
       Address start = c->value.getUnsigned();
       DisjointSpans::Span span{start, start + segment->data.size()};
       if (space.addAndCheckOverlap(span)) {
-        std::cerr << "warning: active memory segments have overlap, which "
-                  << "prevents some optimizations.\n";
-        return false;
+        // Some segments overlap, that is, a later segment tramples the data of
+        // an earlier one. If the memory is imported then we cannot optimize
+        // here: if a later segment is out of bounds then instantiation traps
+        // partway, leaving the data written so far visible in the imported
+        // memory (which outlives the failed instantiation), so even trampled
+        // data matters.
+        // TODO: We could optimize anyway if we can check that all the segments
+        //       after the trampled segment, up to and including the trampling
+        //       segment, will be in-bounds for the imported memory, as then no
+        //       trap can occur between the trampled write and the trampling
+        //       one.
+        if (memory->imported()) {
+          std::cerr << "warning: active memory segments have overlap, which "
+                    << "prevents some optimizations.\n";
+          return false;
+        }
+        // The memory is defined in this module, so partially-applied segments
+        // can never be observed: either instantiation completes and all the
+        // segments are applied in order, or it traps and the memory is never
+        // exposed. We can therefore zero out the trampled data, which the
+        // normal optimization of zeros will then remove.
+        zeroOutTrampledData(dataSegments);
+        break;
       }
     }
   }
   return true;
 }
 
+void MemoryPacking::zeroOutTrampledData(
+  std::vector<std::unique_ptr<DataSegment>>& dataSegments) {
+  // Active segments are applied in order at instantiation, before any code can
+  // run, so when segments overlap only the last write to each byte is ever
+  // observable. Zero out all bytes that a later segment overwrites. This
+  // assumes all active segments have constant offsets, which canOptimize
+  // verifies before calling us.
+  //
+  // Iterate in reverse, tracking the disjoint regions of memory covered by the
+  // segments seen so far as a map from a region's start address to its end.
+  std::map<uint64_t, uint64_t> covered;
+  for (auto it = dataSegments.rbegin(); it != dataSegments.rend(); ++it) {
+    auto& segment = *it;
+    if (!segment->isActive() || segment->data.empty()) {
+      continue;
+    }
+    uint64_t start = segment->offset->cast<Const>()->value.getUnsigned();
+    uint64_t end = start + segment->data.size();
+    // Zero out our bytes that later segments cover. Look for overlapping
+    // regions starting from the last one beginning at or before us.
+    auto covering = covered.upper_bound(start);
+    if (covering != covered.begin()) {
+      --covering;
+    }
+    for (; covering != covered.end() && covering->first < end; ++covering) {
+      uint64_t overlapStart = std::max(start, covering->first);
+      uint64_t overlapEnd = std::min(end, covering->second);
+      if (overlapStart < overlapEnd) {
+        std::fill(segment->data.begin() + (overlapStart - start),
+                  segment->data.begin() + (overlapEnd - start),
+                  0);
+      }
+    }
+    // Add our span to the covered regions, merging with any regions it
+    // touches.
+    auto next = covered.upper_bound(start);
+    if (next != covered.begin()) {
+      auto prev = std::prev(next);
+      if (prev->second >= start) {
+        start = prev->first;
+        end = std::max(end, prev->second);
+        next = prev;
+      }
+    }
+    while (next != covered.end() && next->first <= end) {
+      end = std::max(end, next->second);
+      next = covered.erase(next);
+    }
+    covered[start] = end;
+  }
+}
+
 bool MemoryPacking::canSplit(const std::unique_ptr<DataSegment>& segment,
                              const Referrers& referrers) {
   // Don't mess with segments related to llvm coverage tools such as
diff --git a/test/lit/passes/memory-packing_all-features.wast b/test/lit/passes/memory-packing_all-features.wast
index 8d5089e6fbe..82e4009ffd9 100644
--- a/test/lit/passes/memory-packing_all-features.wast
+++ b/test/lit/passes/memory-packing_all-features.wast
@@ -2193,21 +2193,23 @@
     (data.drop 0)
   )
 )
+
 (module
  ;; CHECK:      (memory $0 1 1)
  (memory $0 1 1)
+ ;; the zero tramples the "x", so the final memory contents are all zeros, and
+ ;; both segments can be removed entirely
  (data (i32.const 1024) "x")
- (data (i32.const 1024) "\00") ;; this tramples the "x", and so must be kept.
+ (data (i32.const 1024) "\00")
 )
-;; CHECK:      (data $0 (i32.const 1024) "x")
 
-;; CHECK:      (data $1 (i32.const 1024) "\00")
 (module
  ;; CHECK:      (memory $0 1 1)
  (memory $0 1 1)
  (data (i32.const 1024) "x")
  (data (i32.const 1025) "\00")
 )
+
 ;; CHECK:      (data $0 (i32.const 1024) "x")
 (module
  ;; CHECK:      (memory $0 1 1)
@@ -2215,19 +2217,112 @@
  (data (i32.const 1024) "x")
  (data (i32.const 1023) "\00")
 )
+
 ;; CHECK:      (data $0 (i32.const 1024) "x")
 (module
  ;; CHECK:      (memory $0 1 1)
  (memory $0 1 1)
+ ;; trampling in one place does not prevent optimizing elsewhere: everything
+ ;; here is zeros in the final memory contents, and can be removed
  (data (i32.const 1024) "x")
- (data (i32.const 1024) "\00") ;; when we see one bad thing, we give up
+ (data (i32.const 1024) "\00")
  (data (i32.const 4096) "\00")
 )
-;; CHECK:      (data $0 (i32.const 1024) "x")
 
-;; CHECK:      (data $1 (i32.const 1024) "\00")
+(module
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; the "y" fully tramples the "x", so only the "y" remains
+ (data (i32.const 1024) "x")
+ (data (i32.const 1024) "y")
+)
+
+;; CHECK:      (data $1 (i32.const 1024) "y")
+(module
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; partial trampling: the "A" overwrites the "y" in the middle of "xyz". the
+ ;; trampled byte is zeroed out, and as the segments are applied in order, the
+ ;; final memory contents are "x", "A", "z"
+ (data (i32.const 1024) "xyz")
+ (data (i32.const 1025) "A")
+)
+
+;; CHECK:      (data $0 (i32.const 1024) "x\00z")
+
+;; CHECK:      (data $1 (i32.const 1025) "A")
+(module
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; chained trampling, where the tramplers are themselves trampled: the final
+ ;; memory contents are "f", "e", "c"
+ (data (i32.const 1024) "abc")
+ (data (i32.const 1024) "de")
+ (data (i32.const 1024) "f")
+)
+
+;; CHECK:      (data $0 (i32.const 1026) "c")
+
+;; CHECK:      (data $1 (i32.const 1025) "e")
+
+;; CHECK:      (data $2 (i32.const 1024) "f")
+(module
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; one segment tramples multiple earlier ones: "WXYZ" covers all of "ab" and
+ ;; the "c" of "cd", so only "WXYZ" and the "d" remain
+ (data (i32.const 1024) "ab")
+ (data (i32.const 1026) "cd")
+ (data (i32.const 1023) "WXYZ")
+)
+
+;; CHECK:      (data $1 (i32.const 1027) "d")
+
+;; CHECK:      (data $2 (i32.const 1023) "WXYZ")
+(module
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; the regions covered by later segments must be merged as they accumulate:
+ ;; walking the segments in reverse we see "fghij" [1024, 1029), then "B"
+ ;; [1025, 1026), then "abcde" [1027, 1032). if the region for "B" were not
+ ;; merged into the one for "fghij", then looking up the region covering
+ ;; "abcde" would find "B" and miss that "fghij" tramples the "ab"
+ (data (i32.const 1027) "abcde")
+ (data (i32.const 1025) "B")
+ (data (i32.const 1024) "fghij")
+)
+
+;; CHECK:      (data $0 (i32.const 1029) "cde")
+
+;; CHECK:      (data $2 (i32.const 1024) "fghij")
+(module
+ ;; CHECK:      (type $0 (func))
+
+ ;; CHECK:      (memory $0 1 1)
+ (memory $0 1 1)
+ ;; a passive segment is not applied at instantiation, so it neither tramples
+ ;; nor is trampled: the active segments cancel out as usual, and the passive
+ ;; segment is untouched
+ (data (i32.const 1024) "x")
+ ;; CHECK:      (data $passive "ppp")
+ (data $passive "ppp")
+ (data (i32.const 1024) "\00")
+ ;; CHECK:      (func $init (type $0)
+ ;; CHECK-NEXT:  (memory.init $passive
+ ;; CHECK-NEXT:   (i32.const 0)
+ ;; CHECK-NEXT:   (i32.const 0)
+ ;; CHECK-NEXT:   (i32.const 3)
+ ;; CHECK-NEXT:  )
+ ;; CHECK-NEXT: )
+ (func $init
+  (memory.init $passive
+   (i32.const 0)
+   (i32.const 0)
+   (i32.const 3)
+  )
+ )
+)
 
-;; CHECK:      (data $2 (i32.const 4096) "\00")
 (module
  ;; CHECK:      (import "env" "memoryBase" (global $memoryBase i32))
  (import "env" "memoryBase" (global $memoryBase i32))
diff --git a/test/lit/passes/memory-packing_zero-filled-memory.wast b/test/lit/passes/memory-packing_zero-filled-memory.wast
index 58ac799f5c5..c4b9e48f602 100644
--- a/test/lit/passes/memory-packing_zero-filled-memory.wast
+++ b/test/lit/passes/memory-packing_zero-filled-memory.wast
@@ -1,6 +1,6 @@
 ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
 
-;; RUN: wasm-opt %s --memory-packing -all --zero-filled-memory -S -o - | filecheck %s
+;; RUN: foreach %s %t wasm-opt --memory-packing -all --zero-filled-memory -S -o - | filecheck %s
 
 (module
  ;; we can optimize on an imported memory with zeroFilledMemory being set.
@@ -10,4 +10,18 @@
  (data (i32.const 1024) "x")
  (data (i32.const 1023) "\00")
 )
+
 ;; CHECK:      (data $0 (i32.const 1024) "x")
+(module
+ ;; but we cannot optimize trampling on an imported memory: if a later segment
+ ;; were to trap during instantiation, the data written before it remains
+ ;; visible in the imported memory, so even the trampled "x" must be kept
+ ;; CHECK:      (import "env" "memory" (memory $0 1 1))
+ (import "env" "memory" (memory $0 1 1))
+
+ (data (i32.const 1024) "x")
+ (data (i32.const 1024) "\00")
+)
+;; CHECK:      (data $0 (i32.const 1024) "x")
+
+;; CHECK:      (data $1 (i32.const 1024) "\00")