Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 77 additions & 4 deletions src/passes/MemoryPacking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ struct MemoryPacking : public Pass {
void run(Module* module) override;
bool canOptimize(std::vector<std::unique_ptr<Memory>>& memories,
std::vector<std::unique_ptr<DataSegment>>& dataSegments);
void
zeroOutTrampledData(std::vector<std::unique_ptr<DataSegment>>& dataSegments);
void optimizeSegmentOps(Module* module);
void getSegmentReferrers(Module* module, ReferrersMap& referrers);
void dropUnusedSegments(Module* module,
Expand Down Expand Up @@ -247,23 +249,94 @@ bool MemoryPacking::canOptimize(
// All active segments have constant offsets, known at this time, so we may be
// able to optimize, but must still check for the trampling problem mentioned
// earlier.
// TODO: optimize in the trampling case
DisjointSpans space;
for (auto& segment : dataSegments) {
if (segment->isActive()) {
auto* c = segment->offset->cast<Const>();
Address start = c->value.getUnsigned();
DisjointSpans::Span span{start, start + segment->data.size()};
if (space.addAndCheckOverlap(span)) {
std::cerr << "warning: active memory segments have overlap, which "
<< "prevents some optimizations.\n";
return false;
// Some segments overlap, that is, a later segment tramples the data of
// an earlier one. If the memory is imported then we cannot optimize
// here: if a later segment is out of bounds then instantiation traps
// partway, leaving the data written so far visible in the imported
// memory (which outlives the failed instantiation), so even trampled
// data matters.
Comment on lines +259 to +264

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth adding a TODO about optimizing anyway if we can check that all the segments after the trampled segment up to and including the trampling segment will be in-bounds for the imported memory.

@JPL11 JPL11 Jun 12, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — added the TODO in 2738c38.

// TODO: We could optimize anyway if we can check that all the segments
// after the trampled segment, up to and including the trampling
// segment, will be in-bounds for the imported memory, as then no
// trap can occur between the trampled write and the trampling
// one.
if (memory->imported()) {
std::cerr << "warning: active memory segments have overlap, which "
<< "prevents some optimizations.\n";
return false;
}
// The memory is defined in this module, so partially-applied segments
// can never be observed: either instantiation completes and all the
// segments are applied in order, or it traps and the memory is never
// exposed. We can therefore zero out the trampled data, which the
// normal optimization of zeros will then remove.
zeroOutTrampledData(dataSegments);
break;
}
}
}
return true;
}

void MemoryPacking::zeroOutTrampledData(
std::vector<std::unique_ptr<DataSegment>>& dataSegments) {
// Active segments are applied in order at instantiation, before any code can
// run, so when segments overlap only the last write to each byte is ever
// observable. Zero out all bytes that a later segment overwrites. This
// assumes all active segments have constant offsets, which canOptimize
// verifies before calling us.
//
// Iterate in reverse, tracking the disjoint regions of memory covered by the
// segments seen so far as a map from a region's start address to its end.
std::map<uint64_t, uint64_t> covered;
for (auto it = dataSegments.rbegin(); it != dataSegments.rend(); ++it) {
auto& segment = *it;
if (!segment->isActive() || segment->data.empty()) {
continue;
}
uint64_t start = segment->offset->cast<Const>()->value.getUnsigned();
uint64_t end = start + segment->data.size();
// Zero out our bytes that later segments cover. Look for overlapping
// regions starting from the last one beginning at or before us.
auto covering = covered.upper_bound(start);
if (covering != covered.begin()) {
--covering;
}
for (; covering != covered.end() && covering->first < end; ++covering) {
uint64_t overlapStart = std::max(start, covering->first);
uint64_t overlapEnd = std::min(end, covering->second);
if (overlapStart < overlapEnd) {
std::fill(segment->data.begin() + (overlapStart - start),
segment->data.begin() + (overlapEnd - start),
0);
}
}
// Add our span to the covered regions, merging with any regions it
// touches.
auto next = covered.upper_bound(start);
if (next != covered.begin()) {
auto prev = std::prev(next);
if (prev->second >= start) {
start = prev->first;
end = std::max(end, prev->second);
next = prev;
}
}
while (next != covered.end() && next->first <= end) {
end = std::max(end, next->second);
next = covered.erase(next);
}
covered[start] = end;
}
}

bool MemoryPacking::canSplit(const std::unique_ptr<DataSegment>& segment,
const Referrers& referrers) {
// Don't mess with segments related to llvm coverage tools such as
Expand Down
109 changes: 102 additions & 7 deletions test/lit/passes/memory-packing_all-features.wast
Original file line number Diff line number Diff line change
Expand Up @@ -2193,41 +2193,136 @@
(data.drop 0)
)
)

(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; the zero tramples the "x", so the final memory contents are all zeros, and
;; both segments can be removed entirely
(data (i32.const 1024) "x")
(data (i32.const 1024) "\00") ;; this tramples the "x", and so must be kept.
(data (i32.const 1024) "\00")
)
;; CHECK: (data $0 (i32.const 1024) "x")

;; CHECK: (data $1 (i32.const 1024) "\00")
(module

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please leave a blank line between each module to help readability.

@JPL11 JPL11 Jun 12, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — added blank lines between the modules here and in memory-packing_zero-filled-memory.wast (2738c38).

;; CHECK: (memory $0 1 1)
(memory $0 1 1)
(data (i32.const 1024) "x")
(data (i32.const 1025) "\00")
)

;; CHECK: (data $0 (i32.const 1024) "x")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
(data (i32.const 1024) "x")
(data (i32.const 1023) "\00")
)

;; CHECK: (data $0 (i32.const 1024) "x")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; trampling in one place does not prevent optimizing elsewhere: everything
;; here is zeros in the final memory contents, and can be removed
(data (i32.const 1024) "x")
(data (i32.const 1024) "\00") ;; when we see one bad thing, we give up
(data (i32.const 1024) "\00")
(data (i32.const 4096) "\00")
)
;; CHECK: (data $0 (i32.const 1024) "x")

;; CHECK: (data $1 (i32.const 1024) "\00")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; the "y" fully tramples the "x", so only the "y" remains
(data (i32.const 1024) "x")
(data (i32.const 1024) "y")
)

;; CHECK: (data $1 (i32.const 1024) "y")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; partial trampling: the "A" overwrites the "y" in the middle of "xyz". the
;; trampled byte is zeroed out, and as the segments are applied in order, the
;; final memory contents are "x", "A", "z"
(data (i32.const 1024) "xyz")
(data (i32.const 1025) "A")
)

;; CHECK: (data $0 (i32.const 1024) "x\00z")

;; CHECK: (data $1 (i32.const 1025) "A")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; chained trampling, where the tramplers are themselves trampled: the final
;; memory contents are "f", "e", "c"
(data (i32.const 1024) "abc")
(data (i32.const 1024) "de")
(data (i32.const 1024) "f")
)

;; CHECK: (data $0 (i32.const 1026) "c")

;; CHECK: (data $1 (i32.const 1025) "e")

;; CHECK: (data $2 (i32.const 1024) "f")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; one segment tramples multiple earlier ones: "WXYZ" covers all of "ab" and
;; the "c" of "cd", so only "WXYZ" and the "d" remain
(data (i32.const 1024) "ab")
(data (i32.const 1026) "cd")
(data (i32.const 1023) "WXYZ")
)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also add a test that depends on the merging of the covered regions. For example, with segments covering the following intervals:

A: [3, 8)
B: [1, 2)
C: [0, 5)

When we visit segment A (after visiting C and B), if we didn't correctly merge the covering information for B into the covering information for C, then we would fail to detect the overlap between C and A because the map lookup would find B instead.

@JPL11 JPL11 Jun 12, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea — added in 2738c38, with segments "abcde"@1027, "B"@1025, "fghij"@1024 matching your A: [3, 8), B: [1, 2), C: [0, 5) shape. Without the merge, the lookup when visiting "abcde" would find the "B" region and miss the overlap with "fghij". The output checks confirm the trampled "ab" is dropped ("cde" remains at 1029).


;; CHECK: (data $1 (i32.const 1027) "d")

;; CHECK: (data $2 (i32.const 1023) "WXYZ")
(module
;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; the regions covered by later segments must be merged as they accumulate:
;; walking the segments in reverse we see "fghij" [1024, 1029), then "B"
;; [1025, 1026), then "abcde" [1027, 1032). if the region for "B" were not
;; merged into the one for "fghij", then looking up the region covering
;; "abcde" would find "B" and miss that "fghij" tramples the "ab"
(data (i32.const 1027) "abcde")
(data (i32.const 1025) "B")
(data (i32.const 1024) "fghij")
)

;; CHECK: (data $0 (i32.const 1029) "cde")

;; CHECK: (data $2 (i32.const 1024) "fghij")
(module
;; CHECK: (type $0 (func))

;; CHECK: (memory $0 1 1)
(memory $0 1 1)
;; a passive segment is not applied at instantiation, so it neither tramples
;; nor is trampled: the active segments cancel out as usual, and the passive
;; segment is untouched
(data (i32.const 1024) "x")
;; CHECK: (data $passive "ppp")
(data $passive "ppp")
(data (i32.const 1024) "\00")
;; CHECK: (func $init (type $0)
;; CHECK-NEXT: (memory.init $passive
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $init
(memory.init $passive
(i32.const 0)
(i32.const 0)
(i32.const 3)
)
)
)

;; CHECK: (data $2 (i32.const 4096) "\00")
(module
;; CHECK: (import "env" "memoryBase" (global $memoryBase i32))
(import "env" "memoryBase" (global $memoryBase i32))
Expand Down
16 changes: 15 additions & 1 deletion test/lit/passes/memory-packing_zero-filled-memory.wast
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.

;; RUN: wasm-opt %s --memory-packing -all --zero-filled-memory -S -o - | filecheck %s
;; RUN: foreach %s %t wasm-opt --memory-packing -all --zero-filled-memory -S -o - | filecheck %s

(module
;; we can optimize on an imported memory with zeroFilledMemory being set.
Expand All @@ -10,4 +10,18 @@
(data (i32.const 1024) "x")
(data (i32.const 1023) "\00")
)

;; CHECK: (data $0 (i32.const 1024) "x")
(module
;; but we cannot optimize trampling on an imported memory: if a later segment
;; were to trap during instantiation, the data written before it remains
;; visible in the imported memory, so even the trampled "x" must be kept
;; CHECK: (import "env" "memory" (memory $0 1 1))
(import "env" "memory" (memory $0 1 1))

(data (i32.const 1024) "x")
(data (i32.const 1024) "\00")
)
;; CHECK: (data $0 (i32.const 1024) "x")

;; CHECK: (data $1 (i32.const 1024) "\00")
Loading