From 3d7d1f4bb6724980d6bb542ee681da22a3d89613 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 9 Mar 2026 05:12:43 +0000 Subject: [PATCH] [wasm-split] Split globals' transitive global dependencies When a global is exclusively used by a secondary module, we can move it to the secondary module. If its initializer contains a `global.get` of another global, we exported it from the primary module to the secondary module, even if it may not be used anywhere else. When we split a global out to a secondary module, this PR computes the transitive dependency of the split global, and if those globals in the dependency are not used anywhere else in other modules, we move them to the secondary module as well. #8441 and this PR combined reduce the size of the primary module by 29%. The running time of `wasm-split` hasn't really changed with this PR, compared to #8441. --- `wasm-objdump -h` result: - Before (#8441) ``` Type start=0x0000000c end=0x00035d44 (size=0x00035d38) count: 11185 Import start=0x00035d49 end=0x003faf6f (size=0x003c5226) count: 56805 Function start=0x003faf73 end=0x0040de1f (size=0x00012eac) count: 62890 Table start=0x0040de22 end=0x0041195d (size=0x00003b3b) count: 2921 Tag start=0x0041195f end=0x00411963 (size=0x00000004) count: 1 Global start=0x00411967 end=0x005541c5 (size=0x0014285e) count: 47771 Export start=0x005541ca end=0x007c0a00 (size=0x0026c836) count: 59077 Start start=0x007c0a02 end=0x007c0a04 (size=0x00000002) start: 828 Elem start=0x007c0a08 end=0x0082a84b (size=0x00069e43) count: 12303 DataCount start=0x0082a84d end=0x0082a84e (size=0x00000001) count: 1 Code start=0x0082a853 end=0x00a5a159 (size=0x0022f906) count: 62890 Data start=0x00a5a15d end=0x00a79cea (size=0x0001fb8d) count: 1 ``` - After (This PR) ``` Type start=0x0000000c end=0x00035d44 (size=0x00035d38) count: 11185 Import start=0x00035d48 end=0x00132efc (size=0x000fd1b4) count: 32642 Function start=0x00132f00 end=0x00145dac (size=0x00012eac) count: 62890 Table start=0x00145daf end=0x001498ea (size=0x00003b3b) count: 2921 Tag start=0x001498ec end=0x001498f0 (size=0x00000004) count: 1 Global start=0x001498f4 end=0x00289e60 (size=0x0014056c) count: 47728 Export start=0x00289e65 end=0x004977fe (size=0x0020d999) count: 35861 Start start=0x00497800 end=0x00497802 (size=0x00000002) start: 828 Elem start=0x00497806 end=0x00501649 (size=0x00069e43) count: 12303 DataCount start=0x0050164b end=0x0050164c (size=0x00000001) count: 1 Code start=0x00501651 end=0x00730f22 (size=0x0022f8d1) count: 62890 Data start=0x00730f26 end=0x00750ab3 (size=0x0001fb8d) count: 1 ``` Note that while the decrease in the global section is small, we have a significant size decrease in the import and the export sections, because we used to import globals and export them just to relay those globals to the secondary modules. --- src/ir/module-splitting.cpp | 57 +++++++++++++++++---- test/lit/wasm-split/transitive-globals.wast | 20 +++----- 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 77ae38fe709..3544610b654 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -583,6 +583,25 @@ Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder, return builder.makeSequence(loadSecondary, callIndirect); } +// Helper to walk expressions in segments but NOT in globals. +template +static void walkSegments(Walker& walker, Module* module) { + walker.setModule(module); + for (auto& curr : module->elementSegments) { + if (curr->offset) { + walker.walk(curr->offset); + } + for (auto* item : curr->data) { + walker.walk(item); + } + } + for (auto& curr : module->dataSegments) { + if (curr->offset) { + walker.walk(curr->offset); + } + } +} + void ModuleSplitter::indirectReferencesToSecondaryFunctions() { // Turn references to secondary functions into references to thunks that // perform a direct call to the original referent. The direct calls in the @@ -977,7 +996,19 @@ void ModuleSplitter::shareImportableItems() { } NameCollector collector(used); - collector.walkModuleCode(&module); + // We shouldn't use collector.walkModuleCode here, because we don't want to + // walk on global initializers. At this point, all globals are still in the + // primary module, so if we walk on global initializers here, globals appear + // in their initialalizers will be all marked as used in the primary module, + // which is not true. + // + // For example, we have (global $a i32 (global.get $b)). Because $a is at + // this point still in the primary module, $b will be marked as "used" in + // the primary module. But $a can be moved to a secondary module later if it + // is used exclusively by that module. Then $b can be also moved, in case it + // doesn't have other uses. But if it is marked as "used" in the primary + // module, it can't. + walkSegments(collector, &module); for (auto& segment : module.dataSegments) { if (segment->memory.is()) { used.memories.insert(segment->memory); @@ -1009,25 +1040,33 @@ void ModuleSplitter::shareImportableItems() { secondaryUsed.push_back(getUsedNames(*secondaryPtr)); } - // Compute globals referenced in other globals' initializers. Since globals - // can reference other globals, we must ensure that if a global is used in a - // module, all its dependencies are also marked as used. - auto computeDependentItems = [&](UsedNames& used) { + // Compute transitive closure of globals referenced in other globals' + // initializers. Since globals can reference other globals, we must ensure + // that if a global is used in a module, all its dependencies are also marked + // as used. + auto computeTransitiveGlobals = [&](UsedNames& used) { std::vector worklist(used.globals.begin(), used.globals.end()); - for (auto name : worklist) { + std::unordered_set visited(used.globals.begin(), used.globals.end()); + while (!worklist.empty()) { + Name currName = worklist.back(); + worklist.pop_back(); // At this point all globals are still in the primary module, so this // exists - auto* global = primary.getGlobal(name); + auto* global = primary.getGlobal(currName); if (!global->imported() && global->init) { for (auto* get : FindAll(global->init).list) { - used.globals.insert(get->name); + if (visited.insert(get->name).second) { + worklist.push_back(get->name); + used.globals.insert(get->name); + } } } } }; + computeTransitiveGlobals(primaryUsed); for (auto& used : secondaryUsed) { - computeDependentItems(used); + computeTransitiveGlobals(used); } // Given a name and module item kind, returns the list of secondary modules diff --git a/test/lit/wasm-split/transitive-globals.wast b/test/lit/wasm-split/transitive-globals.wast index 90740adc3a3..603e0cc4c8a 100644 --- a/test/lit/wasm-split/transitive-globals.wast +++ b/test/lit/wasm-split/transitive-globals.wast @@ -3,26 +3,20 @@ ;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY ;; Check that transitive dependencies in global initializers are correctly -;; analyzed and exported from the primary module to the secondary module. -;; TODO Move $b and $c to the secondary module +;; analyzed and moved to the secondary module. (module - ;; PRIMARY: (global $c i32 (i32.const 42)) + ;; SECONDARY: (global $c i32 (i32.const 42)) (global $c i32 (i32.const 42)) ;; $b depends on $c. - ;; PRIMARY: (global $b i32 (global.get $c)) + ;; SECONDARY: (global $b i32 (global.get $c)) (global $b i32 (global.get $c)) - ;; Globals $b is exported to the secondary module - ;; PRIMARY: (export "global" (global $b)) - - ;; Globals $b is imported from the primary module - ;; SECONDARY: (import "primary" "global" (global $b i32)) - - ;; $a depends on $b. Since $a is exclusively used by the secondary module, - ;; it will be moved there. Its dependency $b should be exported from the - ;; primary module and imported into the secondary module. + ;; $a depends on $b. since $a is exclusively used by the secondary module, + ;; it will be moved there. The transitive dependency must ensure that $b (and + ;; $c) are moved to the secondary module too, because they are not used in the + ;; primary module. ;; SECONDARY: (global $a i32 (global.get $b)) (global $a i32 (global.get $b))