From 0b5218be82e93dc843f908d6d5c6af4948418aa9 Mon Sep 17 00:00:00 2001 From: Guy Bedford Date: Mon, 29 Jun 2026 18:57:25 -0700 Subject: [PATCH] Teach metadce about the WASM_ESM_INTEGRATION module boundary Building with -sWASM_ESM_INTEGRATION at -O2 or above failed in the JS optimizer's metadce pass with 'could not find the assignment to "wasmImports"'. In this mode the wasm<->JS boundary is expressed with native ES import/export syntax rather than the wasmImports object and wasmExports['x'] member uses that metadce pattern-matches. Teach the three metadce-related acorn-optimizer passes about the ES form: - emitDCEGraph reads 'import {..} from "./x.wasm"' as wasm export nodes (collapsing aliases such as memory/wasmMemory to one node) and 'export { js as wasmName }' as wasm import edges, leaving re-exports (export { _main }) to root naturally. - applyDCEGraphRemovals prunes unused specifiers from those statements. - applyImportAndExportNameChanges applies minified names to the wasm-facing side of each specifier. building.py also drops dropped exports (including internal ones like the indirect function table) from the JS import to keep the two interfaces in sync, and link.py keeps import/export name minification on for ESM while disabling the now-pointless import module name minification. --- ChangeLog.md | 8 +- .../applyDCEGraphRemovals-esm-output.js | 11 ++ .../js_optimizer/applyDCEGraphRemovals-esm.js | 30 +++++ ...lyImportAndExportNameChanges-esm-output.js | 9 ++ .../applyImportAndExportNameChanges-esm.js | 26 +++++ test/js_optimizer/emitDCEGraph-esm-output.js | 78 +++++++++++++ test/js_optimizer/emitDCEGraph-esm.js | 43 +++++++ test/test_other.py | 26 +++++ tools/acorn-optimizer.mjs | 109 ++++++++++++++++++ tools/building.py | 6 +- tools/link.py | 5 +- 11 files changed, 347 insertions(+), 4 deletions(-) create mode 100644 test/js_optimizer/applyDCEGraphRemovals-esm-output.js create mode 100644 test/js_optimizer/applyDCEGraphRemovals-esm.js create mode 100644 test/js_optimizer/applyImportAndExportNameChanges-esm-output.js create mode 100644 test/js_optimizer/applyImportAndExportNameChanges-esm.js create mode 100644 test/js_optimizer/emitDCEGraph-esm-output.js create mode 100644 test/js_optimizer/emitDCEGraph-esm.js diff --git a/ChangeLog.md b/ChangeLog.md index 10284f6e15d1d..0116a0aac12a3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -44,8 +44,12 @@ See docs/process.md for more on how version tagging works. - INITIAL_MEMORY - wasmMemory - wasmBinary - Anybody using these will see a clear error in their debug builds signaling - that they now need to be explicitly added to `-sINCOMING_MODULE_JS_API`. + Anybody using these will see a clear error in their debug builds signaling + that they now need to be explicitly added to `-sINCOMING_MODULE_JS_API`. +- Fixed `-sWASM_ESM_INTEGRATION` builds at `-O2` and above, which previously + failed in the JS optimizer's metadce (dead code elimination) pass. metadce now + understands the native ES import/export wasm boundary that this mode emits. + (#27217) 6.0.1 - 06/22/26 ---------------- diff --git a/test/js_optimizer/applyDCEGraphRemovals-esm-output.js b/test/js_optimizer/applyDCEGraphRemovals-esm-output.js new file mode 100644 index 0000000000000..e0b0a6d95fe37 --- /dev/null +++ b/test/js_optimizer/applyDCEGraphRemovals-esm-output.js @@ -0,0 +1,11 @@ +function fd_write_impl() {} + +function fd_close_impl() {} + +function unused_import_impl() {} + +import { memory, main as _main, used_export as _used_export, memory as wasmMemory } from "./a.out.wasm"; + +export { fd_write_impl as fd_write, fd_close_impl as fd_close }; + +export { _main }; diff --git a/test/js_optimizer/applyDCEGraphRemovals-esm.js b/test/js_optimizer/applyDCEGraphRemovals-esm.js new file mode 100644 index 0000000000000..c56e0aa1785ba --- /dev/null +++ b/test/js_optimizer/applyDCEGraphRemovals-esm.js @@ -0,0 +1,30 @@ +// WASM_ESM_INTEGRATION: unused wasm imports are dropped from the `export {..}` +// that sends JS functions to the wasm, and unused wasm exports (including +// internal ones like the indirect function table) from the `import {..}` that +// receives them, keeping the two ES module interfaces in sync. + +function fd_write_impl() { +} +function fd_close_impl() { +} +function unused_import_impl() { +} + +import { + memory, + __indirect_function_table, + main as _main, + used_export as _used_export, + unused_export as _unused_export, + memory as wasmMemory, +} from './a.out.wasm'; + +export { + fd_write_impl as fd_write, + fd_close_impl as fd_close, + unused_import_impl as unused_import, +}; + +export { _main }; + +// EXTRA_INFO: { "unusedImports": ["unused_import"], "unusedExports": ["unused_export", "__indirect_function_table"] } diff --git a/test/js_optimizer/applyImportAndExportNameChanges-esm-output.js b/test/js_optimizer/applyImportAndExportNameChanges-esm-output.js new file mode 100644 index 0000000000000..a67e0da65e890 --- /dev/null +++ b/test/js_optimizer/applyImportAndExportNameChanges-esm-output.js @@ -0,0 +1,9 @@ +function fd_write_impl() {} + +function fd_close_impl() {} + +import { memory, b as _main, c as _malloc, memory as wasmMemory } from "./a.out.wasm"; + +export { fd_write_impl as d, fd_close_impl as e }; + +export { _main }; diff --git a/test/js_optimizer/applyImportAndExportNameChanges-esm.js b/test/js_optimizer/applyImportAndExportNameChanges-esm.js new file mode 100644 index 0000000000000..1dcb67f0ac743 --- /dev/null +++ b/test/js_optimizer/applyImportAndExportNameChanges-esm.js @@ -0,0 +1,26 @@ +// WASM_ESM_INTEGRATION: minified wasm import/export names are applied to the +// native ES import/export specifiers. Only the wasm-facing name of each +// specifier is renamed; the JS-local binding name is left intact (including the +// unaliased `memory`, whose local side must survive). + +function fd_write_impl() { +} +function fd_close_impl() { +} + +import { + memory, + main as _main, + malloc as _malloc, + memory as wasmMemory, +} from './a.out.wasm'; + +export { + fd_write_impl as fd_write, + fd_close_impl as fd_close, +}; + +// Re-export of a wasm export: the local name (_main) is never in the mapping. +export { _main }; + +// EXTRA_INFO: { "mapping": { "main": "b", "malloc": "c", "fd_write": "d", "fd_close": "e" } } diff --git a/test/js_optimizer/emitDCEGraph-esm-output.js b/test/js_optimizer/emitDCEGraph-esm-output.js new file mode 100644 index 0000000000000..b32c1783b307d --- /dev/null +++ b/test/js_optimizer/emitDCEGraph-esm-output.js @@ -0,0 +1,78 @@ +[ + { + "name": "emcc$defun$fd_close_impl", + "reaches": [ + "emcc$defun$helper" + ] + }, + { + "name": "emcc$defun$fd_write_impl", + "reaches": [] + }, + { + "name": "emcc$defun$helper", + "reaches": [] + }, + { + "name": "emcc$defun$unused_import_impl", + "reaches": [] + }, + { + "name": "emcc$export$__indirect_function_table", + "export": "__indirect_function_table", + "reaches": [] + }, + { + "name": "emcc$export$_main", + "export": "main", + "reaches": [], + "root": true + }, + { + "name": "emcc$export$_unused_export", + "export": "unused_export", + "reaches": [] + }, + { + "name": "emcc$export$_used_export", + "export": "used_export", + "reaches": [], + "root": true + }, + { + "name": "emcc$export$memory", + "export": "memory", + "reaches": [], + "root": true + }, + { + "name": "emcc$import$fd_close_impl", + "import": [ + "env", + "fd_close" + ], + "reaches": [ + "emcc$defun$fd_close_impl" + ] + }, + { + "name": "emcc$import$fd_write_impl", + "import": [ + "env", + "fd_write" + ], + "reaches": [ + "emcc$defun$fd_write_impl" + ] + }, + { + "name": "emcc$import$unused_import_impl", + "import": [ + "env", + "unused_import" + ], + "reaches": [ + "emcc$defun$unused_import_impl" + ] + } +] diff --git a/test/js_optimizer/emitDCEGraph-esm.js b/test/js_optimizer/emitDCEGraph-esm.js new file mode 100644 index 0000000000000..e7261ce902d6c --- /dev/null +++ b/test/js_optimizer/emitDCEGraph-esm.js @@ -0,0 +1,43 @@ +// WASM_ESM_INTEGRATION: the wasm<->JS boundary is expressed with native ES +// import/export syntax rather than the `wasmImports` object and +// `wasmExports['x']` member uses, and emitDCEGraph must build the same graph +// from it. + +// JS functions implementing wasm imports. +function fd_write_impl() { +} +function unused_import_impl() { +} + +// A JS function only reachable from a wasm import edge. +function helper() { +} +function fd_close_impl() { + helper(); +} + +// wasm exports received as ES imports. `memory` is imported twice (plain and +// aliased) and must map to a single export node. +import { + memory, + __indirect_function_table, + main as _main, + used_export as _used_export, + unused_export as _unused_export, + memory as wasmMemory, +} from './a.out.wasm'; + +// JS functions exported to the wasm module (the wasm imports). +export { + fd_write_impl as fd_write, + fd_close_impl as fd_close, + unused_import_impl as unused_import, +}; + +// Re-export of a wasm export to the JS entry point: a top-level use that should +// root the underlying `main` export, not be treated as a wasm import. +export { _main }; + +// Top-level uses: root the memory export (via the alias) and one wasm export. +wasmMemory.buffer; +_used_export(); diff --git a/test/test_other.py b/test/test_other.py index f8df18e78fd7e..9f0b9a6476822 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -3048,10 +3048,13 @@ def test_extern_prepost(self): 'emitDCEGraph-sig': (['emitDCEGraph', '--no-print'],), 'emitDCEGraph-prefixing': (['emitDCEGraph', '--no-print'],), 'emitDCEGraph-scopes': (['emitDCEGraph', '--no-print'],), + 'emitDCEGraph-esm': (['emitDCEGraph', '--no-print', '--export-es6'],), 'minimal-runtime-applyDCEGraphRemovals': (['applyDCEGraphRemovals'],), 'applyDCEGraphRemovals': (['applyDCEGraphRemovals'],), + 'applyDCEGraphRemovals-esm': (['applyDCEGraphRemovals', '--export-es6'],), 'applyImportAndExportNameChanges': (['applyImportAndExportNameChanges'],), 'applyImportAndExportNameChanges2': (['applyImportAndExportNameChanges'],), + 'applyImportAndExportNameChanges-esm': (['applyImportAndExportNameChanges', '--export-es6'],), 'minimal-runtime-emitDCEGraph': (['emitDCEGraph', '--no-print'],), 'minimal-runtime-2-emitDCEGraph': (['emitDCEGraph', '--no-print'],), 'standalone-emitDCEGraph': (['emitDCEGraph', '--no-print'],), @@ -12237,6 +12240,29 @@ def test_metadce_wasm2js_i64(self): }''') self.do_runf('src.c', cflags=['-O3', '-sWASM=0']) + def test_metadce_esm_integration(self): + # Regression test for https://github.com/emscripten-core/emscripten/issues/27217. + # Under WASM_ESM_INTEGRATION the wasm<->JS boundary is expressed with native + # ES import/export syntax rather than the `wasmImports` object and + # `wasmExports['x']` member uses. metadce (which runs at -O2 and above) must + # understand that form rather than asserting that it cannot find the + # `wasmImports` assignment. + self.run_process([EMCC, test_file('hello_world.c'), '-O3', + '-sWASM_ESM_INTEGRATION', '-sEXPORT_ES6', '-Wno-experimental', + '-o', 'hello.mjs']) + support = read_file('hello.support.mjs') + # An unused wasm export (here the indirect function table) must be removed + # from both the wasm and the ES import that receives it, so the two module + # interfaces stay in sync. + self.assertNotContained('__indirect_function_table', support) + # The import module name is rewritten to the wasm module and is not minified + # (every import resolves through it), and the re-export of the user `main` + # export is preserved. + self.assertContained('from"./hello.wasm"', support) + self.assertContained('export{_main}', support) + if self.try_require_node_version(25, 0, 0): + self.assertContained('Hello, world!', self.run_js('hello.mjs')) + @crossplatform def test_deterministic(self): # test some things that may not be nondeterministic diff --git a/tools/acorn-optimizer.mjs b/tools/acorn-optimizer.mjs index 0f00c63106a0e..8fd7ec4ff84bb 100755 --- a/tools/acorn-optimizer.mjs +++ b/tools/acorn-optimizer.mjs @@ -438,6 +438,25 @@ function getWasmImportsValue(node) { } } +// Under WASM_ESM_INTEGRATION the wasm exports are received as a native ES +// import from the wasm module itself: +// import { malloc as _malloc, memory } from './a.out.wasm'; +function isWasmExportsImport(node) { + return ( + node.type === 'ImportDeclaration' && + isLiteralString(node.source) && + node.source.value.endsWith('.wasm') + ); +} + +// A sourceless `export { a as b, .. };` statement (not `export ` and not +// a re-export `export {..} from '..'`). +function isExportSpecifierList(node) { + return ( + node.type === 'ExportNamedDeclaration' && !node.declaration && !node.source + ); +} + function isExportUse(node) { // Match usages of symbols on the `wasmExports` object. e.g: // wasmExports['X'] @@ -494,6 +513,29 @@ function applyImportAndExportNameChanges(ast) { if (mapping[name]) { setLiteralValue(prop, mapping[name]); } + } else if (isWasmExportsImport(node)) { + // WASM_ESM_INTEGRATION: rename the wasm-facing name of each received + // export, e.g. `import { malloc as _malloc }` -> `import { a as _malloc }`. + // Replace the imported slot (rather than mutate it in place) since for an + // unaliased specifier acorn shares one node for both sides. + node.specifiers.forEach((spec) => { + const newName = mapping[spec.imported.name]; + if (newName) { + spec.imported = makeIdentifier(newName); + } + }); + } else if (isExportSpecifierList(node)) { + // WASM_ESM_INTEGRATION: rename the wasm-facing name of each JS function + // sent to wasm, e.g. `export { _fd_write as fd_write }` -> + // `export { _fd_write as a }`. Re-exports of wasm exports (`export { + // _main }`) carry a JS-local name that is never in the mapping, so they + // are left untouched. + node.specifiers.forEach((spec) => { + const newName = mapping[spec.exported.name]; + if (newName) { + spec.exported = makeIdentifier(newName); + } + }); } }); } @@ -607,6 +649,10 @@ function emitDCEGraph(ast) { const exportNameToGraphName = {}; // identical to wasmExports['..'] nameToGraphName let foundWasmImportsAssign = false; let foundMinimalRuntimeExports = false; + // Under WASM_ESM_INTEGRATION, JS names bound to wasm exports via an ES import + // from the wasm module. Lets us tell a re-export of a wasm export apart from a + // JS function that is itself exported to wasm (both are `export {..}`). + const wasmExportLocals = new Set(); function saveAsmExport(name, asmName) { // the asmName is what the wasm provides directly; the outside JS @@ -649,6 +695,49 @@ function emitDCEGraph(ast) { }); foundWasmImportsAssign = true; emptyOut(node); // ignore this in the second pass; this does not root + } else if (isWasmExportsImport(node)) { + // WASM_ESM_INTEGRATION: wasm exports received as + // import { malloc as _malloc, memory } from './a.out.wasm'; + // Each binding is a wasm export, exactly like `var _x = wasmExports['x']`. + node.specifiers.forEach((spec) => { + const jsName = spec.local.name; // JS-side name + const asmName = spec.imported.name; // wasm-provided name + if (exportNameToGraphName.hasOwnProperty(asmName)) { + // Another local already binds this wasm export (e.g. both `memory` + // and `memory as wasmMemory`): point this local at the same node so + // a use of either roots the one underlying export. + nameToGraphName[jsName] = exportNameToGraphName[asmName]; + } else { + saveAsmExport(jsName, asmName); + } + wasmExportLocals.add(jsName); + }); + // This ES form stands in for the `wasmImports`/`wasmExports` idioms the + // non-ESM build emits, so it satisfies the sanity check below. + foundWasmImportsAssign = true; + // Drop from the second pass: the local bindings must not be seen as + // top-level uses (that would root every export and defeat DCE). + emptyOut(node); + } else if (isExportSpecifierList(node)) { + // WASM_ESM_INTEGRATION emits two sourceless `export {..}` forms: + // (a) JS functions sent to wasm: export { _fd_write as fd_write }; + // (b) re-exports of wasm exports: export { _main }; + // (a) are the wasm imports; (b) are ordinary top-level uses that should + // root the underlying export (handled in the second pass), so only (a) + // is recorded and removed here. + let isImportEdge = false; + node.specifiers.forEach((spec) => { + if (wasmExportLocals.has(spec.local.name)) { + return; // (b) re-export of a wasm export + } + // (a) `export { jsName as nativeName }` - jsName implements the import. + imports.push([spec.local.name, spec.exported.name]); + isImportEdge = true; + }); + if (isImportEdge) { + foundWasmImportsAssign = true; + emptyOut(node); // does not root; second pass ignores it + } } else if (node.type === 'AssignmentExpression') { const target = node.left; // Ignore assignment to the wasmExports object (as happens in @@ -894,6 +983,26 @@ function applyDCEGraphRemovals(ast) { } return true; }); + } else if (isWasmExportsImport(node)) { + // WASM_ESM_INTEGRATION: drop unused wasm exports from + // import { malloc as _malloc, .. } from './a.out.wasm'; + node.specifiers = node.specifiers.filter((spec) => { + if (unusedExports.has(spec.imported.name)) { + foundUnusedExports.add(spec.imported.name); + return false; + } + return true; + }); + } else if (isExportSpecifierList(node)) { + // WASM_ESM_INTEGRATION: drop unused wasm imports from + // export { _fd_write as fd_write, .. }; + node.specifiers = node.specifiers.filter((spec) => { + if (unusedImports.has(spec.exported.name)) { + foundUnusedImports.add(spec.exported.name); + return false; + } + return true; + }); } else if (node.type === 'ExpressionStatement') { let expr = node.expression; // Inside the assignWasmExports function we have diff --git a/tools/building.py b/tools/building.py index ea95f2e7d66ba..cc7fde6b5d973 100644 --- a/tools/building.py +++ b/tools/building.py @@ -883,7 +883,11 @@ def metadce(js_file, wasm_file, debug_info, last): unused_imports.append(native_name) elif name.startswith('emcc$export$') and settings.DECLARE_ASM_MODULE_EXPORTS: native_name = export_name_map[name] - if shared.is_user_export(native_name): + # Under WASM_ESM_INTEGRATION the JS receives every wasm export as an ES + # import, so any export binaryen drops (including internal ones like the + # indirect function table) must also be dropped from the JS import to + # keep the two module interfaces in sync. + if shared.is_user_export(native_name) or settings.WASM_ESM_INTEGRATION: unused_exports.append(native_name) if not unused_exports and not unused_imports: # nothing found to be unused, so we have nothing to remove diff --git a/tools/link.py b/tools/link.py index 5750443ebb33f..67a902de650c6 100644 --- a/tools/link.py +++ b/tools/link.py @@ -1628,7 +1628,10 @@ def limit_incoming_module_api(): not settings.MAIN_MODULE and \ settings.MINIFY_WASM_EXPORT_NAMES: settings.MINIFY_WASM_IMPORTS_AND_EXPORTS = 1 - settings.MINIFY_WASM_IMPORTED_MODULES = 1 + # Under WASM_ESM_INTEGRATION every wasm import is rewritten to come from the + # single support module (see create_esm_wrapper), so minifying the import + # module names buys nothing and would break that rewrite. + settings.MINIFY_WASM_IMPORTED_MODULES = not settings.WASM_ESM_INTEGRATION if settings.WASM_BIGINT: settings.LEGALIZE_JS_FFI = 0