From e18c22aac5c2c478c1fdc727a081827b04496142 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 21 Apr 2026 22:33:31 -0700 Subject: [PATCH 1/4] [wasm-reduce] Empty functions with delta debugging Delta debugging is an algorithm for finding the minimal set of items necessary to preserve a condition. It generally works by using increasingly fine partitions of the orignal set of items and alternating trying to keep just one of the partitions to make rapid progress and trying to keep the complement of one of the partitions to make smaller changes that are more likely to work. Add a header containing a templatized delta debugging implementation, then use it in wasm-reduce to preserve the minimal number of function bodies necessary to reproduce the reduction condition. This should allow wasm-reduce to make much faster progress on emptying out functions in the common case and leave it much less work to do afterwards. Using delta debugging for deleting functions and performing other reduction operations is left as future work. Deleting functions in particular is challenging because it can involve reloading the module from the working file, potentially changing function names and invalidating the function names that would be stored in the delta debugging partitions. --- src/support/delta_debugging.h | 113 ++++++++++++++++++++++++++ src/tools/wasm-reduce/wasm-reduce.cpp | 54 ++++++++++-- test/gtest/CMakeLists.txt | 1 + test/gtest/delta_debugging.cpp | 82 +++++++++++++++++++ 4 files changed, 242 insertions(+), 8 deletions(-) create mode 100644 src/support/delta_debugging.h create mode 100644 test/gtest/delta_debugging.cpp diff --git a/src/support/delta_debugging.h b/src/support/delta_debugging.h new file mode 100644 index 00000000000..508fe96a385 --- /dev/null +++ b/src/support/delta_debugging.h @@ -0,0 +1,113 @@ +/* + * Copyright 2026 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_support_delta_debugging_h +#define wasm_support_delta_debugging_h + +#include +#include +#include + +namespace wasm { + +// Use the delta debugging algorithm (Zeller 1999, +// https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of +// items necessary to preserve some property. Returns that minimal set of +// items, preserving their input order. `tryPartition` should have this +// signature: +// +// bool tryPartition(size_t partitionIndex, +// size_t numPartitions, +// const std::vector& partition) +// +// It should return true iff the property is preserved while keeping only +// `partition` items. +template +std::vector deltaDebugging(std::vector items, F&& tryPartition) { + size_t numPartitions = 2; + while (numPartitions <= items.size()) { + // Partition the items. + std::vector> partitions; + size_t size = items.size(); + size_t basePartitionSize = size / numPartitions; + size_t rem = size % numPartitions; + size_t idx = 0; + for (size_t i = 0; i < numPartitions; ++i) { + size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0); + if (partitionSize > 0) { + std::vector partition; + partition.reserve(partitionSize); + for (size_t j = 0; j < partitionSize; ++j) { + partition.push_back(items[idx++]); + } + partitions.emplace_back(std::move(partition)); + } + } + assert(numPartitions == partitions.size()); + + bool reduced = false; + + // Try keeping only one partition. Try each partition in turn. + for (size_t i = 0; i < numPartitions; ++i) { + if (tryPartition(i, numPartitions, partitions[i])) { + items = std::move(partitions[i]); + numPartitions = 2; + reduced = true; + break; + } + } + if (reduced) { + continue; + } + + // Otherwise, try keeping the complement of a partition. Do not do this with + // only two partitions because that would be no different from what we + // already tried. + if (numPartitions > 2) { + for (size_t i = 0; i < numPartitions; ++i) { + std::vector complement; + complement.reserve(items.size() - partitions[i].size()); + for (size_t j = 0; j < numPartitions; ++j) { + if (j != i) { + complement.insert( + complement.end(), partitions[j].begin(), partitions[j].end()); + } + } + if (tryPartition(i, numPartitions, complement)) { + items = std::move(complement); + numPartitions = std::max(numPartitions - 1, size_t(2)); + reduced = true; + break; + } + } + if (reduced) { + continue; + } + } + + // Otherwise, make the partitions finer grained. + if (numPartitions < items.size()) { + numPartitions = std::min(items.size(), 2 * numPartitions); + } else { + break; + } + } + return items; +} + +} // namespace wasm + +#endif // wasm_support_delta_debugging_h diff --git a/src/tools/wasm-reduce/wasm-reduce.cpp b/src/tools/wasm-reduce/wasm-reduce.cpp index e002a499545..a920e9840a4 100644 --- a/src/tools/wasm-reduce/wasm-reduce.cpp +++ b/src/tools/wasm-reduce/wasm-reduce.cpp @@ -29,12 +29,12 @@ #include "ir/branch-utils.h" #include "ir/iteration.h" -#include "ir/literal-utils.h" #include "ir/properties.h" #include "ir/utils.h" #include "pass.h" #include "support/colors.h" #include "support/command-line.h" +#include "support/delta_debugging.h" #include "support/file.h" #include "support/hash.h" #include "support/path.h" @@ -894,8 +894,45 @@ struct Reducer } } - // Reduces entire functions at a time. Returns whether we did a significant - // amount of reduction that justifies doing even more. + void reduceFunctionBodies() { + std::cerr << "| try to remove function bodies\n"; + // Use function indices to speed up finding the complement of the kept + // partition. + std::vector funcs; + funcs.reserve(module->functions.size()); + for (Index i = 0; i < module->functions.size(); ++i) { + funcs.push_back(i); + } + deltaDebugging( + std::move(funcs), + [&](Index partitionIndex, + Index numPartitions, + const std::vector& partition) { + std::cerr << "| try partition " << partitionIndex + 1 << " / " + << numPartitions << " (size " << partition.size() << ")\n"; + std::vector removed; + removed.reserve(module->functions.size() - partition.size()); + Index i = 0; + for (Index j : partition) { + while (i < j) { + removed.push_back(module->functions[i++]->name); + } + ++i; + } + while (i < module->functions.size()) { + removed.push_back(module->functions[i++]->name); + } + if (tryToEmptyFunctions(removed)) { + // TODO: Consider doing this just once after the delta debugging since + // we never need to restore from the working copy while removing + // function bodies. + noteReduction(removed.size()); + return true; + } + return false; + }); + } + bool reduceFunctions() { // try to remove functions std::vector functionNames; @@ -936,11 +973,9 @@ struct Reducer } std::cerr << "| trying at i=" << i << " of size " << names.size() << "\n"; - // Try to remove functions and/or empty them. Note that - // tryToRemoveFunctions() will reload the module if it fails, which means - // function names may change - for that reason, run it second. - justReduced = tryToEmptyFunctions(names) || tryToRemoveFunctions(names); - if (justReduced) { + // Note that tryToRemoveFunctions() will reload the module if it fails, + // which means function names may change. + if (tryToRemoveFunctions(names)) { noteReduction(names.size()); // Subtract 1 since the loop increments us anyhow by one: we want to // skip over the skipped functions, and not any more. @@ -967,8 +1002,11 @@ struct Reducer assert(curr == module.get()); curr = nullptr; + reduceFunctionBodies(); + // Reduction of entire functions at a time is very effective, and we do it // with exponential growth and backoff, so keep doing it while it works. + // TODO: Figure out how to use delta debugging for this as well. while (reduceFunctions()) { } diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 41d16f28e92..3a42fb1de74 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -10,6 +10,7 @@ set(unittest_SOURCES cast-check.cpp cfg.cpp dataflow.cpp + delta_debugging.cpp dfa_minimization.cpp disjoint_sets.cpp leaves.cpp diff --git a/test/gtest/delta_debugging.cpp b/test/gtest/delta_debugging.cpp new file mode 100644 index 00000000000..2c9dc2726ee --- /dev/null +++ b/test/gtest/delta_debugging.cpp @@ -0,0 +1,82 @@ +#include "support/delta_debugging.h" +#include "gtest/gtest.h" +#include +#include +#include + +using namespace wasm; + +TEST(DeltaDebuggingTest, EmptyInput) { + std::vector items; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector&) { return false; }); + EXPECT_TRUE(result.empty()); +} + +TEST(DeltaDebuggingTest, SingleItem) { + std::vector items = {0, 1, 2, 3, 4, 5, 6, 7}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector& partition) { + return std::find(partition.begin(), partition.end(), 3) != + partition.end(); + }); + std::vector expected = {3}; + EXPECT_EQ(result, expected); +} + +TEST(DeltaDebuggingTest, MultipleItemsAdjacent) { + std::vector items = {0, 1, 2, 3, 4, 5, 6, 7}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector& partition) { + bool has2 = + std::find(partition.begin(), partition.end(), 2) != partition.end(); + bool has3 = + std::find(partition.begin(), partition.end(), 3) != partition.end(); + return has2 && has3; + }); + std::vector expected = {2, 3}; + EXPECT_EQ(result, expected); +} + +TEST(DeltaDebuggingTest, MultipleItemsNonAdjacent) { + std::vector items = {0, 1, 2, 3, 4, 5, 6, 7}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector& partition) { + bool has2 = + std::find(partition.begin(), partition.end(), 2) != partition.end(); + bool has5 = + std::find(partition.begin(), partition.end(), 5) != partition.end(); + return has2 && has5; + }); + std::vector expected = {2, 5}; + EXPECT_EQ(result, expected); +} + +TEST(DeltaDebuggingTest, OrderMaintained) { + std::vector items = {3, 1, 4, 2}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector& partition) { + bool has3 = + std::find(partition.begin(), partition.end(), 3) != partition.end(); + bool has2 = + std::find(partition.begin(), partition.end(), 2) != partition.end(); + return has3 && has2; + }); + std::vector expected = {3, 2}; + EXPECT_EQ(result, expected); +} + +TEST(DeltaDebuggingTest, DifferentTypes) { + std::vector items = {"apple", "banana", "cherry", "date"}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector& partition) { + bool hasBanana = + std::find(partition.begin(), partition.end(), "banana") != + partition.end(); + bool hasDate = std::find(partition.begin(), partition.end(), "date") != + partition.end(); + return hasBanana && hasDate; + }); + std::vector expected = {"banana", "date"}; + EXPECT_EQ(result, expected); +} From 79582b369537e8fa70b044eb19d6dc0424ab8ffc Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 23 Apr 2026 16:26:34 -0700 Subject: [PATCH 2/4] updates --- src/support/delta_debugging.h | 16 ++- src/tools/wasm-reduce/wasm-reduce.cpp | 165 ++++++++++++++++---------- test/gtest/delta_debugging.cpp | 7 ++ 3 files changed, 119 insertions(+), 69 deletions(-) diff --git a/src/support/delta_debugging.h b/src/support/delta_debugging.h index 508fe96a385..9b45bd6e1ca 100644 --- a/src/support/delta_debugging.h +++ b/src/support/delta_debugging.h @@ -37,6 +37,13 @@ namespace wasm { // `partition` items. template std::vector deltaDebugging(std::vector items, F&& tryPartition) { + if (items.empty()) { + return items; + } + // First try removing everything. + if (tryPartition(0, 1, {})) { + return {}; + } size_t numPartitions = 2; while (numPartitions <= items.size()) { // Partition the items. @@ -98,12 +105,13 @@ std::vector deltaDebugging(std::vector items, F&& tryPartition) { } } - // Otherwise, make the partitions finer grained. - if (numPartitions < items.size()) { - numPartitions = std::min(items.size(), 2 * numPartitions); - } else { + if (numPartitions == items.size()) { + // Cannot further refine the partitions. We're done. break; } + + // Otherwise, make the partitions finer grained. + numPartitions = std::min(items.size(), 2 * numPartitions); } return items; } diff --git a/src/tools/wasm-reduce/wasm-reduce.cpp b/src/tools/wasm-reduce/wasm-reduce.cpp index a920e9840a4..2eeaed38e03 100644 --- a/src/tools/wasm-reduce/wasm-reduce.cpp +++ b/src/tools/wasm-reduce/wasm-reduce.cpp @@ -894,43 +894,103 @@ struct Reducer } } + bool isEmptyBody(Expression* body) { + if (body->is() || body->is()) { + return true; + } + if (auto* block = body->dynCast()) { + return block->list.empty(); + } + return false; + } + void reduceFunctionBodies() { std::cerr << "| try to remove function bodies\n"; // Use function indices to speed up finding the complement of the kept // partition. - std::vector funcs; - funcs.reserve(module->functions.size()); + std::vector nontrivialFuncIndices; + nontrivialFuncIndices.reserve(module->functions.size()); for (Index i = 0; i < module->functions.size(); ++i) { - funcs.push_back(i); - } - deltaDebugging( - std::move(funcs), - [&](Index partitionIndex, - Index numPartitions, - const std::vector& partition) { - std::cerr << "| try partition " << partitionIndex + 1 << " / " - << numPartitions << " (size " << partition.size() << ")\n"; - std::vector removed; - removed.reserve(module->functions.size() - partition.size()); - Index i = 0; - for (Index j : partition) { - while (i < j) { - removed.push_back(module->functions[i++]->name); + auto& func = module->functions[i]; + // Skip functions that already have trivial bodies. + if (func->imported() || isEmptyBody(func->body)) { + continue; + } + nontrivialFuncIndices.push_back(i); + } + // TODO: Use something other than an exception to implement early return. + struct EarlyReturn {}; + try { + deltaDebugging( + nontrivialFuncIndices, + [&](Index partitionIndex, + Index numPartitions, + const std::vector& partition) { + // Stop early if the partition size is less than the square root of + // the remaining set. We don't want to waste time on very fine-grained + // partitions when we could switch to another reduction strategy + // instead. + if (partition.size() > 0 && + partition.size() < std::sqrt(nontrivialFuncIndices.size())) { + throw EarlyReturn{}; } - ++i; - } - while (i < module->functions.size()) { - removed.push_back(module->functions[i++]->name); - } - if (tryToEmptyFunctions(removed)) { - // TODO: Consider doing this just once after the delta debugging since - // we never need to restore from the working copy while removing - // function bodies. - noteReduction(removed.size()); + + std::cerr << "| try partition " << partitionIndex + 1 << " / " + << numPartitions << " (size " << partition.size() << ")\n"; + Index removedSize = nontrivialFuncIndices.size() - partition.size(); + std::vector oldBodies(removedSize); + + // We first need to remove each non-kept function body, and later we + // might need to restore the same function bodies. Abstract the logic + // for iterating over these function bodies. `f` takes a Function* and + // Expression*& for the stashed body. + auto forEachRemovedFuncBody = [&](auto f) { + Index bodyIndex = 0; + Index nontrivialIndex = 0; + Index partitionIndex = 0; + while (nontrivialIndex < nontrivialFuncIndices.size()) { + if (partitionIndex < partition.size() && + nontrivialFuncIndices[nontrivialIndex] == + partition[partitionIndex]) { + // Kept, skip it. + nontrivialIndex++; + partitionIndex++; + } else { + // Removed, process it + Index funcIndex = nontrivialFuncIndices[nontrivialIndex++]; + f(module->functions[funcIndex].get(), oldBodies[bodyIndex++]); + } + } + assert(bodyIndex == removedSize); + assert(partitionIndex == partition.size()); + }; + + // Stash the bodies. + forEachRemovedFuncBody([&](Function* func, Expression*& oldBody) { + oldBody = func->body; + Builder builder(*module); + if (func->getResults() == Type::none) { + func->body = builder.makeNop(); + } else { + func->body = builder.makeUnreachable(); + } + }); + + if (!writeAndTestReduction()) { + // Failure. Restore the bodies. + forEachRemovedFuncBody([](Function* func, Expression*& oldBody) { + func->body = oldBody; + }); + return false; + } + + // Success! + noteReduction(removedSize); + nontrivialFuncIndices = partition; return true; - } - return false; - }); + }); + } catch (EarlyReturn) { + } } bool reduceFunctions() { @@ -1085,41 +1145,6 @@ struct Reducer } } - // Try to empty out the bodies of some functions. - bool tryToEmptyFunctions(std::vector names) { - std::vector oldBodies; - size_t actuallyEmptied = 0; - for (auto name : names) { - auto* func = module->getFunction(name); - auto* oldBody = func->body; - oldBodies.push_back(oldBody); - // Nothing to do for imported functions (body is nullptr) or for bodies - // that have already been as reduced as we can make them. - if (func->imported() || oldBody->is() || - oldBody->is()) { - continue; - } - actuallyEmptied++; - bool useUnreachable = func->getResults() != Type::none; - if (useUnreachable) { - func->body = builder->makeUnreachable(); - } else { - func->body = builder->makeNop(); - } - } - if (actuallyEmptied > 0 && writeAndTestReduction()) { - std::cerr << "| emptied " << actuallyEmptied << " / " - << names.size() << " functions\n"; - return true; - } else { - // Restore the bodies. - for (size_t i = 0; i < names.size(); i++) { - module->getFunction(names[i])->body = oldBodies[i]; - } - return false; - } - } - // Try to actually remove functions. If they are somehow referred to, we will // get a validation error and undo it. bool tryToRemoveFunctions(std::vector names) { @@ -1542,10 +1567,20 @@ More documentation can be found at bool stopping = false; + bool first = true; while (1) { Reducer reducer( command, test, working, binary, deNan, verbose, debugInfo, options); + // For extremely large modules with slow reproduction commands, reducing + // function bodies first can be more effective than running passes. TODO: + // clean this up and reconsider the order of reducers. + if (first) { + reducer.loadWorking(); + reducer.reduceFunctionBodies(); + first = false; + } + // run binaryen optimization passes to reduce. passes are fast to run // and can often reduce large amounts of code efficiently, as opposed // to detructive reduction (i.e., that doesn't preserve correctness as diff --git a/test/gtest/delta_debugging.cpp b/test/gtest/delta_debugging.cpp index 2c9dc2726ee..26636e79c21 100644 --- a/test/gtest/delta_debugging.cpp +++ b/test/gtest/delta_debugging.cpp @@ -80,3 +80,10 @@ TEST(DeltaDebuggingTest, DifferentTypes) { std::vector expected = {"banana", "date"}; EXPECT_EQ(result, expected); } + +TEST(DeltaDebuggingTest, UnconditionallyTrue) { + std::vector items = {0, 1, 2, 3}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector&) { return true; }); + EXPECT_TRUE(result.empty()); +} From 751955d046fd1009f2a8b935cba57714a08ae722 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 24 Apr 2026 00:25:55 -0700 Subject: [PATCH 3/4] round down sqrt --- src/tools/wasm-reduce/wasm-reduce.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/wasm-reduce/wasm-reduce.cpp b/src/tools/wasm-reduce/wasm-reduce.cpp index 2eeaed38e03..6722cb45a03 100644 --- a/src/tools/wasm-reduce/wasm-reduce.cpp +++ b/src/tools/wasm-reduce/wasm-reduce.cpp @@ -930,8 +930,8 @@ struct Reducer // the remaining set. We don't want to waste time on very fine-grained // partitions when we could switch to another reduction strategy // instead. - if (partition.size() > 0 && - partition.size() < std::sqrt(nontrivialFuncIndices.size())) { + if (size_t sqrtRemaining = std::sqrt(nontrivialFuncIndices.size()); + partition.size() > 0 && partition.size() < sqrtRemaining) { throw EarlyReturn{}; } From 78035760e0a2b3fa3c4607f93b8bf0f5db1fb266 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 24 Apr 2026 00:30:46 -0700 Subject: [PATCH 4/4] add test --- test/gtest/delta_debugging.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/gtest/delta_debugging.cpp b/test/gtest/delta_debugging.cpp index 26636e79c21..7e4c8ad4db5 100644 --- a/test/gtest/delta_debugging.cpp +++ b/test/gtest/delta_debugging.cpp @@ -87,3 +87,11 @@ TEST(DeltaDebuggingTest, UnconditionallyTrue) { items, [](size_t, size_t, const std::vector&) { return true; }); EXPECT_TRUE(result.empty()); } + +TEST(DeltaDebuggingTest, UnconditionallyFalse) { + std::vector items = {0, 1, 2, 3}; + auto result = deltaDebugging( + items, [](size_t, size_t, const std::vector&) { return false; }); + std::vector expected = {0, 1, 2, 3}; + EXPECT_EQ(result, expected); +}