Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions src/support/delta_debugging.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright 2026 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef wasm_support_delta_debugging_h
#define wasm_support_delta_debugging_h

#include <algorithm>
#include <cassert>
#include <vector>

namespace wasm {

// Use the delta debugging algorithm (Zeller 1999,
// https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of
// items necessary to preserve some property. Returns that minimal set of
// items, preserving their input order. `tryPartition` should have this
// signature:
//
// bool tryPartition(size_t partitionIndex,
// size_t numPartitions,
// const std::vector<T>& partition)
//
// It should return true iff the property is preserved while keeping only
// `partition` items.
template<typename T, typename F>
std::vector<T> deltaDebugging(std::vector<T> items, F&& tryPartition) {
if (items.empty()) {
return items;
}
// First try removing everything.
if (tryPartition(0, 1, {})) {
return {};
}
size_t numPartitions = 2;
while (numPartitions <= items.size()) {
// Partition the items.
std::vector<std::vector<T>> partitions;
size_t size = items.size();
size_t basePartitionSize = size / numPartitions;
size_t rem = size % numPartitions;
size_t idx = 0;
for (size_t i = 0; i < numPartitions; ++i) {
size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0);
if (partitionSize > 0) {
std::vector<T> partition;
partition.reserve(partitionSize);
for (size_t j = 0; j < partitionSize; ++j) {
partition.push_back(items[idx++]);
}
partitions.emplace_back(std::move(partition));
}
}
assert(numPartitions == partitions.size());

bool reduced = false;

// Try keeping only one partition. Try each partition in turn.
for (size_t i = 0; i < numPartitions; ++i) {
if (tryPartition(i, numPartitions, partitions[i])) {
items = std::move(partitions[i]);
numPartitions = 2;
reduced = true;
break;
}
}
if (reduced) {
continue;
}

// Otherwise, try keeping the complement of a partition. Do not do this with
// only two partitions because that would be no different from what we
// already tried.
if (numPartitions > 2) {
for (size_t i = 0; i < numPartitions; ++i) {
std::vector<T> complement;
complement.reserve(items.size() - partitions[i].size());
for (size_t j = 0; j < numPartitions; ++j) {
if (j != i) {
complement.insert(
complement.end(), partitions[j].begin(), partitions[j].end());
}
}
if (tryPartition(i, numPartitions, complement)) {
items = std::move(complement);
numPartitions = std::max(numPartitions - 1, size_t(2));
reduced = true;
break;
}
}
if (reduced) {
continue;
}
}

if (numPartitions == items.size()) {
// Cannot further refine the partitions. We're done.
break;
}

// Otherwise, make the partitions finer grained.
numPartitions = std::min(items.size(), 2 * numPartitions);
}
return items;
}

} // namespace wasm

#endif // wasm_support_delta_debugging_h
159 changes: 116 additions & 43 deletions src/tools/wasm-reduce/wasm-reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@

#include "ir/branch-utils.h"
#include "ir/iteration.h"
#include "ir/literal-utils.h"
#include "ir/properties.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/colors.h"
#include "support/command-line.h"
#include "support/delta_debugging.h"
#include "support/file.h"
#include "support/hash.h"
#include "support/path.h"
Expand Down Expand Up @@ -894,8 +894,105 @@ struct Reducer
}
}

// Reduces entire functions at a time. Returns whether we did a significant
// amount of reduction that justifies doing even more.
bool isEmptyBody(Expression* body) {
if (body->is<Nop>() || body->is<Unreachable>()) {
return true;
}
if (auto* block = body->dynCast<Block>()) {
return block->list.empty();
}
return false;
}

void reduceFunctionBodies() {
std::cerr << "| try to remove function bodies\n";
// Use function indices to speed up finding the complement of the kept
// partition.
std::vector<Index> nontrivialFuncIndices;
nontrivialFuncIndices.reserve(module->functions.size());
for (Index i = 0; i < module->functions.size(); ++i) {
auto& func = module->functions[i];
// Skip functions that already have trivial bodies.
if (func->imported() || isEmptyBody(func->body)) {
continue;
}
nontrivialFuncIndices.push_back(i);
}
// TODO: Use something other than an exception to implement early return.
struct EarlyReturn {};
try {
deltaDebugging(
nontrivialFuncIndices,
[&](Index partitionIndex,
Index numPartitions,
const std::vector<Index>& partition) {
// Stop early if the partition size is less than the square root of
// the remaining set. We don't want to waste time on very fine-grained
// partitions when we could switch to another reduction strategy
// instead.
if (size_t sqrtRemaining = std::sqrt(nontrivialFuncIndices.size());
partition.size() > 0 && partition.size() < sqrtRemaining) {
throw EarlyReturn{};
}

std::cerr << "| try partition " << partitionIndex + 1 << " / "
<< numPartitions << " (size " << partition.size() << ")\n";
Index removedSize = nontrivialFuncIndices.size() - partition.size();
std::vector<Expression*> oldBodies(removedSize);

// We first need to remove each non-kept function body, and later we
// might need to restore the same function bodies. Abstract the logic
// for iterating over these function bodies. `f` takes a Function* and
// Expression*& for the stashed body.
auto forEachRemovedFuncBody = [&](auto f) {
Index bodyIndex = 0;
Index nontrivialIndex = 0;
Index partitionIndex = 0;
while (nontrivialIndex < nontrivialFuncIndices.size()) {
if (partitionIndex < partition.size() &&
nontrivialFuncIndices[nontrivialIndex] ==
partition[partitionIndex]) {
// Kept, skip it.
nontrivialIndex++;
partitionIndex++;
} else {
// Removed, process it
Index funcIndex = nontrivialFuncIndices[nontrivialIndex++];
f(module->functions[funcIndex].get(), oldBodies[bodyIndex++]);
}
}
assert(bodyIndex == removedSize);
assert(partitionIndex == partition.size());
};

// Stash the bodies.
forEachRemovedFuncBody([&](Function* func, Expression*& oldBody) {
oldBody = func->body;
Builder builder(*module);
if (func->getResults() == Type::none) {
func->body = builder.makeNop();
} else {
func->body = builder.makeUnreachable();
}
});

if (!writeAndTestReduction()) {
// Failure. Restore the bodies.
forEachRemovedFuncBody([](Function* func, Expression*& oldBody) {
func->body = oldBody;
});
return false;
}

// Success!
noteReduction(removedSize);
nontrivialFuncIndices = partition;
return true;
});
} catch (EarlyReturn) {
}
}

bool reduceFunctions() {
// try to remove functions
std::vector<Name> functionNames;
Expand Down Expand Up @@ -936,11 +1033,9 @@ struct Reducer
}
std::cerr << "| trying at i=" << i << " of size " << names.size()
<< "\n";
// Try to remove functions and/or empty them. Note that
// tryToRemoveFunctions() will reload the module if it fails, which means
// function names may change - for that reason, run it second.
justReduced = tryToEmptyFunctions(names) || tryToRemoveFunctions(names);
if (justReduced) {
// Note that tryToRemoveFunctions() will reload the module if it fails,
// which means function names may change.
if (tryToRemoveFunctions(names)) {
noteReduction(names.size());
// Subtract 1 since the loop increments us anyhow by one: we want to
// skip over the skipped functions, and not any more.
Expand All @@ -967,8 +1062,11 @@ struct Reducer
assert(curr == module.get());
curr = nullptr;

reduceFunctionBodies();

// Reduction of entire functions at a time is very effective, and we do it
// with exponential growth and backoff, so keep doing it while it works.
// TODO: Figure out how to use delta debugging for this as well.
while (reduceFunctions()) {
}

Expand Down Expand Up @@ -1047,41 +1145,6 @@ struct Reducer
}
}

// Try to empty out the bodies of some functions.
bool tryToEmptyFunctions(std::vector<Name> names) {
std::vector<Expression*> oldBodies;
size_t actuallyEmptied = 0;
for (auto name : names) {
auto* func = module->getFunction(name);
auto* oldBody = func->body;
oldBodies.push_back(oldBody);
// Nothing to do for imported functions (body is nullptr) or for bodies
// that have already been as reduced as we can make them.
if (func->imported() || oldBody->is<Unreachable>() ||
oldBody->is<Nop>()) {
continue;
}
actuallyEmptied++;
bool useUnreachable = func->getResults() != Type::none;
if (useUnreachable) {
func->body = builder->makeUnreachable();
} else {
func->body = builder->makeNop();
}
}
if (actuallyEmptied > 0 && writeAndTestReduction()) {
std::cerr << "| emptied " << actuallyEmptied << " / "
<< names.size() << " functions\n";
return true;
} else {
// Restore the bodies.
for (size_t i = 0; i < names.size(); i++) {
module->getFunction(names[i])->body = oldBodies[i];
}
return false;
}
}

// Try to actually remove functions. If they are somehow referred to, we will
// get a validation error and undo it.
bool tryToRemoveFunctions(std::vector<Name> names) {
Expand Down Expand Up @@ -1504,10 +1567,20 @@ More documentation can be found at

bool stopping = false;

bool first = true;
while (1) {
Reducer reducer(
command, test, working, binary, deNan, verbose, debugInfo, options);

// For extremely large modules with slow reproduction commands, reducing
// function bodies first can be more effective than running passes. TODO:
// clean this up and reconsider the order of reducers.
if (first) {
reducer.loadWorking();
reducer.reduceFunctionBodies();
first = false;
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about landing this part first? (also, measurement should be independent of it)


// run binaryen optimization passes to reduce. passes are fast to run
// and can often reduce large amounts of code efficiently, as opposed
// to detructive reduction (i.e., that doesn't preserve correctness as
Expand Down
1 change: 1 addition & 0 deletions test/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set(unittest_SOURCES
cast-check.cpp
cfg.cpp
dataflow.cpp
delta_debugging.cpp
dfa_minimization.cpp
disjoint_sets.cpp
leaves.cpp
Expand Down
Loading
Loading