From f2244fd6ea6068ba0493280942538bad9c0f2367 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 1 Apr 2026 17:37:59 +0000 Subject: [PATCH] Decompose byte_extract from union through widest member When byte_extract is applied to a union-typed expression with a non-constant computed offset, the byte_extract lowering creates a massive expression because it must consider all possible byte layouts. For unions where the widest member covers the entire union, we can instead decompose the access through that member, avoiding the expensive lowering. Add union handling to the non-constant-offset overload of get_subexpression_at_offset: for symbol and member expressions of union type, recurse into the widest member. Guard this on the widest member's size equalling the union's size (no trailing padding). The constant-offset overload is left unchanged to preserve existing simplification behavior (e.g., byte_extract(byte_update(...)) patterns used during constant propagation). On the reproducer from #8813, the union version now takes 1.0s (previously 94s), matching the struct version at 1.0s. Fixes: #8813 Co-authored-by: Kiro --- regression/cbmc/union_performance1/main.c | 22 +++++++++++++++++ regression/cbmc/union_performance1/test.desc | 11 +++++++++ src/util/pointer_offset_size.cpp | 26 ++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 regression/cbmc/union_performance1/main.c create mode 100644 regression/cbmc/union_performance1/test.desc diff --git a/regression/cbmc/union_performance1/main.c b/regression/cbmc/union_performance1/main.c new file mode 100644 index 00000000000..800d9418f4a --- /dev/null +++ b/regression/cbmc/union_performance1/main.c @@ -0,0 +1,22 @@ +// Test that byte_extract from a union does not cause performance explosion. +// See GitHub issue diffblue/cbmc#8813. +#include +#include + +typedef struct +{ + int32_t data[8]; +} arr; + +int main() +{ + union + { + arr a; + arr b; + } u; + unsigned i; + __CPROVER_assume(i < 8); + u.a.data[i] = 42; + assert(u.b.data[i] == 42); +} diff --git a/regression/cbmc/union_performance1/test.desc b/regression/cbmc/union_performance1/test.desc new file mode 100644 index 00000000000..9d220de32cd --- /dev/null +++ b/regression/cbmc/union_performance1/test.desc @@ -0,0 +1,11 @@ +CORE +main.c + +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Union of same-typed members should not cause performance explosion. +See GitHub issue diffblue/cbmc#8813. diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index 56e170fab18..2e25851c2ed 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -859,6 +859,32 @@ std::optional get_subexpression_at_offset( return {}; } + else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag) + { + // For union-typed expressions with non-constant offset, access + // through the widest member to avoid expensive byte_extract lowering. + // This is safe when the widest member covers the full union (no + // trailing padding beyond it), since all members start at offset 0. + if(expr.id() == ID_symbol || expr.id() == ID_member) + { + const union_typet &union_type = + expr.type().id() == ID_union_tag + ? ns.follow_tag(to_union_tag_type(expr.type())) + : to_union_type(expr.type()); + + const auto union_size = pointer_offset_bits(union_type, ns); + const auto widest_member = union_type.find_widest_union_component(ns); + if( + widest_member.has_value() && union_size.has_value() && + widest_member->second == *union_size) + { + const member_exprt member( + expr, widest_member->first.get_name(), widest_member->first.type()); + return get_subexpression_at_offset(member, offset, target_type, ns); + } + } + return {}; + } else return {}; }