From cbd8cfb9a1b043a7a92b876c8632bffab5681562 Mon Sep 17 00:00:00 2001 From: topologoanatom Date: Thu, 15 Jan 2026 15:19:00 +0200 Subject: [PATCH 1/7] add cleaning of offset bits before Arc::clone in product func --- src/value.rs | 92 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/src/value.rs b/src/value.rs index 4b9bf285..b2b5e5c1 100644 --- a/src/value.rs +++ b/src/value.rs @@ -309,6 +309,40 @@ fn copy_bits(src: &[u8], src_offset: usize, dst: &mut [u8], dst_offset: usize, n } } +fn check_offset_bits(data: &[u8], bit_offset: usize) -> bool { + debug_assert!(data.len() * 8 >= bit_offset); + for bit in data.iter().take(bit_offset / 8) { + if *bit != 0 { + return false; + } + } + let rem = bit_offset % 8; + if rem != 0 { + let byte = data[bit_offset / 8]; + let mask = 0xFF << (8 - rem); + if byte & mask != 0 { + return false; + } + } + true +} + +fn product_copying_aux(side: Option<(&Arc<[u8]>, usize)>, bit_length: usize) -> (Arc<[u8]>, usize) { + if let Some((side, bit_offset)) = side { + if check_offset_bits(side, bit_offset) { + (Arc::clone(side), bit_offset) + } else { + let mut res = vec![0u8; bit_length.div_ceil(8)]; + + copy_bits(side, bit_offset, &mut res, 0, bit_length); + + (Arc::from(res), 0) + } + } else { + (Arc::from(vec![0; bit_length.div_ceil(8)]), 0) + } +} + /// Helper function to take the product of two values (i.e. their concatenation). /// /// If either `left_inner` or `right_inner` is not provided, it is assumed to be @@ -321,39 +355,33 @@ fn product( right: Option<(&Arc<[u8]>, usize)>, right_bit_length: usize, ) -> (Arc<[u8]>, usize) { - if left_bit_length == 0 { - if let Some((right, right_bit_offset)) = right { - (Arc::clone(right), right_bit_offset) - } else if right_bit_length == 0 { - (Arc::new([]), 0) - } else { - (Arc::from(vec![0; right_bit_length.div_ceil(8)]), 0) - } - } else if right_bit_length == 0 { - if let Some((lt, left_bit_offset)) = left { - (Arc::clone(lt), left_bit_offset) - } else { - (Arc::from(vec![0; left_bit_length.div_ceil(8)]), 0) - } - } else { - // Both left and right have nonzero lengths. This is the only "real" case - // in which we have to do something beyond cloning Arcs or allocating - // zeroed vectors. In this case we left-shift both as much as possible. - let mut bx = Box::<[u8]>::from(vec![0; (left_bit_length + right_bit_length).div_ceil(8)]); - if let Some((left, left_bit_offset)) = left { - copy_bits(left, left_bit_offset, &mut bx, 0, left_bit_length); - } - if let Some((right, right_bit_offset)) = right { - copy_bits( - right, - right_bit_offset, - &mut bx, - left_bit_length, - right_bit_length, - ); - } + match (left_bit_length, right_bit_length) { + (0, 0) => (Arc::<[u8]>::from([]), 0), + + (0, right_bit_length) => product_copying_aux(right, right_bit_length), + + (left_bit_length, 0) => product_copying_aux(left, left_bit_length), - (bx.into(), 0) + (left_bit_length, right_bit_length) => { + // Both left and right have nonzero lengths. This is the only "real" case + // in which we have to do something beyond cloning Arcs or allocating + // zeroed vectors. In this case we left-shift both as much as possible. + let mut bx = + Box::<[u8]>::from(vec![0; (left_bit_length + right_bit_length).div_ceil(8)]); + if let Some((left, left_bit_offset)) = left { + copy_bits(left, left_bit_offset, &mut bx, 0, left_bit_length); + } + if let Some((right, right_bit_offset)) = right { + copy_bits( + right, + right_bit_offset, + &mut bx, + left_bit_length, + right_bit_length, + ); + } + (bx.into(), 0) + } } } From cc47fe10f3ebd678ea7faa95dd97410b3b27c4c1 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Fri, 23 Jan 2026 16:20:26 +0000 Subject: [PATCH 2/7] fuzz: don't make exponentially-sized types in extract_value_direct In the pruning test I will be writing I would like to iterate over entire types so that I can reduce their size. This is impossible for exponentially sized types. So make them disableable in extract_final_type, and disable them for extract_value_direct (but nowhere else, since they seem to be working elsewhere). It would be nice to test that pruning works properly with massive types; it appears that it does; but that would require more work than I want to put in for this PR. --- fuzz/fuzz_lib/lib.rs | 22 +++++++++++----------- fuzz/fuzz_targets/construct_type.rs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fuzz/fuzz_lib/lib.rs b/fuzz/fuzz_lib/lib.rs index d3772c85..4403145d 100644 --- a/fuzz/fuzz_lib/lib.rs +++ b/fuzz/fuzz_lib/lib.rs @@ -63,8 +63,8 @@ impl<'f> Extractor<'f> { } /// Attempt to yield a type from the fuzzer. - pub fn extract_final_type(&mut self) -> Option> { - // We can costruct extremely large types by duplicating Arcs; there + pub fn extract_final_type(&mut self, allow_blowup: bool) -> Option> { + // We can construct extremely large types by duplicating Arcs; there // is no need to have an exponential blowup in the number of tasks. const MAX_N_TASKS: usize = 300; @@ -83,7 +83,7 @@ impl<'f> Extractor<'f> { result_stack.push(FinalTy::unit()); } else { let is_sum = self.extract_bit()?; - let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + let dupe = allow_blowup && (task_stack.len() >= MAX_N_TASKS || self.extract_bit()?); task_stack.push(StackElem::Binary { is_sum, dupe }); if !dupe { task_stack.push(StackElem::NeedType) @@ -113,7 +113,7 @@ impl<'f> Extractor<'f> { /// Attempt to yield a value from the fuzzer by constructing a type and then /// reading a bitstring of that type, in the padded value encoding. pub fn extract_value_padded(&mut self) -> Option { - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > 64 * 1024 * 1024 { // little fuzzing value in producing massive values return None; @@ -128,7 +128,7 @@ impl<'f> Extractor<'f> { /// Attempt to yield a value from the fuzzer by constructing a type and then /// reading a bitstring of that type, in the compact value encoding. pub fn extract_value_compact(&mut self) -> Option { - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > 64 * 1024 * 1024 { // little fuzzing value in producing massive values return None; @@ -184,7 +184,7 @@ impl<'f> Extractor<'f> { } StackElem::Left => { let child = result_stack.pop().unwrap(); - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -192,7 +192,7 @@ impl<'f> Extractor<'f> { } StackElem::Right => { let child = result_stack.pop().unwrap(); - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -205,7 +205,7 @@ impl<'f> Extractor<'f> { } /// Attempt to yield a type from the fuzzer. - pub fn extract_old_final_type(&mut self) -> Option> { + pub fn extract_old_final_type(&mut self, allow_blowup: bool) -> Option> { // We can costruct extremely large types by duplicating Arcs; there // is no need to have an exponential blowup in the number of tasks. const MAX_N_TASKS: usize = 300; @@ -225,7 +225,7 @@ impl<'f> Extractor<'f> { result_stack.push(OldFinalTy::unit()); } else { let is_sum = self.extract_bit()?; - let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + let dupe = allow_blowup && (task_stack.len() >= MAX_N_TASKS || self.extract_bit()?); task_stack.push(StackElem::Binary { is_sum, dupe }); if !dupe { task_stack.push(StackElem::NeedType) @@ -297,7 +297,7 @@ impl<'f> Extractor<'f> { } StackElem::Left => { let child = result_stack.pop().unwrap(); - let ty = self.extract_old_final_type()?; + let ty = self.extract_old_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -305,7 +305,7 @@ impl<'f> Extractor<'f> { } StackElem::Right => { let child = result_stack.pop().unwrap(); - let ty = self.extract_old_final_type()?; + let ty = self.extract_old_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } diff --git a/fuzz/fuzz_targets/construct_type.rs b/fuzz/fuzz_targets/construct_type.rs index 65e7e520..391dc6c3 100644 --- a/fuzz/fuzz_targets/construct_type.rs +++ b/fuzz/fuzz_targets/construct_type.rs @@ -5,7 +5,7 @@ #[cfg(any(fuzzing, test))] fn do_test(data: &[u8]) { let mut extractor = simplicity_fuzz::Extractor::new(data); - let _ = extractor.extract_final_type(); + let _ = extractor.extract_final_type(true); } #[cfg(fuzzing)] From c9f742d046f30cb14c94cb6a73dae3cc49bd51fa Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Fri, 23 Jan 2026 15:40:46 +0000 Subject: [PATCH 3/7] fuzz: add 'prune_value' fuzz test which can find regressions for #338 --- fuzz/Cargo.toml | 7 +++ fuzz/fuzz_lib/lib.rs | 2 +- fuzz/fuzz_targets/prune_value.rs | 105 +++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 fuzz/fuzz_targets/prune_value.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 4135e812..fecec3c1 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,13 @@ test = false doc = false bench = false +[[bin]] +name = "prune_value" +path = "fuzz_targets/prune_value.rs" +test = false +doc = false +bench = false + [[bin]] name = "regression_286" path = "fuzz_targets/regression_286.rs" diff --git a/fuzz/fuzz_lib/lib.rs b/fuzz/fuzz_lib/lib.rs index 4403145d..56f4a19e 100644 --- a/fuzz/fuzz_lib/lib.rs +++ b/fuzz/fuzz_lib/lib.rs @@ -113,7 +113,7 @@ impl<'f> Extractor<'f> { /// Attempt to yield a value from the fuzzer by constructing a type and then /// reading a bitstring of that type, in the padded value encoding. pub fn extract_value_padded(&mut self) -> Option { - let ty = self.extract_final_type(true)?; + let ty = self.extract_final_type(false)?; if ty.bit_width() > 64 * 1024 * 1024 { // little fuzzing value in producing massive values return None; diff --git a/fuzz/fuzz_targets/prune_value.rs b/fuzz/fuzz_targets/prune_value.rs new file mode 100644 index 00000000..94488f90 --- /dev/null +++ b/fuzz/fuzz_targets/prune_value.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) -> Option<()> { + use simplicity::dag::{DagLike, NoSharing}; + use simplicity::types::{CompleteBound, Final}; + + let mut extractor = simplicity_fuzz::Extractor::new(data); + + let val = extractor.extract_value_padded()?; + let ty = val.ty(); + + // Construct a smaller type + let mut stack = vec![]; + for node in ty.post_order_iter::() { + match node.node.bound() { + CompleteBound::Unit => stack.push(Final::unit()), + CompleteBound::Sum(..) => { + let right = stack.pop().unwrap(); + let left = stack.pop().unwrap(); + stack.push(Final::sum(left, right)); + } + CompleteBound::Product(..) => { + let mut right = stack.pop().unwrap(); + let mut left = stack.pop().unwrap(); + if extractor.extract_bit()? { + left = Final::unit(); + } + if extractor.extract_bit()? { + right = Final::unit(); + } + stack.push(Final::product(left, right)); + } + } + } + let pruned_ty = stack.pop().unwrap(); + assert!(stack.is_empty()); + + + // Prune the value + let pruned_val = match val.prune(&pruned_ty) { + Some(val) => val, + None => panic!("Failed to prune value {val} from {ty} to {pruned_ty}"), + }; + + /* + // If you have a regression you likely want to uncomment these printlns. + println!("Original Value Bits: {:?}", val.iter_padded().collect::>()); + println!("Original Value: {val}"); + println!(" Original Type: {ty}"); + println!(" Pruned Value: {pruned_val}"); + println!(" Pruned Type: {pruned_ty}"); + */ + + // Check that pruning made sense by going through the compact bit iterator + // and checking that the pruned value is obtained from the original by + // just deleting bits. + let mut orig_iter = val.iter_compact(); + let mut prune_iter = pruned_val.iter_compact(); + + loop { + match (orig_iter.next(), prune_iter.next()) { + (Some(true), Some(true)) => {}, + (Some(false), Some(false)) => {}, + (Some(_), Some(prune_bit)) => { + // We get here if the pruned and the original iterator disagree. + // This should happen iff we deleted some bits from the pruned + // value, meaning that we just need to ratchet forward the + // original iterator until we're back on track. + loop { + match orig_iter.next() { + Some(orig_bit) => { + if orig_bit == prune_bit { break } + }, + None => panic!("original iterator ran out before pruned iterator did"), + } + } + } + (None, Some(_)) => panic!("original iterator ran out before pruned iterator did"), + (_, None) => break, // done once the pruned iterator runs out + } + } + Some(()) +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| { let _ = do_test(data); }); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + let _ = super::do_test(&data); + } +} From 91dbb9adede5e01c4c1cb46084e0446b6cbd1e46 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Sat, 24 Jan 2026 18:43:47 +0000 Subject: [PATCH 4/7] value: add regression test for #337 --- src/value.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/value.rs b/src/value.rs index 4b9bf285..7f032ede 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1262,6 +1262,42 @@ mod tests { let new_v = Value::from_padded_bits(&mut iter, &v.ty).unwrap(); assert_eq!(v, new_v); } + + #[test] + fn prune_regression_337_1() { + // Two values that differ only in padding bits are nonetheless equal + let ty_2x1_opt = Final::sum(Final::unit(), Final::product(Final::two_two_n(0), Final::unit())); + + // L(ε) as all zeros + let mut iter = BitIter::new(Some(0b0000_0000u8).into_iter()); + let value_1 = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + // L(ε) with a one in its padding bit + let mut iter = BitIter::new(Some(0b0100_0000u8).into_iter()); + let value_2 = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + + assert_eq!(value_1, value_2); + } + + #[test] + fn prune_regression_337_2() { + let ty_2x1_opt = Final::sum(Final::unit(), Final::product(Final::two_two_n(0), Final::unit())); + let ty_1x1_opt = Final::sum(Final::unit(), Final::product(Final::unit(), Final::unit())); + + // Bits [false, true] - first bit is false (left sum), second bit is true (unused padding) + let mut iter = BitIter::new(Some(0b0100_0000u8).into_iter()); + + // Parse as (2 × 1)? then prune to (1 × 1)? + let value = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + let pruned = value.prune(&ty_1x1_opt).unwrap(); + + // Expected: L(ε) - still in the left (unit) branch + let expected = Value::left(Value::unit(), Final::product(Final::unit(), Final::unit())); + + // BUG: This fails because pruning incorrectly returns R((ε,ε)). We first compare string + // serializations since a direct comparison might only test `prune_regression_337_1`. + assert_eq!(pruned.to_string(), expected.to_string()); + assert_eq!(pruned, expected); + } } #[cfg(bench)] From 106f8de1c2896b296ddc0a21cfb5d1c63a43208c Mon Sep 17 00:00:00 2001 From: topologoanatom Date: Wed, 28 Jan 2026 14:26:19 +0200 Subject: [PATCH 5/7] revert to original version --- src/value.rs | 92 ++++++++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 60 deletions(-) diff --git a/src/value.rs b/src/value.rs index bf4219af..7f032ede 100644 --- a/src/value.rs +++ b/src/value.rs @@ -309,40 +309,6 @@ fn copy_bits(src: &[u8], src_offset: usize, dst: &mut [u8], dst_offset: usize, n } } -fn check_offset_bits(data: &[u8], bit_offset: usize) -> bool { - debug_assert!(data.len() * 8 >= bit_offset); - for bit in data.iter().take(bit_offset / 8) { - if *bit != 0 { - return false; - } - } - let rem = bit_offset % 8; - if rem != 0 { - let byte = data[bit_offset / 8]; - let mask = 0xFF << (8 - rem); - if byte & mask != 0 { - return false; - } - } - true -} - -fn product_copying_aux(side: Option<(&Arc<[u8]>, usize)>, bit_length: usize) -> (Arc<[u8]>, usize) { - if let Some((side, bit_offset)) = side { - if check_offset_bits(side, bit_offset) { - (Arc::clone(side), bit_offset) - } else { - let mut res = vec![0u8; bit_length.div_ceil(8)]; - - copy_bits(side, bit_offset, &mut res, 0, bit_length); - - (Arc::from(res), 0) - } - } else { - (Arc::from(vec![0; bit_length.div_ceil(8)]), 0) - } -} - /// Helper function to take the product of two values (i.e. their concatenation). /// /// If either `left_inner` or `right_inner` is not provided, it is assumed to be @@ -355,33 +321,39 @@ fn product( right: Option<(&Arc<[u8]>, usize)>, right_bit_length: usize, ) -> (Arc<[u8]>, usize) { - match (left_bit_length, right_bit_length) { - (0, 0) => (Arc::<[u8]>::from([]), 0), - - (0, right_bit_length) => product_copying_aux(right, right_bit_length), - - (left_bit_length, 0) => product_copying_aux(left, left_bit_length), - - (left_bit_length, right_bit_length) => { - // Both left and right have nonzero lengths. This is the only "real" case - // in which we have to do something beyond cloning Arcs or allocating - // zeroed vectors. In this case we left-shift both as much as possible. - let mut bx = - Box::<[u8]>::from(vec![0; (left_bit_length + right_bit_length).div_ceil(8)]); - if let Some((left, left_bit_offset)) = left { - copy_bits(left, left_bit_offset, &mut bx, 0, left_bit_length); - } - if let Some((right, right_bit_offset)) = right { - copy_bits( - right, - right_bit_offset, - &mut bx, - left_bit_length, - right_bit_length, - ); - } - (bx.into(), 0) + if left_bit_length == 0 { + if let Some((right, right_bit_offset)) = right { + (Arc::clone(right), right_bit_offset) + } else if right_bit_length == 0 { + (Arc::new([]), 0) + } else { + (Arc::from(vec![0; right_bit_length.div_ceil(8)]), 0) } + } else if right_bit_length == 0 { + if let Some((lt, left_bit_offset)) = left { + (Arc::clone(lt), left_bit_offset) + } else { + (Arc::from(vec![0; left_bit_length.div_ceil(8)]), 0) + } + } else { + // Both left and right have nonzero lengths. This is the only "real" case + // in which we have to do something beyond cloning Arcs or allocating + // zeroed vectors. In this case we left-shift both as much as possible. + let mut bx = Box::<[u8]>::from(vec![0; (left_bit_length + right_bit_length).div_ceil(8)]); + if let Some((left, left_bit_offset)) = left { + copy_bits(left, left_bit_offset, &mut bx, 0, left_bit_length); + } + if let Some((right, right_bit_offset)) = right { + copy_bits( + right, + right_bit_offset, + &mut bx, + left_bit_length, + right_bit_length, + ); + } + + (bx.into(), 0) } } From d080859a6649ee6b0c410714dbb6c1a3a41c38d2 Mon Sep 17 00:00:00 2001 From: topologoanatom Date: Wed, 28 Jan 2026 17:51:51 +0200 Subject: [PATCH 6/7] wip: fix padding bits Passes regression tests, but `cargo-fuzz` still reports failures on certain inputs. Root cause investigation is ongoing. --- src/value.rs | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/src/value.rs b/src/value.rs index 7f032ede..f8857838 100644 --- a/src/value.rs +++ b/src/value.rs @@ -941,6 +941,106 @@ impl Value { } } blob.push(last); + + enum ZeroState<'a> { + ProcessType(&'a Final, usize), // (type, bit_offset) + ZeroSum(usize, usize), // (padding_start, padding_end) + ZeroProduct(usize, usize), // (padding_start, padding_end) + } + + fn zero_bit_range(data: &mut [u8], start: usize, end: usize) { + debug_assert!(start <= end); + debug_assert!(end <= data.len() * 8); + + let start_byte = start / 8; + let end_byte = end / 8; + let start_bit = start % 8; + let end_bit = end % 8; + + if start_byte == end_byte { + for i in start_bit..end_bit { + data[start_byte] &= !(1 << (7 - i)); + } + } else { + for i in start_bit..8 { + data[start_byte] &= !(1 << (7 - i)); + } + + for i in (start_byte + 1)..end_byte { + data[i] = 0; + } + + for i in 0..end_bit { + data[end_byte] &= !(1 << (7 - i)); + } + } + } + + fn read_bit(data: &[u8], pos: usize) -> bool { + debug_assert!(pos < data.len() * 8); + + let byte_idx = pos / 8; + let bit_idx = pos % 8; + + (data[byte_idx] >> (7 - bit_idx)) & 1 == 1 + } + + let mut stack = vec![ZeroState::ProcessType(ty, 0)]; + + // Iterate over type, schedule zeroing and push it's childrens to stack + while let Some(state) = stack.pop() { + match state { + ZeroState::ProcessType(ty, offset) => { + if ty.bit_width() == 0 { + continue; + } + + match &ty.bound() { + CompleteBound::Unit => { + // All bits are padding + zero_bit_range(&mut blob, offset, offset + ty.bit_width()); + } + + CompleteBound::Product(left, right) => { + let left_end = offset + left.bit_width(); + let right_end = left_end + right.bit_width(); + let product_end = offset + ty.bit_width(); + + if right_end < product_end { + stack.push(ZeroState::ZeroProduct(right_end, product_end)); + } + + if right.bit_width() > 0 { + stack.push(ZeroState::ProcessType(right, left_end)); + } + if left.bit_width() > 0 { + stack.push(ZeroState::ProcessType(left, offset)); + } + } + + CompleteBound::Sum(left, right) => { + let tag = read_bit(&blob, offset); + let variants_start = offset + 1; + let sum_end = offset + ty.bit_width(); + + let variant = if !tag { left } else { right }; + let side_end = variants_start + variant.bit_width(); + + if side_end < sum_end { + stack.push(ZeroState::ZeroSum(side_end, sum_end)); + } + if variant.bit_width() > 0 { + stack.push(ZeroState::ProcessType(variant, variants_start)); + } + } + } + } + + ZeroState::ZeroProduct(start, end) | ZeroState::ZeroSum(start, end) => { + zero_bit_range(&mut blob, start, end); + } + } + } Ok(Value { inner: blob.into(), @@ -1266,7 +1366,10 @@ mod tests { #[test] fn prune_regression_337_1() { // Two values that differ only in padding bits are nonetheless equal - let ty_2x1_opt = Final::sum(Final::unit(), Final::product(Final::two_two_n(0), Final::unit())); + let ty_2x1_opt = Final::sum( + Final::unit(), + Final::product(Final::two_two_n(0), Final::unit()), + ); // L(ε) as all zeros let mut iter = BitIter::new(Some(0b0000_0000u8).into_iter()); @@ -1280,7 +1383,10 @@ mod tests { #[test] fn prune_regression_337_2() { - let ty_2x1_opt = Final::sum(Final::unit(), Final::product(Final::two_two_n(0), Final::unit())); + let ty_2x1_opt = Final::sum( + Final::unit(), + Final::product(Final::two_two_n(0), Final::unit()), + ); let ty_1x1_opt = Final::sum(Final::unit(), Final::product(Final::unit(), Final::unit())); // Bits [false, true] - first bit is false (left sum), second bit is true (unused padding) From 23a137da6ee682577aba2c4084cefa91c7ed6bc8 Mon Sep 17 00:00:00 2001 From: topologoanatom Date: Thu, 29 Jan 2026 11:13:32 +0200 Subject: [PATCH 7/7] change sum variant start to be right-aligned and move zeroing logic out of `from_padding_bits` --- src/value.rs | 164 ++++++++++++++++++++++++++------------------------- 1 file changed, 83 insertions(+), 81 deletions(-) diff --git a/src/value.rs b/src/value.rs index f8857838..bbae4e12 100644 --- a/src/value.rs +++ b/src/value.rs @@ -941,112 +941,114 @@ impl Value { } } blob.push(last); - - enum ZeroState<'a> { - ProcessType(&'a Final, usize), // (type, bit_offset) - ZeroSum(usize, usize), // (padding_start, padding_end) - ZeroProduct(usize, usize), // (padding_start, padding_end) - } - fn zero_bit_range(data: &mut [u8], start: usize, end: usize) { - debug_assert!(start <= end); - debug_assert!(end <= data.len() * 8); + zero_padding_bits(&mut blob, ty); - let start_byte = start / 8; - let end_byte = end / 8; - let start_bit = start % 8; - let end_bit = end % 8; + Ok(Value { + inner: blob.into(), + bit_offset: 0, + ty: Arc::new(ty.clone()), + }) + } +} +enum ZeroState<'a> { + ProcessType(&'a Final, usize), // (type, bit_offset) + ZeroSum(usize, usize), // (padding_start, padding_end) + ZeroProduct(usize, usize), // (padding_start, padding_end) +} - if start_byte == end_byte { - for i in start_bit..end_bit { - data[start_byte] &= !(1 << (7 - i)); - } - } else { - for i in start_bit..8 { - data[start_byte] &= !(1 << (7 - i)); - } +fn zero_bit_range(data: &mut [u8], start: usize, end: usize) { + debug_assert!(start <= end); + debug_assert!(end <= data.len() * 8); - for i in (start_byte + 1)..end_byte { - data[i] = 0; - } + let start_byte = start / 8; + let end_byte = end / 8; + let start_bit = start % 8; + let end_bit = end % 8; - for i in 0..end_bit { - data[end_byte] &= !(1 << (7 - i)); - } - } + if start_byte == end_byte { + for i in start_bit..end_bit { + data[start_byte] &= !(1 << (7 - i)); + } + } else { + for i in start_bit..8 { + data[start_byte] &= !(1 << (7 - i)); } - fn read_bit(data: &[u8], pos: usize) -> bool { - debug_assert!(pos < data.len() * 8); - - let byte_idx = pos / 8; - let bit_idx = pos % 8; + for i in (start_byte + 1)..end_byte { + data[i] = 0; + } - (data[byte_idx] >> (7 - bit_idx)) & 1 == 1 + for i in 0..end_bit { + data[end_byte] &= !(1 << (7 - i)); } + } +} - let mut stack = vec![ZeroState::ProcessType(ty, 0)]; +fn read_bit(data: &[u8], pos: usize) -> bool { + debug_assert!(pos < data.len() * 8); - // Iterate over type, schedule zeroing and push it's childrens to stack - while let Some(state) = stack.pop() { - match state { - ZeroState::ProcessType(ty, offset) => { - if ty.bit_width() == 0 { - continue; - } + let byte_idx = pos / 8; + let bit_idx = pos % 8; - match &ty.bound() { - CompleteBound::Unit => { - // All bits are padding - zero_bit_range(&mut blob, offset, offset + ty.bit_width()); - } + (data[byte_idx] >> (7 - bit_idx)) & 1 == 1 +} - CompleteBound::Product(left, right) => { - let left_end = offset + left.bit_width(); - let right_end = left_end + right.bit_width(); - let product_end = offset + ty.bit_width(); +fn zero_padding_bits(bits: &mut [u8], ty: &Final) { + let mut stack = vec![ZeroState::ProcessType(ty, 0)]; - if right_end < product_end { - stack.push(ZeroState::ZeroProduct(right_end, product_end)); - } + // Iterate over type, schedule zeroing and push it's childrens to stack + while let Some(state) = stack.pop() { + match state { + ZeroState::ProcessType(ty, offset) => { + if ty.bit_width() == 0 { + continue; + } - if right.bit_width() > 0 { - stack.push(ZeroState::ProcessType(right, left_end)); - } - if left.bit_width() > 0 { - stack.push(ZeroState::ProcessType(left, offset)); - } + match &ty.bound() { + CompleteBound::Unit => { + // All bits are padding + zero_bit_range(bits, offset, offset + ty.bit_width()); + } + + CompleteBound::Product(left, right) => { + let left_end = offset + left.bit_width(); + let right_end = left_end + right.bit_width(); + let product_end = offset + ty.bit_width(); + + if right_end < product_end { + stack.push(ZeroState::ZeroProduct(right_end, product_end)); + } + + if right.bit_width() > 0 { + stack.push(ZeroState::ProcessType(right, left_end)); + } + if left.bit_width() > 0 { + stack.push(ZeroState::ProcessType(left, offset)); } + } - CompleteBound::Sum(left, right) => { - let tag = read_bit(&blob, offset); - let variants_start = offset + 1; - let sum_end = offset + ty.bit_width(); + CompleteBound::Sum(left, right) => { + let tag = read_bit(&bits, offset); + let sum_end = offset + ty.bit_width(); - let variant = if !tag { left } else { right }; - let side_end = variants_start + variant.bit_width(); + let variant = if !tag { left } else { right }; + let variants_start = sum_end - variant.bit_width(); - if side_end < sum_end { - stack.push(ZeroState::ZeroSum(side_end, sum_end)); - } - if variant.bit_width() > 0 { - stack.push(ZeroState::ProcessType(variant, variants_start)); - } + if offset + 1 < variants_start { + stack.push(ZeroState::ZeroSum(offset + 1, variants_start)); + } + if variant.bit_width() > 0 { + stack.push(ZeroState::ProcessType(variant, variants_start)); } } } + } - ZeroState::ZeroProduct(start, end) | ZeroState::ZeroSum(start, end) => { - zero_bit_range(&mut blob, start, end); - } + ZeroState::ZeroProduct(start, end) | ZeroState::ZeroSum(start, end) => { + zero_bit_range(bits, start, end); } } - - Ok(Value { - inner: blob.into(), - bit_offset: 0, - ty: Arc::new(ty.clone()), - }) } }