diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 4135e812..fecec3c1 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,13 @@ test = false doc = false bench = false +[[bin]] +name = "prune_value" +path = "fuzz_targets/prune_value.rs" +test = false +doc = false +bench = false + [[bin]] name = "regression_286" path = "fuzz_targets/regression_286.rs" diff --git a/fuzz/fuzz_lib/lib.rs b/fuzz/fuzz_lib/lib.rs index d3772c85..56f4a19e 100644 --- a/fuzz/fuzz_lib/lib.rs +++ b/fuzz/fuzz_lib/lib.rs @@ -63,8 +63,8 @@ impl<'f> Extractor<'f> { } /// Attempt to yield a type from the fuzzer. - pub fn extract_final_type(&mut self) -> Option> { - // We can costruct extremely large types by duplicating Arcs; there + pub fn extract_final_type(&mut self, allow_blowup: bool) -> Option> { + // We can construct extremely large types by duplicating Arcs; there // is no need to have an exponential blowup in the number of tasks. const MAX_N_TASKS: usize = 300; @@ -83,7 +83,7 @@ impl<'f> Extractor<'f> { result_stack.push(FinalTy::unit()); } else { let is_sum = self.extract_bit()?; - let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + let dupe = allow_blowup && (task_stack.len() >= MAX_N_TASKS || self.extract_bit()?); task_stack.push(StackElem::Binary { is_sum, dupe }); if !dupe { task_stack.push(StackElem::NeedType) @@ -113,7 +113,7 @@ impl<'f> Extractor<'f> { /// Attempt to yield a value from the fuzzer by constructing a type and then /// reading a bitstring of that type, in the padded value encoding. pub fn extract_value_padded(&mut self) -> Option { - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(false)?; if ty.bit_width() > 64 * 1024 * 1024 { // little fuzzing value in producing massive values return None; @@ -128,7 +128,7 @@ impl<'f> Extractor<'f> { /// Attempt to yield a value from the fuzzer by constructing a type and then /// reading a bitstring of that type, in the compact value encoding. pub fn extract_value_compact(&mut self) -> Option { - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > 64 * 1024 * 1024 { // little fuzzing value in producing massive values return None; @@ -184,7 +184,7 @@ impl<'f> Extractor<'f> { } StackElem::Left => { let child = result_stack.pop().unwrap(); - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -192,7 +192,7 @@ impl<'f> Extractor<'f> { } StackElem::Right => { let child = result_stack.pop().unwrap(); - let ty = self.extract_final_type()?; + let ty = self.extract_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -205,7 +205,7 @@ impl<'f> Extractor<'f> { } /// Attempt to yield a type from the fuzzer. - pub fn extract_old_final_type(&mut self) -> Option> { + pub fn extract_old_final_type(&mut self, allow_blowup: bool) -> Option> { // We can costruct extremely large types by duplicating Arcs; there // is no need to have an exponential blowup in the number of tasks. const MAX_N_TASKS: usize = 300; @@ -225,7 +225,7 @@ impl<'f> Extractor<'f> { result_stack.push(OldFinalTy::unit()); } else { let is_sum = self.extract_bit()?; - let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + let dupe = allow_blowup && (task_stack.len() >= MAX_N_TASKS || self.extract_bit()?); task_stack.push(StackElem::Binary { is_sum, dupe }); if !dupe { task_stack.push(StackElem::NeedType) @@ -297,7 +297,7 @@ impl<'f> Extractor<'f> { } StackElem::Left => { let child = result_stack.pop().unwrap(); - let ty = self.extract_old_final_type()?; + let ty = self.extract_old_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } @@ -305,7 +305,7 @@ impl<'f> Extractor<'f> { } StackElem::Right => { let child = result_stack.pop().unwrap(); - let ty = self.extract_old_final_type()?; + let ty = self.extract_old_final_type(true)?; if ty.bit_width() > MAX_TY_WIDTH { return None; } diff --git a/fuzz/fuzz_targets/construct_type.rs b/fuzz/fuzz_targets/construct_type.rs index 65e7e520..391dc6c3 100644 --- a/fuzz/fuzz_targets/construct_type.rs +++ b/fuzz/fuzz_targets/construct_type.rs @@ -5,7 +5,7 @@ #[cfg(any(fuzzing, test))] fn do_test(data: &[u8]) { let mut extractor = simplicity_fuzz::Extractor::new(data); - let _ = extractor.extract_final_type(); + let _ = extractor.extract_final_type(true); } #[cfg(fuzzing)] diff --git a/fuzz/fuzz_targets/prune_value.rs b/fuzz/fuzz_targets/prune_value.rs new file mode 100644 index 00000000..94488f90 --- /dev/null +++ b/fuzz/fuzz_targets/prune_value.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) -> Option<()> { + use simplicity::dag::{DagLike, NoSharing}; + use simplicity::types::{CompleteBound, Final}; + + let mut extractor = simplicity_fuzz::Extractor::new(data); + + let val = extractor.extract_value_padded()?; + let ty = val.ty(); + + // Construct a smaller type + let mut stack = vec![]; + for node in ty.post_order_iter::() { + match node.node.bound() { + CompleteBound::Unit => stack.push(Final::unit()), + CompleteBound::Sum(..) => { + let right = stack.pop().unwrap(); + let left = stack.pop().unwrap(); + stack.push(Final::sum(left, right)); + } + CompleteBound::Product(..) => { + let mut right = stack.pop().unwrap(); + let mut left = stack.pop().unwrap(); + if extractor.extract_bit()? { + left = Final::unit(); + } + if extractor.extract_bit()? { + right = Final::unit(); + } + stack.push(Final::product(left, right)); + } + } + } + let pruned_ty = stack.pop().unwrap(); + assert!(stack.is_empty()); + + + // Prune the value + let pruned_val = match val.prune(&pruned_ty) { + Some(val) => val, + None => panic!("Failed to prune value {val} from {ty} to {pruned_ty}"), + }; + + /* + // If you have a regression you likely want to uncomment these printlns. + println!("Original Value Bits: {:?}", val.iter_padded().collect::>()); + println!("Original Value: {val}"); + println!(" Original Type: {ty}"); + println!(" Pruned Value: {pruned_val}"); + println!(" Pruned Type: {pruned_ty}"); + */ + + // Check that pruning made sense by going through the compact bit iterator + // and checking that the pruned value is obtained from the original by + // just deleting bits. + let mut orig_iter = val.iter_compact(); + let mut prune_iter = pruned_val.iter_compact(); + + loop { + match (orig_iter.next(), prune_iter.next()) { + (Some(true), Some(true)) => {}, + (Some(false), Some(false)) => {}, + (Some(_), Some(prune_bit)) => { + // We get here if the pruned and the original iterator disagree. + // This should happen iff we deleted some bits from the pruned + // value, meaning that we just need to ratchet forward the + // original iterator until we're back on track. + loop { + match orig_iter.next() { + Some(orig_bit) => { + if orig_bit == prune_bit { break } + }, + None => panic!("original iterator ran out before pruned iterator did"), + } + } + } + (None, Some(_)) => panic!("original iterator ran out before pruned iterator did"), + (_, None) => break, // done once the pruned iterator runs out + } + } + Some(()) +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| { let _ = do_test(data); }); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + let _ = super::do_test(&data); + } +} diff --git a/src/value.rs b/src/value.rs index 4b9bf285..bbae4e12 100644 --- a/src/value.rs +++ b/src/value.rs @@ -942,6 +942,8 @@ impl Value { } blob.push(last); + zero_padding_bits(&mut blob, ty); + Ok(Value { inner: blob.into(), bit_offset: 0, @@ -949,6 +951,106 @@ impl Value { }) } } +enum ZeroState<'a> { + ProcessType(&'a Final, usize), // (type, bit_offset) + ZeroSum(usize, usize), // (padding_start, padding_end) + ZeroProduct(usize, usize), // (padding_start, padding_end) +} + +fn zero_bit_range(data: &mut [u8], start: usize, end: usize) { + debug_assert!(start <= end); + debug_assert!(end <= data.len() * 8); + + let start_byte = start / 8; + let end_byte = end / 8; + let start_bit = start % 8; + let end_bit = end % 8; + + if start_byte == end_byte { + for i in start_bit..end_bit { + data[start_byte] &= !(1 << (7 - i)); + } + } else { + for i in start_bit..8 { + data[start_byte] &= !(1 << (7 - i)); + } + + for i in (start_byte + 1)..end_byte { + data[i] = 0; + } + + for i in 0..end_bit { + data[end_byte] &= !(1 << (7 - i)); + } + } +} + +fn read_bit(data: &[u8], pos: usize) -> bool { + debug_assert!(pos < data.len() * 8); + + let byte_idx = pos / 8; + let bit_idx = pos % 8; + + (data[byte_idx] >> (7 - bit_idx)) & 1 == 1 +} + +fn zero_padding_bits(bits: &mut [u8], ty: &Final) { + let mut stack = vec![ZeroState::ProcessType(ty, 0)]; + + // Iterate over type, schedule zeroing and push it's childrens to stack + while let Some(state) = stack.pop() { + match state { + ZeroState::ProcessType(ty, offset) => { + if ty.bit_width() == 0 { + continue; + } + + match &ty.bound() { + CompleteBound::Unit => { + // All bits are padding + zero_bit_range(bits, offset, offset + ty.bit_width()); + } + + CompleteBound::Product(left, right) => { + let left_end = offset + left.bit_width(); + let right_end = left_end + right.bit_width(); + let product_end = offset + ty.bit_width(); + + if right_end < product_end { + stack.push(ZeroState::ZeroProduct(right_end, product_end)); + } + + if right.bit_width() > 0 { + stack.push(ZeroState::ProcessType(right, left_end)); + } + if left.bit_width() > 0 { + stack.push(ZeroState::ProcessType(left, offset)); + } + } + + CompleteBound::Sum(left, right) => { + let tag = read_bit(&bits, offset); + let sum_end = offset + ty.bit_width(); + + let variant = if !tag { left } else { right }; + let variants_start = sum_end - variant.bit_width(); + + if offset + 1 < variants_start { + stack.push(ZeroState::ZeroSum(offset + 1, variants_start)); + } + if variant.bit_width() > 0 { + stack.push(ZeroState::ProcessType(variant, variants_start)); + } + } + } + } + + ZeroState::ZeroProduct(start, end) | ZeroState::ZeroSum(start, end) => { + zero_bit_range(bits, start, end); + } + } + } +} /// A Simplicity word. A value of type `TWO^(2^n)` for some `0 ≤ n < 32`. #[derive(Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] @@ -1262,6 +1364,48 @@ mod tests { let new_v = Value::from_padded_bits(&mut iter, &v.ty).unwrap(); assert_eq!(v, new_v); } + + #[test] + fn prune_regression_337_1() { + // Two values that differ only in padding bits are nonetheless equal + let ty_2x1_opt = Final::sum( + Final::unit(), + Final::product(Final::two_two_n(0), Final::unit()), + ); + + // L(ε) as all zeros + let mut iter = BitIter::new(Some(0b0000_0000u8).into_iter()); + let value_1 = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + // L(ε) with a one in its padding bit + let mut iter = BitIter::new(Some(0b0100_0000u8).into_iter()); + let value_2 = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + + assert_eq!(value_1, value_2); + } + + #[test] + fn prune_regression_337_2() { + let ty_2x1_opt = Final::sum( + Final::unit(), + Final::product(Final::two_two_n(0), Final::unit()), + ); + let ty_1x1_opt = Final::sum(Final::unit(), Final::product(Final::unit(), Final::unit())); + + // Bits [false, true] - first bit is false (left sum), second bit is true (unused padding) + let mut iter = BitIter::new(Some(0b0100_0000u8).into_iter()); + + // Parse as (2 × 1)? then prune to (1 × 1)? + let value = Value::from_padded_bits(&mut iter, &ty_2x1_opt).unwrap(); + let pruned = value.prune(&ty_1x1_opt).unwrap(); + + // Expected: L(ε) - still in the left (unit) branch + let expected = Value::left(Value::unit(), Final::product(Final::unit(), Final::unit())); + + // BUG: This fails because pruning incorrectly returns R((ε,ε)). We first compare string + // serializations since a direct comparison might only test `prune_regression_337_1`. + assert_eq!(pruned.to_string(), expected.to_string()); + assert_eq!(pruned, expected); + } } #[cfg(bench)]