diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 0f5c16af..26f3343b 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -17,10 +17,13 @@ jobs: fail-fast: false matrix: fuzz_target: [ +construct_type, +construct_value, c_rust_merkle, decode_natural, decode_program, parse_human, +regression_value, ] steps: - name: Checkout Crate diff --git a/Cargo-recent.lock b/Cargo-recent.lock index cb499915..f7bcd3d1 100644 --- a/Cargo-recent.lock +++ b/Cargo-recent.lock @@ -419,7 +419,7 @@ name = "simpcli" version = "0.3.0" dependencies = [ "base64 0.21.7", - "simplicity-lang", + "simplicity-lang 0.4.0", ] [[package]] @@ -428,7 +428,23 @@ version = "0.0.1" dependencies = [ "base64 0.22.1", "libfuzzer-sys", - "simplicity-lang", + "simplicity-lang 0.3.1", + "simplicity-lang 0.4.0", +] + +[[package]] +name = "simplicity-lang" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75c8fb4a18e63fbce4cf16026c36a6c38066e4f4a09ce5e81be817d0e36d8f8" +dependencies = [ + "bitcoin_hashes", + "byteorder", + "getrandom", + "hex-conservative 0.1.2", + "miniscript", + "santiago", + "simplicity-sys 0.3.0", ] [[package]] @@ -444,7 +460,17 @@ dependencies = [ "miniscript", "santiago", "serde", - "simplicity-sys", + "simplicity-sys 0.4.0", +] + +[[package]] +name = "simplicity-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd2cc5d458a8032d328ea85e824f54f61664ab84c3d42b3b7f8804fb9b81572" +dependencies = [ + "bitcoin_hashes", + "cc", ] [[package]] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index d2e47157..bd16d8bd 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,9 +9,15 @@ publish = false [package.metadata] cargo-fuzz = true +[lib] +path = "fuzz_lib/lib.rs" + [dependencies] libfuzzer-sys = "0.4" -simplicity-lang = { path = "..", features = ["test-utils"] } +# We shouldn't need an explicit version on the next line, but Andrew's tools +# choke on it otherwise. See https://github.com/nix-community/crate2nix/issues/373 +simplicity-lang = { path = "..", features = ["test-utils"], version = "0.4.0" } +old_simplicity = { package = "simplicity-lang", version = "0.3.1", default-features = false } [dev-dependencies] base64 = "0.22.1" @@ -19,6 +25,20 @@ base64 = "0.22.1" [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } +[[bin]] +name = "construct_type" +path = "fuzz_targets/construct_type.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "construct_value" +path = "fuzz_targets/construct_value.rs" +test = false +doc = false +bench = false + [[bin]] name = "c_rust_merkle" path = "fuzz_targets/c_rust_merkle.rs" @@ -46,3 +66,10 @@ path = "fuzz_targets/parse_human.rs" test = false doc = false bench = false + +[[bin]] +name = "regression_value" +path = "fuzz_targets/regression_value.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_lib/lib.rs b/fuzz/fuzz_lib/lib.rs new file mode 100644 index 00000000..b36b1e5b --- /dev/null +++ b/fuzz/fuzz_lib/lib.rs @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: CC0-1.0 + +use old_simplicity::types::Final as OldFinalTy; +use old_simplicity::Value as OldValue; + +use simplicity::types::Final as FinalTy; +use simplicity::{BitIter, Value}; +use std::sync::Arc; + +/// A wrapper around a buffer which has utilities for extracting various +/// Simplicity types. +#[derive(Clone)] +pub struct Extractor<'f> { + data: &'f [u8], + bit_cache: u8, + bit_len: usize, +} + +impl<'f> Extractor<'f> { + /// Wrap the buffer in an extractor. + pub fn new(data: &'f [u8]) -> Self { + Self { + data, + bit_cache: 0, + bit_len: 0, + } + } + + /// Attempt to yield a u8 from the fuzzer. + pub fn extract_u8(&mut self) -> Option { + if self.data.is_empty() { + None + } else { + let ret = self.data[0]; + self.data = &self.data[1..]; + Some(ret) + } + } + + /// Attempt to yield a single bit from the fuzzer. + pub fn extract_bit(&mut self) -> Option { + if self.bit_len == 0 { + self.bit_cache = self.extract_u8()?; + self.bit_len = 8; + } + + let ret = self.bit_cache & 1 == 1; + self.bit_len -= 1; + self.bit_cache >>= 1; + Some(ret) + } + + /// Attempt to yield a type from the fuzzer. + pub fn extract_final_type(&mut self) -> Option> { + // We can costruct extremely large types by duplicating Arcs; there + // is no need to have an exponential blowup in the number of tasks. + const MAX_N_TASKS: usize = 300; + + enum StackElem { + NeedType, + Binary { is_sum: bool, dupe: bool }, + } + + let mut task_stack = vec![StackElem::NeedType]; + let mut result_stack = vec![]; + + while let Some(task) = task_stack.pop() { + match task { + StackElem::NeedType => { + if self.extract_bit()? { + result_stack.push(FinalTy::unit()); + } else { + let is_sum = self.extract_bit()?; + let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + task_stack.push(StackElem::Binary { is_sum, dupe }); + if !dupe { + task_stack.push(StackElem::NeedType) + } + task_stack.push(StackElem::NeedType); + } + } + StackElem::Binary { is_sum, dupe } => { + let right = result_stack.pop().unwrap(); + let left = if dupe { + Arc::clone(&right) + } else { + result_stack.pop().unwrap() + }; + if is_sum { + result_stack.push(FinalTy::sum(left, right)); + } else { + result_stack.push(FinalTy::product(left, right)); + } + } + } + } + assert_eq!(result_stack.len(), 1); + result_stack.pop() + } + + /// Attempt to yield a value from the fuzzer by constructing a type and then + /// reading a bitstring of that type, in the padded value encoding. + pub fn extract_value_padded(&mut self) -> Option { + let ty = self.extract_final_type()?; + if ty.bit_width() > 64 * 1024 * 1024 { + // little fuzzing value in producing massive values + return None; + } + + let mut iter = BitIter::new(self.data.iter().copied()); + let ret = Value::from_padded_bits(&mut iter, &ty).ok()?; + self.data = &self.data[iter.n_total_read().div_ceil(8)..]; + Some(ret) + } + + /// Attempt to yield a value from the fuzzer by constructing a type and then + /// reading a bitstring of that type, in the compact value encoding. + pub fn extract_value_compact(&mut self) -> Option { + let ty = self.extract_final_type()?; + if ty.bit_width() > 64 * 1024 * 1024 { + // little fuzzing value in producing massive values + return None; + } + + let mut iter = BitIter::new(self.data.iter().copied()); + let ret = Value::from_compact_bits(&mut iter, &ty).ok()?; + self.data = &self.data[iter.n_total_read().div_ceil(8)..]; + Some(ret) + } + + /// Attempt to yield a value from the fuzzer by constructing it directly. + pub fn extract_value_direct(&mut self) -> Option { + const MAX_N_TASKS: usize = 300; + const MAX_TY_WIDTH: usize = 10240; + + enum StackElem { + NeedValue, + Left, + Right, + Product, + } + + let mut task_stack = vec![StackElem::NeedValue]; + let mut result_stack = vec![]; + + while let Some(task) = task_stack.pop() { + match task { + StackElem::NeedValue => match (self.extract_bit()?, self.extract_bit()?) { + (false, false) => result_stack.push(Value::unit()), + (false, true) => { + if task_stack.len() <= MAX_N_TASKS { + task_stack.push(StackElem::Product); + task_stack.push(StackElem::NeedValue); + task_stack.push(StackElem::NeedValue); + } else { + task_stack.push(StackElem::NeedValue); + } + } + (true, false) => { + task_stack.push(StackElem::Left); + task_stack.push(StackElem::NeedValue); + } + (true, true) => { + task_stack.push(StackElem::Right); + task_stack.push(StackElem::NeedValue); + } + }, + StackElem::Product => { + let right = result_stack.pop().unwrap(); + let left = result_stack.pop().unwrap(); + result_stack.push(Value::product(left, right)); + } + StackElem::Left => { + let child = result_stack.pop().unwrap(); + let ty = self.extract_final_type()?; + if ty.bit_width() > MAX_TY_WIDTH { + return None; + } + result_stack.push(Value::left(child, ty)); + } + StackElem::Right => { + let child = result_stack.pop().unwrap(); + let ty = self.extract_final_type()?; + if ty.bit_width() > MAX_TY_WIDTH { + return None; + } + result_stack.push(Value::right(ty, child)); + } + } + } + assert_eq!(result_stack.len(), 1); + result_stack.pop() + } + + /// Attempt to yield a type from the fuzzer. + pub fn extract_old_final_type(&mut self) -> Option> { + // We can costruct extremely large types by duplicating Arcs; there + // is no need to have an exponential blowup in the number of tasks. + const MAX_N_TASKS: usize = 300; + + enum StackElem { + NeedType, + Binary { is_sum: bool, dupe: bool }, + } + + let mut task_stack = vec![StackElem::NeedType]; + let mut result_stack = vec![]; + + while let Some(task) = task_stack.pop() { + match task { + StackElem::NeedType => { + if self.extract_bit()? { + result_stack.push(OldFinalTy::unit()); + } else { + let is_sum = self.extract_bit()?; + let dupe = task_stack.len() >= MAX_N_TASKS || self.extract_bit()?; + task_stack.push(StackElem::Binary { is_sum, dupe }); + if !dupe { + task_stack.push(StackElem::NeedType) + } + task_stack.push(StackElem::NeedType); + } + } + StackElem::Binary { is_sum, dupe } => { + let right = result_stack.pop().unwrap(); + let left = if dupe { + Arc::clone(&right) + } else { + result_stack.pop().unwrap() + }; + if is_sum { + result_stack.push(OldFinalTy::sum(left, right)); + } else { + result_stack.push(OldFinalTy::product(left, right)); + } + } + } + } + assert_eq!(result_stack.len(), 1); + result_stack.pop() + } + + /// Attempt to yield a value from the fuzzer by constructing a type and then + + /// Attempt to yield a value from the fuzzer by constructing it directly. + pub fn extract_old_value_direct(&mut self) -> Option { + const MAX_N_TASKS: usize = 300; + const MAX_TY_WIDTH: usize = 10240; + + enum StackElem { + NeedValue, + Left, + Right, + Product, + } + + let mut task_stack = vec![StackElem::NeedValue]; + let mut result_stack = vec![]; + + while let Some(task) = task_stack.pop() { + match task { + StackElem::NeedValue => match (self.extract_bit()?, self.extract_bit()?) { + (false, false) => result_stack.push(OldValue::unit()), + (false, true) => { + if task_stack.len() <= MAX_N_TASKS { + task_stack.push(StackElem::Product); + task_stack.push(StackElem::NeedValue); + task_stack.push(StackElem::NeedValue); + } else { + task_stack.push(StackElem::NeedValue); + } + } + (true, false) => { + task_stack.push(StackElem::Left); + task_stack.push(StackElem::NeedValue); + } + (true, true) => { + task_stack.push(StackElem::Right); + task_stack.push(StackElem::NeedValue); + } + }, + StackElem::Product => { + let right = result_stack.pop().unwrap(); + let left = result_stack.pop().unwrap(); + result_stack.push(OldValue::product(left, right)); + } + StackElem::Left => { + let child = result_stack.pop().unwrap(); + let ty = self.extract_old_final_type()?; + if ty.bit_width() > MAX_TY_WIDTH { + return None; + } + result_stack.push(OldValue::left(child, ty)); + } + StackElem::Right => { + let child = result_stack.pop().unwrap(); + let ty = self.extract_old_final_type()?; + if ty.bit_width() > MAX_TY_WIDTH { + return None; + } + result_stack.push(OldValue::right(ty, child)); + } + } + } + assert_eq!(result_stack.len(), 1); + result_stack.pop() + } +} diff --git a/fuzz/fuzz_targets/construct_type.rs b/fuzz/fuzz_targets/construct_type.rs new file mode 100644 index 00000000..65e7e520 --- /dev/null +++ b/fuzz/fuzz_targets/construct_type.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) { + let mut extractor = simplicity_fuzz::Extractor::new(data); + let _ = extractor.extract_final_type(); +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| do_test(data)); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + super::do_test(&data); + } +} diff --git a/fuzz/fuzz_targets/construct_value.rs b/fuzz/fuzz_targets/construct_value.rs new file mode 100644 index 00000000..94980e10 --- /dev/null +++ b/fuzz/fuzz_targets/construct_value.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) -> Option<()> { + let mut extractor = simplicity_fuzz::Extractor::new(data); + if extractor.extract_bit()? { + let _ = extractor.extract_value_direct(); + } else { + if extractor.extract_bit()? { + let _ = extractor.extract_value_compact(); + } else { + let _ = extractor.extract_value_padded(); + } + } + + Some(()) +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| { + let _ = do_test(data); +}); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + super::do_test(&data); + } +} diff --git a/fuzz/fuzz_targets/regression_value.rs b/fuzz/fuzz_targets/regression_value.rs new file mode 100644 index 00000000..8944a237 --- /dev/null +++ b/fuzz/fuzz_targets/regression_value.rs @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +use std::sync::Arc; + +#[cfg(any(fuzzing, test))] +use old_simplicity::{types::Final as OldFinal, Value as OldValue}; +#[cfg(any(fuzzing, test))] +use simplicity::types::Final; + +#[cfg(any(fuzzing, test))] +fn convert_ty(new: &Final) -> Option> { + /// Our stack of tasks describing “what we need to do next.” + enum Task<'a> { + /// Convert this `Final` into an `OldFinal`. + NeedType(&'a Final), + Binary { + is_sum: bool, + dupe: bool, + }, + } + + // We'll push tasks onto this stack until everything is converted. + let mut task_stack = vec![Task::NeedType(new)]; + // As we finish conversion of subtrees, we store them here along with + // a count of units. Because the released version of 0.3.0 does not + // have any typeskip optimization we need to bail out if there are + // too many units, since otherwise we will OOM in from_compact_bits. + let mut result_stack: Vec<(usize, Arc)> = vec![]; + const MAX_UNITS: usize = 1024 * 1024; + + // Process tasks in LIFO order + while let Some(task) = task_stack.pop() { + match task { + Task::NeedType(final_ty) => { + if final_ty.is_unit() { + result_stack.push((1, OldFinal::unit())); + } else if let Some((left, right)) = final_ty.as_sum() { + let dupe = Arc::ptr_eq(left, right); + task_stack.push(Task::Binary { is_sum: true, dupe }); + if !dupe { + task_stack.push(Task::NeedType(right)); + } + task_stack.push(Task::NeedType(left)); + } else if let Some((left, right)) = final_ty.as_product() { + let dupe = Arc::ptr_eq(left, right); + task_stack.push(Task::Binary { + is_sum: false, + dupe, + }); + if !dupe { + task_stack.push(Task::NeedType(right)); + } + task_stack.push(Task::NeedType(left)); + } else { + unreachable!(); + } + } + Task::Binary { is_sum, dupe } => { + let right = result_stack.pop().expect("right type missing"); + let left = if dupe { + (right.0, Arc::clone(&right.1)) + } else { + result_stack.pop().expect("left type missing") + }; + let new_total = left.0 + right.0; + if new_total > MAX_UNITS { + return None; + } + if is_sum { + result_stack.push((new_total, OldFinal::sum(left.1, right.1))); + } else { + result_stack.push((new_total, OldFinal::product(left.1, right.1))); + } + } + } + } + + // At the end, we should have exactly one final type. + assert_eq!(result_stack.len(), 1, "Internal conversion error"); + let (_, res) = result_stack.pop().unwrap(); + Some(res) +} + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) { + let mut extractor_1 = simplicity_fuzz::Extractor::new(data); + let mut extractor_2 = simplicity_fuzz::Extractor::new(data); + + let (val, old_val) = match ( + extractor_1.extract_value_direct(), + extractor_2.extract_old_value_direct(), + ) { + (Some(val), Some(old_val)) => (val, old_val), + (None, None) => return, + (Some(val), None) => panic!("Could extract new value but not old."), + (None, Some(val)) => panic!("Could extract old value but not new."), + }; + + assert!(val.iter_compact().eq(old_val.iter_compact())); + assert!(val.iter_padded().eq(old_val.iter_padded())); +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| do_test(data)); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + super::do_test(&data); + } +} diff --git a/fuzz/generate-files.sh b/fuzz/generate-files.sh index 578f0ba5..963fb4b6 100755 --- a/fuzz/generate-files.sh +++ b/fuzz/generate-files.sh @@ -20,9 +20,15 @@ publish = false [package.metadata] cargo-fuzz = true +[lib] +path = "fuzz_lib/lib.rs" + [dependencies] libfuzzer-sys = "0.4" -simplicity-lang = { path = "..", features = ["test-utils"] } +# We shouldn't need an explicit version on the next line, but Andrew's tools +# choke on it otherwise. See https://github.com/nix-community/crate2nix/issues/373 +simplicity-lang = { path = "..", features = ["test-utils"], version = "0.3.0" } +old_simplicity = { package = "simplicity-lang", version = "0.3.0", default-features = false } [dev-dependencies] base64 = "0.22.1" diff --git a/src/value.rs b/src/value.rs index 826d7281..36010c63 100644 --- a/src/value.rs +++ b/src/value.rs @@ -374,7 +374,6 @@ impl Value { /// Create a right value that wraps the given `inner` value. pub fn right(left: Arc, inner: Self) -> Self { let total_width = cmp::max(left.bit_width(), inner.ty.bit_width()); - let (concat, concat_offset) = product( None, total_width - inner.ty.bit_width(), @@ -798,7 +797,7 @@ impl Iterator for CompactBitsIter<'_> { fn next(&mut self) -> Option { while let Some(value) = self.stack.pop() { - if value.is_unit() { + if value.ty.bit_width() == 0 { // NOP } else if let Some(l_value) = value.as_left() { self.stack.push(l_value); @@ -878,7 +877,10 @@ impl Value { bits: &mut BitIter, ty: &Final, ) -> Result { - let mut blob = Vec::with_capacity(ty.bit_width().div_ceil(8)); + const MAX_INITIAL_ALLOC: usize = 32 * 1024 * 1024; // 4 megabytes + + let cap = cmp::min(MAX_INITIAL_ALLOC, ty.bit_width().div_ceil(8)); + let mut blob = Vec::with_capacity(cap); for _ in 0..ty.bit_width() / 8 { blob.push(bits.read_u8()?); }