Skip to content

Commit f821e22

Browse files
committed
fuzz: add ability to generate Simplicity construct nodes, test for encode/decode roundtrip
This construction algorithm is a bit fragile and will often fail but the fuzzer should be able to make some progress with it. In particular if, during construction, it encounters any type inference errors, it will fail. I used this to generate the unit tests in this PR.
1 parent bd3fe4e commit f821e22

File tree

5 files changed

+259
-0
lines changed

5 files changed

+259
-0
lines changed

.github/workflows/fuzz.yml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ c_rust_merkle,
2323
decode_natural,
2424
decode_program,
2525
parse_human,
26+
regression_286,
2627
regression_value,
2728
]
2829
steps:

fuzz/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ test = false
6767
doc = false
6868
bench = false
6969

70+
[[bin]]
71+
name = "regression_286"
72+
path = "fuzz_targets/regression_286.rs"
73+
test = false
74+
doc = false
75+
bench = false
76+
7077
[[bin]]
7178
name = "regression_value"
7279
path = "fuzz_targets/regression_value.rs"

fuzz/fuzz_lib/lib.rs

+12
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ use simplicity::types::Final as FinalTy;
77
use simplicity::{BitIter, Value};
88
use std::sync::Arc;
99

10+
mod program;
11+
12+
pub use program::ProgramControl;
13+
1014
/// A wrapper around a buffer which has utilities for extracting various
1115
/// Simplicity types.
1216
#[derive(Clone)]
@@ -16,6 +20,7 @@ pub struct Extractor<'f> {
1620
bit_len: usize,
1721
}
1822

23+
// More impls in the other modules.
1924
impl<'f> Extractor<'f> {
2025
/// Wrap the buffer in an extractor.
2126
pub fn new(data: &'f [u8]) -> Self {
@@ -37,6 +42,13 @@ impl<'f> Extractor<'f> {
3742
}
3843
}
3944

45+
/// Attempt to yield a u16 from the fuzzer.
46+
///
47+
/// Internally, extracts in big-endian.
48+
pub fn extract_u16(&mut self) -> Option<u16> {
49+
Some((u16::from(self.extract_u8()?) << 8) + u16::from(self.extract_u8()?))
50+
}
51+
4052
/// Attempt to yield a single bit from the fuzzer.
4153
pub fn extract_bit(&mut self) -> Option<bool> {
4254
if self.bit_len == 0 {

fuzz/fuzz_lib/program.rs

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// SPDX-License-Identifier: CC0-1.0
2+
3+
use std::sync::Arc;
4+
5+
use super::Extractor;
6+
use simplicity::node::{
7+
CoreConstructible as _, DisconnectConstructible as _, JetConstructible as _,
8+
WitnessConstructible as _,
9+
};
10+
use simplicity::types;
11+
use simplicity::{jet::Core, Cmr, ConstructNode, FailEntropy};
12+
13+
/// Structure controlling the kind of programs generated by the fuzzer.
14+
pub struct ProgramControl {
15+
/// Whether to attempt to insert "type bombs" into the generated program.
16+
///
17+
/// Importantly, type bombs may have 2^n nodes for fairly large n, and will
18+
/// not respect `max_nodes`. So if you are trying to generate small programs
19+
/// you should not enable this.
20+
pub enable_type_bomb: bool,
21+
/// Whether to attempt to insert disconnect nodes into the generated program.
22+
pub enable_disconnect: bool,
23+
/// Whether to attempt to insert witness nodes into the generated program.
24+
pub enable_witness: bool,
25+
/// Whether to attempt to insert fail nodes into the generated program.
26+
pub enable_fail: bool,
27+
/// Whether to attempt to insert assertl and assertr nodes into the generated program.
28+
pub enable_asserts: bool,
29+
/// Maximum number of nodes a generated program may have. This limit may not
30+
/// be exactly enforced. If it is `None`, no limit is enforced.
31+
pub max_nodes: Option<usize>,
32+
}
33+
34+
impl ProgramControl {
35+
fn from_u16(u: u16) -> Self {
36+
ProgramControl {
37+
enable_type_bomb: u & 0x8000 == 0x8000,
38+
enable_disconnect: u & 0x4000 == 0x4000,
39+
enable_witness: u & 0x2000 == 0x2000,
40+
enable_fail: u & 0x1000 == 0x1000,
41+
enable_asserts: u & 0x0800 == 0x0800,
42+
max_nodes: Some(5 * usize::from(u & 0x07ff)),
43+
}
44+
}
45+
}
46+
47+
impl Extractor<'_> {
48+
pub fn extract_core_construct_node(
49+
&mut self,
50+
force_control: Option<ProgramControl>,
51+
) -> Option<Arc<ConstructNode<Core>>> {
52+
type ArcNode = Arc<ConstructNode<Core>>;
53+
54+
let ctx = types::Context::new();
55+
let mut stack: Vec<ArcNode> = vec![];
56+
57+
let program_control =
58+
force_control.unwrap_or(ProgramControl::from_u16(self.extract_u16()?));
59+
60+
let mut count = 0usize;
61+
for _ in 0..program_control.max_nodes.unwrap_or(usize::MAX) {
62+
let control = self.extract_u8()?;
63+
if program_control.enable_type_bomb && control & 0x80 == 0x80 {
64+
let mut ret = stack.pop()?;
65+
// Special-case: type bomb. Iterate x -> pair(x, x) on the top stack
66+
// item up to 128 times. Its CPU cost and target type will blow up
67+
// by a factor 2^128. If its target type has nonzero size this should
68+
// fail to construct; if it's 0 we should be able to construct it but
69+
// the bit machine should reject it.
70+
for _ in 0..control & 0x7f {
71+
// FIXME should we refuse to make the type-bomb if `ret` contains any
72+
// witness or disconnect nodes? In this case the encoding of our
73+
// CommitNode won't round-trip, since we're force-sharing both children
74+
// of this `pair` but when decoding `CommitNode` we reject anything that
75+
// shares witnesses or disconnects, which at commit-time we treat as
76+
// being unique and never shared.
77+
ret = ArcNode::pair(&ret, &ret).unwrap();
78+
}
79+
stack.push(ret);
80+
} else {
81+
match control {
82+
// Return whatever we've got (note that this will "waste" everything else
83+
// on the stack)
84+
0 => {
85+
if stack.len() == 1 {
86+
return stack.pop();
87+
} else {
88+
return None;
89+
}
90+
}
91+
// 1 through 63
92+
1 => stack.push(ArcNode::unit(&ctx)),
93+
2 => stack.push(ArcNode::iden(&ctx)),
94+
3 => {
95+
use simplicity::dag::DagLike as _;
96+
97+
let val = self.extract_value_direct()?;
98+
if program_control.max_nodes.is_some() {
99+
for _ in val.as_ref().pre_order_iter::<simplicity::dag::NoSharing>() {
100+
count = count.checked_add(1)?;
101+
}
102+
}
103+
if let Some(max) = program_control.max_nodes {
104+
if val.compact_len() > max {
105+
return None;
106+
}
107+
}
108+
stack.push(ArcNode::scribe(&ctx, &val));
109+
}
110+
4 if program_control.enable_witness => stack.push(ArcNode::witness(&ctx, None)),
111+
5 => {
112+
let child = stack.pop()?;
113+
stack.push(ArcNode::injl(&child));
114+
}
115+
6 => {
116+
let child = stack.pop()?;
117+
stack.push(ArcNode::injr(&child));
118+
}
119+
7 => {
120+
let child = stack.pop()?;
121+
stack.push(ArcNode::drop_(&child));
122+
}
123+
8 => {
124+
let child = stack.pop()?;
125+
stack.push(ArcNode::take(&child));
126+
}
127+
9 => {
128+
let child = stack.pop()?;
129+
let cmr_u8 = self.extract_u8()?;
130+
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
131+
stack.push(ArcNode::assertl(&child, cmr).ok()?);
132+
}
133+
10 => {
134+
let child = stack.pop()?;
135+
let cmr_u8 = self.extract_u8()?;
136+
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
137+
stack.push(ArcNode::assertr(cmr, &child).ok()?);
138+
}
139+
11 if program_control.enable_fail => {
140+
let fail_u8 = self.extract_u8()?;
141+
let fail = FailEntropy::from_byte_array([fail_u8; 64]);
142+
stack.push(ArcNode::fail(&ctx, fail));
143+
}
144+
12 => {
145+
let rchild = stack.pop()?;
146+
let lchild = stack.pop()?;
147+
stack.push(ArcNode::pair(&lchild, &rchild).ok()?);
148+
}
149+
13 => {
150+
let rchild = stack.pop()?;
151+
let lchild = stack.pop()?;
152+
stack.push(ArcNode::case(&lchild, &rchild).ok()?);
153+
}
154+
14 => {
155+
let rchild = stack.pop()?;
156+
let lchild = stack.pop()?;
157+
stack.push(ArcNode::comp(&lchild, &rchild).ok()?);
158+
}
159+
15 if program_control.enable_disconnect => {
160+
let child = stack.pop()?;
161+
stack.push(ArcNode::disconnect(&child, &None).ok()?);
162+
}
163+
// We assume that the above cases did not cover 64-255, so that if we
164+
// right-shift by 6 we can get all 4 values.
165+
_ => {
166+
let extra_bits = usize::from(control >> 6);
167+
let idx = (extra_bits << 8) + usize::from(self.extract_u8()?);
168+
stack.push(ArcNode::jet(&ctx, Core::ALL[idx % Core::ALL.len()]));
169+
}
170+
}
171+
}
172+
173+
if let Some(max) = program_control.max_nodes {
174+
count = count.checked_add(1)?;
175+
if count > max {
176+
return None;
177+
}
178+
}
179+
}
180+
181+
None
182+
}
183+
}

fuzz/fuzz_targets/regression_286.rs

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// SPDX-License-Identifier: CC0-1.0
2+
3+
#![cfg_attr(fuzzing, no_main)]
4+
5+
#[cfg(any(fuzzing, test))]
6+
fn do_test(data: &[u8]) {
7+
use simplicity::{jet::Core, BitIter, CommitNode};
8+
9+
let mut extractor = simplicity_fuzz::Extractor::new(data);
10+
11+
let construct =
12+
match extractor.extract_core_construct_node(Some(simplicity_fuzz::ProgramControl {
13+
enable_type_bomb: false,
14+
enable_disconnect: false,
15+
enable_witness: false,
16+
enable_fail: false,
17+
enable_asserts: true,
18+
max_nodes: Some(25),
19+
})) {
20+
Some(x) => x,
21+
None => return,
22+
};
23+
//println!("constructed {construct}");
24+
let finalized = match construct.finalize_types() {
25+
Ok(x) => x,
26+
Err(_) => return,
27+
};
28+
//println!("finalized {finalized}");
29+
let prog = finalized.encode_to_vec();
30+
//println!("{}", simplicity::bitcoin::hex::DisplayHex::as_hex(&prog));
31+
let prog = BitIter::from(prog);
32+
let decode = CommitNode::<Core>::decode(prog).unwrap();
33+
assert_eq!(
34+
finalized, decode,
35+
"Constructed committed LHS; encoded and decoded to get RHS",
36+
);
37+
}
38+
39+
#[cfg(fuzzing)]
40+
libfuzzer_sys::fuzz_target!(|data| do_test(data));
41+
42+
#[cfg(not(fuzzing))]
43+
fn main() {}
44+
45+
#[cfg(test)]
46+
mod tests {
47+
use base64::Engine;
48+
49+
#[test]
50+
fn duplicate_crash() {
51+
let data = base64::prelude::BASE64_STANDARD
52+
.decode("Cg==")
53+
.expect("base64 should be valid");
54+
super::do_test(&data);
55+
}
56+
}

0 commit comments

Comments
 (0)