Skip to content

Commit b2d9d69

Browse files
authored
Merge pull request #553 from Chia-Network/back-reference-length
back-reference path length
2 parents bf30717 + c3638db commit b2d9d69

File tree

5 files changed

+163
-5
lines changed

5 files changed

+163
-5
lines changed

fuzz/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,9 @@ name = "incremental-serializer"
9898
path = "fuzz_targets/incremental_serializer.rs"
9999
test = false
100100
doc = false
101+
102+
[[bin]]
103+
name = "serializer-cmp"
104+
path = "fuzz_targets/serializer_cmp.rs"
105+
test = false
106+
doc = false

fuzz/fuzz_targets/serializer_cmp.rs

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#![no_main]
2+
3+
mod make_tree;
4+
mod node_eq;
5+
6+
use clvmr::allocator::{Allocator, NodePtr, SExp};
7+
use clvmr::serde::node_from_bytes_backrefs;
8+
use clvmr::serde::write_atom::write_atom;
9+
use clvmr::serde::ReadCacheLookup;
10+
use clvmr::serde::{serialized_length, treehash, ObjectCache};
11+
use std::io;
12+
use std::io::Cursor;
13+
use std::io::Write;
14+
15+
use node_eq::node_eq;
16+
17+
use libfuzzer_sys::fuzz_target;
18+
19+
const BACK_REFERENCE: u8 = 0xfe;
20+
const CONS_BOX_MARKER: u8 = 0xff;
21+
22+
#[derive(PartialEq, Eq)]
23+
enum ReadOp {
24+
Parse,
25+
Cons,
26+
}
27+
28+
// make sure back-references returned by ReadCacheLookup are smaller than the
29+
// node they reference
30+
pub fn compare_back_references(allocator: &Allocator, node: NodePtr) -> io::Result<Vec<u8>> {
31+
let mut f = Cursor::new(Vec::new());
32+
33+
let mut read_op_stack: Vec<ReadOp> = vec![ReadOp::Parse];
34+
let mut write_stack: Vec<NodePtr> = vec![node];
35+
36+
let mut read_cache_lookup = ReadCacheLookup::new();
37+
38+
let mut thc = ObjectCache::new(treehash);
39+
let mut slc = ObjectCache::new(serialized_length);
40+
41+
while let Some(node_to_write) = write_stack.pop() {
42+
let op = read_op_stack.pop();
43+
assert!(op == Some(ReadOp::Parse));
44+
45+
let node_serialized_length = *slc
46+
.get_or_calculate(allocator, &node_to_write, None)
47+
.expect("couldn't calculate serialized length");
48+
let node_tree_hash = thc
49+
.get_or_calculate(allocator, &node_to_write, None)
50+
.expect("can't get treehash");
51+
52+
let result1 = read_cache_lookup.find_path(node_tree_hash, node_serialized_length);
53+
match result1 {
54+
Some(path) => {
55+
f.write_all(&[BACK_REFERENCE])?;
56+
write_atom(&mut f, &path)?;
57+
read_cache_lookup.push(*node_tree_hash);
58+
{
59+
// make sure the path is never encoded as more bytes than
60+
// the node we're referencing
61+
use std::io::Write;
62+
let mut temp = Cursor::new(Vec::<u8>::new());
63+
temp.write_all(&[BACK_REFERENCE])?;
64+
write_atom(&mut temp, &path)?;
65+
let temp = temp.into_inner();
66+
assert!(temp.len() < node_serialized_length as usize);
67+
}
68+
}
69+
None => match allocator.sexp(node_to_write) {
70+
SExp::Pair(left, right) => {
71+
f.write_all(&[CONS_BOX_MARKER])?;
72+
write_stack.push(right);
73+
write_stack.push(left);
74+
read_op_stack.push(ReadOp::Cons);
75+
read_op_stack.push(ReadOp::Parse);
76+
read_op_stack.push(ReadOp::Parse);
77+
}
78+
SExp::Atom => {
79+
let atom = allocator.atom(node_to_write);
80+
write_atom(&mut f, atom.as_ref())?;
81+
read_cache_lookup.push(*node_tree_hash);
82+
}
83+
},
84+
}
85+
while let Some(ReadOp::Cons) = read_op_stack.last() {
86+
read_op_stack.pop();
87+
read_cache_lookup.pop2_and_cons();
88+
}
89+
}
90+
Ok(f.into_inner())
91+
}
92+
93+
// serializing with the regular compressed serializer should yield the same
94+
// result as using the incremental one (as long as it's in a single add() call).
95+
fuzz_target!(|data: &[u8]| {
96+
let mut unstructured = arbitrary::Unstructured::new(data);
97+
let mut allocator = Allocator::new();
98+
let (program, _) = make_tree::make_tree(&mut allocator, &mut unstructured);
99+
100+
let b1 = compare_back_references(&allocator, program).unwrap();
101+
let b2 = node_from_bytes_backrefs(&mut allocator, &b1).unwrap();
102+
assert!(node_eq(&allocator, b2, program));
103+
});

src/serde/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub use de_tree::{parse_triples, ParsedTriple};
2626
pub use identity_hash::RandomState;
2727
pub use incremental::{Serializer, UndoState};
2828
pub use object_cache::{serialized_length, treehash, ObjectCache};
29+
pub use read_cache_lookup::ReadCacheLookup;
2930
pub use ser::{node_to_bytes, node_to_bytes_limit};
3031
pub use ser_br::{node_to_bytes_backrefs, node_to_bytes_backrefs_limit};
3132
pub use serialized_length::{serialized_length_atom, serialized_length_small_number};

src/serde/read_cache_lookup.rs

+15-5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use bitvec::vec::BitVec;
2121
use std::collections::{HashMap, HashSet};
2222

2323
use super::bytes32::{hash_blob, hash_blobs, Bytes32};
24+
use super::serialized_length::atom_length_bits;
2425

2526
#[derive(Debug, Clone)]
2627
pub struct ReadCacheLookup {
@@ -153,7 +154,14 @@ impl ReadCacheLookup {
153154
let mut new_partial_paths = vec![];
154155
for (node, path) in partial_paths.iter_mut() {
155156
if *node == self.root_hash {
156-
possible_responses.push(reversed_path_to_vec_u8(path));
157+
// make sure we never return a path that needs more (or the
158+
// same) bytes to serialize than the node we're referencing.
159+
if let Some(path_len) = atom_length_bits(path.len() as u64) {
160+
if path_len < max_bytes_for_path_encoding {
161+
let p = reversed_path_to_vec_u8(path);
162+
possible_responses.push(p);
163+
}
164+
}
157165
continue;
158166
}
159167

@@ -162,12 +170,14 @@ impl ReadCacheLookup {
162170
for (parent, direction) in items.iter() {
163171
if *(self.count.get(parent).unwrap_or(&0)) > 0 && !seen_ids.contains(parent)
164172
{
165-
if path.len() + 1 > max_path_length {
173+
if path.len() > max_path_length {
166174
return possible_responses;
167175
}
168-
let mut new_path = path.clone();
169-
new_path.push(*direction);
170-
new_partial_paths.push((*parent, new_path));
176+
if path.len() < max_path_length {
177+
let mut new_path = path.clone();
178+
new_path.push(*direction);
179+
new_partial_paths.push((*parent, new_path));
180+
}
171181
}
172182
seen_ids.insert(parent);
173183
}

src/serde/serialized_length.rs

+38
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,26 @@ pub fn serialized_length_small_number(val: u32) -> u32 {
2121
len_for_value(val) as u32 + 1
2222
}
2323

24+
// given an atom with num_bits (counting from the most significant set bit)
25+
// return the number of bytes we need to serialized this atom
26+
pub fn atom_length_bits(num_bits: u64) -> Option<u64> {
27+
if num_bits < 8 {
28+
return Some(1);
29+
}
30+
let num_bytes = (num_bits + 7) / 8;
31+
match num_bytes {
32+
1..0x40 => Some(1 + num_bytes),
33+
0x40..0x2000 => Some(2 + num_bytes),
34+
0x2000..0x10_0000 => Some(3 + num_bytes),
35+
0x10_0000..0x800_0000 => Some(4 + num_bytes),
36+
0x800_0000..0x4_0000_0000 => Some(5 + num_bytes),
37+
_ => {
38+
assert!(num_bits >= 0x4_0000_0000 * 8 - 7);
39+
None
40+
}
41+
}
42+
}
43+
2444
#[cfg(test)]
2545
mod tests {
2646
use super::*;
@@ -53,4 +73,22 @@ mod tests {
5373
fn test_serialized_length_small_number(#[case] value: u32, #[case] expect: u32) {
5474
assert_eq!(serialized_length_small_number(value), expect);
5575
}
76+
77+
#[rstest]
78+
#[case(0, Some(1))]
79+
#[case(1, Some(1))]
80+
#[case(7, Some(1))]
81+
#[case(8, Some(2))]
82+
#[case(9, Some(3))]
83+
#[case(504, Some(1+63))]
84+
#[case(505, Some(2+64))]
85+
#[case(0xfff8, Some(2+0x1fff))]
86+
#[case(0xfff9, Some(3+0x2000))]
87+
#[case(0x3ffffff8, Some(4 + (0x3ffffff8 + 7) / 8))]
88+
#[case(0x3ffffff9, Some(5 + (0x3ffffff9 + 7) / 8))]
89+
#[case(0x1ffffffff8, Some(5 + (0x1ffffffff8 + 7) / 8))]
90+
#[case(0x1ffffffff9, None)]
91+
fn test_atom_length_bits(#[case] num_bits: u64, #[case] expect: Option<u64>) {
92+
assert_eq!(atom_length_bits(num_bits), expect);
93+
}
5694
}

0 commit comments

Comments
 (0)