Skip to content

Commit f4f73c4

Browse files
committed
use SHA-1 to deduplicate sub trees
1 parent 96573cb commit f4f73c4

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

Cargo.lock

+2-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ p256 = { version = "0.13.2", features = ["ecdsa"] }
8080
sha3 = "0.10.8"
8181
rand = { workspace = true }
8282
hex = { workspace = true }
83+
sha1 = { workspace = true }
8384

8485
[dev-dependencies]
8586
rstest = { workspace = true }

src/serde/tree_cache.rs

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,31 @@
1-
use super::bytes32::{hash_blobs, Bytes32};
21
use super::{ChildPos, PathBuilder};
32
use crate::allocator::{Allocator, NodePtr, SExp};
43
use crate::serde::serialized_length_atom;
54
use crate::serde::RandomState;
65
use crate::serde::VisitedNodes;
6+
use rand::prelude::*;
7+
use sha1::{Digest, Sha1};
78
use std::collections::hash_map::Entry;
89
use std::collections::HashMap;
910

1011
const MIN_SERIALIZED_LENGTH: u64 = 4;
1112

13+
type Bytes20 = [u8; 20];
14+
15+
fn hash_blobs(salt: &[u8], blobs: &[&[u8]]) -> Bytes20 {
16+
let mut ctx = Sha1::default();
17+
ctx.update(salt);
18+
for blob in blobs.iter() {
19+
ctx.update(blob);
20+
}
21+
ctx.finalize().into()
22+
}
23+
1224
#[derive(Clone, Debug)]
1325
struct NodeEntry {
1426
/// the tree hash of this node. It may be None if it or any of its children
1527
/// is the sentinel node, which means we can't compute the tree hash.
16-
tree_hash: Option<Bytes32>,
28+
tree_hash: Option<Bytes20>,
1729
/// a node can have an arbitrary number of parents, since they can be reused
1830
/// this is a list of parent nodes, followed by whether we're the left or
1931
/// right child. The u32 is an index into the node_entry vector.
@@ -75,7 +87,7 @@ pub struct TreeCache {
7587
/// node_entry vector. For any given tree hash, we're only supposed to
7688
/// have a single NodeEntry. There may be multiple NodePtr referring to
7789
/// the same NodeEntry (if they are identical sub trees).
78-
hash_to_node: HashMap<Bytes32, u32, RandomState>,
90+
hash_to_node: HashMap<Bytes20, u32, RandomState>,
7991

8092
/// When deserializing, we keep a stack of tokens we've parsed so far, this
8193
/// stack is maintaining that same state, since that's what back-references
@@ -95,13 +107,20 @@ pub struct TreeCache {
95107
/// update(), the tree is assumed to be placed at the sentinel node in the
96108
/// previous call to update()
97109
pub sentinel_node: Option<NodePtr>,
110+
111+
/// We compute hash-trees using SHA-1 in order to determine whether the
112+
/// trees are identical or not. To mitigate malicious SHA-1 hash collisions,
113+
/// we stalt the hashes
114+
salt: [u8; 8],
98115
}
99116

100117
impl TreeCache {
101118
pub fn new(sentinel: Option<NodePtr>) -> Self {
119+
let mut rng = rand::thread_rng();
102120
Self {
103121
sentinel_node: sentinel,
104122
hash_to_node: HashMap::with_hasher(RandomState::default()),
123+
salt: rng.gen(),
105124
..Default::default()
106125
}
107126
}
@@ -200,7 +219,7 @@ impl TreeCache {
200219
continue;
201220
}
202221
let buf = a.atom(node);
203-
let hash = hash_blobs(&[&[1], buf.as_ref()]);
222+
let hash = hash_blobs(&self.salt, &[&[1], buf.as_ref()]);
204223

205224
// record the mapping of this node to the
206225
// corresponding NodeEntry index
@@ -251,7 +270,11 @@ impl TreeCache {
251270
let (hash, idx) = if let (Some(left_hash), Some(right_hash)) =
252271
(left.tree_hash, right.tree_hash)
253272
{
254-
let hash = hash_blobs(&[&[2], left_hash.as_ref(), right_hash.as_ref()]);
273+
let hash = hash_blobs(
274+
&self.salt,
275+
&[&[2], left_hash.as_ref(), right_hash.as_ref()],
276+
);
277+
255278
(Some(hash), self.hash_to_node.get(&hash))
256279
} else {
257280
(None, None)

0 commit comments

Comments
 (0)