Skip to content

Commit 6d3d794

Browse files
committed
feat: add hashed key-value trie implementation
This expands on the previously added key-value trie by adding a hashed variant and mechanism to convert from unhashed to hashed.
1 parent 945becd commit 6d3d794

File tree

4 files changed

+269
-6
lines changed

4 files changed

+269
-6
lines changed

storage/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ pub use path::{
6565
#[cfg(not(feature = "branch_factor_256"))]
6666
pub use path::{PackedBytes, PackedPathComponents, PackedPathRef};
6767
pub use tries::{
68-
DuplicateKeyError, HashedTrieNode, IterAscending, IterDescending, KeyValueTrieRoot,
69-
TrieEdgeIter, TrieEdgeState, TrieNode, TrieValueIter,
68+
DuplicateKeyError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, IterDescending,
69+
KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, TrieValueIter,
7070
};
7171
pub use u4::{TryFromIntError, U4};
7272

storage/src/node/branch.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,23 @@ mod ethhash {
193193

194194
use super::Serializable;
195195

196-
#[derive(Clone, Debug)]
196+
#[derive(Clone)]
197197
pub enum HashOrRlp {
198198
Hash(TrieHash),
199199
// TODO: this slice is never larger than 32 bytes so smallvec is probably not our best container
200200
// the length is stored in a `usize` but it could be in a `u8` and it will never overflow
201201
Rlp(SmallVec<[u8; 32]>),
202202
}
203203

204+
impl std::fmt::Debug for HashOrRlp {
205+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
206+
match self {
207+
HashOrRlp::Hash(h) => write!(f, "Hash({h})"),
208+
HashOrRlp::Rlp(r) => write!(f, "Rlp({})", hex::encode(r)),
209+
}
210+
}
211+
}
212+
204213
impl HashOrRlp {
205214
/// Creates a new `TrieHash` from the default value, which is the all zeros.
206215
///

storage/src/tries/kvp.rs

Lines changed: 256 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#[cfg(not(feature = "branch_factor_256"))]
55
use crate::PackedPathRef;
66
use crate::{
7-
Children, HashType, PathBuf, PathComponent, PathGuard, SplitPath, TrieNode, TriePath,
8-
TriePathFromPackedBytes,
7+
Children, HashType, Hashable, HashableShunt, HashedTrieNode, PathBuf, PathComponent, PathGuard,
8+
SplitPath, TrieNode, TriePath, TriePathFromPackedBytes, ValueDigest,
99
};
1010

1111
#[cfg(feature = "branch_factor_256")]
@@ -44,6 +44,19 @@ pub struct KeyValueTrieRoot<'a, T: ?Sized> {
4444
pub children: Children<Option<Box<Self>>>,
4545
}
4646

47+
/// The root of a hashed key-value trie.
48+
///
49+
/// This is similar to [`KeyValueTrieRoot`], but includes the computed hash of
50+
/// the node as well as its leading path components. Consequently, the hashed
51+
/// trie is formed by hashing the un-hashed trie.
52+
pub struct HashedKeyValueTrieRoot<'a, T: ?Sized> {
53+
computed: HashType,
54+
leading_path: PathBuf,
55+
partial_path: PackedPathRef<'a>,
56+
value: Option<&'a T>,
57+
children: Children<Option<Box<Self>>>,
58+
}
59+
4760
impl<T: AsRef<[u8]> + ?Sized> std::fmt::Debug for KeyValueTrieRoot<'_, T> {
4861
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4962
f.debug_struct("KeyValueTrieRoot")
@@ -54,6 +67,18 @@ impl<T: AsRef<[u8]> + ?Sized> std::fmt::Debug for KeyValueTrieRoot<'_, T> {
5467
}
5568
}
5669

70+
impl<T: AsRef<[u8]> + ?Sized> std::fmt::Debug for HashedKeyValueTrieRoot<'_, T> {
71+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72+
f.debug_struct("HashedKeyValueTrieRoot")
73+
.field("computed", &self.computed)
74+
.field("leading_path", &self.leading_path.display())
75+
.field("partial_path", &self.partial_path.display())
76+
.field("value", &DebugValue::new(self.value))
77+
.field("children", &DebugChildren::new(&self.children))
78+
.finish()
79+
}
80+
}
81+
5782
impl<'a, T: AsRef<[u8]> + ?Sized> KeyValueTrieRoot<'a, T> {
5883
/// Constructs a new leaf node with the given path and value.
5984
#[must_use]
@@ -216,6 +241,42 @@ impl<'a, T: AsRef<[u8]> + ?Sized> KeyValueTrieRoot<'a, T> {
216241

217242
Ok(self)
218243
}
244+
245+
/// Hashes this trie, returning a hashed trie.
246+
#[must_use]
247+
pub fn into_hashed_trie(self: Box<Self>) -> Box<HashedKeyValueTrieRoot<'a, T>> {
248+
HashedKeyValueTrieRoot::new(PathGuard::new(&mut PathBuf::new_const()), self)
249+
}
250+
}
251+
252+
impl<'a, T: AsRef<[u8]> + ?Sized> HashedKeyValueTrieRoot<'a, T> {
253+
/// Constructs a new hashed key-value trie node from the given un-hashed
254+
/// node.
255+
#[must_use]
256+
pub fn new(
257+
mut leading_path: PathGuard<'_>,
258+
#[expect(clippy::boxed_local)] node: Box<KeyValueTrieRoot<'a, T>>,
259+
) -> Box<Self> {
260+
let children = node
261+
.children
262+
.map(|pc, child| child.map(|child| Self::new(leading_path.fork_append(pc), child)));
263+
264+
Box::new(Self {
265+
computed: HashableShunt::new(
266+
leading_path.as_slice(),
267+
node.partial_path,
268+
node.value.map(|v| ValueDigest::Value(v.as_ref())),
269+
children
270+
.each_ref()
271+
.map(|_, c| c.as_deref().map(|c| c.computed.clone())),
272+
)
273+
.to_hash(),
274+
leading_path: leading_path.as_slice().into(),
275+
partial_path: node.partial_path,
276+
value: node.value,
277+
children,
278+
})
279+
}
219280
}
220281

221282
impl<T: AsRef<[u8]> + ?Sized> TrieNode<T> for KeyValueTrieRoot<'_, T> {
@@ -235,6 +296,62 @@ impl<T: AsRef<[u8]> + ?Sized> TrieNode<T> for KeyValueTrieRoot<'_, T> {
235296
fn child_node(&self, pc: PathComponent) -> Option<&Self> {
236297
self.children[pc].as_deref()
237298
}
299+
300+
fn child_state(&self, pc: PathComponent) -> Option<super::TrieEdgeState<'_, Self>> {
301+
self.children[pc]
302+
.as_deref()
303+
.map(|node| super::TrieEdgeState::UnhashedChild { node })
304+
}
305+
}
306+
307+
impl<T: AsRef<[u8]> + ?Sized> TrieNode<T> for HashedKeyValueTrieRoot<'_, T> {
308+
fn partial_path(&self) -> impl SplitPath + '_ {
309+
self.partial_path
310+
}
311+
312+
fn value(&self) -> Option<&T> {
313+
self.value
314+
}
315+
316+
fn child_hash(&self, pc: PathComponent) -> Option<&HashType> {
317+
self.children[pc].as_deref().map(|c| &c.computed)
318+
}
319+
320+
fn child_node(&self, pc: PathComponent) -> Option<&Self> {
321+
self.children[pc].as_deref()
322+
}
323+
324+
fn child_state(&self, pc: PathComponent) -> Option<super::TrieEdgeState<'_, Self>> {
325+
self.children[pc]
326+
.as_deref()
327+
.map(|node| super::TrieEdgeState::from_node(node, Some(&node.computed)))
328+
}
329+
}
330+
331+
impl<T: AsRef<[u8]> + ?Sized> HashedTrieNode<T> for HashedKeyValueTrieRoot<'_, T> {
332+
fn computed(&self) -> &HashType {
333+
&self.computed
334+
}
335+
}
336+
337+
impl<T: AsRef<[u8]> + ?Sized> Hashable for HashedKeyValueTrieRoot<'_, T> {
338+
fn parent_prefix_path(&self) -> impl crate::IntoSplitPath + '_ {
339+
self.leading_path.as_slice()
340+
}
341+
342+
fn partial_path(&self) -> impl crate::IntoSplitPath + '_ {
343+
self.partial_path
344+
}
345+
346+
fn value_digest(&self) -> Option<ValueDigest<&[u8]>> {
347+
self.value.map(|v| ValueDigest::Value(v.as_ref()))
348+
}
349+
350+
fn children(&self) -> Children<Option<HashType>> {
351+
self.children
352+
.each_ref()
353+
.map(|_, c| c.as_deref().map(|c| c.computed.clone()))
354+
}
238355
}
239356

240357
struct DebugValue<'a, T: ?Sized> {
@@ -310,6 +427,78 @@ mod tests {
310427

311428
use super::*;
312429

430+
/// in constant context, convert an ASCII hex string to a byte array
431+
///
432+
/// # Panics
433+
///
434+
/// Panics if the input is not valid hex.
435+
const fn from_ascii<const FROM: usize, const TO: usize>(hex: &[u8; FROM]) -> [u8; TO] {
436+
#![expect(clippy::arithmetic_side_effects, clippy::indexing_slicing)]
437+
438+
const fn from_hex_char(c: u8) -> u8 {
439+
match c {
440+
b'0'..=b'9' => c - b'0',
441+
b'a'..=b'f' => c - b'a' + 10,
442+
b'A'..=b'F' => c - b'A' + 10,
443+
_ => panic!("invalid hex character"),
444+
}
445+
}
446+
447+
const {
448+
assert!(FROM == TO.wrapping_mul(2));
449+
}
450+
451+
let mut bytes = [0u8; TO];
452+
let mut i = 0_usize;
453+
while i < TO {
454+
let off = i.wrapping_mul(2);
455+
let hi = hex[off];
456+
let off = off.wrapping_add(1);
457+
let lo = hex[off];
458+
bytes[i] = (from_hex_char(hi) << 4) | from_hex_char(lo);
459+
i += 1;
460+
}
461+
462+
bytes
463+
}
464+
465+
macro_rules! expected_hash {
466+
(
467+
merkledb16: $hex16:expr,
468+
merkledb256: $hex256:expr,
469+
ethereum: rlp($hexeth:expr),
470+
) => {
471+
match () {
472+
#[cfg(all(not(feature = "branch_factor_256"), not(feature = "ethhash")))]
473+
() => $crate::HashType::from(from_ascii($hex16)),
474+
#[cfg(all(feature = "branch_factor_256", not(feature = "ethhash")))]
475+
() => $crate::HashType::from(from_ascii($hex256)),
476+
#[cfg(all(not(feature = "branch_factor_256"), feature = "ethhash"))]
477+
() => $crate::HashType::Rlp(smallvec::SmallVec::from(
478+
&from_ascii::<{ $hexeth.len() }, { $hexeth.len() / 2 }>($hexeth)[..],
479+
)),
480+
#[cfg(all(feature = "branch_factor_256", feature = "ethhash"))]
481+
() => compile_error!("branch_factor_256 and ethhash cannot both be enabled"),
482+
}
483+
};
484+
(
485+
merkledb16: $hex16:expr,
486+
merkledb256: $hex256:expr,
487+
ethereum: $hexeth:expr,
488+
) => {
489+
$crate::HashType::from(from_ascii(match () {
490+
#[cfg(all(not(feature = "branch_factor_256"), not(feature = "ethhash")))]
491+
() => $hex16,
492+
#[cfg(all(feature = "branch_factor_256", not(feature = "ethhash")))]
493+
() => $hex256,
494+
#[cfg(all(not(feature = "branch_factor_256"), feature = "ethhash"))]
495+
() => $hexeth,
496+
#[cfg(all(feature = "branch_factor_256", feature = "ethhash"))]
497+
() => compile_error!("branch_factor_256 and ethhash cannot both be enabled"),
498+
}))
499+
};
500+
}
501+
313502
#[test_case(&[])]
314503
#[test_case(&[("a", "1")])]
315504
#[test_case(&[("a", "1"), ("b", "2")])]
@@ -384,4 +573,69 @@ mod tests {
384573
assert_eq!(kvp_value, slice_value);
385574
}
386575
}
576+
577+
#[test_case(&[("a", "1")], expected_hash!{
578+
merkledb16: b"1ffe11ce995a9c07021d6f8a8c5b1817e6375dd0ea27296b91a8d48db2858bc9",
579+
merkledb256: b"831a115e52af616bd2df8cd7a0993e21e544d7d201e151a7f61dcdd1a6bd557c",
580+
ethereum: rlp(b"c482206131"),
581+
}; "single key")]
582+
#[test_case(&[("a", "1"), ("b", "2")], expected_hash!{
583+
merkledb16: b"ff783ce73f7a5fa641991d76d626eefd7840a839590db4269e1e92359ae60593",
584+
merkledb256: b"301e9035ef0fe1b50788f9b5bca3a2c19bce9c798bdb1dda09fc71dd22564ce4",
585+
ethereum: rlp(b"d81696d580c22031c220328080808080808080808080808080"),
586+
}; "two disjoint keys")]
587+
#[test_case(&[("a", "1"), ("ab", "2")], expected_hash!{
588+
merkledb16: b"c5def8c64a2f3b8647283251732b68a2fb185f8bf92c0103f31d5ec69bb9a90c",
589+
merkledb256: b"2453f6f0b38fd36bcb66b145aff0f7ae3a6b96121fa1187d13afcffa7641b156",
590+
ethereum: rlp(b"d882006194d3808080808080c2323280808080808080808031"),
591+
}; "two nested keys")]
592+
#[test_case(&[("a", "1"), ("b", "2"), ("c", "3")], expected_hash!{
593+
merkledb16: b"95618fd79a0ca2d7612bf9fd60663b81f632c9a65e76bb5bc3ed5f3045cf1404",
594+
merkledb256: b"f5c185a96ed86da8da052a52f6c2e7368c90d342c272dd0e6c9e72c0071cdb0c",
595+
ethereum: rlp(b"da1698d780c22031c22032c2203380808080808080808080808080"),
596+
}; "three disjoint keys")]
597+
#[test_case(&[("a", "1"), ("ab", "2"), ("ac", "3")], expected_hash!{
598+
merkledb16: b"ee8a7a1409935f58ab6ce40a1e05ee2a587bdc06c201dbec7006ee1192e71f70",
599+
merkledb256: b"40c9cee60ac59e7926109137fbaa5d68642d4770863b150f98bd8ac00aedbff3",
600+
ethereum: b"6ffab67bf7096a9608b312b9b2459c17ec9429286b283a3b3cdaa64860182699",
601+
}; "two children of same parent")]
602+
#[test_case(&[("a", "1"), ("b", "2"), ("ba", "3")], expected_hash!{
603+
merkledb16: b"d3efab83a1a4dd193c8ae51dfe638bba3494d8b1917e7a9185d20301ff1c528b",
604+
merkledb256: b"e6f711e762064ffcc7276e9c6149fc8f1050e009a21e436e7b78a4a60079e3ba",
605+
ethereum: b"21a118e1765c556e505a8752a0fd5bbb4ea78fb21077f8488d42862ebabf0130",
606+
}; "nested sibling")]
607+
#[test_case(&[("a", "1"), ("ab", "2"), ("abc", "3")], expected_hash!{
608+
merkledb16: b"af11454e2f920fb49041c9890c318455952d651b7d835f5731218dbc4bde4805",
609+
merkledb256: b"5dc43e88b3019050e741be52ed4afff621e1ac93cd2c68d37f82947d1d16cff5",
610+
ethereum: b"eabecb5e4efb9b5824cd926fac6350bdcb4a599508b16538afde303d72571169",
611+
}; "linear nested keys")]
612+
#[test_case(&[("a", "1"), ("ab", "2"), ("ac", "3"), ("b", "4")], expected_hash!{
613+
merkledb16: b"749390713e51d3e4e50ba492a669c1644a6d9cb7e48b2a14d556e7f953da92fc",
614+
merkledb256: b"30dbf15b59c97d2997f4fbed1ae86d1eab8e7aa2dd84337029fe898f47aeb8e6",
615+
ethereum: b"2e636399fae96dc07abaf21167a34b8a5514d6594e777635987e319c76f28a75",
616+
}; "four keys")]
617+
#[test_case(&[("a", "1"), ("ab", "2"), ("ac", "3"), ("b", "4"), ("ba", "5")], expected_hash!{
618+
merkledb16: b"1c043978de0cd65fe2e75a74eaa98878b753f4ec20f6fbbb7232a39f02e88c6f",
619+
merkledb256: b"02bb75b5d5b81ba4c64464a5e39547de4e0d858c04da4a4aae9e63fc8385279d",
620+
ethereum: b"df930bafb34edb6d758eb5f4dd9461fc259c8c13abf38da8a0f63f289e107ecd",
621+
}; "five keys")]
622+
#[test_case(&[("a", "1"), ("ab", "2"), ("ac", "3"), ("b", "4"), ("ba", "5"), ("bb", "6")], expected_hash!{
623+
merkledb16: b"c2c13c095f7f07ce9ef92401f73951b4846a19e2b092b8a527fe96fa82f55cfd",
624+
merkledb256: b"56d69386ad494d6be42bbdd78b3ad00c07c12e631338767efa1539d6720ce7a6",
625+
ethereum: b"8ca7c3b09aa0a8877122d67fd795051bd1e6ff169932e3b7a1158ed3d66fbedf",
626+
}; "six keys")]
627+
#[test_case(&[("a", "1"), ("ab", "2"), ("ac", "3"), ("b", "4"), ("ba", "5"), ("bb", "6"), ("c", "7")], expected_hash!{
628+
merkledb16: b"697e767d6f4af8236090bc95131220c1c94cadba3e66e0a8011c9beef7b255a5",
629+
merkledb256: b"2f083246b86da1e6e135f771ae712f271c1162c23ebfaa16178ea57f0317bf06",
630+
ethereum: b"3fa832b90f7f1a053a48a4528d1e446cc679fbcf376d0ef8703748d64030e19d",
631+
}; "seven keys")]
632+
fn test_hashed_trie(slice: &[(&str, &str)], root_hash: crate::HashType) {
633+
let root = KeyValueTrieRoot::<str>::from_slice(slice)
634+
.unwrap()
635+
.unwrap()
636+
.into_hashed_trie();
637+
638+
assert_eq!(*root.computed(), root_hash);
639+
assert_eq!(*root.computed(), crate::Preimage::to_hash(&*root));
640+
}
387641
}

storage/src/tries/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ mod kvp;
77
use crate::{HashType, PathComponent, SplitPath};
88

99
pub use self::iter::{IterAscending, IterDescending, TrieEdgeIter, TrieValueIter};
10-
pub use self::kvp::{DuplicateKeyError, KeyValueTrieRoot};
10+
pub use self::kvp::{DuplicateKeyError, HashedKeyValueTrieRoot, KeyValueTrieRoot};
1111

1212
/// The state of an edge from a parent node to a child node in a trie.
1313
#[derive(Debug, PartialEq, Eq, Hash)]

0 commit comments

Comments
 (0)