diff --git a/firewood/src/proofs/tests.rs b/firewood/src/proofs/tests.rs index 408935f74..2217cee2a 100644 --- a/firewood/src/proofs/tests.rs +++ b/firewood/src/proofs/tests.rs @@ -3,6 +3,10 @@ #![expect(clippy::unwrap_used, clippy::indexing_slicing)] +use firewood_storage::{ + KeyProofTrieRoot, PackedPathRef, PathComponent, TrieNode, TriePath, TriePathFromPackedBytes, + ValueDigest, +}; use integer_encoding::VarInt; use test_case::test_case; @@ -224,3 +228,100 @@ fn test_empty_proof() { Err(err) => panic!("Expected valid empty proof, got error: {err}"), } } + +#[test] +fn test_proof_trie_construction() { + let merkle = crate::merkle::tests::init_merkle((0u8..=10).map(|k| ([k], [k]))); + let proof = merkle + .range_proof(Some(&[2u8]), Some(&[8u8]), std::num::NonZeroUsize::new(5)) + .unwrap(); + + let lower_trie = KeyProofTrieRoot::new(&**proof.start_proof()) + .unwrap() + .unwrap(); + let upper_trie = KeyProofTrieRoot::new(&**proof.end_proof()) + .unwrap() + .unwrap(); + + let mut iter = lower_trie.iter_path(PackedPathRef::path_from_packed_bytes(&[0x2_u8])); + let (path, edge) = iter.next().unwrap(); + assert!(path.is_empty()); + assert!(edge.is_unhashed()); + let root = edge.node().unwrap(); + + #[cfg(feature = "branch_factor_256")] + assert!(root.partial_path().is_empty()); + #[cfg(not(feature = "branch_factor_256"))] + assert!(root.partial_path().path_eq(&[PathComponent::ALL[0]])); + + assert_eq!(root.value(), None); + assert!(root.child_hash(PathComponent::ALL[2]).is_some()); + assert!(root.child_hash(PathComponent::ALL[6]).is_some()); + assert!(root.child_hash(PathComponent::ALL[10]).is_some()); + assert!(root.child_hash(PathComponent::ALL[11]).is_none()); + assert!(root.child_node(PathComponent::ALL[6]).is_none()); + assert!(root.child_node(PathComponent::ALL[10]).is_none()); + assert!(root.child_node(PathComponent::ALL[11]).is_none()); + let child = root.child_node(PathComponent::ALL[2]).unwrap(); + assert!(child.partial_path().is_empty()); + assert_eq!(child.value(), Some(&ValueDigest::Value(&[2_u8][..]))); + + let (path, edge) = iter.next().unwrap(); + #[cfg(feature = "branch_factor_256")] + assert!(path.path_eq(&[PathComponent::ALL[2]])); + #[cfg(not(feature = "branch_factor_256"))] + assert!(path.path_eq(&[PathComponent::ALL[0], PathComponent::ALL[2]])); + + assert!( + edge.is_local(), + "edge from root to child has both hash and node" + ); + let root = edge.node().unwrap(); + assert!( + std::ptr::eq(root, child), + "expected not just equal, but identical references to the same node" + ); + assert!(root.partial_path().is_empty()); + assert_eq!(root.value(), Some(&ValueDigest::Value(&[2_u8][..]))); + + let mut iter = upper_trie.iter_path(PackedPathRef::path_from_packed_bytes(&[0x6_u8])); + let (path, edge) = iter.next().unwrap(); + assert!(path.is_empty()); + assert!(edge.is_unhashed()); + let root = edge.node().unwrap(); + + #[cfg(feature = "branch_factor_256")] + assert!(root.partial_path().is_empty()); + #[cfg(not(feature = "branch_factor_256"))] + assert!(root.partial_path().path_eq(&[PathComponent::ALL[0]])); + + assert_eq!(root.value(), None); + assert!(root.child_hash(PathComponent::ALL[2]).is_some()); + assert!(root.child_hash(PathComponent::ALL[6]).is_some()); + assert!(root.child_hash(PathComponent::ALL[10]).is_some()); + assert!(root.child_hash(PathComponent::ALL[11]).is_none()); + assert!(root.child_node(PathComponent::ALL[2]).is_none()); + assert!(root.child_node(PathComponent::ALL[10]).is_none()); + assert!(root.child_node(PathComponent::ALL[11]).is_none()); + let child = root.child_node(PathComponent::ALL[6]).unwrap(); + assert!(child.partial_path().is_empty()); + assert_eq!(child.value(), Some(&ValueDigest::Value(&[6_u8][..]))); + + let (path, edge) = iter.next().unwrap(); + #[cfg(feature = "branch_factor_256")] + assert!(path.path_eq(&[PathComponent::ALL[6]])); + #[cfg(not(feature = "branch_factor_256"))] + assert!(path.path_eq(&[PathComponent::ALL[0], PathComponent::ALL[6]])); + + assert!( + edge.is_local(), + "edge from root to child has both hash and node" + ); + let root = edge.node().unwrap(); + assert!( + std::ptr::eq(root, child), + "expected not just equal, but identical references to the same node" + ); + assert!(root.partial_path().is_empty()); + assert_eq!(root.value(), Some(&ValueDigest::Value(&[6_u8][..]))); +} diff --git a/storage/src/lib.rs b/storage/src/lib.rs index b48a2af25..96e49d33a 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -58,15 +58,16 @@ pub use nodestore::{ NodeReader, NodeStore, Parentable, RootReader, TrieReader, }; pub use path::{ - ComponentIter, IntoSplitPath, JoinedPath, PartialPath, PathBuf, PathCommonPrefix, - PathComponent, PathComponentSliceExt, PathGuard, SplitPath, TriePath, TriePathAsPackedBytes, - TriePathFromPackedBytes, TriePathFromUnpackedBytes, + ComponentIter, IntoSplitPath, JoinedPath, PackedPathRef, PartialPath, PathBuf, + PathCommonPrefix, PathComponent, PathComponentSliceExt, PathGuard, SplitPath, TriePath, + TriePathAsPackedBytes, TriePathFromPackedBytes, TriePathFromUnpackedBytes, }; #[cfg(not(feature = "branch_factor_256"))] -pub use path::{PackedBytes, PackedPathComponents, PackedPathRef}; +pub use path::{PackedBytes, PackedPathComponents}; pub use tries::{ - DuplicateKeyError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, IterDescending, - KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, TriePathIter, TrieValueIter, + DuplicateKeyError, FromKeyProofError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, + IterDescending, KeyProofTrieRoot, KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, + TriePathIter, TrieValueIter, }; pub use u4::{TryFromIntError, U4}; diff --git a/storage/src/path/mod.rs b/storage/src/path/mod.rs index 54aad8145..668f694d7 100644 --- a/storage/src/path/mod.rs +++ b/storage/src/path/mod.rs @@ -15,6 +15,10 @@ pub use self::joined::JoinedPath; pub use self::packed::{PackedBytes, PackedPathComponents, PackedPathRef}; pub use self::split::{IntoSplitPath, PathCommonPrefix, SplitPath}; +/// If the branch factor is 256, a packed path is just a slice of path components. +#[cfg(feature = "branch_factor_256")] +pub type PackedPathRef<'a> = &'a [PathComponent]; + /// A trie path of components with different underlying representations. /// /// The underlying representation does not need to be a contiguous array of diff --git a/storage/src/tries/kvp.rs b/storage/src/tries/kvp.rs index 956ff389a..aae02ec02 100644 --- a/storage/src/tries/kvp.rs +++ b/storage/src/tries/kvp.rs @@ -1,16 +1,12 @@ // Copyright (C) 2025, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -#[cfg(not(feature = "branch_factor_256"))] -use crate::PackedPathRef; use crate::{ - Children, HashType, Hashable, HashableShunt, HashedTrieNode, JoinedPath, PathBuf, - PathComponent, PathGuard, SplitPath, TrieNode, TriePath, TriePathFromPackedBytes, ValueDigest, + Children, HashType, Hashable, HashableShunt, HashedTrieNode, JoinedPath, PackedPathRef, + PathBuf, PathComponent, PathGuard, SplitPath, TrieNode, TriePath, TriePathFromPackedBytes, + ValueDigest, }; -#[cfg(feature = "branch_factor_256")] -type PackedPathRef<'a> = &'a [PathComponent]; - /// A duplicate key error when merging two key-value tries. #[non_exhaustive] #[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] diff --git a/storage/src/tries/mod.rs b/storage/src/tries/mod.rs index 51934025b..069814f78 100644 --- a/storage/src/tries/mod.rs +++ b/storage/src/tries/mod.rs @@ -3,11 +3,13 @@ mod iter; mod kvp; +mod proof; use crate::{HashType, IntoSplitPath, PathComponent, SplitPath}; pub use self::iter::{IterAscending, IterDescending, TrieEdgeIter, TriePathIter, TrieValueIter}; pub use self::kvp::{DuplicateKeyError, HashedKeyValueTrieRoot, KeyValueTrieRoot}; +pub use self::proof::{FromKeyProofError, KeyProofTrieRoot}; /// The state of an edge from a parent node to a child node in a trie. #[derive(Debug, PartialEq, Eq, Hash)] diff --git a/storage/src/tries/proof.rs b/storage/src/tries/proof.rs new file mode 100644 index 000000000..0b41b8dbd --- /dev/null +++ b/storage/src/tries/proof.rs @@ -0,0 +1,203 @@ +// Copyright (C) 2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +use crate::{ + Children, HashType, Hashable, IntoSplitPath, PathBuf, PathComponent, SplitPath, TrieEdgeState, + TrieNode, TriePath, ValueDigest, +}; + +/// An error indicating that a slice of proof nodes is invalid. +#[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] +pub enum FromKeyProofError { + /// The parent node's path is not a strict prefix the node that follows it. + #[error( + "parent node {parent_path} precedes child node {child_path} but is not a strict prefix of it", + parent_path = parent_path.display(), + child_path = child_path.display(), + )] + InvalidChildPath { + /// The path of the parent node. + parent_path: PathBuf, + /// The path of the following child node. + child_path: PathBuf, + }, + /// The parent node does not reference the child node at the path component + /// leading to the child node. + #[error( + "child node {child_path} is not reachable from parent node {parent_path}", + parent_path = parent_path.display(), + child_path = child_path.display(), + )] + MissingChild { + /// The path of the parent node. + parent_path: PathBuf, + /// The path of the following child node. + child_path: PathBuf, + }, +} + +/// A root node in a trie formed from a key proof. +/// +/// A proof trie follows a linear path from the root to a terminal node, and +/// includes the necessary information to calculate the hash of each node along +/// that path. +/// +/// In the proof, each node will include the value or value digest at that node, +/// depending on what is required by the hasher. Additionally, the hashes of each +/// child node that branches off the node along the path are included. +#[derive(Debug)] +pub struct KeyProofTrieRoot<'a, P> { + partial_path: P, + value_digest: Option>, + children: Children>>, +} + +#[derive(Debug)] +enum KeyProofTrieNode<'a, P> { + /// Described nodes are proof nodes where we have the data necessary to + /// reconstruct the hash. The value digest may be a value or a digest. We can + /// verify the hash of theses nodes using the value or digest, but may not + /// have the full value. + Described { + node: Box>, + hash: HashType, + }, + /// Remote nodes are the nodes where we only know the ID, as discovered + /// from a proof node. If we only have the child, we can't infer anything + /// else about the node. + Remote { hash: HashType }, +} + +impl<'a, P: SplitPath> KeyProofTrieRoot<'a, P> { + /// Constructs a trie root from a slice of proof nodes. + /// + /// Each node in the slice must be a strict prefix of the following node. And, + /// each child node must be referenced by its parent (i.e., the parent must + /// indicate a child at the path component leading to the child). The hash + /// is not verified here. + /// + /// # Errors + /// + /// - [`FromKeyProofError::InvalidChildPath`] if any node's path is not a strict + /// prefix of the following node's path. + /// - [`FromKeyProofError::MissingChild`] if any parent node does not reference + /// the following child node at the path component leading to the child. + pub fn new(proof: &'a T) -> Result>, FromKeyProofError> + where + T: AsRef<[N]> + ?Sized, + N: Hashable: IntoSplitPath> + 'a, + { + proof + .as_ref() + .iter() + .rev() + .try_fold(None::>, |parent, node| match parent { + None => Ok(Some(Self::new_tail_node(node))), + Some(p) => p.new_parent_node(node).map(Some), + }) + } + + /// Creates a new trie root from the tail node of a proof. + fn new_tail_node(node: &'a N) -> Box + where + N: Hashable: IntoSplitPath>, + { + Box::new(Self { + partial_path: node.full_path().into_split_path(), + value_digest: node.value_digest(), + children: node + .children() + .map(|_, child| child.map(|hash| KeyProofTrieNode::Remote { hash })), + }) + } + + /// Creates a new trie root by making this node a child of the given parent. + /// + /// The parent key must be a strict prefix of this node's key, and the parent + /// must reference this node in its children by hash (the hash is not verified + /// here). + fn new_parent_node( + mut self: Box, + parent: &'a N, + ) -> Result, FromKeyProofError> + where + N: Hashable: IntoSplitPath>, + { + match parent + .full_path() + .into_split_path() + .longest_common_prefix(self.partial_path) + .split_first_parts() + { + (None, Some((pc, child_path)), parent_path) => { + let mut parent = Self::new_tail_node(parent); + if let Some(KeyProofTrieNode::Remote { hash }) = parent.children.take(pc) { + self.partial_path = child_path; + parent.partial_path = parent_path; + parent.children[pc] = Some(KeyProofTrieNode::Described { node: self, hash }); + Ok(parent) + } else { + Err(FromKeyProofError::MissingChild { + parent_path: parent.partial_path.as_component_slice().into_owned(), + child_path: self.partial_path.as_component_slice().into_owned(), + }) + } + } + _ => Err(FromKeyProofError::InvalidChildPath { + parent_path: parent.full_path().as_component_slice().into_owned(), + child_path: self.partial_path.as_component_slice().into_owned(), + }), + } + } +} + +impl<'a, P: IntoSplitPath + 'a> KeyProofTrieNode<'a, P> { + const fn hash(&self) -> &HashType { + match self { + KeyProofTrieNode::Described { hash, .. } | KeyProofTrieNode::Remote { hash } => hash, + } + } + + const fn node(&self) -> Option<&KeyProofTrieRoot<'a, P>> { + match self { + KeyProofTrieNode::Described { node, .. } => Some(node), + KeyProofTrieNode::Remote { .. } => None, + } + } + + const fn as_edge_state(&self) -> TrieEdgeState<'_, KeyProofTrieRoot<'a, P>> { + match self { + KeyProofTrieNode::Described { node, hash } => TrieEdgeState::LocalChild { node, hash }, + KeyProofTrieNode::Remote { hash } => TrieEdgeState::RemoteChild { hash }, + } + } +} + +impl<'a, P: SplitPath + 'a> TrieNode> for KeyProofTrieRoot<'a, P> { + type PartialPath<'b> + = P + where + Self: 'b; + + fn partial_path(&self) -> Self::PartialPath<'_> { + self.partial_path + } + + fn value(&self) -> Option<&ValueDigest<&'a [u8]>> { + self.value_digest.as_ref() + } + + fn child_hash(&self, pc: PathComponent) -> Option<&HashType> { + self.children[pc].as_ref().map(KeyProofTrieNode::hash) + } + + fn child_node(&self, pc: PathComponent) -> Option<&Self> { + self.children[pc].as_ref().and_then(KeyProofTrieNode::node) + } + + fn child_state(&self, pc: PathComponent) -> Option> { + self.children[pc] + .as_ref() + .map(KeyProofTrieNode::as_edge_state) + } +}