From 497272d4200c98f0e3520f2d7530b20557391a1b Mon Sep 17 00:00:00 2001 From: Brandon LeBlanc Date: Wed, 15 Oct 2025 17:47:37 +0000 Subject: [PATCH] feat: build trie from Proofs Key proofs are a sequence of trie nodes that follow a linear path through the trie. This means we can reconstruct a narrow view of the trie over that linear path. In this narrow view, each node can refer to another full node or just its hash. This resulting trie can (and will in an upcoming change) be hashed to generate the same root hash. While merging with the key-value tries from #1363 and #1365, we can iteratively verify the hash of each layer and detect early if any node is incomplete. The `Remote` edges can also point outside of the key range. We can use these remote edges to identify holes in our overall trie and continue synchronizing down those paths. --- firewood/src/proofs/tests.rs | 101 +++++++++++++++++ storage/src/lib.rs | 13 +-- storage/src/path/mod.rs | 4 + storage/src/tries/kvp.rs | 10 +- storage/src/tries/mod.rs | 2 + storage/src/tries/proof.rs | 203 +++++++++++++++++++++++++++++++++++ 6 files changed, 320 insertions(+), 13 deletions(-) create mode 100644 storage/src/tries/proof.rs diff --git a/firewood/src/proofs/tests.rs b/firewood/src/proofs/tests.rs index 408935f748..2217cee2a2 100644 --- a/firewood/src/proofs/tests.rs +++ b/firewood/src/proofs/tests.rs @@ -3,6 +3,10 @@ #![expect(clippy::unwrap_used, clippy::indexing_slicing)] +use firewood_storage::{ + KeyProofTrieRoot, PackedPathRef, PathComponent, TrieNode, TriePath, TriePathFromPackedBytes, + ValueDigest, +}; use integer_encoding::VarInt; use test_case::test_case; @@ -224,3 +228,100 @@ fn test_empty_proof() { Err(err) => panic!("Expected valid empty proof, got error: {err}"), } } + +#[test] +fn test_proof_trie_construction() { + let merkle = crate::merkle::tests::init_merkle((0u8..=10).map(|k| ([k], [k]))); + let proof = merkle + .range_proof(Some(&[2u8]), Some(&[8u8]), std::num::NonZeroUsize::new(5)) + .unwrap(); + + let lower_trie = KeyProofTrieRoot::new(&**proof.start_proof()) + .unwrap() + .unwrap(); + let upper_trie = KeyProofTrieRoot::new(&**proof.end_proof()) + .unwrap() + .unwrap(); + + let mut iter = lower_trie.iter_path(PackedPathRef::path_from_packed_bytes(&[0x2_u8])); + let (path, edge) = iter.next().unwrap(); + assert!(path.is_empty()); + assert!(edge.is_unhashed()); + let root = edge.node().unwrap(); + + #[cfg(feature = "branch_factor_256")] + assert!(root.partial_path().is_empty()); + #[cfg(not(feature = "branch_factor_256"))] + assert!(root.partial_path().path_eq(&[PathComponent::ALL[0]])); + + assert_eq!(root.value(), None); + assert!(root.child_hash(PathComponent::ALL[2]).is_some()); + assert!(root.child_hash(PathComponent::ALL[6]).is_some()); + assert!(root.child_hash(PathComponent::ALL[10]).is_some()); + assert!(root.child_hash(PathComponent::ALL[11]).is_none()); + assert!(root.child_node(PathComponent::ALL[6]).is_none()); + assert!(root.child_node(PathComponent::ALL[10]).is_none()); + assert!(root.child_node(PathComponent::ALL[11]).is_none()); + let child = root.child_node(PathComponent::ALL[2]).unwrap(); + assert!(child.partial_path().is_empty()); + assert_eq!(child.value(), Some(&ValueDigest::Value(&[2_u8][..]))); + + let (path, edge) = iter.next().unwrap(); + #[cfg(feature = "branch_factor_256")] + assert!(path.path_eq(&[PathComponent::ALL[2]])); + #[cfg(not(feature = "branch_factor_256"))] + assert!(path.path_eq(&[PathComponent::ALL[0], PathComponent::ALL[2]])); + + assert!( + edge.is_local(), + "edge from root to child has both hash and node" + ); + let root = edge.node().unwrap(); + assert!( + std::ptr::eq(root, child), + "expected not just equal, but identical references to the same node" + ); + assert!(root.partial_path().is_empty()); + assert_eq!(root.value(), Some(&ValueDigest::Value(&[2_u8][..]))); + + let mut iter = upper_trie.iter_path(PackedPathRef::path_from_packed_bytes(&[0x6_u8])); + let (path, edge) = iter.next().unwrap(); + assert!(path.is_empty()); + assert!(edge.is_unhashed()); + let root = edge.node().unwrap(); + + #[cfg(feature = "branch_factor_256")] + assert!(root.partial_path().is_empty()); + #[cfg(not(feature = "branch_factor_256"))] + assert!(root.partial_path().path_eq(&[PathComponent::ALL[0]])); + + assert_eq!(root.value(), None); + assert!(root.child_hash(PathComponent::ALL[2]).is_some()); + assert!(root.child_hash(PathComponent::ALL[6]).is_some()); + assert!(root.child_hash(PathComponent::ALL[10]).is_some()); + assert!(root.child_hash(PathComponent::ALL[11]).is_none()); + assert!(root.child_node(PathComponent::ALL[2]).is_none()); + assert!(root.child_node(PathComponent::ALL[10]).is_none()); + assert!(root.child_node(PathComponent::ALL[11]).is_none()); + let child = root.child_node(PathComponent::ALL[6]).unwrap(); + assert!(child.partial_path().is_empty()); + assert_eq!(child.value(), Some(&ValueDigest::Value(&[6_u8][..]))); + + let (path, edge) = iter.next().unwrap(); + #[cfg(feature = "branch_factor_256")] + assert!(path.path_eq(&[PathComponent::ALL[6]])); + #[cfg(not(feature = "branch_factor_256"))] + assert!(path.path_eq(&[PathComponent::ALL[0], PathComponent::ALL[6]])); + + assert!( + edge.is_local(), + "edge from root to child has both hash and node" + ); + let root = edge.node().unwrap(); + assert!( + std::ptr::eq(root, child), + "expected not just equal, but identical references to the same node" + ); + assert!(root.partial_path().is_empty()); + assert_eq!(root.value(), Some(&ValueDigest::Value(&[6_u8][..]))); +} diff --git a/storage/src/lib.rs b/storage/src/lib.rs index b48a2af25f..96e49d33ac 100644 --- a/storage/src/lib.rs +++ b/storage/src/lib.rs @@ -58,15 +58,16 @@ pub use nodestore::{ NodeReader, NodeStore, Parentable, RootReader, TrieReader, }; pub use path::{ - ComponentIter, IntoSplitPath, JoinedPath, PartialPath, PathBuf, PathCommonPrefix, - PathComponent, PathComponentSliceExt, PathGuard, SplitPath, TriePath, TriePathAsPackedBytes, - TriePathFromPackedBytes, TriePathFromUnpackedBytes, + ComponentIter, IntoSplitPath, JoinedPath, PackedPathRef, PartialPath, PathBuf, + PathCommonPrefix, PathComponent, PathComponentSliceExt, PathGuard, SplitPath, TriePath, + TriePathAsPackedBytes, TriePathFromPackedBytes, TriePathFromUnpackedBytes, }; #[cfg(not(feature = "branch_factor_256"))] -pub use path::{PackedBytes, PackedPathComponents, PackedPathRef}; +pub use path::{PackedBytes, PackedPathComponents}; pub use tries::{ - DuplicateKeyError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, IterDescending, - KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, TriePathIter, TrieValueIter, + DuplicateKeyError, FromKeyProofError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, + IterDescending, KeyProofTrieRoot, KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, + TriePathIter, TrieValueIter, }; pub use u4::{TryFromIntError, U4}; diff --git a/storage/src/path/mod.rs b/storage/src/path/mod.rs index 54aad8145a..668f694d74 100644 --- a/storage/src/path/mod.rs +++ b/storage/src/path/mod.rs @@ -15,6 +15,10 @@ pub use self::joined::JoinedPath; pub use self::packed::{PackedBytes, PackedPathComponents, PackedPathRef}; pub use self::split::{IntoSplitPath, PathCommonPrefix, SplitPath}; +/// If the branch factor is 256, a packed path is just a slice of path components. +#[cfg(feature = "branch_factor_256")] +pub type PackedPathRef<'a> = &'a [PathComponent]; + /// A trie path of components with different underlying representations. /// /// The underlying representation does not need to be a contiguous array of diff --git a/storage/src/tries/kvp.rs b/storage/src/tries/kvp.rs index 956ff389aa..aae02ec029 100644 --- a/storage/src/tries/kvp.rs +++ b/storage/src/tries/kvp.rs @@ -1,16 +1,12 @@ // Copyright (C) 2025, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -#[cfg(not(feature = "branch_factor_256"))] -use crate::PackedPathRef; use crate::{ - Children, HashType, Hashable, HashableShunt, HashedTrieNode, JoinedPath, PathBuf, - PathComponent, PathGuard, SplitPath, TrieNode, TriePath, TriePathFromPackedBytes, ValueDigest, + Children, HashType, Hashable, HashableShunt, HashedTrieNode, JoinedPath, PackedPathRef, + PathBuf, PathComponent, PathGuard, SplitPath, TrieNode, TriePath, TriePathFromPackedBytes, + ValueDigest, }; -#[cfg(feature = "branch_factor_256")] -type PackedPathRef<'a> = &'a [PathComponent]; - /// A duplicate key error when merging two key-value tries. #[non_exhaustive] #[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] diff --git a/storage/src/tries/mod.rs b/storage/src/tries/mod.rs index 51934025b0..069814f78e 100644 --- a/storage/src/tries/mod.rs +++ b/storage/src/tries/mod.rs @@ -3,11 +3,13 @@ mod iter; mod kvp; +mod proof; use crate::{HashType, IntoSplitPath, PathComponent, SplitPath}; pub use self::iter::{IterAscending, IterDescending, TrieEdgeIter, TriePathIter, TrieValueIter}; pub use self::kvp::{DuplicateKeyError, HashedKeyValueTrieRoot, KeyValueTrieRoot}; +pub use self::proof::{FromKeyProofError, KeyProofTrieRoot}; /// The state of an edge from a parent node to a child node in a trie. #[derive(Debug, PartialEq, Eq, Hash)] diff --git a/storage/src/tries/proof.rs b/storage/src/tries/proof.rs new file mode 100644 index 0000000000..0b41b8dbd5 --- /dev/null +++ b/storage/src/tries/proof.rs @@ -0,0 +1,203 @@ +// Copyright (C) 2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +use crate::{ + Children, HashType, Hashable, IntoSplitPath, PathBuf, PathComponent, SplitPath, TrieEdgeState, + TrieNode, TriePath, ValueDigest, +}; + +/// An error indicating that a slice of proof nodes is invalid. +#[derive(Debug, Clone, PartialEq, Eq, Hash, thiserror::Error)] +pub enum FromKeyProofError { + /// The parent node's path is not a strict prefix the node that follows it. + #[error( + "parent node {parent_path} precedes child node {child_path} but is not a strict prefix of it", + parent_path = parent_path.display(), + child_path = child_path.display(), + )] + InvalidChildPath { + /// The path of the parent node. + parent_path: PathBuf, + /// The path of the following child node. + child_path: PathBuf, + }, + /// The parent node does not reference the child node at the path component + /// leading to the child node. + #[error( + "child node {child_path} is not reachable from parent node {parent_path}", + parent_path = parent_path.display(), + child_path = child_path.display(), + )] + MissingChild { + /// The path of the parent node. + parent_path: PathBuf, + /// The path of the following child node. + child_path: PathBuf, + }, +} + +/// A root node in a trie formed from a key proof. +/// +/// A proof trie follows a linear path from the root to a terminal node, and +/// includes the necessary information to calculate the hash of each node along +/// that path. +/// +/// In the proof, each node will include the value or value digest at that node, +/// depending on what is required by the hasher. Additionally, the hashes of each +/// child node that branches off the node along the path are included. +#[derive(Debug)] +pub struct KeyProofTrieRoot<'a, P> { + partial_path: P, + value_digest: Option>, + children: Children>>, +} + +#[derive(Debug)] +enum KeyProofTrieNode<'a, P> { + /// Described nodes are proof nodes where we have the data necessary to + /// reconstruct the hash. The value digest may be a value or a digest. We can + /// verify the hash of theses nodes using the value or digest, but may not + /// have the full value. + Described { + node: Box>, + hash: HashType, + }, + /// Remote nodes are the nodes where we only know the ID, as discovered + /// from a proof node. If we only have the child, we can't infer anything + /// else about the node. + Remote { hash: HashType }, +} + +impl<'a, P: SplitPath> KeyProofTrieRoot<'a, P> { + /// Constructs a trie root from a slice of proof nodes. + /// + /// Each node in the slice must be a strict prefix of the following node. And, + /// each child node must be referenced by its parent (i.e., the parent must + /// indicate a child at the path component leading to the child). The hash + /// is not verified here. + /// + /// # Errors + /// + /// - [`FromKeyProofError::InvalidChildPath`] if any node's path is not a strict + /// prefix of the following node's path. + /// - [`FromKeyProofError::MissingChild`] if any parent node does not reference + /// the following child node at the path component leading to the child. + pub fn new(proof: &'a T) -> Result>, FromKeyProofError> + where + T: AsRef<[N]> + ?Sized, + N: Hashable: IntoSplitPath> + 'a, + { + proof + .as_ref() + .iter() + .rev() + .try_fold(None::>, |parent, node| match parent { + None => Ok(Some(Self::new_tail_node(node))), + Some(p) => p.new_parent_node(node).map(Some), + }) + } + + /// Creates a new trie root from the tail node of a proof. + fn new_tail_node(node: &'a N) -> Box + where + N: Hashable: IntoSplitPath>, + { + Box::new(Self { + partial_path: node.full_path().into_split_path(), + value_digest: node.value_digest(), + children: node + .children() + .map(|_, child| child.map(|hash| KeyProofTrieNode::Remote { hash })), + }) + } + + /// Creates a new trie root by making this node a child of the given parent. + /// + /// The parent key must be a strict prefix of this node's key, and the parent + /// must reference this node in its children by hash (the hash is not verified + /// here). + fn new_parent_node( + mut self: Box, + parent: &'a N, + ) -> Result, FromKeyProofError> + where + N: Hashable: IntoSplitPath>, + { + match parent + .full_path() + .into_split_path() + .longest_common_prefix(self.partial_path) + .split_first_parts() + { + (None, Some((pc, child_path)), parent_path) => { + let mut parent = Self::new_tail_node(parent); + if let Some(KeyProofTrieNode::Remote { hash }) = parent.children.take(pc) { + self.partial_path = child_path; + parent.partial_path = parent_path; + parent.children[pc] = Some(KeyProofTrieNode::Described { node: self, hash }); + Ok(parent) + } else { + Err(FromKeyProofError::MissingChild { + parent_path: parent.partial_path.as_component_slice().into_owned(), + child_path: self.partial_path.as_component_slice().into_owned(), + }) + } + } + _ => Err(FromKeyProofError::InvalidChildPath { + parent_path: parent.full_path().as_component_slice().into_owned(), + child_path: self.partial_path.as_component_slice().into_owned(), + }), + } + } +} + +impl<'a, P: IntoSplitPath + 'a> KeyProofTrieNode<'a, P> { + const fn hash(&self) -> &HashType { + match self { + KeyProofTrieNode::Described { hash, .. } | KeyProofTrieNode::Remote { hash } => hash, + } + } + + const fn node(&self) -> Option<&KeyProofTrieRoot<'a, P>> { + match self { + KeyProofTrieNode::Described { node, .. } => Some(node), + KeyProofTrieNode::Remote { .. } => None, + } + } + + const fn as_edge_state(&self) -> TrieEdgeState<'_, KeyProofTrieRoot<'a, P>> { + match self { + KeyProofTrieNode::Described { node, hash } => TrieEdgeState::LocalChild { node, hash }, + KeyProofTrieNode::Remote { hash } => TrieEdgeState::RemoteChild { hash }, + } + } +} + +impl<'a, P: SplitPath + 'a> TrieNode> for KeyProofTrieRoot<'a, P> { + type PartialPath<'b> + = P + where + Self: 'b; + + fn partial_path(&self) -> Self::PartialPath<'_> { + self.partial_path + } + + fn value(&self) -> Option<&ValueDigest<&'a [u8]>> { + self.value_digest.as_ref() + } + + fn child_hash(&self, pc: PathComponent) -> Option<&HashType> { + self.children[pc].as_ref().map(KeyProofTrieNode::hash) + } + + fn child_node(&self, pc: PathComponent) -> Option<&Self> { + self.children[pc].as_ref().and_then(KeyProofTrieNode::node) + } + + fn child_state(&self, pc: PathComponent) -> Option> { + self.children[pc] + .as_ref() + .map(KeyProofTrieNode::as_edge_state) + } +}