diff --git a/firewood/Cargo.toml b/firewood/Cargo.toml index cd59376710..104dbdc74e 100644 --- a/firewood/Cargo.toml +++ b/firewood/Cargo.toml @@ -36,6 +36,8 @@ bincode = "1.3.3" integer-encoding = "4.0.0" smallvec = "1.6.1" fastrace = { version = "0.7.4" } +derive_more = { version = "2.0.0", features = [ "debug" ] } +firewood-storage = { path = "../storage", version = "0.0.6" } [features] default = [] @@ -60,6 +62,7 @@ plain_hasher = "0.2.3" hex-literal = "1.0.0" env_logger = "0.11.7" hash-db = "0.16.0" +metrics-util = "0.19.0" [[bench]] name = "hashops" diff --git a/firewood/src/diff.rs b/firewood/src/diff.rs new file mode 100644 index 0000000000..d618495474 --- /dev/null +++ b/firewood/src/diff.rs @@ -0,0 +1,2193 @@ +// Copyright (C) 2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +#![allow(dead_code)] +// Allow clippy warnings that are generated by the derivative crate's code generation. +// The derivative crate generates Debug implementations that trigger these warnings, +// but we can't control the generated code and these patterns are necessary for the crate to work. +#![allow(clippy::borrowed_box)] // Derivative generates &Box patterns +#![allow(clippy::needless_lifetimes)] // Derivative generates explicit lifetimes that clippy thinks are unnecessary + +use firewood_storage::{Child, FileIoError, Node, Path, SharedNode, TrieReader, logger::trace}; +use std::cmp::Ordering; +use std::fmt; + +use crate::db::BatchOp; +use crate::merkle::{Key, PrefixOverlap, Value}; +use crate::stream::{ + IterationNode, NodeStreamState, as_enumerated_children_iter, key_from_nibble_iter, +}; +use derive_more::derive::Debug; + +// State structs for different types of diff iteration nodes + +/// Two nodes that need to be compared against each other. +/// This state occurs when we have matching children in both trees that need to be processed, +/// or when we're starting the comparison at the root level with nodes from both trees. +/// +/// The "unvisited" part means that we haven't actually consumed the value +/// in a branch, or we haven't consumed the value of the leaf. +#[derive(Debug)] +struct UnvisitedNodePairState { + node_left: SharedNode, + node_right: SharedNode, +} + +/// A node that exists only in the left tree (tree1) and needs to be processed for deletion. +/// This state occurs when comparing children and the left tree has a child at a position +/// where the right tree doesn't, or when the entire left subtree needs to be deleted. +#[derive(Debug)] +struct UnvisitedNodeLeftState { + node: SharedNode, + excluded_node: Option<(SharedNode, Path)>, +} + +/// A node that exists only in the right tree (tree2) and needs to be processed for addition. +/// This state occurs when comparing children and the right tree has a child at a position +/// where the left tree doesn't, or when the entire right subtree needs to be added. +#[derive(Debug)] +struct UnvisitedNodeRightState { + node: SharedNode, + excluded_node: Option<(SharedNode, Path)>, +} + +/// Two branch nodes whose children are being compared. +/// This state occurs when we have branch nodes from both trees and we need to iterate +/// through their children to find differences between the subtrees. +#[derive(Debug)] +struct VisitedNodePairState { + #[debug("")] + children_iter_left: Box + Send>, + #[debug("")] + children_iter_right: Box + Send>, +} + +/// A branch node from the left tree only, whose children all need to be processed as deletions. +/// This state occurs when there are no remaining children on the right side to compare against, +/// or when we have a branch node that exists only in the left tree. +/// +/// If `included_node` is Some, one child that matches the included node should be compared +/// instead of deleted (used in Branch vs Leaf scenarios). +/// +/// This is always a leaf node (never a branch) because it comes from Branch vs Leaf scenarios +/// where we're traversing a branch's children but need to include any child that represents +/// the same logical key as a leaf from the other tree. +#[derive(Debug)] +struct VisitedNodeLeftState { + #[debug("")] + children_iter: Box + Send>, + /// Optional included node - if present, children matching this should be compared instead of deleted. + excluded: Option<(SharedNode, Path)>, +} + +/// A branch node from the right tree only, whose children all need to be processed as additions. +/// This state occurs when there are no remaining children on the left side to compare against, +/// or when we have a branch node that exists only in the right tree. +#[derive(Debug)] +struct VisitedNodeRightState { + #[debug("")] + children_iter: Box + Send>, + /// Optional excluded node - if present, children matching this should be compared instead of added. + excluded: Option<(SharedNode, Path)>, +} + +// Helper function for derivative to format iterators +fn fmt_as_iterator(_: &T, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("\"\"") +} + +trait StateVisitor { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + readers: (&L, &R), + ) -> Result<(), FileIoError>; +} + +trait UnvisitedStateVisitor { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + ) -> Result>, FileIoError>; +} + +/// Trait for processing out-of-sync states that need separate stream processing +trait OutOfSyncStateVisitor { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + readers: (&L, &R), + ) -> Result>, FileIoError>; +} + +#[derive(Debug, Default)] +struct IterStack { + stack: Vec, +} + +impl IterStack { + fn push(&mut self, node: DiffIterationNode) { + trace!( + "pushing new state {{ key: {:x?}, state: {:?} }}", + node.key, node.state + ); + if node.key.starts_with(&[0, 7, 7]) { + trace!("found key {:x?}", node.key); + } + self.stack.push(node); + } +} + +impl std::ops::Deref for IterStack { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.stack + } +} + +impl std::ops::DerefMut for IterStack { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.stack + } +} + +// This is the initial state for comparing two nodes. They are unvisited. +// For branches, this means we haven't looked at the k/v pair in the branch +// nor have we looked at any of the children. +// For leaves, this means we haven't looked at the k/v pair in the leaf. + +// There are 4 possibilities: +// 1. Both nodes are branches: +// . Process the branch k/v, then transition to VisitedPairState +// 2. Both nodes are leaves: +// . Process the leaf k/v, then pop the stack +// 3. Left node is a branch, right node is a leaf: +// . Compare the branch k/v with the leaf k/v. If they have the same +// . partial path, then everything except for this leaf is deleted, +// . and we transition to VisitedLeftState with the leaf excluded +// 4. Left node is a leaf, right node is a branch: +// . Compare the leaf k/v with the branch k/v. If they have the same +// . partial path, then everything except for this leaf is added, +// . and we transition to VisitedRightState with the leaf excluded +#[allow(clippy::too_many_lines)] // for now +impl UnvisitedStateVisitor for UnvisitedNodePairState { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + ) -> Result>, FileIoError> { + + trace!( + "visiting Unvisited node pair at {key:x?} (depth: {})", + iter_stack.len() + ); + + match (&*self.node_left, &*self.node_right) { + (Node::Branch(branch_left), Node::Branch(branch_right)) => { + // Possibilities are: + // (A) The right partial path extends the left partial path + // (B) The left partial path extends the right partial path + // (C) Neither extend each other, but left < right + // (D) Neither extend each other, but left > right + // (E) The partial paths are the same + // + // Case (A): + // x and y (and any value in branch1) have been deleted + // + // left right + // ... ... + // | | + // branch1 branch2 + // / | \ ... + // x y branch2 + // ... + // If branch2 is not present on the left, it must be marked added + // at the appropriate point while traversing left + // + // Case (B) + // x and y (and any value in branch1) have been added + // + // left right + // ... ... + // | | + // branch2 branch1 + // ... / | \ + // x y branch2 + // ... + // + // If branch2 is not present on the right, it must be marked deleted + // at the appropriate point while traversing right. + // + // In both (C) and (D), all the keys on either side do not overlap, but since + // we must iterate in order: + // Case (C) deletes all the keys on the left then adds all the keys on the right + // If present, a value in the left branch was deleted, and added for the right branch + // Case (D) adds all the keys on the right then deletes all the keys on the left + // If present, a value in the right branch was added, and deleted for the right branch + // + if branch_left.partial_path != branch_right.partial_path { + // Cases (A)-(D) + trace!( + "Partial paths differ at key {:x?}: left={:x?}, right={:x?}", + key, branch_left.partial_path, branch_right.partial_path + ); + let overlap = PrefixOverlap::new( + &branch_left.partial_path.0, + &branch_right.partial_path.0, + ); + if overlap.unique_a.is_empty() { + // Case (A) + todo!() + //return Ok(None); + } + if overlap.unique_b.is_empty() { + // case (B) + todo!() + // return Ok(None); + } + if branch_left.partial_path < branch_right.partial_path { + // case (C) + // Add everything on the right + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: Box::new(as_enumerated_children_iter(branch_right)), + excluded: None, + }), + }); + + // Delete everything on the left + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: Box::new(as_enumerated_children_iter(branch_left)), + excluded: None, + }), + }); + // If left had a value, it's been deleted, and it's first + if let Some(_value) = &branch_left.value { + return Ok(Some(BatchOp::Delete { + key: key_from_nibble_iter(key.iter().copied()), + })); + } + // no value, so we'll get the next one from the deleted items on the left + return Ok(None); + } + // case (D) + return Ok(None); + } + + // Case (E) -- Partial paths match, proceed with normal comparison + // Compare values first + let value_diff = match (&branch_left.value, &branch_right.value) { + (Some(v_left), Some(v_right)) if v_left == v_right => None, + (Some(_), None) => Some(BatchOp::Delete { + key: key_from_nibble_iter(key.iter().copied()), + }), + (_, Some(v_right)) => Some(BatchOp::Put { + key: key_from_nibble_iter(key.iter().copied()), + value: v_right.to_vec(), + }), + (None, None) => None, + }; + + // Set up to compare children + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedPair(VisitedNodePairState { + children_iter_left: Box::new(as_enumerated_children_iter(branch_left)), + children_iter_right: Box::new(as_enumerated_children_iter(branch_right)), + }), + }); + + Ok(value_diff) + } + (Node::Branch(branch), Node::Leaf(leaf)) => { + // Branch is from left tree, leaf is from right tree + // This means all (except possibly one) of the k/v pairs + // on this branch have been deleted. + + // This leaf, however, is special. If it matches some key in the branch, + // and contains the same value as the leaf, then no diff is needed. + // If it is found, but doesn't have the same value, we generate a put + // operation with the leaf's value. We save this leaf into the + // VisitedNodeLeftState structure and follow the logic there. + + // rare case: the branch itself represents the same logical key as the leaf + if branch.partial_path == leaf.partial_path { + trace!("branch vs leaf, same path {key:x?}"); + // continue with all remaining children as deletions, with no + // special handling for this leaf, since it's value is in the branch + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: Box::new(as_enumerated_children_iter(branch)), + excluded: None, + }), + }); + return if let Some(branch_val) = &branch.value { + // If the branch value also matches, we can just forget about this leaf + if branch_val == &leaf.value { + Ok(None) + } else { + // the branch has a different value, so we need to generate a put + // operation for the leaf + Ok(Some(BatchOp::Put { + key: key_from_nibble_iter(key.iter().copied()), + value: leaf.value.to_vec(), + })) + } + } else { + // the branch has no value, so the leaf is new, and we need to process the + // remaining items on the branch as deletes + Ok(Some(BatchOp::Put { + key: key_from_nibble_iter(key.iter().copied()), + value: leaf.value.to_vec(), + })) + }; + } + // TODO: maybe the leaf is down in the branch somewhere. If we never find it, + // then it's brand new data + + // Set up to traverse branch children, but exclude any that are equivalent to the leaf + // key already has the branch's partial path added, we need to remove that before chaining the child + let leaf_key = Path::from_nibbles_iterator( + key.iter() + .take(key.len().saturating_sub(branch.partial_path.len())) + .copied() + .chain(leaf.partial_path.iter().copied()), + ); + // let leaf_key = key.clone(); + trace!("key is {key:x?}"); + trace!("branch is {branch:?}"); + trace!("leaf is {leaf:?}"); + trace!("excluding leaf {leaf_key:x?} (for inserts)"); + trace!("switching to VisitedNodeLeftState"); + + let din = DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: Box::new(as_enumerated_children_iter(branch)), + excluded: Some((self.node_right.clone(), leaf_key)), + }), + }; + trace!("pushing {din:?}"); + iter_stack.push(din); + + // if the branch had a value, it's been deleted + if let Some(_branch_val) = &branch.value { + Ok(Some(BatchOp::Delete { + key: key_from_nibble_iter(key.iter().copied()), + })) + } else { + Ok(None) + } + } + (Node::Leaf(leaf), Node::Branch(branch)) => { + // Leaf vs Branch - save the leaf and traverse branch children with exclusion + // Leaf is from left tree, branch is from right tree + + // Compare the leaf's value with the branch's own value + // Only generate operations for actual value differences, not structural changes + let value_diff = match &branch.value { + Some(branch_val) if branch_val == &leaf.value => None, // Same value, no operation needed + Some(different_val) => Some(BatchOp::Put { + // Branch has different value, update it + key: key_from_nibble_iter(key.iter().copied()), + value: different_val.to_vec(), + }), + None => { + // Branch has no value - this might be a structural optimization + // The leaf might be represented by one of the branch's children + // Don't generate a delete operation unless we're sure the leaf doesn't exist + None + } + }; + + // Set up to traverse branch children, but exclude any that are equivalent to the leaf + // "key" already includes the full path to the leaf + let leaf_key = key.clone(); + trace!("key is {key:x?}"); + trace!("branch is {branch:?}"); + trace!("leaf is {leaf:?}"); + trace!("excluding leaf {leaf_key:x?} (for deletes)"); + + // remove the leaf's partial path, and add the branch's partial path + let branch_key = Path::from_nibbles_iterator( + key.iter() + .take(key.len().saturating_sub(leaf.partial_path.len())) + .copied() + .chain(branch.partial_path.iter().copied()), + ); + iter_stack.push(DiffIterationNode { + key: branch_key, + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: Box::new(as_enumerated_children_iter(branch)), + excluded: Some((self.node_left.clone(), leaf_key)), + }), + }); + + Ok(value_diff) + } + (Node::Leaf(leaf1), Node::Leaf(leaf2)) => { + match leaf1.partial_path.cmp(&leaf2.partial_path) { + Ordering::Less => { + // left right + // A B + // A was deleted, B was added, in that order + trace!( + "Left leaf is prefix of right leaf at key {:x?}: left={:x?}, right={:x?}", + key, leaf1.partial_path, leaf2.partial_path + ); + // push right leaf to process as add later + + // adjust the key to include left1.partial_path + Ok(Some(BatchOp::Delete { + key: key_from_nibble_iter( + (key.iter().copied()).chain(leaf1.partial_path.iter().copied()), + ), + })) + } + Ordering::Greater => { + // Right leaf has shorter partial path - it's a prefix of left leaf + // This means the right leaf represents a key that is a prefix of the left leaf's key + // We need to handle this by treating it as an out-of-sync state + trace!( + "Right leaf is prefix of left leaf at key {:x?}: left={:x?}, right={:x?}", + key, leaf1.partial_path, leaf2.partial_path + ); + todo!() + } + Ordering::Equal => { + // Partial paths are equal - this is the normal case + if leaf1.value == leaf2.value { + Ok(None) + } else { + // The key already includes the complete path including partial paths + Ok(Some(BatchOp::Put { + key: key_from_nibble_iter(key.iter().copied()), + value: leaf2.value.to_vec(), + })) + } + } + } + } + } + } +} + +impl UnvisitedStateVisitor for UnvisitedNodeLeftState { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + ) -> Result>, FileIoError> { + let final_key = key_from_nibble_iter(key.iter().copied()); + + // Node exists only in tree1 - mark for deletion + match &*self.node { + Node::Branch(branch) => { + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: Box::new(as_enumerated_children_iter(branch)), + excluded: self.excluded_node, + }), + }); + + if branch.value.is_some() { + // The key already includes the complete path including partial paths + Ok(Some(BatchOp::Delete { key: final_key })) + } else { + Ok(None) + } + } + Node::Leaf(_leaf) => { + // The key already includes the complete path including partial paths + Ok(Some(BatchOp::Delete { key: final_key })) + } + } + } +} + +impl UnvisitedStateVisitor for UnvisitedNodeRightState { + fn visit( + self, + key: &Path, + iter_stack: &mut IterStack, + ) -> Result>, FileIoError> { + let current_key = Path::from_nibbles_iterator(key.iter().copied()); + + // Node exists only in tree2 - mark for addition + match &*self.node { + Node::Branch(branch) => { + iter_stack.push(DiffIterationNode { + key: current_key.clone(), + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: Box::new(as_enumerated_children_iter(branch)), + excluded: self.excluded_node, + }), + }); + + if let Some(value) = &branch.value { + // The key already includes the complete path including partial paths + Ok(Some(BatchOp::Put { + key: key_from_nibble_iter(current_key.iter().copied()), + value: value.to_vec(), + })) + } else { + Ok(None) + } + } + Node::Leaf(leaf) => { + // The key already includes the complete path including partial paths + Ok(Some(BatchOp::Put { + key: key_from_nibble_iter(current_key.iter().copied()), + value: leaf.value.to_vec(), + })) + } + } + } +} + +impl StateVisitor for VisitedNodeLeftState { + fn visit( + mut self, + key: &Path, + iter_stack: &mut IterStack, + readers: (&L, &R), + ) -> Result<(), FileIoError> { + if let Some((pos, child)) = self.children_iter.next() { + let node = match child { + Child::AddressWithHash(addr, _) => readers.0.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_key = Path::from_nibbles_iterator( + key.iter() + .copied() + .chain(std::iter::once(pos)) + .chain(node.partial_path().iter().copied()), + ); + + if let Some((excluded_node, excluded_path)) = self.excluded.take() { + trace!("child_key is {child_key:x?}"); + trace!("LS checking....{child_key:x?} is excluded..."); + if excluded_path == child_key { + trace!("yes"); + // Found the excluded child + + // resume after the excluded child + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: self.children_iter, + excluded: None, // Already taken + }), + }); + + // Compare this child instead of deleting (using moved excluded_node) + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedPair(UnvisitedNodePairState { + node_left: node, + node_right: excluded_node, // Moved, no clone needed + }), + }); + } else { + // Not the excluded child - put it back and continue looking + trace!("no"); + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: self.children_iter, + excluded: Some((excluded_node.clone(), excluded_path.clone())), // Put it back + }), + }); + + // Delete this child + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedLeft(UnvisitedNodeLeftState { + node, + excluded_node: Some((excluded_node, excluded_path)), + }), + }); + } + } else { + // No exclusion - delete all children + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: self.children_iter, + excluded: None, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedLeft(UnvisitedNodeLeftState { + node, + excluded_node: None, + }), + }); + } + } + Ok(()) + } +} + +#[allow(clippy::pedantic, clippy::arithmetic_side_effects)] +impl StateVisitor for VisitedNodePairState { + fn visit( + mut self, + key: &Path, + iter_stack: &mut IterStack, + readers: (&L, &R), + ) -> Result<(), FileIoError> { + // Compare children from both trees + let child_left_opt = self.children_iter_left.next(); + let child_right_opt = self.children_iter_right.next(); + + match (child_left_opt, child_right_opt) { + (Some((pos_left, child_left)), Some((pos_right, child_right))) => { + match pos_left.cmp(&pos_right) { + Ordering::Equal => { + // Same position - check if subtrees are identical + if DiffMerkleNodeStream::::child_identical(&child_left, &child_right) + { + // Identical subtrees, skip them and continue with remaining children + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedPair(VisitedNodePairState { + children_iter_left: self.children_iter_left, + children_iter_right: self.children_iter_right, + }), + }); + return Ok(()); + } + + // Different subtrees, need to compare them + let node_left = match child_left { + Child::AddressWithHash(addr, _) => readers.0.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let node_right = match child_right { + Child::AddressWithHash(addr, _) => readers.1.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_key: Path = { + let nibbles: Vec = key + .iter() + .copied() + .chain(node_left.partial_path().iter().copied()) + .collect(); + Path::from(nibbles.as_slice()) + }; + + // Continue with remaining children + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedPair(VisitedNodePairState { + children_iter_left: self.children_iter_left, + children_iter_right: self.children_iter_right, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedPair(UnvisitedNodePairState { + node_left, + node_right, + }), + }); + } + Ordering::Less => { + // pos_left < pos_right: child exists in tree_left but not tree_right + let node_left = match child_left { + Child::AddressWithHash(addr, _) => readers.0.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_key: Path = { + let nibbles: Vec = key + .iter() + .copied() + .chain(std::iter::once(pos_left)) + .chain(node_left.partial_path().iter().copied()) + .collect(); + Path::from(nibbles.as_slice()) + }; + + // Put back child_right for next iteration + let new_iter_right = std::iter::once((pos_right, child_right)) + .chain(self.children_iter_right); + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedPair(VisitedNodePairState { + children_iter_left: self.children_iter_left, + children_iter_right: Box::new(new_iter_right), + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedLeft(UnvisitedNodeLeftState { + node: node_left, + excluded_node: None, + }), + }); + } + Ordering::Greater => { + // pos_left > pos_right: child exists in tree_right but not tree_left + let node_right = match child_right { + Child::AddressWithHash(addr, _) => readers.1.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_key: Path = { + let nibbles: Vec = key + .iter() + .copied() + .chain(std::iter::once(pos_right)) + .chain(node_right.partial_path().iter().copied()) + .collect(); + Path::from(nibbles.as_slice()) + }; + + // Put back child_left for next iteration + let new_iter_left = + std::iter::once((pos_left, child_left)).chain(self.children_iter_left); + iter_stack.push(DiffIterationNode { + key: key.get(..key.len() - 1).unwrap_or_default().into(), + state: DiffIterationNodeState::VisitedPair(VisitedNodePairState { + children_iter_left: Box::new(new_iter_left), + children_iter_right: self.children_iter_right, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedRight( + UnvisitedNodeRightState { + node: node_right, + excluded_node: None, + }, + ), + }); + } + } + } + (Some((pos_left, child_left)), None) => { + // Only tree_left has remaining children + let node_left = match child_left { + Child::AddressWithHash(addr, _) => readers.0.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_key: Path = { + let nibbles: Vec = key + .iter() + .copied() + .chain(std::iter::once(pos_left)) + .chain(node_left.partial_path().iter().copied()) + .collect(); + Path::from(nibbles.as_slice()) + }; + + // Continue with remaining children from tree_left + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter: self.children_iter_left, + excluded: None, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedLeft(UnvisitedNodeLeftState { + node: node_left, + excluded_node: None, + }), + }); + } + (None, Some((pos_right, child_right))) => { + // Only tree_right has remaining children + let node_right = match child_right { + Child::AddressWithHash(addr, _) => readers.1.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_partial_path = node_right.partial_path().iter().copied(); + let child_key: Path = { + let nibbles: Vec = key + .iter() + .copied() + .chain(std::iter::once(pos_right)) + .chain(child_partial_path) + .collect(); + Path::from(nibbles.as_slice()) + }; + + // Continue with remaining children from tree_right + iter_stack.push(DiffIterationNode { + key: key.clone(), + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: self.children_iter_right, + excluded: None, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedRight(UnvisitedNodeRightState { + node: node_right, + excluded_node: None, + }), + }); + } + (None, None) => { + // No more children in either tree, continue + } + } + Ok(()) + } +} + +impl StateVisitor for VisitedNodeRightState { + fn visit( + mut self, + key: &Path, + iter_stack: &mut IterStack, + readers: (&L, &R), + ) -> Result<(), FileIoError> { + if let Some((pos, child)) = self.children_iter.next() { + let node = match child { + Child::AddressWithHash(addr, _) => readers.1.read_node(addr)?, + Child::Node(node) => node.clone().into(), + }; + + let child_partial_path = node.partial_path().iter().copied(); + let child_key = Path::from_nibbles_iterator( + key.iter() + .copied() + .chain(std::iter::once(pos)) + .chain(child_partial_path), + ); + let branch_key = + Path::from_nibbles_iterator(key.iter().copied().chain(std::iter::once(pos))); + + // Check if this child should be compared instead of added + if let Some((excluded_node, excluded_path)) = self.excluded.take() { + trace!("RS checking....{child_key:x?} is excluded..."); + if excluded_path == child_key { + // Found the excluded child - compare instead of adding + trace!("yes"); + iter_stack.push(DiffIterationNode { + key: branch_key, + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: self.children_iter, + excluded: None, + }), + }); + if excluded_node.value() != node.value() { + // all this work, and the value was different anyway, so we still need to add it + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedRight( + UnvisitedNodeRightState { + node, + excluded_node: None, + }, + ), + }); + } + } else { + // Not the excluded child - add it + trace!("no"); + iter_stack.push(DiffIterationNode { + key: branch_key, + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: self.children_iter, + excluded: Some((excluded_node, excluded_path)), + }), + }); + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedRight(UnvisitedNodeRightState { + node, + excluded_node: None, + }), + }); + } + } else { + // No inclusion - add all children + iter_stack.push(DiffIterationNode { + key: branch_key, + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter: self.children_iter, + excluded: None, + }), + }); + + iter_stack.push(DiffIterationNode { + key: child_key, + state: DiffIterationNodeState::UnvisitedRight(UnvisitedNodeRightState { + node, + excluded_node: None, + }), + }); + } + } + Ok(()) + } +} + +/// Enum containing all possible states for a diff iteration node +#[derive(Debug)] +enum DiffIterationNodeState { + /// Two unvisited nodes that should be compared + UnvisitedPair(UnvisitedNodePairState), + /// A node that exists only in tree1 (needs to be deleted) + UnvisitedLeft(UnvisitedNodeLeftState), + /// A node that exists only in tree2 (needs to be added) + UnvisitedRight(UnvisitedNodeRightState), + /// A pair of visited branch nodes - track which children to compare next + VisitedPair(VisitedNodePairState), + /// A visited branch node from tree1 only (may have exclusions) + VisitedLeft(VisitedNodeLeftState), + /// A visited branch node from tree2 only (may have exclusions) + VisitedRight(VisitedNodeRightState), +} + +/// Iteration node that tracks state for both trees simultaneously +#[derive(Debug)] +struct DiffIterationNode { + key: Path, + state: DiffIterationNodeState, +} + +impl DiffIterationNode { + /// Convert an `IterationNode` to a `DiffIterationNode` for left tree operations (deletions). + fn from_left(node: IterationNode) -> Self { + match node { + IterationNode::Unvisited { key, node } => { + let path = Path::from(key.as_ref()); + DiffIterationNode { + key: path, + state: DiffIterationNodeState::UnvisitedLeft(UnvisitedNodeLeftState { + node, + excluded_node: None, + }), + } + } + IterationNode::Visited { key, children_iter } => { + let path = Path::from(key.as_ref()); + DiffIterationNode { + key: path, + state: DiffIterationNodeState::VisitedLeft(VisitedNodeLeftState { + children_iter, + excluded: None, + }), + } + } + } + } + + /// Convert an `IterationNode` to a `DiffIterationNode` for right tree operations (additions). + fn from_right(node: IterationNode) -> Self { + match node { + IterationNode::Unvisited { key, node } => { + let path = Path::from(key.as_ref()); + DiffIterationNode { + key: path, + state: DiffIterationNodeState::UnvisitedRight(UnvisitedNodeRightState { + node, + excluded_node: None, + }), + } + } + IterationNode::Visited { key, children_iter } => { + let path = Path::from(key.as_ref()); + DiffIterationNode { + key: path, + state: DiffIterationNodeState::VisitedRight(VisitedNodeRightState { + children_iter, + excluded: None, + }), + } + } + } + } +} + +/// State for the diff iterator that tracks lazy initialization +#[derive(Debug)] +enum DiffNodeStreamState { + /// The iterator state is lazily initialized when `next_internal` is called + /// for the first time. The iteration start key is stored here. + StartFromKey(Key), + /// The iterator is actively iterating over nodes + Iterating { iter_stack: IterStack }, +} + +impl From for DiffNodeStreamState { + fn from(key: Key) -> Self { + Self::StartFromKey(key) + } +} + +/// Optimized node iterator that compares two merkle trees and skips matching subtrees +#[derive(Debug)] +struct DiffMerkleNodeStream<'a, T: TrieReader, U: TrieReader> { + tree_left: &'a T, + tree_right: &'a U, + state: DiffNodeStreamState, +} + +impl<'a, T: TrieReader, U: TrieReader> DiffMerkleNodeStream<'a, T, U> { + fn new(tree_left: &'a T, tree_right: &'a U, start_key: Key) -> Self { + Self { + tree_left, + tree_right, + state: DiffNodeStreamState::from(start_key), + } + } + + /// Check if two children have the same hash or refer to the same node. + /// + /// This is used to determine if two subtrees are identical. + /// + /// For mutable trees, we can compare node addresses to detect identical subtrees + /// This works because identical subtrees will share the same underlying Node reference + /// For immutable trees, we can't know if they are the same, assume not + fn child_identical(child_left: &Child, child_right: &Child) -> bool { + match (child_left, child_right) { + (Child::AddressWithHash(_, hash_left), Child::AddressWithHash(_, hash_right)) => { + hash_left == hash_right + } + // At least one of the children is unhashed, so we can't know if they are the same, assume not + _ => false, + } + } + + /// Returns the initial state for a diff iterator over the given trees which starts at `key`. + fn get_diff_iterator_initial_state( + tree_left: &T, + tree_right: &U, + key: &[u8], + ) -> Result { + let root_left_opt = tree_left.root_node(); + let root_right_opt = tree_right.root_node(); + + match (root_left_opt, root_right_opt) { + (None, None) => { + // Both trees are empty + Ok(DiffNodeStreamState::Iterating { + iter_stack: IterStack::default(), + }) + } + (Some(_root_left), None) => { + // Only tree_left has content - use single tree traversal for deletions + let initial_state = NodeStreamState::get_iterator_initial_state(tree_left, key)?; + let iter_stack = match initial_state { + NodeStreamState::StartFromKey(_) => vec![], // Should not happen + NodeStreamState::Iterating { iter_stack } => { + // Convert IterationNode to DiffIterationNode for left tree operations + iter_stack + .into_iter() + .map(DiffIterationNode::from_left) + .collect() + } + }; + Ok(DiffNodeStreamState::Iterating { + iter_stack: IterStack { stack: iter_stack }, + }) + } + (None, Some(_root_right)) => { + // Only tree_right has content - use single tree traversal for additions + let initial_state = NodeStreamState::get_iterator_initial_state(tree_right, key)?; + let iter_stack = match initial_state { + NodeStreamState::StartFromKey(_) => vec![], // Should not happen + NodeStreamState::Iterating { iter_stack } => { + // Convert IterationNode to DiffIterationNode for right tree operations + iter_stack + .into_iter() + .map(DiffIterationNode::from_right) + .collect() + } + }; + Ok(DiffNodeStreamState::Iterating { + iter_stack: IterStack { stack: iter_stack }, + }) + } + (Some(root_left), Some(root_right)) => { + // Both trees have content - need to compare them + // Start with an empty path and let the node comparison logic handle partial paths + // This ensures we don't double-add partial paths from different roots + let root_key: Path = Path::from_nibbles_iterator(std::iter::empty()); + Ok(DiffNodeStreamState::Iterating { + iter_stack: IterStack { + stack: vec![DiffIterationNode { + key: root_key, + state: DiffIterationNodeState::UnvisitedPair(UnvisitedNodePairState { + node_left: root_left, + node_right: root_right, + }), + }], + }, + }) + } + } + } + + fn next_internal( + &mut self, + ) -> Option, firewood_storage::FileIoError>> { + // Handle lazy initialization + let iter_stack = match &mut self.state { + DiffNodeStreamState::StartFromKey(key) => { + match Self::get_diff_iterator_initial_state(self.tree_left, self.tree_right, key) { + Ok(new_state) => { + self.state = new_state; + return self.next_internal(); + } + Err(e) => return Some(Err(e)), + } + } + DiffNodeStreamState::Iterating { iter_stack } => iter_stack, + }; + + // We remove the most recent DiffIterationNode, but in some cases we will push it back onto the stack. + while let Some(iter_node) = iter_stack.pop() { + match iter_node.state { + DiffIterationNodeState::UnvisitedPair(state) => { + match state.visit(&iter_node.key, iter_stack) { + Ok(Some(result)) => return Some(Ok(result)), + Ok(None) => {} + Err(e) => return Some(Err(e)), + } + } + DiffIterationNodeState::UnvisitedLeft(state) => { + match state.visit(&iter_node.key, iter_stack) { + Ok(Some(result)) => return Some(Ok(result)), + Ok(None) => {} + Err(e) => return Some(Err(e)), + } + } + DiffIterationNodeState::UnvisitedRight(state) => { + match state.visit(&iter_node.key, iter_stack) { + Ok(Some(result)) => return Some(Ok(result)), + Ok(None) => {} + Err(e) => return Some(Err(e)), + } + } + DiffIterationNodeState::VisitedPair(state) => { + if let Err(e) = state.visit::( + &iter_node.key, + iter_stack, + (self.tree_left, self.tree_right), + ) { + return Some(Err(e)); + } + } + DiffIterationNodeState::VisitedLeft(state) => { + if let Err(e) = state.visit::( + &iter_node.key, + iter_stack, + (self.tree_left, self.tree_right), + ) { + return Some(Err(e)); + } + } + DiffIterationNodeState::VisitedRight(state) => { + if let Err(e) = state.visit::( + &iter_node.key, + iter_stack, + (self.tree_left, self.tree_right), + ) { + return Some(Err(e)); + } + } + } + } + None + } +} + +impl Iterator for DiffMerkleNodeStream<'_, T, U> { + type Item = Result, firewood_storage::FileIoError>; + + fn next(&mut self) -> Option { + self.next_internal() + } +} + +/// Optimized diff stream that uses `DiffMerkleNodeStream` for hash-based optimizations +#[derive(derive_more::Debug)] +struct DiffMerkleKeyValueStreams<'a, T: TrieReader, U: TrieReader> { + #[debug("")] + node_stream: DiffMerkleNodeStream<'a, T, U>, +} + +impl<'a, T: TrieReader, U: TrieReader> DiffMerkleKeyValueStreams<'a, T, U> { + fn new(tree_left: &'a T, tree_right: &'a U, start_key: Key) -> Self { + Self { + node_stream: DiffMerkleNodeStream::new(tree_left, tree_right, start_key), + } + } +} + +impl Iterator for DiffMerkleKeyValueStreams<'_, T, U> { + type Item = Result, firewood_storage::FileIoError>; + + fn next(&mut self) -> Option { + self.node_stream.next_internal() + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use crate::merkle::Merkle; + + use super::*; + use env_logger::WriteStyle; + use firewood_storage::{ImmutableProposal, MemStore, MutableProposal, NodeStore}; + use std::sync::Arc; + use test_case::test_case; + + fn diff_merkle_iterator<'a, T, U>( + tree_left: &'a Merkle, + tree_right: &'a Merkle, + start_key: Key, + ) -> DiffMerkleNodeStream<'a, T, U> + where + T: firewood_storage::TrieReader, + U: firewood_storage::TrieReader, + { + DiffMerkleNodeStream::new(tree_left.nodestore(), tree_right.nodestore(), start_key) + } + + fn create_test_merkle() -> Merkle> { + let memstore = MemStore::new(vec![]); + let nodestore = NodeStore::new_empty_proposal(Arc::new(memstore)); + Merkle::from(nodestore) + } + + fn populate_merkle( + mut merkle: Merkle>, + items: &[(&[u8], &[u8])], + ) -> Merkle, MemStore>> { + for (key, value) in items { + merkle + .insert(key, value.to_vec().into_boxed_slice()) + .unwrap(); + } + merkle.try_into().unwrap() + } + + fn make_immutable( + merkle: Merkle>, + ) -> Merkle, MemStore>> { + merkle.try_into().unwrap() + } + + #[test] + fn test_diff_empty_mutable_trees() { + // This is unlikely to happen in practice, but it helps cover the case where + // hashes do not exist yet. + let m1 = create_test_merkle(); + let m2 = create_test_merkle(); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_empty_trees() { + let m1 = make_immutable(create_test_merkle()); + let m2 = make_immutable(create_test_merkle()); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_identical_trees() { + let items = [ + (b"key1".as_slice(), b"value1".as_slice()), + (b"key2".as_slice(), b"value2".as_slice()), + (b"key3".as_slice(), b"value3".as_slice()), + ]; + + let m1 = populate_merkle(create_test_merkle(), &items); + let m2 = populate_merkle(create_test_merkle(), &items); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_additions_only() { + let items = [ + (b"key1".as_slice(), b"value1".as_slice()), + (b"key2".as_slice(), b"value2".as_slice()), + ]; + + let m1 = make_immutable(create_test_merkle()); + let m2 = populate_merkle(create_test_merkle(), &items); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + + let op1 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op1, BatchOp::Put { key, value } if key == Box::from(b"key1".as_slice()) && value == b"value1") + ); + + let op2 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op2, BatchOp::Put { key, value } if key == Box::from(b"key2".as_slice()) && value == b"value2") + ); + + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_deletions_only() { + let items = [ + (b"key1".as_slice(), b"value1".as_slice()), + (b"key2".as_slice(), b"value2".as_slice()), + ]; + + let m1 = populate_merkle(create_test_merkle(), &items); + let m2 = make_immutable(create_test_merkle()); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + + let op1 = diff_iter.next().unwrap().unwrap(); + assert!(matches!(op1, BatchOp::Delete { key } if key == Box::from(b"key1".as_slice()))); + + let op2 = diff_iter.next().unwrap().unwrap(); + assert!(matches!(op2, BatchOp::Delete { key } if key == Box::from(b"key2".as_slice()))); + + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_modifications() { + let m1 = populate_merkle(create_test_merkle(), &[(b"key1", b"old_value")]); + let m2 = populate_merkle(create_test_merkle(), &[(b"key1", b"new_value")]); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + + let op = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op, BatchOp::Put { key, value } if key == Box::from(b"key1".as_slice()) && value == b"new_value") + ); + + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_mixed_operations() { + // m1 has: key1=value1, key2=old_value, key3=value3 + // m2 has: key2=new_value, key4=value4 + // Expected: Delete key1, Put key2=new_value, Delete key3, Put key4=value4 + + let m1 = populate_merkle( + create_test_merkle(), + &[ + (b"key1", b"value1"), + (b"key2", b"old_value"), + (b"key3", b"value3"), + ], + ); + + let m2 = populate_merkle( + create_test_merkle(), + &[(b"key2", b"new_value"), (b"key4", b"value4")], + ); + + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + + let op1 = diff_iter.next().unwrap().unwrap(); + assert!(matches!(op1, BatchOp::Delete { key } if key == Box::from(b"key1".as_slice()))); + + let op2 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op2, BatchOp::Put { key, value } if key == Box::from(b"key2".as_slice()) && value == b"new_value") + ); + + let op3 = diff_iter.next().unwrap().unwrap(); + assert!(matches!(op3, BatchOp::Delete { key } if key == Box::from(b"key3".as_slice()))); + + let op4 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op4, BatchOp::Put { key, value } if key == Box::from(b"key4".as_slice()) && value == b"value4") + ); + + assert!(diff_iter.next().is_none()); + } + + #[test] + fn test_diff_with_start_key() { + let m1 = populate_merkle( + create_test_merkle(), + &[ + (b"aaa", b"value1"), + (b"bbb", b"value2"), + (b"ccc", b"value3"), + ], + ); + + let m2 = populate_merkle( + create_test_merkle(), + &[ + (b"aaa", b"value1"), // Same + (b"bbb", b"modified"), // Modified + (b"ddd", b"value4"), // Added + ], + ); + + // Start from key "bbb" - should skip "aaa" + let mut diff_iter = diff_merkle_iterator(&m1, &m2, Box::from(b"bbb".as_slice())); + + let op1 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op1, BatchOp::Put { ref key, ref value } if **key == *b"bbb" && **value == *b"modified"), + "Expected first operation to be Put bbb=modified, got: {op1:?}", + ); + + let op2 = diff_iter.next().unwrap().unwrap(); + assert!(matches!(op2, BatchOp::Delete { key } if key == Box::from(b"ccc".as_slice()))); + + let op3 = diff_iter.next().unwrap().unwrap(); + assert!( + matches!(op3, BatchOp::Put { key, value } if key == Box::from(b"ddd".as_slice()) && value == b"value4") + ); + + assert!(diff_iter.next().is_none()); + } + + #[test] + #[allow(clippy::indexing_slicing)] + fn test_diff_interleaved_keys() { + // m1: a, c, e + // m2: b, c, d, f + // Expected: Delete a, Put b, Put d, Delete e, Put f + + let m1 = populate_merkle( + create_test_merkle(), + &[(b"a", b"value_a"), (b"c", b"value_c"), (b"e", b"value_e")], + ); + + let m2 = populate_merkle( + create_test_merkle(), + &[ + (b"b", b"value_b"), + (b"c", b"value_c"), + (b"d", b"value_d"), + (b"f", b"value_f"), + ], + ); + + let diff_iter = diff_merkle_iterator(&m1, &m2, Box::new([])); + + let ops: Vec<_> = diff_iter.collect::, _>>().unwrap(); + + assert_eq!(ops.len(), 5); + assert!(matches!(ops[0], BatchOp::Delete { ref key } if **key == *b"a")); + assert!( + matches!(ops[1], BatchOp::Put { ref key, ref value } if **key == *b"b" && **value == *b"value_b") + ); + assert!( + matches!(ops[2], BatchOp::Put { ref key, ref value } if **key == *b"d" && **value == *b"value_d") + ); + assert!(matches!(ops[3], BatchOp::Delete { ref key } if **key == *b"e")); + assert!( + matches!(ops[4], BatchOp::Put { ref key, ref value } if **key == *b"f" && **value == *b"value_f") + ); + // Note: "c" should be skipped as it's identical in both trees + } + + // example of running this test with a specific seed and maximum tracing in diff code: + // FIREWOOD_TEST_SEED=14805530293320947613 RUST_LOG=firewood::diff=trace \ + // cargo test --features logger diff::tests::diff_random_with_deletions + #[test_case(false, false, 500)] + #[test_case(false, true, 500)] + #[test_case(true, false, 500)] + #[test_case(true, true, 500)] + #[allow(clippy::indexing_slicing, clippy::cast_precision_loss)] + fn diff_random_with_deletions(trie1_mutable: bool, trie2_mutable: bool, num_items: usize) { + use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; + + let _ = env_logger::builder() + .write_style(WriteStyle::Never) + .format_timestamp(None) + .is_test(true) + .try_init(); + + // Read FIREWOOD_TEST_SEED from environment or use random seed + let seed = std::env::var("FIREWOOD_TEST_SEED") + .ok() + .map_or_else( + || None, + |s| Some(str::parse(&s).expect("couldn't parse FIREWOOD_TEST_SEED; must be a u64")), + ) + .unwrap_or(14805530293320947613); + //.unwrap_or_else(|| rng().random()); + + eprintln!("Seed {seed}: to rerun with this data, export FIREWOOD_TEST_SEED={seed}"); + let mut rng = StdRng::seed_from_u64(seed); + + // Generate random key-value pairs, ensuring uniqueness + let mut items: Vec<(Vec, Vec)> = Vec::new(); + let mut seen_keys = std::collections::HashSet::new(); + + while items.len() < num_items { + let key_len = rng.random_range(1..=32); + let value_len = rng.random_range(1..=64); + + let key: Vec = (0..key_len).map(|_| rng.random()).collect(); + + // Only add if key is unique + if seen_keys.insert(key.clone()) { + let value: Vec = (0..value_len).map(|_| rng.random()).collect(); + items.push((key, value)); + } + } + + // Create two identical merkles + let mut m1 = create_test_merkle(); + let mut m2 = create_test_merkle(); + + for (key, value) in &items { + m1.insert(key, value.clone().into_boxed_slice()).unwrap(); + m2.insert(key, value.clone().into_boxed_slice()).unwrap(); + } + + // Pick two different random indices to delete + let delete_idx1 = rng.random_range(0..items.len()); + let mut delete_idx2 = rng.random_range(0..items.len()); + while delete_idx2 == delete_idx1 { + delete_idx2 = rng.random_range(0..items.len()); + } + + let deleted_key1 = &items[delete_idx1].0; + let deleted_key2 = &items[delete_idx2].0; + + // Get the actual values from the trees before deletion (handles duplicate keys correctly) + let _actual_value1 = m1.get_value(deleted_key1).unwrap().unwrap(); + let _actual_value2 = m2.get_value(deleted_key2).unwrap().unwrap(); + + // Delete different keys from each merkle + m1.remove(deleted_key1).unwrap(); + println!("expected put for key {deleted_key1:x?}"); + //m2.remove(deleted_key2).unwrap(); + //println!("expected delete for key {:x?}", deleted_key2); + + // Convert to the appropriate type based on test parameters + let ops: Vec, Vec>> = if trie1_mutable && trie2_mutable { + // Both mutable + diff_merkle_iterator(&m1, &m2, Box::new([])) + .collect::, _>>() + .unwrap() + } else if trie1_mutable && !trie2_mutable { + // m1 mutable, m2 immutable + let m2_immut: Merkle, MemStore>> = + m2.try_into().unwrap(); + diff_merkle_iterator(&m1, &m2_immut, Box::new([])) + .collect::, _>>() + .unwrap() + } else if !trie1_mutable && trie2_mutable { + // m1 immutable, m2 mutable + let m1_immut: Merkle, MemStore>> = + m1.try_into().unwrap(); + diff_merkle_iterator(&m1_immut, &m2, Box::new([])) + .collect::, _>>() + .unwrap() + } else { + // Both immutable + let m1_immut: Merkle, MemStore>> = + m1.try_into().unwrap(); + let m2_immut: Merkle, MemStore>> = + m2.try_into().unwrap(); + println!("---m1\n{}", m1_immut.dump().unwrap()); + println!("---m2\n{}", m2_immut.dump().unwrap()); + diff_merkle_iterator(&m1_immut, &m2_immut, Box::new([])) + .collect::, _>>() + .unwrap() + }; + + // Should have exactly 2 operations: 1 delete + 1 put + if ops.len() != 2 { + println!( + "DEBUG: Expected 2 operations but got {} for seed {}", + ops.len(), + seed + ); + for (i, op) in ops.iter().enumerate() { + match op { + BatchOp::Delete { key } => { + println!(" {}: Delete key: {:x?}", i, key.as_ref()); + } + BatchOp::Put { key, value: _ } => { + println!(" {}: Put key: {:x?}", i, key.as_ref()); + } + BatchOp::DeleteRange { .. } => { + println!(" {i}: DeleteRange"); + } + } + } + } + assert_eq!(ops.len(), 2); + } + + #[test] + #[allow(clippy::pedantic)] + fn test_hash_optimization_reduces_node_reads() { + // This test focuses specifically on validating that hash optimization reduces node reads + // by comparing baseline full-traversal reads vs optimized diff operation reads. + // Diff correctness is validated by other tests. + use metrics::{Key, Label, Recorder}; + use metrics_util::registry::{AtomicStorage, Registry}; + use std::sync::Arc; + use std::sync::atomic::Ordering; + + /// Test metrics recorder that captures counter values for testing + #[derive(derive_more::Debug, Clone)] + struct TestRecorder { + registry: Arc>, + } + + impl TestRecorder { + fn new() -> Self { + Self { + registry: Arc::new(Registry::atomic()), + } + } + + fn get_counter_value( + &self, + key_name: &'static str, + labels: &[(&'static str, &'static str)], + ) -> u64 { + let key = if labels.is_empty() { + Key::from_name(key_name) + } else { + let label_vec: Vec