diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 750cde14cac..719c24b9561 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -931,18 +931,26 @@ where // Load the persisted custody context from the db and initialize // the context for this run - let custody_context = if let Some(custody) = + let (custody_context, cgc_changed_opt) = if let Some(custody) = load_custody_context::(store.clone()) { - Arc::new(CustodyContext::new_from_persisted_custody_context( + let head_epoch = canonical_head + .cached_head() + .head_slot() + .epoch(E::slots_per_epoch()); + CustodyContext::new_from_persisted_custody_context( custody, self.node_custody_type, + head_epoch, &self.spec, - )) + ) } else { - Arc::new(CustodyContext::new(self.node_custody_type, &self.spec)) + ( + CustodyContext::new(self.node_custody_type, &self.spec), + None, + ) }; - debug!(?custody_context, "Loading persisted custody context"); + debug!(?custody_context, "Loaded persisted custody context"); let beacon_chain = BeaconChain { spec: self.spec.clone(), @@ -1019,7 +1027,7 @@ where slot_clock, self.kzg.clone(), store, - custody_context, + Arc::new(custody_context), self.spec, ) .map_err(|e| format!("Error initializing DataAvailabilityChecker: {:?}", e))?, @@ -1062,6 +1070,14 @@ where return Err(format!("Weak subjectivity verification failed: {:?}", e)); } + if let Some(cgc_changed) = cgc_changed_opt { + // Update data column custody info if there's a CGC change from CLI flags. + // This will trigger column backfill. + let cgc_change_effective_slot = + cgc_changed.effective_epoch.start_slot(E::slots_per_epoch()); + beacon_chain.update_data_column_custody_info(Some(cgc_change_effective_slot)); + } + info!( head_state = %head.beacon_state_root(), head_block = %head.beacon_block_root, diff --git a/beacon_node/beacon_chain/src/custody_context.rs b/beacon_node/beacon_chain/src/custody_context.rs index 7ec13a8b519..0da0e7573ec 100644 --- a/beacon_node/beacon_chain/src/custody_context.rs +++ b/beacon_node/beacon_chain/src/custody_context.rs @@ -7,7 +7,7 @@ use std::{ collections::{BTreeMap, HashMap}, sync::atomic::{AtomicU64, Ordering}, }; -use tracing::warn; +use tracing::{debug, warn}; use types::data_column_custody_group::{CustodyIndex, compute_columns_for_custody_group}; use types::{ChainSpec, ColumnIndex, Epoch, EthSpec, Slot}; @@ -49,6 +49,10 @@ impl ValidatorRegistrations { /// /// If a `cgc_override` value is specified, the cgc value is inserted into the registration map /// and is equivalent to registering validator(s) with the same custody requirement. + /// + /// The node will backfill all the way back to either data_availability_boundary or fulu epoch, + /// and because this is a fresh node, setting the epoch to 0 is fine, as backfill will be done via + /// backfill sync instead of column backfill. fn new(cgc_override: Option) -> Self { let mut registrations = ValidatorRegistrations { validators: Default::default(), @@ -100,10 +104,9 @@ impl ValidatorRegistrations { let validator_custody_requirement = get_validators_custody_requirement(validator_custody_units, spec); - tracing::debug!( + debug!( validator_custody_units, - validator_custody_requirement, - "Registered validators" + validator_custody_requirement, "Registered validators" ); // If registering the new validator increased the total validator "units", then @@ -126,8 +129,11 @@ impl ValidatorRegistrations { } } - /// Updates the `epoch_validator_custody_requirements` map by pruning all values on/after `effective_epoch` - /// and updating the map to store the latest validator custody requirements for the `effective_epoch`. + /// Updates the `epoch -> cgc` map after custody backfill has been completed for + /// the specified epoch. + /// + /// This is done by pruning all values on/after `effective_epoch` and updating the map to store + /// the latest validator custody requirements for the `effective_epoch`. pub fn backfill_validator_custody_requirements(&mut self, effective_epoch: Epoch) { if let Some(latest_validator_custody) = self.latest_validator_custody_requirement() { // Delete records if @@ -247,39 +253,92 @@ impl CustodyContext { /// Restore the custody context from disk. /// - /// * If NodeCustodyType::custody_count < validator_custody_at_head, it means the attached - /// validate stake has increased the node's CGC. We ignore the CLI input. - /// * If NodeCustodyType::custody_count > validator_custody_at_head, it means the user has - /// changed the node's custody type via either the --supernode or --semi-supernode flags, - /// and will require a resync until we implement column backfill for this scenario. + /// # Behavior + /// * If [`NodeCustodyType::get_custody_count_override`] < validator_custody_at_head, it means + /// validators have increased the CGC beyond the derived CGC from cli flags. We ignore the CLI input. + /// * If [`NodeCustodyType::get_custody_count_override`] > validator_custody_at_head, it means the user has + /// changed the node's custody type via either the --supernode or --semi-supernode flags which + /// has resulted in a CGC increase. **The new CGC will be made effective from the next epoch**. + /// + /// # Returns + /// A tuple containing: + /// * `Self` - The restored custody context with updated CGC at head + /// * `Option` - `Some` if the CLI flag caused a CGC increase (triggering backfill), + /// `None` if no CGC change occurred or reduction was prevented pub fn new_from_persisted_custody_context( ssz_context: CustodyContextSsz, node_custody_type: NodeCustodyType, + head_epoch: Epoch, spec: &ChainSpec, - ) -> Self { - let cgc_override = node_custody_type.get_custody_count_override(spec); - if let Some(cgc_from_cli) = cgc_override - && cgc_from_cli > ssz_context.validator_custody_at_head - { - warn!( - info = "node will continue to run with the current custody count", - current_custody_count = ssz_context.validator_custody_at_head, - node_custody_type = ?node_custody_type, - "Changing node type is currently not supported without a resync and will have no effect", + ) -> (Self, Option) { + let CustodyContextSsz { + mut validator_custody_at_head, + mut epoch_validator_custody_requirements, + persisted_is_supernode: _, + } = ssz_context; + + let mut custody_count_changed = None; + + if let Some(cgc_from_cli) = node_custody_type.get_custody_count_override(spec) { + debug!( + ?node_custody_type, + persisted_custody_count = validator_custody_at_head, + "Initialising from persisted custody context" ); + + if cgc_from_cli > validator_custody_at_head { + // Make the CGC from CLI effective from the next epoch + let effective_epoch = head_epoch + 1; + let old_custody_group_count = validator_custody_at_head; + validator_custody_at_head = cgc_from_cli; + + let sampling_count = spec + .sampling_size_custody_groups(cgc_from_cli) + .expect("should compute node sampling size from valid chain spec"); + + epoch_validator_custody_requirements.push((effective_epoch, cgc_from_cli)); + + custody_count_changed = Some(CustodyCountChanged { + new_custody_group_count: validator_custody_at_head, + old_custody_group_count, + sampling_count, + effective_epoch, + }); + + debug!( + info = "new CGC will be effective from the next epoch", + ?node_custody_type, + old_cgc = old_custody_group_count, + new_cgc = validator_custody_at_head, + effective_epoch = %effective_epoch, + "Node custody type change caused a custody count increase", + ); + } else if cgc_from_cli < validator_custody_at_head { + // We don't currently support reducing CGC for simplicity. + // A common scenario is that user may restart with a CLI flag, but the validators + // are only attached later, and we end up having CGC inconsistency. + warn!( + info = "node will continue to run with the current custody count", + current_custody_count = validator_custody_at_head, + node_custody_type = ?node_custody_type, + "Reducing CGC is currently not supported without a resync and will have no effect", + ); + } } - CustodyContext { - validator_custody_count: AtomicU64::new(ssz_context.validator_custody_at_head), + + let custody_context = CustodyContext { + validator_custody_count: AtomicU64::new(validator_custody_at_head), validator_registrations: RwLock::new(ValidatorRegistrations { validators: Default::default(), - epoch_validator_custody_requirements: ssz_context - .epoch_validator_custody_requirements + epoch_validator_custody_requirements: epoch_validator_custody_requirements .into_iter() .collect(), }), all_custody_columns_ordered: OnceLock::new(), _phantom_data: PhantomData, - } + }; + + (custody_context, custody_count_changed) } /// Initializes an ordered list of data columns based on provided custody groups. @@ -331,7 +390,7 @@ impl CustodyContext { let current_cgc = self.validator_custody_count.load(Ordering::Relaxed); if new_validator_custody != current_cgc { - tracing::debug!( + debug!( old_count = current_cgc, new_count = new_validator_custody, "Validator count at head updated" @@ -342,10 +401,9 @@ impl CustodyContext { let updated_cgc = self.custody_group_count_at_head(spec); // Send the message to network only if there are more columns subnets to subscribe to if updated_cgc > current_cgc { - tracing::debug!( + debug!( old_cgc = current_cgc, - updated_cgc, - "Custody group count updated" + updated_cgc, "Custody group count updated" ); return Some(CustodyCountChanged { new_custody_group_count: updated_cgc, @@ -457,6 +515,8 @@ impl CustodyContext { &all_columns_ordered[..custody_group_count] } + /// The node has completed backfill for this epoch. Update the internal records so the function + /// [`Self::custody_columns_for_epoch()`] returns up-to-date results. pub fn update_and_backfill_custody_count_at_epoch(&self, effective_epoch: Epoch) { self.validator_registrations .write() @@ -464,8 +524,13 @@ impl CustodyContext { } } -/// The custody count changed because of a change in the -/// number of validators being managed. +/// Indicates that the custody group count (CGC) has increased. +/// +/// CGC increases can occur due to: +/// 1. Validator registrations increasing effective balance beyond current CGC +/// 2. CLI flag changes (e.g., switching to --supernode or --semi-supernode) +/// +/// This struct is used to trigger column backfill and network subnet subscription updates. pub struct CustodyCountChanged { pub new_custody_group_count: u64, pub old_custody_group_count: u64, @@ -509,6 +574,153 @@ mod tests { type E = MainnetEthSpec; + fn setup_custody_context( + spec: &ChainSpec, + head_epoch: Epoch, + epoch_and_cgc_tuples: Vec<(Epoch, u64)>, + ) -> CustodyContext { + let cgc_at_head = epoch_and_cgc_tuples.last().unwrap().1; + let ssz_context = CustodyContextSsz { + validator_custody_at_head: cgc_at_head, + persisted_is_supernode: false, + epoch_validator_custody_requirements: epoch_and_cgc_tuples, + }; + + let (custody_context, _) = CustodyContext::::new_from_persisted_custody_context( + ssz_context, + NodeCustodyType::Fullnode, + head_epoch, + spec, + ); + + let all_custody_groups_ordered = (0..spec.number_of_custody_groups).collect::>(); + custody_context + .init_ordered_data_columns_from_custody_groups(all_custody_groups_ordered, spec) + .expect("should initialise ordered data columns"); + custody_context + } + + fn complete_backfill_for_epochs( + custody_context: &CustodyContext, + start_epoch: Epoch, + end_epoch: Epoch, + ) { + assert!(start_epoch >= end_epoch); + // Call from end_epoch down to start_epoch (inclusive), simulating backfill + for epoch in (end_epoch.as_u64()..=start_epoch.as_u64()).rev() { + custody_context.update_and_backfill_custody_count_at_epoch(Epoch::new(epoch)); + } + } + + /// Helper function to test CGC increases when switching node custody types. + /// Verifies that CustodyCountChanged is returned with correct values and + /// that custody_group_count_at_epoch returns appropriate values for current and next epoch. + fn assert_custody_type_switch_increases_cgc( + persisted_cgc: u64, + target_node_custody_type: NodeCustodyType, + expected_new_cgc: u64, + head_epoch: Epoch, + spec: &ChainSpec, + ) { + let ssz_context = CustodyContextSsz { + validator_custody_at_head: persisted_cgc, + persisted_is_supernode: false, + epoch_validator_custody_requirements: vec![(Epoch::new(0), persisted_cgc)], + }; + + let (custody_context, custody_count_changed) = + CustodyContext::::new_from_persisted_custody_context( + ssz_context, + target_node_custody_type, + head_epoch, + spec, + ); + + // Verify CGC increased + assert_eq!( + custody_context.custody_group_count_at_head(spec), + expected_new_cgc, + "cgc should increase from {} to {}", + persisted_cgc, + expected_new_cgc + ); + + // Verify CustodyCountChanged is returned with correct values + let cgc_changed = custody_count_changed.expect("CustodyCountChanged should be returned"); + assert_eq!( + cgc_changed.new_custody_group_count, expected_new_cgc, + "new_custody_group_count should be {}", + expected_new_cgc + ); + assert_eq!( + cgc_changed.old_custody_group_count, persisted_cgc, + "old_custody_group_count should be {}", + persisted_cgc + ); + assert_eq!( + cgc_changed.effective_epoch, + head_epoch + 1, + "effective epoch should be head_epoch + 1" + ); + assert_eq!( + cgc_changed.sampling_count, + spec.sampling_size_custody_groups(expected_new_cgc) + .expect("should compute sampling size"), + "sampling_count should match expected value" + ); + + // Verify custody_group_count_at_epoch returns correct values + assert_eq!( + custody_context.custody_group_count_at_epoch(head_epoch, spec), + persisted_cgc, + "current epoch should still use old cgc ({})", + persisted_cgc + ); + assert_eq!( + custody_context.custody_group_count_at_epoch(head_epoch + 1, spec), + expected_new_cgc, + "next epoch should use new cgc ({})", + expected_new_cgc + ); + } + + /// Helper function to test CGC reduction prevention when switching node custody types. + /// Verifies that CGC stays at the persisted value and CustodyCountChanged is not returned. + fn assert_custody_type_switch_unchanged_cgc( + persisted_cgc: u64, + target_node_custody_type: NodeCustodyType, + head_epoch: Epoch, + spec: &ChainSpec, + ) { + let ssz_context = CustodyContextSsz { + validator_custody_at_head: persisted_cgc, + persisted_is_supernode: false, + epoch_validator_custody_requirements: vec![(Epoch::new(0), persisted_cgc)], + }; + + let (custody_context, custody_count_changed) = + CustodyContext::::new_from_persisted_custody_context( + ssz_context, + target_node_custody_type, + head_epoch, + spec, + ); + + // Verify CGC stays at persisted value (no reduction) + assert_eq!( + custody_context.custody_group_count_at_head(spec), + persisted_cgc, + "cgc should remain at {} (reduction not supported)", + persisted_cgc + ); + + // Verify no CustodyCountChanged is returned (no change occurred) + assert!( + custody_count_changed.is_none(), + "CustodyCountChanged should not be returned when CGC doesn't change" + ); + } + #[test] fn no_validators_supernode_default() { let spec = E::default_spec(); @@ -914,9 +1126,10 @@ mod tests { epoch_validator_custody_requirements: vec![], }; - let custody_context = CustodyContext::::new_from_persisted_custody_context( + let (custody_context, _) = CustodyContext::::new_from_persisted_custody_context( ssz_context, NodeCustodyType::Fullnode, + Epoch::new(0), &spec, ); @@ -927,51 +1140,155 @@ mod tests { ); } + /// Tests CLI flag change: Fullnode (CGC=0) → Supernode (CGC=128) + /// CGC should increase and trigger backfill via CustodyCountChanged. #[test] - fn restore_fullnode_then_switch_to_supernode_has_no_effect() { + fn restore_fullnode_then_switch_to_supernode_increases_cgc() { let spec = E::default_spec(); - let ssz_context = CustodyContextSsz { - validator_custody_at_head: 0, // no validators - persisted_is_supernode: false, - epoch_validator_custody_requirements: vec![], - }; + let head_epoch = Epoch::new(10); + let supernode_cgc = spec.number_of_custody_groups; - // Attempt to restore as supernode (wants 128), but should use original persisted value - let custody_context = CustodyContext::::new_from_persisted_custody_context( - ssz_context, + assert_custody_type_switch_increases_cgc( + 0, NodeCustodyType::Supernode, + supernode_cgc, + head_epoch, &spec, ); + } + /// Tests validator-driven CGC increase: Semi-supernode (CGC=64) → CGC=70 + /// Semi-supernode can exceed 64 when validator effective balance increases CGC. + #[test] + fn restore_semi_supernode_with_validators_can_exceed_64() { + let spec = E::default_spec(); + let semi_supernode_cgc = spec.number_of_custody_groups / 2; // 64 + let custody_context = CustodyContext::::new(NodeCustodyType::SemiSupernode, &spec); + + // Verify initial CGC is 64 (semi-supernode) assert_eq!( custody_context.custody_group_count_at_head(&spec), - spec.custody_requirement, - "should use original fullnode cgc, not supernode cgc" + semi_supernode_cgc, + "initial cgc should be 64" + ); + + // Register validators with 70 custody units (exceeding semi-supernode default) + let validator_custody_units = 70; + let current_slot = Slot::new(10); + let cgc_changed = custody_context.register_validators( + vec![( + 0, + validator_custody_units * spec.balance_per_additional_custody_group, + )], + current_slot, + &spec, + ); + + // Verify CGC increased from 64 to 70 + assert!( + cgc_changed.is_some(), + "CustodyCountChanged should be returned" + ); + let cgc_changed = cgc_changed.unwrap(); + assert_eq!( + cgc_changed.new_custody_group_count, validator_custody_units, + "cgc should increase to 70" + ); + assert_eq!( + cgc_changed.old_custody_group_count, semi_supernode_cgc, + "old cgc should be 64" + ); + + // Verify the custody context reflects the new CGC + assert_eq!( + custody_context.custody_group_count_at_head(&spec), + validator_custody_units, + "custody_group_count_at_head should be 70" ); } + /// Tests CLI flag change prevention: Supernode (CGC=128) → Fullnode (CGC stays 128) + /// CGC reduction is not supported - persisted value is retained. #[test] fn restore_supernode_then_switch_to_fullnode_uses_persisted() { let spec = E::default_spec(); - let supernode_cgc = spec.number_of_custody_groups; // supernode cgc - - let ssz_context = CustodyContextSsz { - validator_custody_at_head: supernode_cgc, - persisted_is_supernode: false, - epoch_validator_custody_requirements: vec![(Epoch::new(0), supernode_cgc)], - }; + let supernode_cgc = spec.number_of_custody_groups; - // Attempt to restore as fullnode (wants 8), but should keep persisted value (128) - let custody_context = CustodyContext::::new_from_persisted_custody_context( - ssz_context, + assert_custody_type_switch_unchanged_cgc( + supernode_cgc, NodeCustodyType::Fullnode, + Epoch::new(0), &spec, ); + } - assert_eq!( - custody_context.custody_group_count_at_head(&spec), + /// Tests CLI flag change prevention: Supernode (CGC=128) → Semi-supernode (CGC stays 128) + /// CGC reduction is not supported - persisted value is retained. + #[test] + fn restore_supernode_then_switch_to_semi_supernode_keeps_supernode_cgc() { + let spec = E::default_spec(); + let supernode_cgc = spec.number_of_custody_groups; + let head_epoch = Epoch::new(10); + + assert_custody_type_switch_unchanged_cgc( supernode_cgc, - "should use persisted supernode cgc, not fullnode cgc" + NodeCustodyType::SemiSupernode, + head_epoch, + &spec, + ); + } + + /// Tests CLI flag change: Fullnode with validators (CGC=32) → Semi-supernode (CGC=64) + /// CGC should increase and trigger backfill via CustodyCountChanged. + #[test] + fn restore_fullnode_with_validators_then_switch_to_semi_supernode() { + let spec = E::default_spec(); + let persisted_cgc = 32u64; + let semi_supernode_cgc = spec.number_of_custody_groups / 2; + let head_epoch = Epoch::new(10); + + assert_custody_type_switch_increases_cgc( + persisted_cgc, + NodeCustodyType::SemiSupernode, + semi_supernode_cgc, + head_epoch, + &spec, + ); + } + + /// Tests CLI flag change: Semi-supernode (CGC=64) → Supernode (CGC=128) + /// CGC should increase and trigger backfill via CustodyCountChanged. + #[test] + fn restore_semi_supernode_then_switch_to_supernode() { + let spec = E::default_spec(); + let semi_supernode_cgc = spec.number_of_custody_groups / 2; + let supernode_cgc = spec.number_of_custody_groups; + let head_epoch = Epoch::new(10); + + assert_custody_type_switch_increases_cgc( + semi_supernode_cgc, + NodeCustodyType::Supernode, + supernode_cgc, + head_epoch, + &spec, + ); + } + + /// Tests CLI flag change: Fullnode with validators (CGC=32) → Supernode (CGC=128) + /// CGC should increase and trigger backfill via CustodyCountChanged. + #[test] + fn restore_with_cli_flag_increases_cgc_from_nonzero() { + let spec = E::default_spec(); + let persisted_cgc = 32u64; + let supernode_cgc = spec.number_of_custody_groups; + let head_epoch = Epoch::new(10); + + assert_custody_type_switch_increases_cgc( + persisted_cgc, + NodeCustodyType::Supernode, + supernode_cgc, + head_epoch, + &spec, ); } @@ -992,9 +1309,10 @@ mod tests { ], }; - let custody_context = CustodyContext::::new_from_persisted_custody_context( + let (custody_context, _) = CustodyContext::::new_from_persisted_custody_context( ssz_context, NodeCustodyType::Fullnode, + Epoch::new(20), &spec, ); @@ -1033,4 +1351,77 @@ mod tests { "sampling at epoch 25 should match final cgc" ); } + + #[test] + fn backfill_single_cgc_increase_updates_past_epochs() { + let spec = E::default_spec(); + let final_cgc = 32u64; + let default_cgc = spec.custody_requirement; + + // Setup: Node restart after validators were registered, causing CGC increase to 32 at epoch 20 + let head_epoch = Epoch::new(20); + let epoch_and_cgc_tuples = vec![(head_epoch, final_cgc)]; + let custody_context = setup_custody_context(&spec, head_epoch, epoch_and_cgc_tuples); + assert_eq!( + custody_context.custody_group_count_at_epoch(Epoch::new(15), &spec), + default_cgc, + ); + + // Backfill from epoch 20 down to 15 (simulating backfill) + complete_backfill_for_epochs(&custody_context, head_epoch, Epoch::new(15)); + + // After backfilling to epoch 15, it should use latest CGC (32) + assert_eq!( + custody_context.custody_group_count_at_epoch(Epoch::new(15), &spec), + final_cgc, + ); + assert_eq!( + custody_context + .custody_columns_for_epoch(Some(Epoch::new(15)), &spec) + .len(), + final_cgc as usize, + ); + + // Prior epoch should still return the original CGC + assert_eq!( + custody_context.custody_group_count_at_epoch(Epoch::new(14), &spec), + default_cgc, + ); + } + + #[test] + fn backfill_with_multiple_cgc_increases_prunes_map_correctly() { + let spec = E::default_spec(); + let initial_cgc = 8u64; + let mid_cgc = 16u64; + let final_cgc = 32u64; + + // Setup: Node restart after multiple validator registrations causing CGC increases + let head_epoch = Epoch::new(20); + let epoch_and_cgc_tuples = vec![ + (Epoch::new(0), initial_cgc), + (Epoch::new(10), mid_cgc), + (head_epoch, final_cgc), + ]; + let custody_context = setup_custody_context(&spec, head_epoch, epoch_and_cgc_tuples); + + // Backfill to epoch 15 (between the two CGC increases) + complete_backfill_for_epochs(&custody_context, Epoch::new(20), Epoch::new(15)); + + // Verify epochs 15 - 20 return latest CGC (32) + for epoch in 15..=20 { + assert_eq!( + custody_context.custody_group_count_at_epoch(Epoch::new(epoch), &spec), + final_cgc, + ); + } + + // Verify epochs 10-14 still return mid_cgc (16) + for epoch in 10..14 { + assert_eq!( + custody_context.custody_group_count_at_epoch(Epoch::new(epoch), &spec), + mid_cgc, + ); + } + } }