Simplex reconfiguration framework - Part IV (MSM implementation - verification)#381
Simplex reconfiguration framework - Part IV (MSM implementation - verification)#381yacovm wants to merge 1 commit into
Conversation
bcb6d88 to
d5dd2de
Compare
| if prev.NextPChainReferenceHeight == 0 { | ||
| return nil | ||
| } |
There was a problem hiding this comment.
I still think we need to do some verification, otherwise a malicious node can make us finalize a block without us ever checking NextEpochApprovals.
See this test I asked claude to write.
// TestNextEpochApprovalsVerifierHaltViaGarbageApprovals demonstrates the liveness halt
// that the missing nil-check in verifyNormal enables.
//
// Scenario:
//
// A: Normal block, prev.NextPChainReferenceHeight == 0.
// B: Normal block proposed by a malicious leader. B legitimately advances
// NextPChainReferenceHeight (validator set changed), and also injects an
// all-1s bitmask into NextEpochApprovals. With prev (A).NextPChainReferenceHeight
// == 0 the verifier early-returns and B is accepted.
// B+1: Honest leader. prev (B).NextPChainReferenceHeight > 0 so verifyNormal runs
// the full path. areNextEpochApprovalsSignersSupersetOfApprovalsOfPrevBlock
// requires B+1 to be a superset of B's all-1s bitmask. No honest proposer
// can produce that, so no valid B+1 exists -> chain halts at B.
//
// This test will start failing at step (B) once verifyNormal is fixed to reject
// garbage approvals when prev.NextPChainReferenceHeight == 0, which is the desired
// behavior.
func TestNextEpochApprovalsVerifierHaltViaGarbageApprovals(t *testing.T) {
validators := NodeBLSMappings{
{BLSKey: []byte{1}, Weight: 1},
{BLSKey: []byte{2}, Weight: 1},
{BLSKey: []byte{3}, Weight: 1},
}
getValidators := func(uint64) (NodeBLSMappings, error) { return validators, nil }
v := &nextEpochApprovalsVerifier{
sigVerifier: &testSigVerifier{},
getValidatorSet: getValidators,
keyAggregator: &testKeyAggregator{},
sigAggregatorCreator: newSignatureAggregatorCreator(),
}
// Malicious B: triggers transition AND injects all-1s garbage approvals.
garbageApprovals := &NextEpochApprovals{NodeIDs: []byte{0xFF}, Signature: []byte("garbage")}
mdA := SimplexEpochInfo{NextPChainReferenceHeight: 0, PChainReferenceHeight: 50}
mdB := SimplexEpochInfo{
NextPChainReferenceHeight: 100,
PChainReferenceHeight: 50,
NextEpochApprovals: garbageApprovals,
}
// Step 1: verify B against A. Today the verifier accepts B. After the fix this
// should fail with errUnexpectedApprovals and the halt below becomes unreachable.
errB := v.Verify(verificationInput{
nextBlockType: BlockTypeNormal,
prevMD: StateMachineMetadata{SimplexEpochInfo: mdA},
proposedBlockMD: StateMachineMetadata{SimplexEpochInfo: mdB},
})
require.NoError(t, errB, "malicious B accepted today; once fixed, expect errUnexpectedApprovals")
// Step 2: honest B+1 tries to continue. It supplies its own approval (bit 0),
// which is the realistic best-case for an honest proposer that just observed
// the transition. The superset check against B's all-1s bitmask rejects it.
honestApprovals := &NextEpochApprovals{NodeIDs: []byte{0x01}, Signature: []byte("sig")}
mdBPlus1 := SimplexEpochInfo{
NextPChainReferenceHeight: 100,
PChainReferenceHeight: 50,
NextEpochApprovals: honestApprovals,
}
errBPlus1 := v.Verify(verificationInput{
nextBlockType: BlockTypeNormal,
prevMD: StateMachineMetadata{SimplexEpochInfo: mdB},
proposedBlockMD: StateMachineMetadata{SimplexEpochInfo: mdBPlus1},
})
require.ErrorIs(t, errBPlus1, errSignerSetShrunk, "chain is wedged: no honest B+1 can satisfy the superset check against B's garbage")
}There was a problem hiding this comment.
so i think we need this check
if prev.NextPChainReferenceHeight == 0 {
if next.NextEpochApprovals != nil {
return fmt.Errorf("%w but got %v", errUnexpectedApprovals, next.NextEpochApprovals)
}
return nil
}| prev, next := in.prevMD.SimplexEpochInfo, in.proposedBlockMD.SimplexEpochInfo | ||
|
|
||
| // An epoch number of 0 means this is not a Simplex block, so the next block should be the first Simplex block with epoch number 1. | ||
| if in.prevMD.SimplexEpochInfo.EpochNumber == 0 && in.proposedBlockMD.SimplexEpochInfo.EpochNumber != 1 { |
There was a problem hiding this comment.
how can we ever reach this code path via the msm? if we are in this verifier that means the EpochNumber must be > 0. Is this just a defensive check?
| prevBlock, _, err := v.getBlock(in.prevBlockSeq, md.Prev) | ||
| if err != nil { | ||
| return fmt.Errorf("failed retrieving block: %w", err) | ||
| } |
There was a problem hiding this comment.
this additional lookup is just to check is prevBlock.InnerBlock != nil. We can remove this lookup if we just pass in the prevBlock we already queried in VerifyBlock
Line 261 in d5dd2de
| &epochNumberVerifier{}, | ||
| &validationDescriptorVerifier{ | ||
| getValidatorSet: func(pChainHeight uint64) (NodeBLSMappings, error) { | ||
| return sm.Config.GetValidatorSet(pChainHeight) |
There was a problem hiding this comment.
would it make sense to just pass in the validator set for the pChainHeight in the verification input rather than passing in the method to both validationDescriptorVerifier and nextPChainReferenceHeightVerifier? I guess it would also save a lookup too
| if next.SealingBlockSeq != 0 { | ||
| return fmt.Errorf("%w: expected 0 but got %d", errSealingBlockSeqMismatch, next.SealingBlockSeq) | ||
| } | ||
| case BlockTypeTelock: |
There was a problem hiding this comment.
should we add a defensive check to make sure the telock case falls under one of these ifs?
switch {
case prev.SealingBlockSeq > 0:
if next.SealingBlockSeq != prev.SealingBlockSeq {
return fmt.Errorf("%w: expected %d but got %d", errSealingBlockSeqMismatch, prev.SealingBlockSeq, next.SealingBlockSeq)
}
case prev.BlockValidationDescriptor != nil:
md, err := simplex.ProtocolMetadataFromBytes(in.prevMD.SimplexProtocolMetadata)
if err != nil {
return fmt.Errorf("failed parsing protocol metadata: %w", err)
}
if next.SealingBlockSeq != md.Seq {
return fmt.Errorf("%w: expected %d but got %d", errSealingBlockSeqMismatch, md.Seq, next.SealingBlockSeq)
}
default:
return errInvalidTelockPredecessorThere was a problem hiding this comment.
is it better to construct the verificationInput once before the loop?
Signed-off-by: Yacov Manevich <yacov.manevich@avalabs.org>
| return false | ||
| } | ||
|
|
||
| type testBlockStore map[uint64]StateMachineBlock |
There was a problem hiding this comment.
i dont think we should be testing anything other structs in misc.go in this file. When we bridge over these avalanchego utilites defined in misc.go this file should just be deleted.
Unfortunately I think this means moving everything starting from line 181 and onwards.
| errNilInnerBlock = errors.New("InnerBlock is nil") | ||
| errBuiltGenesisInnerBlock = errors.New("received a genesis block") | ||
| errZeroBlockParentNoInnerBlock = errors.New("failed constructing zero block: parent block has no inner block") | ||
| errNilBlock = errors.New("block is nil") |
There was a problem hiding this comment.
thoughts on errNilBlock and errNilInnerBlock being one error?
| errLastNonSimplexInnerBlockNil = errors.New("failed constructing zero block: last non-Simplex inner block is nil") | ||
| errInvalidProtocolMetadataSeq = errors.New("invalid ProtocolMetadata sequence number: should be > 0") | ||
| errUnknownState = errors.New("unknown state") | ||
| errNilInnerBlock = errors.New("InnerBlock is nil") |
There was a problem hiding this comment.
Lines 247 to 250 in 7d9705d
i think this is being used wrongly here. maybe we should just remove it and use errNilBlock instead
| errPChainReferenceHeightDecreased = errors.New("P-chain reference height is decreasing") | ||
| errValidatorSetUnchanged = errors.New("validator set unchanged; next P-chain reference height should not have advanced") | ||
| errPChainHeightNotReached = errors.New("haven't reached referenced P-chain height yet") | ||
| errUnknownBlockType = errors.New("unknown block type") |
There was a problem hiding this comment.
we should remove this error and just use errUnknownState
| errUnknownState = errors.New("unknown state") | ||
| errNilInnerBlock = errors.New("InnerBlock is nil") | ||
| errBuiltGenesisInnerBlock = errors.New("received a genesis block") | ||
| errZeroBlockParentNoInnerBlock = errors.New("failed constructing zero block: parent block has no inner block") |
There was a problem hiding this comment.
this plus errParentInnerBlockHasNoInnerBlock can be merged into one
| } | ||
|
|
||
| // wrapBlock creates a new StateMachineBlock by wrapping the VM block (if applicable) and adding the appropriate metadata. | ||
| func (sm *StateMachine) wrapBlock(parentBlock StateMachineBlock, childBlock VMBlock, newSimplexEpochInfo SimplexEpochInfo, pChainHeight uint64, simplexMetadata, simplexBlacklist []byte) *StateMachineBlock { |
There was a problem hiding this comment.
i dont think this function should be part of StateMachine
| return sm.wrapBlock(parentBlock, innerBlock, newSimplexEpochInfo, decisionToBuildBlock.pChainHeight, simplexMetadata, simplexBlacklist), nil | ||
| } | ||
|
|
||
| func (sm *StateMachine) verifyNormalBlock(ctx context.Context, parentBlock StateMachineBlock, nextBlock *StateMachineBlock, prevBlockSeq uint64) error { |
There was a problem hiding this comment.
thoughts on separating out some of the common verification code?
https://github.com/ava-labs/Simplex/compare/reconfig-3c...reconfig-seperations?expand=1
There was a problem hiding this comment.
unrelated but doesn't this comment in SimplexEpochInfo need to be updated
// PrevSealingBlockHash is the hash of the sealing block of the previous epoch.
// It is empty for the first epoch, and the second epoch has the PrevSealingBlockHash set to be
// the hash of the first ever block built by the StateMachine.
PrevSealingBlockHash [32]byte `canoto:"fixed bytes,3"`
| if err != nil { | ||
| sm.Logger.Error("Error retrieving previous sealing block", zap.Uint64("seq", simplexEpochInfo.EpochNumber), zap.Error(err)) | ||
| return nil, fmt.Errorf("failed to retrieve previous sealing InnerBlock at epoch %d: %w", simplexEpochInfo.EpochNumber-1, err) | ||
| return SimplexEpochInfo{}, fmt.Errorf("failed to retrieve previous sealing InnerBlock at epoch %d: %w", simplexEpochInfo.EpochNumber-1, err) |
There was a problem hiding this comment.
why do we do EpochNumber - 1 here?
| pChainHeightBuff := make([]byte, 8) | ||
| binary.BigEndian.PutUint64(pChainHeightBuff, pChainHeight) | ||
|
|
||
| if err := sm.SignatureVerifier.VerifySignature(next.NextEpochApprovals.Signature, pChainHeightBuff, aggPK); err != nil { |
There was a problem hiding this comment.
should we add a domain separator to this signature? similarly to what we do for quorum certificates?
No description provided.