This commit is contained in:
Cassandra Heart 2025-11-13 23:37:09 -06:00
parent f2fa7bf57f
commit bc10bd497c
No known key found for this signature in database
GPG Key ID: 371083BFA6C240AA
28 changed files with 487 additions and 125 deletions

View File

@ -43,7 +43,7 @@ func FormatVersion(version []byte) string {
}
func GetPatchNumber() byte {
return 0x06
return 0x07
}
func GetRCNumber() byte {

View File

@ -38,10 +38,7 @@ type Replicas interface {
// Returns the following expected errors for invalid inputs:
// - model.ErrRankUnknown if no rank containing the given rank is
// known
LeaderForRank(
rank uint64,
selector models.Identity,
) (models.Identity, error)
LeaderForRank(rank uint64) (models.Identity, error)
// QuorumThresholdForRank returns the minimum total weight for a supermajority
// at the given rank. This weight threshold is computed using the total weight

View File

@ -496,10 +496,7 @@ func (e *EventHandler[
consensus.Uint64Param("current_rank", curRank),
consensus.IdentityParam("self", e.committee.Self()),
)
currentLeader, err := e.committee.LeaderForRank(
curRank,
e.paceMaker.LatestQuorumCertificate().Identity(),
)
currentLeader, err := e.committee.LeaderForRank(curRank)
if err != nil {
return fmt.Errorf(
"failed to determine primary for new rank %d: %w",
@ -694,10 +691,7 @@ func (e *EventHandler[
return nil
}
// leader (node ID) for next rank
nextLeader, err := e.committee.LeaderForRank(
curRank+1,
proposal.State.Identifier,
)
nextLeader, err := e.committee.LeaderForRank(curRank + 1)
if errors.Is(err, models.ErrRankUnknown) {
// We are attempting process a state in an unknown rank
// This should never happen, because:

View File

@ -133,8 +133,8 @@ func (_m *DynamicCommittee) IdentityByState(stateID models.Identity, participant
}
// LeaderForRank provides a mock function with given fields: rank
func (_m *DynamicCommittee) LeaderForRank(rank uint64, selector models.Identity) (models.Identity, error) {
ret := _m.Called(rank, selector)
func (_m *DynamicCommittee) LeaderForRank(rank uint64) (models.Identity, error) {
ret := _m.Called(rank)
if len(ret) == 0 {
panic("no return value specified for LeaderForRank")
@ -142,17 +142,17 @@ func (_m *DynamicCommittee) LeaderForRank(rank uint64, selector models.Identity)
var r0 models.Identity
var r1 error
if rf, ok := ret.Get(0).(func(uint64, models.Identity) (models.Identity, error)); ok {
return rf(rank, selector)
if rf, ok := ret.Get(0).(func(uint64) (models.Identity, error)); ok {
return rf(rank)
}
if rf, ok := ret.Get(0).(func(uint64, models.Identity) models.Identity); ok {
r0 = rf(rank, selector)
if rf, ok := ret.Get(0).(func(uint64) models.Identity); ok {
r0 = rf(rank)
} else {
r0 = ret.Get(0).(models.Identity)
}
if rf, ok := ret.Get(1).(func(uint64, models.Identity) error); ok {
r1 = rf(rank, selector)
if rf, ok := ret.Get(1).(func(uint64) error); ok {
r1 = rf(rank)
} else {
r1 = ret.Error(1)
}

View File

@ -73,8 +73,8 @@ func (_m *Replicas) IdentityByRank(rank uint64, participantID models.Identity) (
}
// LeaderForRank provides a mock function with given fields: rank
func (_m *Replicas) LeaderForRank(rank uint64, selector models.Identity) (models.Identity, error) {
ret := _m.Called(rank, selector)
func (_m *Replicas) LeaderForRank(rank uint64) (models.Identity, error) {
ret := _m.Called(rank)
if len(ret) == 0 {
panic("no return value specified for LeaderForRank")
@ -82,17 +82,17 @@ func (_m *Replicas) LeaderForRank(rank uint64, selector models.Identity) (models
var r0 models.Identity
var r1 error
if rf, ok := ret.Get(0).(func(uint64, models.Identity) (models.Identity, error)); ok {
return rf(rank, selector)
if rf, ok := ret.Get(0).(func(uint64) (models.Identity, error)); ok {
return rf(rank)
}
if rf, ok := ret.Get(0).(func(uint64, models.Identity) models.Identity); ok {
r0 = rf(rank, selector)
if rf, ok := ret.Get(0).(func(uint64) models.Identity); ok {
r0 = rf(rank)
} else {
r0 = ret.Get(0).(models.Identity)
}
if rf, ok := ret.Get(1).(func(uint64, models.Identity) error); ok {
r1 = rf(rank, selector)
if rf, ok := ret.Get(1).(func(uint64) error); ok {
r1 = rf(rank)
} else {
r1 = ret.Error(1)
}

View File

@ -128,10 +128,7 @@ func (r *SafetyRules[StateT, VoteT]) produceVote(
)
}
currentLeader, err := r.committee.LeaderForRank(
state.Rank,
state.ParentQuorumCertificate.Identity(),
)
currentLeader, err := r.committee.LeaderForRank(state.Rank)
if err != nil {
return nil, fmt.Errorf(
"expect to have a valid leader for rank %d: %w",

View File

@ -333,10 +333,7 @@ func (v *Validator[StateT, VoteT]) ValidateProposal(
}
// check the proposer is the leader for the proposed state's rank
leader, err := v.committee.LeaderForRank(
state.Rank,
state.ParentQuorumCertificate.Identity(),
)
leader, err := v.committee.LeaderForRank(state.Rank)
if err != nil {
return fmt.Errorf(
"error determining leader for state %x: %w",

View File

@ -3,6 +3,7 @@ package aggregator
import (
"github.com/pkg/errors"
"source.quilibrium.com/quilibrium/monorepo/bls48581"
"source.quilibrium.com/quilibrium/monorepo/consensus"
"source.quilibrium.com/quilibrium/monorepo/consensus/models"
typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus"
@ -62,18 +63,32 @@ func (c *ConsensusSignatureAggregatorWrapper) Aggregate(
return nil, errors.Wrap(err, "aggregate")
}
pubs := map[string]struct{}{}
for _, p := range publicKeys {
pubs[string(p)] = struct{}{}
pubs := map[string]int{}
for i, p := range publicKeys {
pubs[string(p)] = i
}
bitmask := make([]byte, (len(provers)+7)/8)
extra := []byte{}
if len(c.filter) != 0 {
extra = make([]byte, 516*len(provers))
}
for i, p := range provers {
if _, ok := pubs[string(p.PublicKey)]; ok {
if j, ok := pubs[string(p.PublicKey)]; ok {
bitmask[i/8] |= (1 << (i % 8))
if len(c.filter) != 0 {
copy(extra[516*i:516*(i+1)], signatures[j][74:])
}
}
}
// TODO: remove direct reference
if len(c.filter) != 0 {
output.(*bls48581.BlsAggregateOutput).AggregateSignature =
append(output.(*bls48581.BlsAggregateOutput).AggregateSignature, extra...)
}
return &ConsensusAggregatedSignature{output, bitmask}, nil
}

View File

@ -127,6 +127,7 @@ type AppConsensusEngine struct {
quit chan struct{}
canRunStandalone bool
blacklistMap map[string]bool
currentRank uint64
alertPublicKey []byte
// Message queues
@ -497,6 +498,10 @@ func NewAppConsensusEngine(
}
pending = engine.getPendingProposals(frame.Header.FrameNumber)
}
liveness, err := engine.consensusStore.GetLivenessState(appAddress)
if err == nil {
engine.currentRank = liveness.CurrentRank
}
engine.voteAggregator, err = voting.NewAppShardVoteAggregator[PeerID](
tracing.NewZapTracer(logger),
@ -1948,7 +1953,7 @@ func (e *AppConsensusEngine) OnQuorumCertificateTriggeredRankChange(
return
}
nextLeader, err := e.LeaderForRank(newRank, frame.Identity())
nextLeader, err := e.LeaderForRank(newRank)
if err != nil {
e.logger.Error("could not determine next prover", zap.Error(err))
return
@ -1974,8 +1979,9 @@ func (e *AppConsensusEngine) OnQuorumCertificateTriggeredRankChange(
return
}
challenge := sha3.Sum256([]byte(frame.Identity()))
proof := e.frameProver.CalculateMultiProof(
[32]byte([]byte(frame.Identity())),
challenge,
frame.Header.Difficulty,
ids,
uint32(myIndex),
@ -1989,6 +1995,7 @@ func (e *AppConsensusEngine) OnQuorumCertificateTriggeredRankChange(
// OnRankChange implements consensus.Consumer.
func (e *AppConsensusEngine) OnRankChange(oldRank uint64, newRank uint64) {
e.currentRank = newRank
err := e.ensureGlobalClient()
if err != nil {
e.logger.Error("cannot confirm cross-shard locks", zap.Error(err))
@ -2363,7 +2370,7 @@ func (e *AppConsensusEngine) VerifyQuorumCertificate(
qc.AggregateSignature.GetPubKey(),
qc.AggregateSignature.GetSignature(),
verification.MakeVoteMessage(nil, qc.Rank, qc.Identity()),
[]byte("appshard"),
slices.Concat([]byte("appshard"), e.appAddress),
); !valid {
return models.ErrInvalidSignature
}
@ -2426,7 +2433,7 @@ func (e *AppConsensusEngine) VerifyTimeoutCertificate(
tc.Rank,
tc.LatestQuorumCertificate.Rank,
),
[]byte("appshardtimeout"),
slices.Concat([]byte("appshardtimeout"), e.appAddress),
); !valid {
return models.ErrInvalidSignature
}
@ -2469,7 +2476,7 @@ func (e *AppConsensusEngine) VerifyVote(
pubkey,
(*vote).PublicKeySignatureBls48581.Signature[:74],
verification.MakeVoteMessage(nil, (*vote).Rank, (*vote).Source()),
[]byte("appshard"),
slices.Concat([]byte("appshard"), e.appAddress),
); !valid {
return models.ErrInvalidSignature
}

View File

@ -3,6 +3,7 @@ package app
import (
"bytes"
"encoding/binary"
"math/big"
"slices"
"github.com/iden3/go-iden3-crypto/poseidon"
@ -105,28 +106,26 @@ func (e *AppConsensusEngine) IdentityByState(
}
// LeaderForRank implements consensus.DynamicCommittee.
func (e *AppConsensusEngine) LeaderForRank(
rank uint64,
selector models.Identity,
) (models.Identity, error) {
func (e *AppConsensusEngine) LeaderForRank(rank uint64) (
models.Identity,
error,
) {
// TODO(2.2): revisit this
inputBI, err := poseidon.HashBytes(slices.Concat(
[]byte(selector),
binary.BigEndian.AppendUint64(nil, rank),
binary.BigEndian.AppendUint64(slices.Clone(e.appAddress), rank),
))
if err != nil {
return "", errors.Wrap(err, "leader for rank")
}
input := inputBI.FillBytes(make([]byte, 32))
prover, err := e.proverRegistry.GetNextProver(
[32]byte(input),
e.appAddress,
)
proverSet, err := e.proverRegistry.GetActiveProvers(e.appAddress)
if err != nil {
return "", errors.Wrap(err, "leader for rank")
}
return models.Identity(prover), nil
inputBI.Mod(inputBI, big.NewInt(int64(len(proverSet))))
index := inputBI.Int64()
return models.Identity(proverSet[int(index)].Address), nil
}
// QuorumThresholdForRank implements consensus.DynamicCommittee.

View File

@ -84,8 +84,57 @@ func (p *AppLeaderProvider) ProveNextState(
return nil, models.NewNoVoteErrorf("missing prior frame")
}
latestQC, qcErr := p.engine.clockStore.GetLatestQuorumCertificate(
p.engine.appAddress,
)
if qcErr != nil {
p.engine.logger.Debug(
"could not fetch latest quorum certificate",
zap.Error(qcErr),
)
}
if prior.Identity() != priorState {
frameProvingTotal.WithLabelValues("error").Inc()
if latestQC != nil && latestQC.Identity() == priorState {
switch {
case prior.Header.Rank < latestQC.GetRank():
return nil, models.NewNoVoteErrorf(
"needs sync: prior rank %d behind latest qc rank %d",
prior.Header.Rank,
latestQC.GetRank(),
)
case prior.Header.Rank == latestQC.GetRank() &&
latestQC.Identity() != prior.Identity():
peerID, peerErr := p.engine.getRandomProverPeerId()
if peerErr != nil {
p.engine.logger.Warn(
"could not determine peer for fork sync",
zap.Error(peerErr),
)
} else {
p.engine.logger.Warn(
"detected fork, scheduling sync",
zap.Uint64("frame_number", latestQC.GetFrameNumber()),
zap.String("peer_id", peerID.String()),
)
p.engine.syncProvider.AddState(
[]byte(peerID),
latestQC.GetFrameNumber(),
[]byte(latestQC.Identity()),
)
}
return nil, models.NewNoVoteErrorf(
"fork detected at rank %d (local: %x, qc: %x)",
latestQC.GetRank(),
prior.Identity(),
latestQC.Identity(),
)
}
}
return nil, models.NewNoVoteErrorf(
"building on fork or needs sync: frame %d, rank %d, parent_id: %x, asked: rank %d, id: %x",
prior.Header.FrameNumber,

View File

@ -13,6 +13,7 @@ import (
"os"
"path"
"path/filepath"
"slices"
"strings"
"time"
@ -38,6 +39,7 @@ const defaultStateQueueCapacity = 10
type syncRequest struct {
frameNumber uint64
peerId []byte
identity []byte
}
// AppSyncProvider implements SyncProvider
@ -81,7 +83,36 @@ func (p *AppSyncProvider) Start(
ctx,
request.frameNumber,
request.peerId,
request.identity,
)
case <-time.After(10 * time.Minute):
// If we got here, it means we hit a pretty strong halt. This is an act of
// last resort to get everyone re-aligned:
head, err := p.engine.appTimeReel.GetHead()
if err != nil {
p.engine.logger.Error(
"head frame not found for time reel",
zap.Error(err),
)
continue
}
if time.UnixMilli(head.Header.Timestamp).Before(
time.Now().Add(-10 * time.Minute),
) {
peer, err := p.engine.getRandomProverPeerId()
if err != nil {
p.engine.logger.Error("could not get random prover", zap.Error(err))
continue
}
p.processState(
ctx,
head.Header.FrameNumber,
[]byte(peer),
[]byte(head.Identity()),
)
}
}
}
}
@ -90,11 +121,13 @@ func (p *AppSyncProvider) processState(
ctx context.Context,
frameNumber uint64,
peerID []byte,
identity []byte,
) {
err := p.syncWithPeer(
ctx,
frameNumber,
peerID,
identity,
)
if err != nil {
p.engine.logger.Error("could not sync with peer", zap.Error(err))
@ -320,7 +353,12 @@ func (p *AppSyncProvider) syncWithMesh(ctx context.Context) error {
latest = head
}
err = p.syncWithPeer(ctx, latest.Header.FrameNumber, []byte(peerID))
err = p.syncWithPeer(
ctx,
latest.Header.FrameNumber,
[]byte(peerID),
[]byte(latest.Identity()),
)
if err != nil {
p.engine.logger.Debug("error syncing frame", zap.Error(err))
}
@ -339,6 +377,7 @@ func (p *AppSyncProvider) syncWithPeer(
ctx context.Context,
frameNumber uint64,
peerId []byte,
expectedIdentity []byte,
) error {
p.engine.logger.Info(
"polling peer for new frames",
@ -465,6 +504,29 @@ func (p *AppSyncProvider) syncWithPeer(
p.engine.logger.Debug("received invalid response from peer")
return nil
}
if len(expectedIdentity) != 0 {
if !bytes.Equal(
[]byte(response.Proposal.State.Identity()),
expectedIdentity,
) {
p.engine.logger.Warn(
"aborting sync due to unexpected frame identity",
zap.Uint64("frame_number", frameNumber),
zap.String(
"expected",
hex.EncodeToString(expectedIdentity),
),
zap.String(
"received",
hex.EncodeToString(
[]byte(response.Proposal.State.Identity()),
),
),
)
return errors.New("sync frame identity mismatch")
}
expectedIdentity = nil
}
p.engine.logger.Info(
"received new leading frame",
zap.Uint64(
@ -476,9 +538,25 @@ func (p *AppSyncProvider) syncWithPeer(
frametime.AppFrameSince(response.Proposal.State),
),
)
provers, err := p.engine.proverRegistry.GetActiveProvers(
p.engine.appAddress,
)
if err != nil {
p.engine.logger.Error(
"could not obtain active provers",
zap.Error(err),
)
return err
}
ids := [][]byte{}
for _, p := range provers {
ids = append(ids, p.Address)
}
if _, err := p.engine.frameProver.VerifyFrameHeader(
response.Proposal.State.Header,
p.engine.blsConstructor,
ids,
); err != nil {
return errors.Wrap(err, "sync")
}
@ -499,19 +577,35 @@ func (p *AppSyncProvider) syncWithPeer(
func (p *AppSyncProvider) AddState(
sourcePeerID []byte,
frameNumber uint64,
expectedIdentity []byte,
) {
// Drop if we're within the threshold
if frameNumber <=
(*p.engine.forks.FinalizedState().State).Header.FrameNumber {
p.engine.logger.Debug("dropping stale state for sync")
(*p.engine.forks.FinalizedState().State).Header.FrameNumber &&
frameNumber != 0 {
p.engine.logger.Debug(
"dropping stale state for sync",
zap.Uint64("frame_requested", frameNumber),
zap.Uint64(
"finalized_frame",
(*p.engine.forks.FinalizedState().State).Header.FrameNumber,
),
)
return
}
// Handle special case: we're at genesis frame on time reel
if frameNumber == 0 {
frameNumber = 1
expectedIdentity = []byte{}
}
// Enqueue if we can, otherwise drop it because we'll catch up
select {
case p.queuedStates <- syncRequest{
frameNumber: frameNumber,
peerId: sourcePeerID,
peerId: slices.Clone(sourcePeerID),
identity: slices.Clone(expectedIdentity),
}:
p.engine.logger.Debug(
"enqueued sync request",

View File

@ -109,7 +109,10 @@ func (p *AppVotingProvider) SignTimeoutVote(
newestQuorumCertificateRank,
)
sig, err := signer.SignWithDomain(signatureData, []byte("appshardtimeout"))
sig, err := signer.SignWithDomain(
signatureData,
slices.Concat([]byte("appshardtimeout"), p.engine.appAddress),
)
if err != nil {
p.engine.logger.Error("could not sign vote", zap.Error(err))
return nil, errors.Wrap(err, "sign vote")
@ -148,10 +151,7 @@ func (p *AppVotingProvider) SignVote(
)
}
nextLeader, err := p.engine.LeaderForRank(
state.Rank,
state.ParentQuorumCertificate.Identity(),
)
nextLeader, err := p.engine.LeaderForRank(state.Rank)
if err != nil {
p.engine.logger.Error("could not determine next prover", zap.Error(err))
return nil, errors.Wrap(
@ -167,7 +167,10 @@ func (p *AppVotingProvider) SignVote(
p.engine.proofCacheMu.RUnlock()
if !ok {
return nil, errors.Wrap(errors.New("no proof ready for vote"), "sign vote")
return nil, errors.Wrap(
errors.New("no proof ready for vote"),
"sign vote",
)
}
extProof = proof[:]
}
@ -178,7 +181,10 @@ func (p *AppVotingProvider) SignVote(
state.Rank,
state.Identifier,
)
sig, err := signer.SignWithDomain(signatureData, []byte("appshard"))
sig, err := signer.SignWithDomain(
signatureData,
slices.Concat([]byte("appshard"), p.engine.appAddress),
)
if err != nil {
p.engine.logger.Error("could not sign vote", zap.Error(err))
return nil, errors.Wrap(err, "sign vote")

View File

@ -191,21 +191,30 @@ func (e *AppConsensusEngine) handleAppShardProposal(
finalized := e.forks.FinalizedState()
finalizedRank := finalized.Rank
finalizedFrameNumber := (*finalized.State).Header.FrameNumber
frameNumber := proposal.State.Header.FrameNumber
// drop proposals if we already processed them
if proposal.State.Header.FrameNumber <= finalizedFrameNumber ||
if frameNumber <= finalizedFrameNumber ||
proposal.State.Header.Rank <= finalizedRank {
e.logger.Debug("dropping stale proposal")
return
}
_, _, err := e.clockStore.GetShardClockFrame(
existingFrame, _, err := e.clockStore.GetShardClockFrame(
proposal.State.Header.Address,
proposal.State.Header.FrameNumber,
frameNumber,
false,
)
if err == nil {
e.logger.Debug("dropping stale proposal")
return
if err == nil && existingFrame != nil {
qc, qcErr := e.clockStore.GetQuorumCertificate(
proposal.State.Header.Address,
proposal.State.GetRank(),
)
if qcErr == nil && qc != nil &&
qc.GetFrameNumber() == frameNumber &&
qc.Identity() == proposal.State.Identity() {
e.logger.Debug("dropping stale proposal")
return
}
}
if proposal.State.Header.FrameNumber != 0 {
@ -239,12 +248,14 @@ func (e *AppConsensusEngine) handleAppShardProposal(
return
}
e.syncProvider.AddState([]byte(peerID), head.Header.FrameNumber)
e.syncProvider.AddState(
[]byte(peerID),
head.Header.FrameNumber,
[]byte(head.Identity()),
)
return
}
}
frameNumber := proposal.State.Header.FrameNumber
expectedFrame, err := e.appTimeReel.GetHead()
if err != nil {
e.logger.Error("could not obtain app time reel head", zap.Error(err))
@ -351,7 +362,13 @@ func (e *AppConsensusEngine) processProposal(
signedProposal.PreviousRankTimeoutCertificate = prtc
}
e.voteAggregator.AddState(signedProposal)
// IMPORTANT: we do not want to send old proposals to the vote aggregator or
// we risk engine shutdown if the leader selection method changed frame
// validation ensures that the proposer is valid for the proposal per time
// reel rules.
if signedProposal.State.Rank >= e.currentRank {
e.voteAggregator.AddState(signedProposal)
}
e.consensusParticipant.SubmitProposal(signedProposal)
proposalProcessedTotal.WithLabelValues(e.appAddressHex, "success").Inc()

View File

@ -96,8 +96,9 @@ func (e *AppConsensusEngine) validateConsensusMessage(
return p2p.ValidationResultReject
}
now := uint64(time.Now().UnixMilli())
if vote.Timestamp > now+5000 || vote.Timestamp < now-5000 {
// We should still accept votes for the past rank either because a peer
// needs it, or because we need it to trump a TC
if e.currentRank > vote.Rank+1 {
voteValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
@ -126,8 +127,8 @@ func (e *AppConsensusEngine) validateConsensusMessage(
return p2p.ValidationResultReject
}
now := uint64(time.Now().UnixMilli())
if timeoutState.Timestamp > now+5000 || timeoutState.Timestamp < now-5000 {
// We should still accept votes for the past rank in case a peer needs it
if e.currentRank > timeoutState.Vote.Rank+1 {
timeoutStateValidationTotal.WithLabelValues(
e.appAddressHex,
"ignore",

View File

@ -3,6 +3,7 @@ package global
import (
"bytes"
"encoding/binary"
"math/big"
"slices"
"github.com/iden3/go-iden3-crypto/poseidon"
@ -107,23 +108,23 @@ func (e *GlobalConsensusEngine) IdentityByState(
// LeaderForRank implements consensus.DynamicCommittee.
func (e *GlobalConsensusEngine) LeaderForRank(
rank uint64,
selector models.Identity,
) (models.Identity, error) {
// TODO(2.2): revisit this
inputBI, err := poseidon.HashBytes(slices.Concat(
[]byte(selector),
binary.BigEndian.AppendUint64(nil, rank),
))
if err != nil {
return "", errors.Wrap(err, "leader for rank")
}
input := inputBI.FillBytes(make([]byte, 32))
prover, err := e.proverRegistry.GetNextProver([32]byte(input), nil)
proverSet, err := e.proverRegistry.GetActiveProvers(nil)
if err != nil {
return "", errors.Wrap(err, "leader for rank")
}
return models.Identity(prover), nil
inputBI.Mod(inputBI, big.NewInt(int64(len(proverSet))))
index := inputBI.Int64()
return models.Identity(proverSet[int(index)].Address), nil
}
// QuorumThresholdForRank implements consensus.DynamicCommittee.

View File

@ -90,8 +90,55 @@ func (p *GlobalLeaderProvider) ProveNextState(
return nil, models.NewNoVoteErrorf("missing prior frame")
}
latestQC, qcErr := p.engine.clockStore.GetLatestQuorumCertificate(nil)
if qcErr != nil {
p.engine.logger.Debug(
"could not fetch latest quorum certificate",
zap.Error(qcErr),
)
}
if prior.Identity() != priorState {
frameProvingTotal.WithLabelValues("error").Inc()
if latestQC != nil && latestQC.Identity() == priorState {
switch {
case prior.Header.Rank < latestQC.GetRank():
return nil, models.NewNoVoteErrorf(
"needs sync: prior rank %d behind latest qc rank %d",
prior.Header.Rank,
latestQC.GetRank(),
)
case prior.Header.Rank == latestQC.GetRank() &&
latestQC.Identity() != prior.Identity():
peerID, peerErr := p.engine.getRandomProverPeerId()
if peerErr != nil {
p.engine.logger.Warn(
"could not determine peer for fork sync",
zap.Error(peerErr),
)
} else {
p.engine.logger.Warn(
"detected fork, scheduling sync",
zap.Uint64("frame_number", latestQC.GetFrameNumber()),
zap.String("peer_id", peerID.String()),
)
p.engine.syncProvider.AddState(
[]byte(peerID),
latestQC.GetFrameNumber(),
[]byte(latestQC.Identity()),
)
}
return nil, models.NewNoVoteErrorf(
"fork detected at rank %d (local: %x, qc: %x)",
latestQC.GetRank(),
prior.Identity(),
latestQC.Identity(),
)
}
}
return nil, models.NewNoVoteErrorf(
"building on fork or needs sync: frame %d, rank %d, parent_id: %x, asked: rank %d, id: %x",
prior.Header.FrameNumber,

View File

@ -3,7 +3,9 @@ package global
import (
"bytes"
"context"
"encoding/hex"
"fmt"
"slices"
"time"
"github.com/libp2p/go-libp2p/core/crypto"
@ -26,6 +28,7 @@ const defaultStateQueueCapacity = 10
type syncRequest struct {
frameNumber uint64
peerId []byte
identity []byte
}
// GlobalSyncProvider implements SyncProvider
@ -69,6 +72,7 @@ func (p *GlobalSyncProvider) Start(
ctx,
request.frameNumber,
request.peerId,
request.identity,
)
}
}
@ -78,11 +82,13 @@ func (p *GlobalSyncProvider) processState(
ctx context.Context,
frameNumber uint64,
peerID []byte,
identity []byte,
) {
err := p.syncWithPeer(
ctx,
frameNumber,
peerID,
identity,
)
if err != nil {
p.engine.logger.Error("could not sync with peer", zap.Error(err))
@ -230,7 +236,12 @@ func (p *GlobalSyncProvider) syncWithMesh(ctx context.Context) error {
latest = head
}
err = p.syncWithPeer(ctx, latest.Header.FrameNumber, []byte(peerID))
err = p.syncWithPeer(
ctx,
latest.Header.FrameNumber,
[]byte(peerID),
nil,
)
if err != nil {
p.engine.logger.Debug("error syncing frame", zap.Error(err))
}
@ -249,6 +260,7 @@ func (p *GlobalSyncProvider) syncWithPeer(
ctx context.Context,
frameNumber uint64,
peerId []byte,
expectedIdentity []byte,
) error {
p.engine.logger.Info(
"polling peer for new frames",
@ -372,6 +384,30 @@ func (p *GlobalSyncProvider) syncWithPeer(
return nil
}
if len(expectedIdentity) != 0 {
if !bytes.Equal(
[]byte(response.Proposal.State.Identity()),
expectedIdentity,
) {
p.engine.logger.Warn(
"aborting sync due to unexpected frame identity",
zap.Uint64("frame_number", frameNumber),
zap.String(
"expected",
hex.EncodeToString(expectedIdentity),
),
zap.String(
"received",
hex.EncodeToString(
[]byte(response.Proposal.State.Identity()),
),
),
)
return errors.New("sync frame identity mismatch")
}
expectedIdentity = nil
}
p.engine.logger.Info(
"received new leading frame",
zap.Uint64("frame_number", response.Proposal.State.Header.FrameNumber),
@ -499,19 +535,35 @@ func (p *GlobalSyncProvider) hyperSyncHyperedgeRemoves(
func (p *GlobalSyncProvider) AddState(
sourcePeerID []byte,
frameNumber uint64,
expectedIdentity []byte,
) {
// Drop if we're within the threshold
if frameNumber <=
(*p.engine.forks.FinalizedState().State).Header.FrameNumber {
p.engine.logger.Debug("dropping stale state for sync")
(*p.engine.forks.FinalizedState().State).Header.FrameNumber &&
frameNumber != 0 {
p.engine.logger.Debug(
"dropping stale state for sync",
zap.Uint64("frame_requested", frameNumber),
zap.Uint64(
"finalized_frame",
(*p.engine.forks.FinalizedState().State).Header.FrameNumber,
),
)
return
}
// Handle special case: we're at genesis frame on time reel
if frameNumber == 0 {
frameNumber = 1
expectedIdentity = []byte{}
}
// Enqueue if we can, otherwise drop it because we'll catch up
select {
case p.queuedStates <- syncRequest{
frameNumber: frameNumber,
peerId: sourcePeerID,
peerId: slices.Clone(sourcePeerID),
identity: slices.Clone(expectedIdentity),
}:
p.engine.logger.Debug(
"enqueued sync request",

View File

@ -572,18 +572,18 @@ func (e *GlobalConsensusEngine) evaluateForProposals(
}
}
e.logger.Debug(
"appending descriptor for allocation planning",
zap.String("shard_key", hex.EncodeToString(bp)),
zap.Uint64("size", size.Uint64()),
zap.Int("ring", len(above)/8),
zap.Int("shard_count", int(shard.DataShards)),
)
if allocated && pending {
pendingFilters = append(pendingFilters, bp)
}
if !allocated {
e.logger.Debug(
"appending descriptor for allocation planning",
zap.String("shard_key", hex.EncodeToString(bp)),
zap.Uint64("size", size.Uint64()),
zap.Int("ring", len(above)/8),
zap.Int("shard_count", int(shard.DataShards)),
)
proposalDescriptors = append(
proposalDescriptors,
provers.ShardDescriptor{

View File

@ -2806,6 +2806,7 @@ func (e *GlobalConsensusEngine) OnQuorumCertificateTriggeredRankChange(
e.syncProvider.AddState(
[]byte(peer),
current.Header.FrameNumber,
[]byte(current.Identity()),
)
return
}
@ -2846,6 +2847,7 @@ func (e *GlobalConsensusEngine) OnQuorumCertificateTriggeredRankChange(
e.syncProvider.AddState(
[]byte(peerID),
current.Header.FrameNumber,
[]byte(current.Identity()),
)
return
}

View File

@ -869,18 +869,27 @@ func (e *GlobalConsensusEngine) handleGlobalProposal(
finalized := e.forks.FinalizedState()
finalizedRank := finalized.Rank
finalizedFrameNumber := (*finalized.State).Header.FrameNumber
frameNumber := proposal.State.Header.FrameNumber
// drop proposals if we already processed them
if proposal.State.Header.FrameNumber <= finalizedFrameNumber ||
if frameNumber <= finalizedFrameNumber ||
proposal.State.Header.Rank <= finalizedRank {
e.logger.Debug("dropping stale proposal")
return
}
_, err := e.clockStore.GetGlobalClockFrame(proposal.State.Header.FrameNumber)
if err == nil {
e.logger.Debug("dropping stale proposal")
return
existingFrame, err := e.clockStore.GetGlobalClockFrame(frameNumber)
if err == nil && existingFrame != nil {
qc, qcErr := e.clockStore.GetQuorumCertificate(
nil,
proposal.State.GetRank(),
)
if qcErr == nil && qc != nil &&
qc.GetFrameNumber() == frameNumber &&
qc.Identity() == proposal.State.Identity() {
e.logger.Debug("dropping stale proposal")
return
}
}
// if we have a parent, cache and move on
@ -915,12 +924,12 @@ func (e *GlobalConsensusEngine) handleGlobalProposal(
e.syncProvider.AddState(
[]byte(peerID),
head.Header.FrameNumber,
[]byte(head.Identity()),
)
return
}
}
frameNumber := proposal.State.Header.FrameNumber
expectedFrame, err := e.globalTimeReel.GetHead()
if err != nil {
e.logger.Error("could not obtain time reel head", zap.Error(err))
@ -1021,7 +1030,14 @@ func (e *GlobalConsensusEngine) processProposal(
if prtc != nil {
signedProposal.PreviousRankTimeoutCertificate = prtc
}
e.voteAggregator.AddState(signedProposal)
// IMPORTANT: we do not want to send old proposals to the vote aggregator or
// we risk engine shutdown if the leader selection method changed frame
// validation ensures that the proposer is valid for the proposal per time
// reel rules.
if signedProposal.State.Rank >= e.currentRank {
e.voteAggregator.AddState(signedProposal)
}
e.consensusParticipant.SubmitProposal(signedProposal)
e.trySealParentWithChild(proposal)
@ -1732,7 +1748,7 @@ func (e *GlobalConsensusEngine) handleShardVote(message *pb.Message) {
voterPublicKey,
signatureData,
vote.PublicKeySignatureBls48581.Signature,
[]byte("appshard"),
slices.Concat([]byte("appshard"), vote.Filter),
)
if err != nil || !valid {

View File

@ -83,7 +83,9 @@ func (e *GlobalConsensusEngine) validateGlobalConsensusMessage(
return tp2p.ValidationResultReject
}
if e.currentRank > vote.Rank {
// We should still accept votes for the past rank either because a peer
// needs it, or because we need it to trump a TC
if e.currentRank > vote.Rank+1 {
e.logger.Debug("vote is stale")
return tp2p.ValidationResultIgnore
}
@ -110,7 +112,8 @@ func (e *GlobalConsensusEngine) validateGlobalConsensusMessage(
return tp2p.ValidationResultReject
}
if e.currentRank > timeoutState.Vote.Rank {
// We should still accept votes for the past rank in case a peer needs it
if e.currentRank > timeoutState.Vote.Rank+1 {
e.logger.Debug("timeout is stale")
return tp2p.ValidationResultIgnore
}

View File

@ -66,7 +66,11 @@ func (b *BLSAppFrameValidator) Validate(
}
}
bits, err := b.frameProver.VerifyFrameHeader(frame.Header, b.blsConstructor)
bits, err := b.frameProver.VerifyFrameHeader(
frame.Header,
b.blsConstructor,
nil,
)
isValid := err == nil
if !isValid {

View File

@ -1,6 +1,8 @@
package voting
import (
"slices"
"github.com/gammazero/workerpool"
"github.com/pkg/errors"
"source.quilibrium.com/quilibrium/monorepo/consensus"
@ -61,7 +63,7 @@ func NewAppShardVoteAggregator[PeerIDT models.Unique](
](
voteProcessorFactory.Create,
),
[]byte("appshard"),
slices.Concat([]byte("appshard"), filter),
signatureAggregator,
votingProvider,
)
@ -119,7 +121,7 @@ func NewAppShardTimeoutAggregator[PeerIDT models.Unique](
committee,
validator,
votingProvider,
[]byte("appshardtimeout"),
slices.Concat([]byte("appshardtimeout"), filter),
)
timeoutCollectorFactory := timeoutcollector.NewTimeoutCollectorFactory(

View File

@ -496,6 +496,7 @@ func (p *ProverKick) verifyEquivocation(kickedPublicKey []byte) bool {
valid, err := p.frameProver.VerifyFrameHeaderSignature(
frame1,
p.blsConstructor,
nil,
)
if !valid || err != nil {
return false
@ -504,6 +505,7 @@ func (p *ProverKick) verifyEquivocation(kickedPublicKey []byte) bool {
valid, err = p.frameProver.VerifyFrameHeaderSignature(
frame2,
p.blsConstructor,
nil,
)
if !valid || err != nil {
return false

View File

@ -103,11 +103,6 @@ func (p *ProverShardUpdate) Verify(frameNumber uint64) (bool, error) {
return false, errors.Wrap(errors.New("invalid update"), "verify")
}
_, err = p.frameProver.VerifyFrameHeader(p.FrameHeader, p.blsConstructor)
if err != nil {
return false, errors.Wrap(err, "verify")
}
return true, nil
}
@ -267,9 +262,20 @@ func (p *ProverShardUpdate) buildContext() (*shardUpdateContext, error) {
return nil, errors.New("frame header missing address")
}
info, err := p.proverRegistry.GetActiveProvers(p.FrameHeader.Address)
if err != nil {
return nil, errors.Wrap(err, "get active provers")
}
ids := [][]byte{}
for _, p := range info {
ids = append(ids, p.Address)
}
setIndices, err := p.frameProver.VerifyFrameHeader(
p.FrameHeader,
p.blsConstructor,
ids,
)
if err != nil {
return nil, errors.Wrap(err, "verify frame header")

View File

@ -26,10 +26,12 @@ type FrameProver interface {
VerifyFrameHeader(
frame *protobufs.FrameHeader,
bls BlsConstructor,
ids [][]byte,
) ([]uint8, error)
VerifyFrameHeaderSignature(
frame *protobufs.FrameHeader,
bls BlsConstructor,
ids [][]byte,
) (bool, error)
GetFrameSignaturePayload(
frame *protobufs.FrameHeader,

View File

@ -10,6 +10,8 @@ import (
"go.uber.org/zap"
"golang.org/x/crypto/sha3"
"source.quilibrium.com/quilibrium/monorepo/consensus/models"
"source.quilibrium.com/quilibrium/monorepo/consensus/verification"
"source.quilibrium.com/quilibrium/monorepo/protobufs"
qcrypto "source.quilibrium.com/quilibrium/monorepo/types/crypto"
)
@ -264,6 +266,7 @@ func (w *WesolowskiFrameProver) GetFrameSignaturePayload(
func (w *WesolowskiFrameProver) VerifyFrameHeader(
frame *protobufs.FrameHeader,
bls qcrypto.BlsConstructor,
ids [][]byte,
) ([]uint8, error) {
if len(frame.Address) == 0 {
return nil, errors.Wrap(
@ -329,7 +332,7 @@ func (w *WesolowskiFrameProver) VerifyFrameHeader(
return nil, nil
}
valid, err := w.VerifyFrameHeaderSignature(frame, bls)
valid, err := w.VerifyFrameHeaderSignature(frame, bls, ids)
if err != nil {
return nil, errors.Wrap(err, "verify frame header")
}
@ -347,17 +350,23 @@ func (w *WesolowskiFrameProver) VerifyFrameHeader(
func (w *WesolowskiFrameProver) VerifyFrameHeaderSignature(
frame *protobufs.FrameHeader,
bls qcrypto.BlsConstructor,
ids [][]byte,
) (bool, error) {
// Get the signature payload
signaturePayload, err := w.GetFrameSignaturePayload(frame)
selectorBI, err := poseidon.HashBytes(frame.Output)
if err != nil {
return false, errors.Wrap(err, "verify frame header signature")
}
signaturePayload := verification.MakeVoteMessage(
frame.Address,
frame.Rank,
models.Identity(selectorBI.FillBytes(make([]byte, 32))),
)
domain := append([]byte("shard"), frame.Address...)
domain := append([]byte("appshard"), frame.Address...)
if !bls.VerifySignatureRaw(
frame.PublicKeySignatureBls48581.PublicKey.KeyValue,
frame.PublicKeySignatureBls48581.Signature,
frame.PublicKeySignatureBls48581.Signature[:74],
signaturePayload,
domain,
) {
@ -367,7 +376,45 @@ func (w *WesolowskiFrameProver) VerifyFrameHeaderSignature(
)
}
return true, nil
indices := GetSetBitIndices(frame.PublicKeySignatureBls48581.Bitmask)
if len(frame.PublicKeySignatureBls48581.Signature) == 74 &&
len(indices) != 1 {
return false, errors.Wrap(
errors.New("signature missing multiproof"),
"verify frame header signature",
)
}
if len(frame.PublicKeySignatureBls48581.Signature) == 74 && ids == nil {
return true, nil
}
buf := bytes.NewBuffer(frame.PublicKeySignatureBls48581.Signature[74:])
var multiproofCount uint32
if err := binary.Read(buf, binary.BigEndian, &multiproofCount); err != nil {
return false, errors.Wrap(err, "verify frame header signature")
}
multiproofs := [][516]byte{}
for i := uint32(0); i < multiproofCount; i++ {
multiproof := [516]byte{}
if _, err := buf.Read(multiproof[:]); err != nil {
return false, errors.Wrap(err, "verify frame header signature")
}
multiproofs = append(multiproofs, multiproof)
}
challenge := sha3.Sum256(frame.ParentSelector)
valid, err := w.VerifyMultiProof(
challenge,
frame.Difficulty,
ids,
multiproofs,
)
return valid, errors.Wrap(err, "verify frame header signature")
}
func (w *WesolowskiFrameProver) ProveGlobalFrameHeader(
@ -576,10 +623,15 @@ func (w *WesolowskiFrameProver) VerifyGlobalHeaderSignature(
bls qcrypto.BlsConstructor,
) (bool, error) {
// Get the signature payload
signaturePayload, err := w.GetGlobalFrameSignaturePayload(frame)
selectorBI, err := poseidon.HashBytes(frame.Output)
if err != nil {
return false, errors.Wrap(err, "verify global frame header")
return false, errors.Wrap(err, "verify frame header signature")
}
signaturePayload := verification.MakeVoteMessage(
nil,
frame.Rank,
models.Identity(selectorBI.FillBytes(make([]byte, 32))),
)
if !bls.VerifySignatureRaw(
frame.PublicKeySignatureBls48581.PublicKey.KeyValue,