ceremonyclient/node/consensus/global/message_subscription.go
Cassandra Heart ce4f77b140
v2.1.0.19 (#515)
* v2.1.0.19

* enhanced error logging, fix seniority marker join blocker, fix sync message size limit defaults

* resolve signature failure

* additional error logging for merge-related signatures

* fix: one-shot sync message size, app shard TC signature size, collector/hotstuff race condition, expired joins blocking new joins due to pruning disable

* remove compat with old 2.0.0 blossomsub

* fix: resolve abandoned prover joins

* reload prover registry

* fix stale worker proposal edge

* add full sanity check on join before submitting to identify bug

* resolve non-fallthrough condition that should be fallthrough

* fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating

* add reconnect fallback if no peers are found with variable reconnect time (#511)

Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>

* update base peer count to 1 (#513)

* fix: expired prover join frames, starting port ranges, proposer getting stuck, and seniority on joins

* fix: panic on shutdown, libp2p discovery picking inaccessible peers, coverage event check not in shutdown logic, amend app shard worker behavior to mirror global for prover root reconciliation

* fix: shutdown scenario quirks, reload hanging

* fix: do not bailout early on shutdown of coverage check

* fix: force registry refresh on worker waiting for registration

* add more logging to wait for prover

* fix: worker manager refreshes the filter on allocation, snapshots blocking close on shutdown

* tweak: force shutdown after five seconds for app worker

* fix: don't loop when shutting down

* fix: slight reordering, also added named workers to trace hanging shutdowns

* use deterministic key for peer id of workers to stop flagging workers as sybil attacks

* fix: remove pubsub stop from app consensus engine as it shouldn't manage pubsub lifecycle, integrate shutdown context to PerformSync to prevent stuck syncs from halting respawn

* fix: blossomsub pubsub interface does not properly track subscription status

* fix: subscribe order to avoid nil panic

* switch from dnsaddr to dns4

* add missing quic-v1

* additional logging to isolate respawn quirks

* fix: dnsaddr -> dns4 for blossomsub

* sort-of fix: apply sledgehammer to restart logic

* fix: restore proper respawn logic, fix frozen hypergraph post respawn, unsubscribe from bitmask previously missing

---------

Co-authored-by: winged-pegasus <55340199+winged-pegasus@users.noreply.github.com>
Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>
2026-02-26 04:20:13 -06:00

274 lines
6.8 KiB
Go

package global
import (
"bytes"
"slices"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
"source.quilibrium.com/quilibrium/monorepo/rpm"
tp2p "source.quilibrium.com/quilibrium/monorepo/types/p2p"
)
func (e *GlobalConsensusEngine) subscribeToGlobalConsensus() error {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
return nil
}
provingKey, _, _, _ := e.GetProvingKey(e.config.Engine)
e.mixnet = rpm.NewRPMMixnet(e.logger, provingKey, e.proverRegistry, nil)
if err := e.pubsub.Subscribe(
GLOBAL_CONSENSUS_BITMASK,
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.globalConsensusMessageQueue <- message:
return nil
default:
e.logger.Warn("global message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to global consensus")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_CONSENSUS_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateGlobalConsensusMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to global consensus")
}
// Initiate a bulk subscribe to entire bitmask
if err := e.pubsub.Subscribe(
bytes.Repeat([]byte{0xff}, 32),
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.appFramesMessageQueue <- message:
return nil
default:
e.logger.Warn("app frames message queue full, dropping message")
return nil
}
},
); err != nil {
e.logger.Error(
"error while subscribing to app shard consensus channels",
zap.Error(err),
)
return nil
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
bytes.Repeat([]byte{0xff}, 32),
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateAppFrameMessage(peerID, message)
},
true,
); err != nil {
return nil
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToShardConsensusMessages() error {
if err := e.pubsub.Subscribe(
slices.Concat(
[]byte{0},
bytes.Repeat([]byte{0xff}, 32),
),
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.shardConsensusMessageQueue <- message:
return nil
default:
e.logger.Warn("shard consensus queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to shard consensus messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
slices.Concat(
[]byte{0},
bytes.Repeat([]byte{0xff}, 32),
),
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateShardConsensusMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to shard consensus messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToFrameMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_FRAME_BITMASK,
func(message *pb.Message) error {
// Don't subscribe if running in consensus, the time reel shouldn't have
// the frame ahead of time
if e.config.P2P.Network == 99 || e.config.Engine.ArchiveMode {
return nil
}
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.globalFrameMessageQueue <- message:
return nil
default:
e.logger.Warn("global frame queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to frame messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_FRAME_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateFrameMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to frame messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToProverMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_PROVER_BITMASK,
func(message *pb.Message) error {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
return nil
}
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.globalProverMessageQueue <- message:
e.logger.Debug("received prover message")
return nil
default:
e.logger.Warn("global prover message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to prover messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_PROVER_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateProverMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to prover messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToPeerInfoMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_PEER_INFO_BITMASK,
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
case e.globalPeerInfoMessageQueue <- message:
return nil
default:
e.logger.Warn("peer info message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to peer info messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_PEER_INFO_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validatePeerInfoMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to peer info messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToAlertMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_ALERT_BITMASK,
func(message *pb.Message) error {
select {
case e.globalAlertMessageQueue <- message:
return nil
case <-e.ShutdownSignal():
return errors.New("context cancelled")
default:
e.logger.Warn("alert message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to alert messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_ALERT_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateAlertMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to alert messages")
}
return nil
}