mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-03-07 01:07:44 +08:00
* v2.1.0.19 * enhanced error logging, fix seniority marker join blocker, fix sync message size limit defaults * resolve signature failure * additional error logging for merge-related signatures * fix: one-shot sync message size, app shard TC signature size, collector/hotstuff race condition, expired joins blocking new joins due to pruning disable * remove compat with old 2.0.0 blossomsub * fix: resolve abandoned prover joins * reload prover registry * fix stale worker proposal edge * add full sanity check on join before submitting to identify bug * resolve non-fallthrough condition that should be fallthrough * fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating * add reconnect fallback if no peers are found with variable reconnect time (#511) Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com> * update base peer count to 1 (#513) * fix: expired prover join frames, starting port ranges, proposer getting stuck, and seniority on joins * fix: panic on shutdown, libp2p discovery picking inaccessible peers, coverage event check not in shutdown logic, amend app shard worker behavior to mirror global for prover root reconciliation * fix: shutdown scenario quirks, reload hanging * fix: do not bailout early on shutdown of coverage check * fix: force registry refresh on worker waiting for registration * add more logging to wait for prover * fix: worker manager refreshes the filter on allocation, snapshots blocking close on shutdown * tweak: force shutdown after five seconds for app worker * fix: don't loop when shutting down * fix: slight reordering, also added named workers to trace hanging shutdowns * use deterministic key for peer id of workers to stop flagging workers as sybil attacks * fix: remove pubsub stop from app consensus engine as it shouldn't manage pubsub lifecycle, integrate shutdown context to PerformSync to prevent stuck syncs from halting respawn * fix: blossomsub pubsub interface does not properly track subscription status * fix: subscribe order to avoid nil panic * switch from dnsaddr to dns4 * add missing quic-v1 * additional logging to isolate respawn quirks * fix: dnsaddr -> dns4 for blossomsub * sort-of fix: apply sledgehammer to restart logic * fix: restore proper respawn logic, fix frozen hypergraph post respawn, unsubscribe from bitmask previously missing --------- Co-authored-by: winged-pegasus <55340199+winged-pegasus@users.noreply.github.com> Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>
274 lines
6.8 KiB
Go
274 lines
6.8 KiB
Go
package global
|
|
|
|
import (
|
|
"bytes"
|
|
"slices"
|
|
|
|
"github.com/libp2p/go-libp2p/core/peer"
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
|
|
"source.quilibrium.com/quilibrium/monorepo/rpm"
|
|
tp2p "source.quilibrium.com/quilibrium/monorepo/types/p2p"
|
|
)
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToGlobalConsensus() error {
|
|
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
|
|
return nil
|
|
}
|
|
|
|
provingKey, _, _, _ := e.GetProvingKey(e.config.Engine)
|
|
e.mixnet = rpm.NewRPMMixnet(e.logger, provingKey, e.proverRegistry, nil)
|
|
|
|
if err := e.pubsub.Subscribe(
|
|
GLOBAL_CONSENSUS_BITMASK,
|
|
func(message *pb.Message) error {
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.globalConsensusMessageQueue <- message:
|
|
return nil
|
|
default:
|
|
e.logger.Warn("global message queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to global consensus")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
GLOBAL_CONSENSUS_BITMASK,
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateGlobalConsensusMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to global consensus")
|
|
}
|
|
|
|
// Initiate a bulk subscribe to entire bitmask
|
|
if err := e.pubsub.Subscribe(
|
|
bytes.Repeat([]byte{0xff}, 32),
|
|
func(message *pb.Message) error {
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.appFramesMessageQueue <- message:
|
|
return nil
|
|
default:
|
|
e.logger.Warn("app frames message queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
e.logger.Error(
|
|
"error while subscribing to app shard consensus channels",
|
|
zap.Error(err),
|
|
)
|
|
return nil
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
bytes.Repeat([]byte{0xff}, 32),
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateAppFrameMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return nil
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToShardConsensusMessages() error {
|
|
if err := e.pubsub.Subscribe(
|
|
slices.Concat(
|
|
[]byte{0},
|
|
bytes.Repeat([]byte{0xff}, 32),
|
|
),
|
|
func(message *pb.Message) error {
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.shardConsensusMessageQueue <- message:
|
|
return nil
|
|
default:
|
|
e.logger.Warn("shard consensus queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to shard consensus messages")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
slices.Concat(
|
|
[]byte{0},
|
|
bytes.Repeat([]byte{0xff}, 32),
|
|
),
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateShardConsensusMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to shard consensus messages")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToFrameMessages() error {
|
|
if err := e.pubsub.Subscribe(
|
|
GLOBAL_FRAME_BITMASK,
|
|
func(message *pb.Message) error {
|
|
// Don't subscribe if running in consensus, the time reel shouldn't have
|
|
// the frame ahead of time
|
|
if e.config.P2P.Network == 99 || e.config.Engine.ArchiveMode {
|
|
return nil
|
|
}
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.globalFrameMessageQueue <- message:
|
|
return nil
|
|
default:
|
|
e.logger.Warn("global frame queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to frame messages")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
GLOBAL_FRAME_BITMASK,
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateFrameMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to frame messages")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToProverMessages() error {
|
|
if err := e.pubsub.Subscribe(
|
|
GLOBAL_PROVER_BITMASK,
|
|
func(message *pb.Message) error {
|
|
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
|
|
return nil
|
|
}
|
|
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.globalProverMessageQueue <- message:
|
|
e.logger.Debug("received prover message")
|
|
return nil
|
|
default:
|
|
e.logger.Warn("global prover message queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to prover messages")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
GLOBAL_PROVER_BITMASK,
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateProverMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to prover messages")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToPeerInfoMessages() error {
|
|
if err := e.pubsub.Subscribe(
|
|
GLOBAL_PEER_INFO_BITMASK,
|
|
func(message *pb.Message) error {
|
|
select {
|
|
case <-e.haltCtx.Done():
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
case e.globalPeerInfoMessageQueue <- message:
|
|
return nil
|
|
default:
|
|
e.logger.Warn("peer info message queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to peer info messages")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
GLOBAL_PEER_INFO_BITMASK,
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validatePeerInfoMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to peer info messages")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *GlobalConsensusEngine) subscribeToAlertMessages() error {
|
|
if err := e.pubsub.Subscribe(
|
|
GLOBAL_ALERT_BITMASK,
|
|
func(message *pb.Message) error {
|
|
select {
|
|
case e.globalAlertMessageQueue <- message:
|
|
return nil
|
|
case <-e.ShutdownSignal():
|
|
return errors.New("context cancelled")
|
|
default:
|
|
e.logger.Warn("alert message queue full, dropping message")
|
|
return nil
|
|
}
|
|
},
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to alert messages")
|
|
}
|
|
|
|
// Register frame validator
|
|
if err := e.pubsub.RegisterValidator(
|
|
GLOBAL_ALERT_BITMASK,
|
|
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
|
|
return e.validateAlertMessage(peerID, message)
|
|
},
|
|
true,
|
|
); err != nil {
|
|
return errors.Wrap(err, "subscribe to alert messages")
|
|
}
|
|
|
|
return nil
|
|
}
|