ceremonyclient/node/consensus/global/message_subscription.go
Cassandra Heart 53f7c2b5c9
v2.1.0.2 (#442)
* v2.1.0.2

* restore tweaks to simlibp2p

* fix: nil ref on size calc

* fix: panic should induce shutdown from event_distributor

* fix: friendlier initialization that requires less manual kickstarting for test/devnets

* fix: fewer available shards than provers should choose shard length

* fix: update stored worker registry, improve logging for debug mode

* fix: shut the fuck up, peer log

* qol: log value should be snake cased

* fix:non-archive snap sync issues

* fix: separate X448/Decaf448 signed keys, add onion key to registry

* fix: overflow arithmetic on frame number comparison

* fix: worker registration should be idempotent if inputs are same, otherwise permit updated records

* fix: remove global prover state from size calculation

* fix: divide by zero case

* fix: eager prover

* fix: broadcast listener default

* qol: diagnostic data for peer authenticator

* fix: master/worker connectivity issue in sparse networks

tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key

* fix: reorder steps of join creation

* fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency)

* fix: add delegate to protobuf <-> reified join conversion

* fix: preempt prover from planning with no workers

* fix: use the unallocated workers to generate a proof

* qol: underflow causes join fail in first ten frames on test/devnets

* qol: small logging tweaks for easier log correlation in debug mode

* qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal

* qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring

* reuse shard descriptors for both join planning and confirm/reject decisions

* fix: add missing interface method and amend test blossomsub to use new peer id basis

* fix: only check allocations if they exist

* fix: pomw mint proof data needs to be hierarchically under global intrinsic domain

* staging temporary state under diagnostics

* fix: first phase of distributed lock refactoring

* fix: compute intrinsic locking

* fix: hypergraph intrinsic locking

* fix: token intrinsic locking

* fix: update execution engines to support new locking model

* fix: adjust tests with new execution shape

* fix: weave in lock/unlock semantics to liveness provider

* fix lock fallthrough, add missing allocation update

* qol: additional logging for diagnostics, also testnet/devnet handling for confirmations

* fix: establish grace period on halt scenario to permit recovery

* fix: support test/devnet defaults for coverage scenarios

* fix: nil ref on consensus halts for non-archive nodes

* fix: remove unnecessary prefix from prover ref

* add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved

* fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly

* rollup of pre-liveness check lock interactions

* ahead of tests, get the protobuf/metrics-related changes out so teams can prepare

* add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved

* fix: blocker (3)

* Dev docs improvements (#445)

* Make install deps script more robust

* Improve testing instructions

* Worker node should stop upon OS SIGINT/SIGTERM signal (#447)

* move pebble close to Stop()

* move deferred Stop() to Start()

* add core id to worker stop log message

* create done os signal channel and stop worker upon message to it

---------

Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com>

---------

Co-authored-by: Daz <daz_the_corgi@proton.me>
Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
2025-10-23 01:03:06 -05:00

270 lines
6.6 KiB
Go

package global
import (
"bytes"
"slices"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
"source.quilibrium.com/quilibrium/monorepo/rpm"
tp2p "source.quilibrium.com/quilibrium/monorepo/types/p2p"
)
func (e *GlobalConsensusEngine) subscribeToGlobalConsensus() error {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
return nil
}
provingKey, _, _, _ := e.GetProvingKey(e.config.Engine)
e.mixnet = rpm.NewRPMMixnet(e.logger, provingKey, e.proverRegistry, nil)
if err := e.pubsub.Subscribe(
GLOBAL_CONSENSUS_BITMASK,
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case e.globalConsensusMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("global message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to global consensus")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_CONSENSUS_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateGlobalConsensusMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to global consensus")
}
// Initiate a bulk subscribe to entire bitmask
if err := e.pubsub.Subscribe(
bytes.Repeat([]byte{0xff}, 32),
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case e.appFramesMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("app frames message queue full, dropping message")
return nil
}
},
); err != nil {
e.logger.Error(
"error while subscribing to app shard consensus channels",
zap.Error(err),
)
return nil
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
bytes.Repeat([]byte{0xff}, 32),
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateAppFrameMessage(peerID, message)
},
true,
); err != nil {
return nil
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToShardConsensusMessages() error {
if err := e.pubsub.Subscribe(
slices.Concat(
[]byte{0},
bytes.Repeat([]byte{0xff}, 32),
),
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case e.shardConsensusMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("shard consensus queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to shard consensus messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
slices.Concat(
[]byte{0},
bytes.Repeat([]byte{0xff}, 32),
),
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateShardConsensusMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to shard consensus messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToFrameMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_FRAME_BITMASK,
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case e.globalFrameMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("global frame queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to frame messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_FRAME_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateFrameMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to frame messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToProverMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_PROVER_BITMASK,
func(message *pb.Message) error {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
e.logger.Debug("dropping prover message, not in archive mode")
return nil
}
select {
case <-e.haltCtx.Done():
return nil
case e.globalProverMessageQueue <- message:
e.logger.Debug("received prover message")
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("global prover message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to prover messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_PROVER_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateProverMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to prover messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToPeerInfoMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_PEER_INFO_BITMASK,
func(message *pb.Message) error {
select {
case <-e.haltCtx.Done():
return nil
case e.globalPeerInfoMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("peer info message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to peer info messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_PEER_INFO_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validatePeerInfoMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to peer info messages")
}
return nil
}
func (e *GlobalConsensusEngine) subscribeToAlertMessages() error {
if err := e.pubsub.Subscribe(
GLOBAL_ALERT_BITMASK,
func(message *pb.Message) error {
select {
case e.globalAlertMessageQueue <- message:
return nil
case <-e.ctx.Done():
return errors.New("context cancelled")
default:
e.logger.Warn("alert message queue full, dropping message")
return nil
}
},
); err != nil {
return errors.Wrap(err, "subscribe to alert messages")
}
// Register frame validator
if err := e.pubsub.RegisterValidator(
GLOBAL_ALERT_BITMASK,
func(peerID peer.ID, message *pb.Message) tp2p.ValidationResult {
return e.validateAlertMessage(peerID, message)
},
true,
); err != nil {
return errors.Wrap(err, "subscribe to alert messages")
}
return nil
}