mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-03-02 23:07:33 +08:00
* v2.1.0.2 * restore tweaks to simlibp2p * fix: nil ref on size calc * fix: panic should induce shutdown from event_distributor * fix: friendlier initialization that requires less manual kickstarting for test/devnets * fix: fewer available shards than provers should choose shard length * fix: update stored worker registry, improve logging for debug mode * fix: shut the fuck up, peer log * qol: log value should be snake cased * fix:non-archive snap sync issues * fix: separate X448/Decaf448 signed keys, add onion key to registry * fix: overflow arithmetic on frame number comparison * fix: worker registration should be idempotent if inputs are same, otherwise permit updated records * fix: remove global prover state from size calculation * fix: divide by zero case * fix: eager prover * fix: broadcast listener default * qol: diagnostic data for peer authenticator * fix: master/worker connectivity issue in sparse networks tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key * fix: reorder steps of join creation * fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency) * fix: add delegate to protobuf <-> reified join conversion * fix: preempt prover from planning with no workers * fix: use the unallocated workers to generate a proof * qol: underflow causes join fail in first ten frames on test/devnets * qol: small logging tweaks for easier log correlation in debug mode * qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal * qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring * reuse shard descriptors for both join planning and confirm/reject decisions * fix: add missing interface method and amend test blossomsub to use new peer id basis * fix: only check allocations if they exist * fix: pomw mint proof data needs to be hierarchically under global intrinsic domain * staging temporary state under diagnostics * fix: first phase of distributed lock refactoring * fix: compute intrinsic locking * fix: hypergraph intrinsic locking * fix: token intrinsic locking * fix: update execution engines to support new locking model * fix: adjust tests with new execution shape * fix: weave in lock/unlock semantics to liveness provider * fix lock fallthrough, add missing allocation update * qol: additional logging for diagnostics, also testnet/devnet handling for confirmations * fix: establish grace period on halt scenario to permit recovery * fix: support test/devnet defaults for coverage scenarios * fix: nil ref on consensus halts for non-archive nodes * fix: remove unnecessary prefix from prover ref * add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved * fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly * rollup of pre-liveness check lock interactions * ahead of tests, get the protobuf/metrics-related changes out so teams can prepare * add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved * fix: blocker (3) * Dev docs improvements (#445) * Make install deps script more robust * Improve testing instructions * Worker node should stop upon OS SIGINT/SIGTERM signal (#447) * move pebble close to Stop() * move deferred Stop() to Start() * add core id to worker stop log message * create done os signal channel and stop worker upon message to it --------- Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com> --------- Co-authored-by: Daz <daz_the_corgi@proton.me> Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
270 lines
7.1 KiB
Go
270 lines
7.1 KiB
Go
package app
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
"source.quilibrium.com/quilibrium/monorepo/node/consensus/global"
|
|
consensustime "source.quilibrium.com/quilibrium/monorepo/node/consensus/time"
|
|
globalintrinsics "source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/global"
|
|
typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/schema"
|
|
)
|
|
|
|
func (e *AppConsensusEngine) eventDistributorLoop() {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
e.logger.Error("fatal error encountered", zap.Any("panic", r))
|
|
if e.cancel != nil {
|
|
e.cancel()
|
|
}
|
|
// Avoid blocking on quit channel during panic recovery
|
|
select {
|
|
case e.quit <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
}()
|
|
defer e.wg.Done()
|
|
|
|
// Subscribe to events from the event distributor
|
|
eventCh := e.eventDistributor.Subscribe(hex.EncodeToString(e.appAddress))
|
|
defer e.eventDistributor.Unsubscribe(hex.EncodeToString(e.appAddress))
|
|
|
|
for {
|
|
select {
|
|
case <-e.ctx.Done():
|
|
return
|
|
case <-e.quit:
|
|
return
|
|
case event, ok := <-eventCh:
|
|
if !ok {
|
|
e.logger.Error("event channel closed unexpectedly")
|
|
return
|
|
}
|
|
|
|
switch event.Type {
|
|
case typesconsensus.ControlEventAppNewHead:
|
|
if data, ok := event.Data.(*consensustime.AppEvent); ok &&
|
|
data.Frame != nil {
|
|
e.logger.Debug(
|
|
"received new app head event",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
)
|
|
|
|
// Record the fee vote from the accepted frame
|
|
if err := e.dynamicFeeManager.AddFrameFeeVote(
|
|
e.appAddress,
|
|
data.Frame.Header.FrameNumber,
|
|
data.Frame.Header.FeeMultiplierVote,
|
|
); err != nil {
|
|
e.logger.Error(
|
|
"failed to add frame fee vote",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
zap.Uint64("fee_vote", data.Frame.Header.FeeMultiplierVote),
|
|
zap.Error(err),
|
|
)
|
|
}
|
|
}
|
|
case typesconsensus.ControlEventAppEquivocation:
|
|
// Handle equivocation by constructing and publishing a ProverKick
|
|
// message
|
|
if data, ok := event.Data.(*consensustime.AppEvent); ok &&
|
|
data.Frame != nil && data.OldHead != nil {
|
|
e.logger.Warn(
|
|
"received equivocating frame",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
)
|
|
|
|
// The equivocating prover is the one who signed the new frame
|
|
if data.Frame.Header != nil &&
|
|
data.Frame.Header.PublicKeySignatureBls48581 != nil &&
|
|
data.Frame.Header.PublicKeySignatureBls48581.PublicKey != nil {
|
|
|
|
kickedProverPublicKey :=
|
|
data.Frame.Header.PublicKeySignatureBls48581.PublicKey.KeyValue
|
|
|
|
// Serialize both conflicting frame headers
|
|
conflictingFrame1, err := data.OldHead.Header.ToCanonicalBytes()
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to marshal old frame header",
|
|
zap.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
|
|
conflictingFrame2, err := data.Frame.Header.ToCanonicalBytes()
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to marshal new frame header",
|
|
zap.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
|
|
// Create the ProverKick message using the intrinsic struct
|
|
proverKick, err := globalintrinsics.NewProverKick(
|
|
data.Frame.Header.FrameNumber,
|
|
kickedProverPublicKey,
|
|
conflictingFrame1,
|
|
conflictingFrame2,
|
|
e.blsConstructor,
|
|
e.frameProver,
|
|
e.hypergraph,
|
|
schema.NewRDFMultiprover(
|
|
&schema.TurtleRDFParser{},
|
|
e.inclusionProver,
|
|
),
|
|
e.proverRegistry,
|
|
e.clockStore,
|
|
)
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to construct prover kick",
|
|
zap.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
|
|
err = proverKick.Prove(data.Frame.Header.FrameNumber)
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to prove prover kick",
|
|
zap.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
|
|
// Serialize the ProverKick to the request form
|
|
kickBytes, err := proverKick.ToRequestBytes()
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to serialize prover kick",
|
|
zap.Error(err),
|
|
)
|
|
continue
|
|
}
|
|
|
|
// Publish the kick message
|
|
if err := e.pubsub.PublishToBitmask(
|
|
global.GLOBAL_PROVER_BITMASK,
|
|
kickBytes,
|
|
); err != nil {
|
|
e.logger.Error("failed to publish prover kick", zap.Error(err))
|
|
} else {
|
|
e.logger.Info(
|
|
"published prover kick for equivocation",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
zap.String(
|
|
"kicked_prover",
|
|
hex.EncodeToString(kickedProverPublicKey),
|
|
),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
case typesconsensus.ControlEventCoverageHalt:
|
|
data, ok := event.Data.(*typesconsensus.CoverageEventData)
|
|
if ok && data.Message != "" {
|
|
e.logger.Error(data.Message)
|
|
e.halt()
|
|
if err := e.stateMachine.Stop(); err != nil {
|
|
e.logger.Error(
|
|
"error occurred while halting consensus",
|
|
zap.Error(err),
|
|
)
|
|
}
|
|
go func() {
|
|
for {
|
|
select {
|
|
case <-e.ctx.Done():
|
|
return
|
|
case <-time.After(10 * time.Second):
|
|
e.logger.Error(
|
|
"full halt detected, leaving system in halted state until recovery",
|
|
)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
case typesconsensus.ControlEventHalt:
|
|
data, ok := event.Data.(*typesconsensus.ErrorEventData)
|
|
if ok && data.Error != nil {
|
|
e.logger.Error(
|
|
"full halt detected, leaving system in halted state",
|
|
zap.Error(data.Error),
|
|
)
|
|
e.halt()
|
|
if err := e.stateMachine.Stop(); err != nil {
|
|
e.logger.Error(
|
|
"error occurred while halting consensus",
|
|
zap.Error(err),
|
|
)
|
|
}
|
|
go func() {
|
|
for {
|
|
select {
|
|
case <-e.ctx.Done():
|
|
return
|
|
case <-time.After(10 * time.Second):
|
|
e.logger.Error(
|
|
"full halt detected, leaving system in halted state",
|
|
zap.Error(data.Error),
|
|
)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
case typesconsensus.ControlEventAppFork:
|
|
if data, ok := event.Data.(*consensustime.AppEvent); ok &&
|
|
data.Frame != nil {
|
|
e.logger.Debug(
|
|
"received new app fork event",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
)
|
|
|
|
// Remove the forked fee votes
|
|
removed, err := e.dynamicFeeManager.RewindToFrame(
|
|
e.appAddress,
|
|
data.Frame.Header.FrameNumber,
|
|
)
|
|
if err != nil {
|
|
e.logger.Error(
|
|
"failed to rewind frame fee vote",
|
|
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
|
|
zap.Error(err),
|
|
)
|
|
}
|
|
|
|
e.logger.Info("rewound fee votes", zap.Int("removed_votes", removed))
|
|
}
|
|
|
|
default:
|
|
e.logger.Debug(
|
|
"received unhandled event type",
|
|
zap.Int("event_type", int(event.Type)),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (e *AppConsensusEngine) emitAlertEvent(alertMessage string) {
|
|
event := typesconsensus.ControlEvent{
|
|
Type: typesconsensus.ControlEventHalt,
|
|
Data: &typesconsensus.ErrorEventData{
|
|
Error: errors.New(alertMessage),
|
|
},
|
|
}
|
|
|
|
go e.eventDistributor.Publish(event)
|
|
|
|
e.logger.Info("emitted alert message")
|
|
}
|