ceremonyclient/node/app/node.go
Cassandra Heart 53f7c2b5c9
v2.1.0.2 (#442)
* v2.1.0.2

* restore tweaks to simlibp2p

* fix: nil ref on size calc

* fix: panic should induce shutdown from event_distributor

* fix: friendlier initialization that requires less manual kickstarting for test/devnets

* fix: fewer available shards than provers should choose shard length

* fix: update stored worker registry, improve logging for debug mode

* fix: shut the fuck up, peer log

* qol: log value should be snake cased

* fix:non-archive snap sync issues

* fix: separate X448/Decaf448 signed keys, add onion key to registry

* fix: overflow arithmetic on frame number comparison

* fix: worker registration should be idempotent if inputs are same, otherwise permit updated records

* fix: remove global prover state from size calculation

* fix: divide by zero case

* fix: eager prover

* fix: broadcast listener default

* qol: diagnostic data for peer authenticator

* fix: master/worker connectivity issue in sparse networks

tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key

* fix: reorder steps of join creation

* fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency)

* fix: add delegate to protobuf <-> reified join conversion

* fix: preempt prover from planning with no workers

* fix: use the unallocated workers to generate a proof

* qol: underflow causes join fail in first ten frames on test/devnets

* qol: small logging tweaks for easier log correlation in debug mode

* qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal

* qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring

* reuse shard descriptors for both join planning and confirm/reject decisions

* fix: add missing interface method and amend test blossomsub to use new peer id basis

* fix: only check allocations if they exist

* fix: pomw mint proof data needs to be hierarchically under global intrinsic domain

* staging temporary state under diagnostics

* fix: first phase of distributed lock refactoring

* fix: compute intrinsic locking

* fix: hypergraph intrinsic locking

* fix: token intrinsic locking

* fix: update execution engines to support new locking model

* fix: adjust tests with new execution shape

* fix: weave in lock/unlock semantics to liveness provider

* fix lock fallthrough, add missing allocation update

* qol: additional logging for diagnostics, also testnet/devnet handling for confirmations

* fix: establish grace period on halt scenario to permit recovery

* fix: support test/devnet defaults for coverage scenarios

* fix: nil ref on consensus halts for non-archive nodes

* fix: remove unnecessary prefix from prover ref

* add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved

* fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly

* rollup of pre-liveness check lock interactions

* ahead of tests, get the protobuf/metrics-related changes out so teams can prepare

* add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved

* fix: blocker (3)

* Dev docs improvements (#445)

* Make install deps script more robust

* Improve testing instructions

* Worker node should stop upon OS SIGINT/SIGTERM signal (#447)

* move pebble close to Stop()

* move deferred Stop() to Start()

* add core id to worker stop log message

* create done os signal channel and stop worker upon message to it

---------

Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com>

---------

Co-authored-by: Daz <daz_the_corgi@proton.me>
Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
2025-10-23 01:03:06 -05:00

174 lines
4.0 KiB
Go

package app
import (
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/node/consensus/global"
consensustime "source.quilibrium.com/quilibrium/monorepo/node/consensus/time"
"source.quilibrium.com/quilibrium/monorepo/node/execution/manager"
"source.quilibrium.com/quilibrium/monorepo/node/rpc"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/keys"
"source.quilibrium.com/quilibrium/monorepo/types/p2p"
"source.quilibrium.com/quilibrium/monorepo/types/store"
"source.quilibrium.com/quilibrium/monorepo/types/worker"
)
type DHTNode struct {
pubSub p2p.PubSub
quit chan struct{}
}
type MasterNode struct {
logger *zap.Logger
dataProofStore store.DataProofStore
clockStore store.ClockStore
coinStore store.TokenStore
keyManager keys.KeyManager
pubSub p2p.PubSub
globalConsensus *global.GlobalConsensusEngine
globalTimeReel *consensustime.GlobalTimeReel
pebble store.KVDB
coreId uint
quit chan struct{}
}
func newDHTNode(
pubSub p2p.PubSub,
) (*DHTNode, error) {
return &DHTNode{
pubSub: pubSub,
quit: make(chan struct{}),
}, nil
}
func newMasterNode(
logger *zap.Logger,
dataProofStore store.DataProofStore,
clockStore store.ClockStore,
coinStore store.TokenStore,
keyManager keys.KeyManager,
pubSub p2p.PubSub,
globalConsensus *global.GlobalConsensusEngine,
globalTimeReel *consensustime.GlobalTimeReel,
pebble store.KVDB,
coreId uint,
) (*MasterNode, error) {
logger = logger.With(zap.String("process", "master"))
return &MasterNode{
logger: logger,
dataProofStore: dataProofStore,
clockStore: clockStore,
coinStore: coinStore,
keyManager: keyManager,
pubSub: pubSub,
globalConsensus: globalConsensus,
globalTimeReel: globalTimeReel,
pebble: pebble,
coreId: coreId,
quit: make(chan struct{}),
}, nil
}
func (d *DHTNode) Start() {
<-d.quit
}
func (d *DHTNode) Stop() {
go func() {
d.quit <- struct{}{}
}()
}
func (m *MasterNode) Start(quitCh chan struct{}) error {
// Start the global consensus engine
m.quit = quitCh
errChan := m.globalConsensus.Start(quitCh)
select {
case err := <-errChan:
if err != nil {
return err
}
}
m.logger.Info("master node started", zap.Uint("core_id", m.coreId))
// Wait for shutdown signal
<-m.quit
return nil
}
func (m *MasterNode) Stop() {
m.logger.Info("stopping master node")
// Stop the global consensus engine
if err := <-m.globalConsensus.Stop(false); err != nil {
m.logger.Error("error stopping global consensus", zap.Error(err))
}
defer func() {
// Close database
if m.pebble != nil {
err := m.pebble.Close()
if err != nil {
m.logger.Error("database shut down with errors", zap.Error(err))
} else {
m.logger.Info("database stopped cleanly")
}
}
}()
}
func (m *MasterNode) GetLogger() *zap.Logger {
return m.logger
}
func (m *MasterNode) GetClockStore() store.ClockStore {
return m.clockStore
}
func (m *MasterNode) GetCoinStore() store.TokenStore {
return m.coinStore
}
func (m *MasterNode) GetDataProofStore() store.DataProofStore {
return m.dataProofStore
}
func (m *MasterNode) GetKeyManager() keys.KeyManager {
return m.keyManager
}
func (m *MasterNode) GetPubSub() p2p.PubSub {
return m.pubSub
}
func (m *MasterNode) GetGlobalConsensusEngine() *global.GlobalConsensusEngine {
return m.globalConsensus
}
func (m *MasterNode) GetGlobalTimeReel() *consensustime.GlobalTimeReel {
return m.globalTimeReel
}
func (m *MasterNode) GetCoreId() uint {
return m.coreId
}
func (m *MasterNode) GetPeerInfoProvider() rpc.PeerInfoProvider {
return m.globalConsensus
}
func (m *MasterNode) GetWorkerManager() worker.WorkerManager {
return m.globalConsensus.GetWorkerManager()
}
func (m *MasterNode) GetProverRegistry() consensus.ProverRegistry {
return m.globalConsensus.GetProverRegistry()
}
func (
m *MasterNode,
) GetExecutionEngineManager() *manager.ExecutionEngineManager {
return m.globalConsensus.GetExecutionEngineManager()
}