mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-02-21 18:37:26 +08:00
* v2.1.0.2 * restore tweaks to simlibp2p * fix: nil ref on size calc * fix: panic should induce shutdown from event_distributor * fix: friendlier initialization that requires less manual kickstarting for test/devnets * fix: fewer available shards than provers should choose shard length * fix: update stored worker registry, improve logging for debug mode * fix: shut the fuck up, peer log * qol: log value should be snake cased * fix:non-archive snap sync issues * fix: separate X448/Decaf448 signed keys, add onion key to registry * fix: overflow arithmetic on frame number comparison * fix: worker registration should be idempotent if inputs are same, otherwise permit updated records * fix: remove global prover state from size calculation * fix: divide by zero case * fix: eager prover * fix: broadcast listener default * qol: diagnostic data for peer authenticator * fix: master/worker connectivity issue in sparse networks tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key * fix: reorder steps of join creation * fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency) * fix: add delegate to protobuf <-> reified join conversion * fix: preempt prover from planning with no workers * fix: use the unallocated workers to generate a proof * qol: underflow causes join fail in first ten frames on test/devnets * qol: small logging tweaks for easier log correlation in debug mode * qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal * qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring * reuse shard descriptors for both join planning and confirm/reject decisions * fix: add missing interface method and amend test blossomsub to use new peer id basis * fix: only check allocations if they exist * fix: pomw mint proof data needs to be hierarchically under global intrinsic domain * staging temporary state under diagnostics * fix: first phase of distributed lock refactoring * fix: compute intrinsic locking * fix: hypergraph intrinsic locking * fix: token intrinsic locking * fix: update execution engines to support new locking model * fix: adjust tests with new execution shape * fix: weave in lock/unlock semantics to liveness provider * fix lock fallthrough, add missing allocation update * qol: additional logging for diagnostics, also testnet/devnet handling for confirmations * fix: establish grace period on halt scenario to permit recovery * fix: support test/devnet defaults for coverage scenarios * fix: nil ref on consensus halts for non-archive nodes * fix: remove unnecessary prefix from prover ref * add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved * fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly * rollup of pre-liveness check lock interactions * ahead of tests, get the protobuf/metrics-related changes out so teams can prepare * add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved * fix: blocker (3) * Dev docs improvements (#445) * Make install deps script more robust * Improve testing instructions * Worker node should stop upon OS SIGINT/SIGTERM signal (#447) * move pebble close to Stop() * move deferred Stop() to Start() * add core id to worker stop log message * create done os signal channel and stop worker upon message to it --------- Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com> --------- Co-authored-by: Daz <daz_the_corgi@proton.me> Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
149 lines
3.3 KiB
Go
149 lines
3.3 KiB
Go
package app
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
|
|
"go.uber.org/zap"
|
|
consensustime "source.quilibrium.com/quilibrium/monorepo/node/consensus/time"
|
|
"source.quilibrium.com/quilibrium/monorepo/node/datarpc"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/keys"
|
|
"source.quilibrium.com/quilibrium/monorepo/types/store"
|
|
)
|
|
|
|
type DataWorkerNode struct {
|
|
logger *zap.Logger
|
|
dataProofStore store.DataProofStore
|
|
clockStore store.ClockStore
|
|
coinStore store.TokenStore
|
|
keyManager keys.KeyManager
|
|
pebble store.KVDB
|
|
coreId uint
|
|
ipcServer *datarpc.DataWorkerIPCServer
|
|
frameProver crypto.FrameProver
|
|
globalTimeReel *consensustime.GlobalTimeReel
|
|
parentProcess int
|
|
quit chan struct{}
|
|
}
|
|
|
|
func newDataWorkerNode(
|
|
logger *zap.Logger,
|
|
dataProofStore store.DataProofStore,
|
|
clockStore store.ClockStore,
|
|
coinStore store.TokenStore,
|
|
keyManager keys.KeyManager,
|
|
pebble store.KVDB,
|
|
frameProver crypto.FrameProver,
|
|
ipcServer *datarpc.DataWorkerIPCServer,
|
|
globalTimeReel *consensustime.GlobalTimeReel,
|
|
coreId uint,
|
|
parentProcess int,
|
|
) (*DataWorkerNode, error) {
|
|
logger = logger.With(zap.String("process", fmt.Sprintf("worker %d", coreId)))
|
|
return &DataWorkerNode{
|
|
logger: logger,
|
|
dataProofStore: dataProofStore,
|
|
clockStore: clockStore,
|
|
coinStore: coinStore,
|
|
keyManager: keyManager,
|
|
pebble: pebble,
|
|
coreId: coreId,
|
|
ipcServer: ipcServer,
|
|
frameProver: frameProver,
|
|
globalTimeReel: globalTimeReel,
|
|
parentProcess: parentProcess,
|
|
quit: make(chan struct{}),
|
|
}, nil
|
|
}
|
|
|
|
func (n *DataWorkerNode) Start(
|
|
done chan os.Signal,
|
|
quitCh chan struct{},
|
|
) error {
|
|
go func() {
|
|
err := n.ipcServer.Start()
|
|
if err != nil {
|
|
n.logger.Error(
|
|
"error while starting ipc server for core",
|
|
zap.Uint64("core", uint64(n.coreId)),
|
|
)
|
|
n.quit <- struct{}{}
|
|
}
|
|
}()
|
|
|
|
n.logger.Info("data worker node started", zap.Uint("core_id", n.coreId))
|
|
|
|
select {
|
|
case <-n.quit:
|
|
case <-done:
|
|
}
|
|
|
|
n.ipcServer.Stop()
|
|
err := n.pebble.Close()
|
|
if err != nil {
|
|
n.logger.Error(
|
|
"database shut down with errors",
|
|
zap.Error(err),
|
|
zap.Uint("core_id", n.coreId),
|
|
)
|
|
} else {
|
|
n.logger.Info(
|
|
"database stopped cleanly",
|
|
zap.Uint("core_id", n.coreId),
|
|
)
|
|
}
|
|
|
|
quitCh <- struct{}{}
|
|
return nil
|
|
}
|
|
|
|
func (n *DataWorkerNode) Stop() {
|
|
n.logger.Info("stopping data worker node")
|
|
|
|
if n.quit != nil {
|
|
close(n.quit)
|
|
}
|
|
}
|
|
|
|
// GetQuitChannel returns the quit channel for external signaling
|
|
func (n *DataWorkerNode) GetQuitChannel() chan struct{} {
|
|
return n.quit
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetLogger() *zap.Logger {
|
|
return n.logger
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetClockStore() store.ClockStore {
|
|
return n.clockStore
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetCoinStore() store.TokenStore {
|
|
return n.coinStore
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetDataProofStore() store.DataProofStore {
|
|
return n.dataProofStore
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetKeyManager() keys.KeyManager {
|
|
return n.keyManager
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetGlobalTimeReel() *consensustime.GlobalTimeReel {
|
|
return n.globalTimeReel
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetCoreId() uint {
|
|
return n.coreId
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetFrameProver() crypto.FrameProver {
|
|
return n.frameProver
|
|
}
|
|
|
|
func (n *DataWorkerNode) GetIPCServer() *datarpc.DataWorkerIPCServer {
|
|
return n.ipcServer
|
|
}
|