v2.1.0.19 (#515)

* v2.1.0.19

* enhanced error logging, fix seniority marker join blocker, fix sync message size limit defaults

* resolve signature failure

* additional error logging for merge-related signatures

* fix: one-shot sync message size, app shard TC signature size, collector/hotstuff race condition, expired joins blocking new joins due to pruning disable

* remove compat with old 2.0.0 blossomsub

* fix: resolve abandoned prover joins

* reload prover registry

* fix stale worker proposal edge

* add full sanity check on join before submitting to identify bug

* resolve non-fallthrough condition that should be fallthrough

* fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating

* add reconnect fallback if no peers are found with variable reconnect time (#511)

Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>

* update base peer count to 1 (#513)

* fix: expired prover join frames, starting port ranges, proposer getting stuck, and seniority on joins

* fix: panic on shutdown, libp2p discovery picking inaccessible peers, coverage event check not in shutdown logic, amend app shard worker behavior to mirror global for prover root reconciliation

* fix: shutdown scenario quirks, reload hanging

* fix: do not bailout early on shutdown of coverage check

* fix: force registry refresh on worker waiting for registration

* add more logging to wait for prover

* fix: worker manager refreshes the filter on allocation, snapshots blocking close on shutdown

* tweak: force shutdown after five seconds for app worker

* fix: don't loop when shutting down

* fix: slight reordering, also added named workers to trace hanging shutdowns

* use deterministic key for peer id of workers to stop flagging workers as sybil attacks

* fix: remove pubsub stop from app consensus engine as it shouldn't manage pubsub lifecycle, integrate shutdown context to PerformSync to prevent stuck syncs from halting respawn

* fix: blossomsub pubsub interface does not properly track subscription status

* fix: subscribe order to avoid nil panic

* switch from dnsaddr to dns4

* add missing quic-v1

* additional logging to isolate respawn quirks

* fix: dnsaddr -> dns4 for blossomsub

* sort-of fix: apply sledgehammer to restart logic

* fix: restore proper respawn logic, fix frozen hypergraph post respawn, unsubscribe from bitmask previously missing

---------

Co-authored-by: winged-pegasus <55340199+winged-pegasus@users.noreply.github.com>
Co-authored-by: Tyler Sturos <55340199+tjsturos@users.noreply.github.com>
This commit is contained in:
Cassandra Heart 2026-02-26 04:20:13 -06:00 committed by GitHub
parent 92c1f07562
commit ce4f77b140
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
74 changed files with 5818 additions and 1232 deletions

View File

@ -85,9 +85,9 @@ func NewConfig(configPath string) (*Config, error) {
}
var BootstrapPeers = []string{
"/dnsaddr/quinoa.quilibrium.com/udp/8339/p2p/QmP9NNzAzRjCL8gdQBkKHwyBCWJGVb3jPrQzTveYdU24kH",
"/dnsaddr/qualia.quilibrium.com/udp/8339/p2p/QmRP1UPiDg1enHgN6wEL1Y4uUh1XKg7V3QExdBKV9BUUQf",
"/dnsaddr/quetzalcoatl.quilibrium.com/udp/8339/p2p/QmNq4xSqrxTKKtK7J6UFEa4unjsoULP2G4qWwwH5EKmoJj",
"/dns4/quinoa.quilibrium.com/udp/8336/quic-v1/p2p/QmP9NNzAzRjCL8gdQBkKHwyBCWJGVb3jPrQzTveYdU24kH",
"/dns4/qualia.quilibrium.com/udp/8336/quic-v1/p2p/QmRP1UPiDg1enHgN6wEL1Y4uUh1XKg7V3QExdBKV9BUUQf",
"/dns4/quetzalcoatl.quilibrium.com/udp/8336/quic-v1/p2p/QmNq4xSqrxTKKtK7J6UFEa4unjsoULP2G4qWwwH5EKmoJj",
// "/ip4/204.186.74.46/udp/8316/quic-v1/p2p/QmeqBjm3iX7sdTieyto1gys5ruQrQNPKfaTGcVQQWJPYDV",
"/ip4/65.109.17.13/udp/8336/quic-v1/p2p/Qmc35n99eojSvW3PkbfBczJoSX92WmnnKh3Fg114ok3oo4",
"/ip4/65.108.194.84/udp/8336/quic-v1/p2p/QmP8C7g9ZRiWzhqN2AgFu5onS6HwHzR6Vv1TCHxAhnCSnq",

View File

@ -6,12 +6,12 @@ const (
defaultMinimumPeersRequired = 3
priorDefaultDataWorkerBaseListenMultiaddr = "/ip4/127.0.0.1/tcp/%d"
defaultDataWorkerBaseListenMultiaddr = "/ip4/0.0.0.0/tcp/%d"
defaultDataWorkerBaseP2PPort = uint16(50000)
defaultDataWorkerBaseStreamPort = uint16(60000)
defaultDataWorkerBaseP2PPort = uint16(25000)
defaultDataWorkerBaseStreamPort = uint16(32500)
defaultDataWorkerMemoryLimit = int64(1792 * 1024 * 1024) // 1.75 GiB
defaultSyncTimeout = 4 * time.Second
defaultSyncCandidates = 8
defaultSyncMessageReceiveLimit = 1 * 1024 * 1024
defaultSyncMessageReceiveLimit = 600 * 1024 * 1024
defaultSyncMessageSendLimit = 600 * 1024 * 1024
defaultRewardStrategy = "reward-greedy"
)

View File

@ -18,6 +18,7 @@ const (
defaultPingTimeout = 5 * time.Second
defaultPingPeriod = 30 * time.Second
defaultPingAttempts = 3
defaultPeerReconnectInterval = 60 * time.Second
defaultStreamListenMultiaddr = "/ip4/0.0.0.0/tcp/8340"
)
@ -76,6 +77,7 @@ type P2PConfig struct {
ValidateWorkers int `yaml:"validateWorkers"`
SubscriptionQueueSize int `yaml:"subscriptionQueueSize"`
PeerOutboundQueueSize int `yaml:"peerOutboundQueueSize"`
PeerReconnectCheckInterval time.Duration `yaml:"peerReconnectCheckInterval"`
}
// WithDefaults returns a copy of the P2PConfig with any missing fields set to
@ -220,5 +222,8 @@ func (c P2PConfig) WithDefaults() P2PConfig {
if cpy.PeerOutboundQueueSize == 0 {
cpy.PeerOutboundQueueSize = blossomsub.DefaultPeerOutboundQueueSize
}
if cpy.PeerReconnectCheckInterval == 0 {
cpy.PeerReconnectCheckInterval = defaultPeerReconnectInterval
}
return cpy
}

View File

@ -43,7 +43,7 @@ func FormatVersion(version []byte) string {
}
func GetPatchNumber() byte {
return 0x12
return 0x13
}
func GetRCNumber() byte {

View File

@ -23,15 +23,15 @@ const (
)
// BlossomSubDefaultProtocols is the default BlossomSub router protocol list
var BlossomSubDefaultProtocols = []protocol.ID{BlossomSubID_v21, BlossomSubID_v2}
var BlossomSubDefaultProtocols = []protocol.ID{BlossomSubID_v21}
// BlossomSubDefaultFeatures is the feature test function for the default BlossomSub protocols
func BlossomSubDefaultFeatures(feat BlossomSubFeature, proto protocol.ID) bool {
switch feat {
case BlossomSubFeatureMesh:
return proto == BlossomSubID_v21 || proto == BlossomSubID_v2
return proto == BlossomSubID_v21
case BlossomSubFeaturePX:
return proto == BlossomSubID_v21 || proto == BlossomSubID_v2
return proto == BlossomSubID_v21
case BlossomSubFeatureIdontwant:
return proto == BlossomSubID_v21
default:

View File

@ -18,7 +18,7 @@ var (
var (
ResolvableProtocols = []ma.Protocol{dnsaddrProtocol, dns4Protocol, dns6Protocol, dnsProtocol}
DefaultResolver = &Resolver{def: net.DefaultResolver}
DefaultResolver = &Resolver{def: &net.Resolver{PreferGo: true}}
)
const maxResolvedAddrs = 100
@ -44,7 +44,7 @@ var _ BasicResolver = (*Resolver)(nil)
// NewResolver creates a new Resolver instance with the specified options
func NewResolver(opts ...Option) (*Resolver, error) {
r := &Resolver{def: net.DefaultResolver}
r := &Resolver{def: &net.Resolver{PreferGo: true}}
for _, opt := range opts {
err := opt(r)
if err != nil {

View File

@ -132,14 +132,24 @@ func (hg *HypergraphCRDT) SetSelfPeerID(peerID string) {
func (hg *HypergraphCRDT) SetShutdownContext(ctx context.Context) {
hg.shutdownCtx = ctx
// Reopen the snapshot manager in case it was closed by a previous
// shutdown context (i.e. during in-process engine respawn).
hg.snapshotMgr.reopen()
go func() {
select {
case <-hg.shutdownCtx.Done():
hg.snapshotMgr.publish(nil)
hg.snapshotMgr.close()
}
}()
}
// CloseSnapshots synchronously releases all snapshot generations and their DB
// snapshots. This must be called before closing the underlying Pebble database
// to avoid dangling snapshot warnings. It is idempotent.
func (hg *HypergraphCRDT) CloseSnapshots() {
hg.snapshotMgr.close()
}
func (hg *HypergraphCRDT) contextWithShutdown(
parent context.Context,
) (context.Context, context.CancelFunc) {
@ -159,6 +169,34 @@ func (hg *HypergraphCRDT) contextWithShutdown(
return ctx, cancel
}
// lockWithShutdown tries to acquire hg.mu exclusively. If the shutdown context
// fires before the lock is acquired, it returns false and the caller must not
// proceed. A background goroutine ensures the lock is released if it is
// eventually acquired after shutdown.
func (hg *HypergraphCRDT) lockWithShutdown() bool {
if hg.shutdownCtx == nil {
hg.mu.Lock()
return true
}
locked := make(chan struct{})
go func() {
hg.mu.Lock()
close(locked)
}()
select {
case <-locked:
return true
case <-hg.shutdownCtx.Done():
go func() {
<-locked
hg.mu.Unlock()
}()
return false
}
}
func (hg *HypergraphCRDT) snapshotSet(
shardKey tries.ShardKey,
targetStore tries.TreeBackingStore,
@ -696,7 +734,9 @@ func (hg *HypergraphCRDT) GetMetadataAtKey(pathKey []byte) (
[]hypergraph.ShardMetadata,
error,
) {
hg.mu.Lock()
if !hg.lockWithShutdown() {
return nil, errors.New("shutting down")
}
defer hg.mu.Unlock()
if len(pathKey) < 32 {
return nil, errors.Wrap(

View File

@ -298,6 +298,11 @@ func (hg *HypergraphCRDT) CommitShard(
L2: [32]byte(shardAddress[:32]),
}
txn, err := hg.store.NewTransaction(false)
if err != nil {
return nil, errors.Wrap(err, "commit shard")
}
vertexAddSet, vertexRemoveSet := hg.getOrCreateIdSet(
shardKey,
hg.vertexAdds,
@ -306,9 +311,9 @@ func (hg *HypergraphCRDT) CommitShard(
hg.getCoveredPrefix(),
)
vertexAddTree := vertexAddSet.GetTree()
vertexAddTree.Commit(nil, false)
vertexAddTree.Commit(txn, false)
vertexRemoveTree := vertexRemoveSet.GetTree()
vertexRemoveTree.Commit(nil, false)
vertexRemoveTree.Commit(txn, false)
path := tries.GetFullPath(shardAddress[:32])
for _, p := range shardAddress[32:] {
@ -333,25 +338,20 @@ func (hg *HypergraphCRDT) CommitShard(
hg.getCoveredPrefix(),
)
hyperedgeAddTree := hyperedgeAddSet.GetTree()
hyperedgeAddTree.Commit(nil, false)
hyperedgeAddTree.Commit(txn, false)
hyperedgeRemoveTree := hyperedgeRemoveSet.GetTree()
hyperedgeRemoveTree.Commit(nil, false)
hyperedgeRemoveTree.Commit(txn, false)
hyperedgeAddNode, err := vertexAddTree.GetByPath(path)
hyperedgeAddNode, err := hyperedgeAddTree.GetByPath(path)
if err != nil && !strings.Contains(err.Error(), "not found") {
return nil, errors.Wrap(err, "commit shard")
}
hyperedgeRemoveNode, err := vertexRemoveTree.GetByPath(path)
hyperedgeRemoveNode, err := hyperedgeRemoveTree.GetByPath(path)
if err != nil && !strings.Contains(err.Error(), "not found") {
return nil, errors.Wrap(err, "commit shard")
}
txn, err := hg.store.NewTransaction(false)
if err != nil {
return nil, errors.Wrap(err, "commit shard")
}
vertexAddCommit := make([]byte, 64)
if vertexAddNode != nil {
switch n := vertexAddNode.(type) {

View File

@ -170,6 +170,7 @@ type snapshotManager struct {
logger *zap.Logger
store tries.TreeBackingStore
mu sync.Mutex
closed bool
// generations holds snapshot generations ordered from newest to oldest.
// generations[0] is the current/latest generation.
generations []*snapshotGeneration
@ -190,6 +191,10 @@ func (m *snapshotManager) publish(root []byte) {
m.mu.Lock()
defer m.mu.Unlock()
if m.closed {
return
}
rootHex := ""
if len(root) != 0 {
rootHex = hex.EncodeToString(root)
@ -287,7 +292,7 @@ func (m *snapshotManager) acquire(
m.mu.Lock()
defer m.mu.Unlock()
if len(m.generations) == 0 {
if m.closed || len(m.generations) == 0 {
m.logger.Warn("no snapshot generations available")
return nil
}
@ -425,6 +430,53 @@ func (m *snapshotManager) release(handle *snapshotHandle) {
}
}
// close releases all snapshot generations and their DB snapshots. After close,
// publish and acquire become no-ops. Shard snapshot handles held by active sync
// sessions remain valid (they are self-contained in-memory DBs) and will be
// released when the session ends.
func (m *snapshotManager) close() {
m.mu.Lock()
defer m.mu.Unlock()
if m.closed {
return
}
m.closed = true
for _, gen := range m.generations {
for key, handle := range gen.handles {
delete(gen.handles, key)
if handle != nil {
handle.releaseRef(m.logger)
}
}
if gen.dbSnapshot != nil {
if err := gen.dbSnapshot.Close(); err != nil {
m.logger.Warn("failed to close DB snapshot during shutdown", zap.Error(err))
}
gen.dbSnapshot = nil
}
}
m.generations = nil
m.logger.Debug("snapshot manager closed")
}
// reopen resets the closed flag so the snapshot manager can accept new
// snapshots after a respawn. Any previously held snapshots were already
// released by close(), so we start with an empty generation list.
func (m *snapshotManager) reopen() {
m.mu.Lock()
defer m.mu.Unlock()
if !m.closed {
return
}
m.closed = false
m.generations = make([]*snapshotGeneration, 0, maxSnapshotGenerations)
m.logger.Debug("snapshot manager reopened")
}
func shardKeyString(sk tries.ShardKey) string {
buf := make([]byte, 0, len(sk.L1)+len(sk.L2))
buf = append(buf, sk.L1[:]...)

View File

@ -177,6 +177,17 @@ func (hg *HypergraphCRDT) Sync(
path := hg.getCoveredPrefix()
// Commit tree state through a transaction before sending initial query
initTxn, err := hg.store.NewTransaction(false)
if err != nil {
return nil, err
}
initCommitment := set.GetTree().Commit(initTxn, false)
if err := initTxn.Commit(); err != nil {
initTxn.Abort()
return nil, err
}
// Send initial query for path
sendStart := time.Now()
if err := stream.Send(&protobufs.HypergraphComparison{
@ -185,7 +196,7 @@ func (hg *HypergraphCRDT) Sync(
ShardKey: slices.Concat(shardKey.L1[:], shardKey.L2[:]),
PhaseSet: phaseSet,
Path: toInt32Slice(path),
Commitment: set.GetTree().Commit(nil, false),
Commitment: initCommitment,
IncludeLeafData: false,
ExpectedRoot: expectedRoot,
},
@ -336,7 +347,15 @@ func (hg *HypergraphCRDT) Sync(
wg.Wait()
root = set.GetTree().Commit(nil, false)
finalTxn, err := hg.store.NewTransaction(false)
if err != nil {
return nil, err
}
root = set.GetTree().Commit(finalTxn, false)
if err := finalTxn.Commit(); err != nil {
finalTxn.Abort()
return nil, err
}
hg.logger.Info(
"hypergraph root commit",
zap.String("root", hex.EncodeToString(root)),

View File

@ -68,7 +68,8 @@ func isGlobalProverShardBytes(shardKeyBytes []byte) bool {
func (hg *HypergraphCRDT) PerformSync(
stream protobufs.HypergraphComparisonService_PerformSyncServer,
) error {
ctx := stream.Context()
ctx, shutdownCancel := hg.contextWithShutdown(stream.Context())
defer shutdownCancel()
logger := hg.logger.With(zap.String("method", "PerformSync"))
sessionStart := time.Now()
@ -404,7 +405,6 @@ func (hg *HypergraphCRDT) handleGetLeaves(
Size: leaf.Size.FillBytes(make([]byte, 32)),
}
// Load underlying vertex tree if available (use snapshot store for consistency)
vtree, err := session.store.LoadVertexTree(leaf.Key)
if err == nil && vtree != nil {
data, err := tries.SerializeNonLazyTree(vtree)

View File

@ -86,7 +86,11 @@ func (c *ConsensusSignatureAggregatorWrapper) Aggregate(
}
// TODO: remove direct reference
if len(c.filter) != 0 {
// Only append extra bytes when extProofs were actually present (adj > 0).
// Timeout votes produce 74-byte signatures with no extProof, so appending
// 516*(n-1) zero bytes would bloat the TC aggregate signature beyond the
// deserialization limit (711 bytes) in TimeoutCertificate.FromCanonicalBytes.
if len(c.filter) != 0 && adj > 0 {
output.(*bls48581.BlsAggregateOutput).AggregateSignature =
append(output.(*bls48581.BlsAggregateOutput).AggregateSignature, extra...)
}

View File

@ -740,11 +740,18 @@ func NewAppConsensusEngine(
appSyncHooks,
)
// Add sync provider
componentBuilder.AddWorker(engine.syncProvider.Start)
// namedWorker wraps a worker function with shutdown logging so we can
// identify which worker(s) hang during shutdown.
namedWorker := func(name string, fn func(lifecycle.SignalerContext, lifecycle.ReadyFunc)) lifecycle.ComponentWorker {
return func(ctx lifecycle.SignalerContext, ready lifecycle.ReadyFunc) {
engine.logger.Debug("worker starting", zap.String("worker", name))
defer engine.logger.Debug("worker stopped", zap.String("worker", name))
fn(ctx, ready)
}
}
// Add consensus
componentBuilder.AddWorker(func(
componentBuilder.AddWorker(namedWorker("consensus", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
@ -802,17 +809,120 @@ func NewAppConsensusEngine(
<-ctx.Done()
<-lifecycle.AllDone(engine.voteAggregator, engine.timeoutAggregator)
})
}))
// Start app shard proposal queue processor
componentBuilder.AddWorker(func(
componentBuilder.AddWorker(namedWorker("proposalQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processAppShardProposalQueue(ctx)
})
}))
// NOTE: subscribe calls are deferred until after ComponentManager is built
// (see below). The handler closures reference e.ShutdownSignal() which
// panics if ComponentManager is nil. Since Subscribe spawns goroutines
// immediately, a message arriving before Build() would hit a nil receiver.
// Add sync provider
componentBuilder.AddWorker(namedWorker("syncProvider", engine.syncProvider.Start))
// Start message queue processors
componentBuilder.AddWorker(namedWorker("consensusMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processConsensusMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("proverMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processProverMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("frameMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processFrameMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("globalFrameMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processGlobalFrameMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("alertMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processAlertMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("peerInfoMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processPeerInfoMessageQueue(ctx)
}))
componentBuilder.AddWorker(namedWorker("dispatchMsgQueue", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processDispatchMessageQueue(ctx)
}))
// Start event distributor event loop
componentBuilder.AddWorker(namedWorker("eventDistributor", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.eventDistributorLoop(ctx)
}))
// Start metrics update goroutine
componentBuilder.AddWorker(namedWorker("metricsLoop", func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.updateMetricsLoop(ctx)
}))
engine.ComponentManager = componentBuilder.Build()
if hgWithShutdown, ok := engine.hyperSync.(interface {
SetShutdownContext(context.Context)
}); ok {
hgWithShutdown.SetShutdownContext(
contextFromShutdownSignal(engine.ShutdownSignal()),
)
}
// Set self peer ID on hypergraph to allow unlimited self-sync sessions
if hgWithSelfPeer, ok := engine.hyperSync.(interface {
SetSelfPeerID(string)
}); ok {
hgWithSelfPeer.SetSelfPeerID(peer.ID(ps.GetPeerID()).String())
}
// Subscribe to pubsub bitmasks. These calls spawn handler goroutines
// immediately, and the handlers reference e.ShutdownSignal() which
// requires ComponentManager to be non-nil. That's why subscriptions
// must happen after componentBuilder.Build() above.
err = engine.subscribeToConsensusMessages()
if err != nil {
return nil, err
@ -853,104 +963,11 @@ func NewAppConsensusEngine(
return nil, err
}
// Add sync provider
componentBuilder.AddWorker(engine.syncProvider.Start)
// Start message queue processors
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processConsensusMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processProverMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processFrameMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processGlobalFrameMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processAlertMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processPeerInfoMessageQueue(ctx)
})
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.processDispatchMessageQueue(ctx)
})
// Start event distributor event loop
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.eventDistributorLoop(ctx)
})
// Start metrics update goroutine
componentBuilder.AddWorker(func(
ctx lifecycle.SignalerContext,
ready lifecycle.ReadyFunc,
) {
ready()
engine.updateMetricsLoop(ctx)
})
engine.ComponentManager = componentBuilder.Build()
if hgWithShutdown, ok := engine.hyperSync.(interface {
SetShutdownContext(context.Context)
}); ok {
hgWithShutdown.SetShutdownContext(
contextFromShutdownSignal(engine.ShutdownSignal()),
)
}
// Set self peer ID on hypergraph to allow unlimited self-sync sessions
if hgWithSelfPeer, ok := engine.hyperSync.(interface {
SetSelfPeerID(string)
}); ok {
hgWithSelfPeer.SetSelfPeerID(peer.ID(ps.GetPeerID()).String())
}
return engine, nil
}
func (e *AppConsensusEngine) Stop(force bool) <-chan error {
e.logger.Info("app engine stopping", zap.Bool("force", force))
errChan := make(chan error, 1)
// First, cancel context to signal all goroutines to stop
@ -967,6 +984,8 @@ func (e *AppConsensusEngine) Stop(force bool) <-chan error {
e.pubsub.UnregisterValidator(e.getFrameMessageBitmask())
e.pubsub.Unsubscribe(e.getGlobalFrameMessageBitmask(), false)
e.pubsub.UnregisterValidator(e.getGlobalFrameMessageBitmask())
e.pubsub.Unsubscribe(e.getGlobalProverMessageBitmask(), false)
e.pubsub.UnregisterValidator(e.getGlobalProverMessageBitmask())
e.pubsub.Unsubscribe(e.getGlobalAlertMessageBitmask(), false)
e.pubsub.UnregisterValidator(e.getGlobalAlertMessageBitmask())
e.pubsub.Unsubscribe(e.getGlobalPeerInfoMessageBitmask(), false)
@ -974,6 +993,19 @@ func (e *AppConsensusEngine) Stop(force bool) <-chan error {
e.pubsub.Unsubscribe(e.getDispatchMessageBitmask(), false)
e.pubsub.UnregisterValidator(e.getDispatchMessageBitmask())
// Wait for component workers to finish. The IPC server owns the pubsub
// lifecycle, so we don't close it here (doing so would break respawns).
e.logger.Info("app engine shutdown: waiting for workers")
select {
case <-e.Done():
e.logger.Info("app engine shutdown: all workers stopped")
case <-time.After(30 * time.Second):
e.logger.Error("app engine shutdown: timed out after 30s")
if !force {
errChan <- errors.New("timeout waiting for app engine shutdown")
}
}
close(errChan)
return errChan
}
@ -988,6 +1020,22 @@ func (e *AppConsensusEngine) handleGlobalProverRoot(
frameNumber := frame.Header.FrameNumber
expectedProverRoot := frame.Header.ProverTreeCommitment
if len(expectedProverRoot) == 0 {
return
}
// Match the GlobalConsensusEngine's ordering: commit the tree first as a
// standalone step, then extract and verify the prover root. The global
// engine calls Commit(N) at the start of materialize(N) before checking
// the root. We mirror this by committing first, then extracting.
if _, err := e.hypergraph.Commit(frameNumber); err != nil {
e.logger.Warn(
"failed to commit hypergraph for global prover root check",
zap.Uint64("frame_number", frameNumber),
zap.Error(err),
)
}
localRoot, err := e.computeLocalGlobalProverRoot(frameNumber)
if err != nil {
e.logger.Warn(
@ -997,12 +1045,11 @@ func (e *AppConsensusEngine) handleGlobalProverRoot(
)
e.globalProverRootSynced.Store(false)
e.globalProverRootVerifiedFrame.Store(0)
// Use blocking hypersync to ensure we're synced before continuing
e.performBlockingGlobalHypersync(frame.Header.Prover, expectedProverRoot)
return
}
if len(localRoot) == 0 || len(expectedProverRoot) == 0 {
if len(localRoot) == 0 {
return
}
@ -1015,8 +1062,35 @@ func (e *AppConsensusEngine) handleGlobalProverRoot(
)
e.globalProverRootSynced.Store(false)
e.globalProverRootVerifiedFrame.Store(0)
// Use blocking hypersync to ensure we're synced before continuing
e.performBlockingGlobalHypersync(frame.Header.Prover, expectedProverRoot)
// Re-compute local root after sync to verify convergence, matching
// the global engine's post-sync verification pattern.
newLocalRoot, newRootErr := e.computeLocalGlobalProverRoot(frameNumber)
if newRootErr != nil {
e.logger.Warn(
"failed to compute local global prover root after sync",
zap.Uint64("frame_number", frameNumber),
zap.Error(newRootErr),
)
} else if bytes.Equal(newLocalRoot, expectedProverRoot) {
e.logger.Info(
"global prover root converged after sync",
zap.Uint64("frame_number", frameNumber),
)
e.globalProverRootSynced.Store(true)
e.globalProverRootVerifiedFrame.Store(frameNumber)
if err := e.proverRegistry.Refresh(); err != nil {
e.logger.Warn("failed to refresh prover registry", zap.Error(err))
}
} else {
e.logger.Warn(
"global prover root still mismatched after sync",
zap.Uint64("frame_number", frameNumber),
zap.String("expected_root", hex.EncodeToString(expectedProverRoot)),
zap.String("post_sync_root", hex.EncodeToString(newLocalRoot)),
)
}
return
}
@ -1082,6 +1156,16 @@ func (e *AppConsensusEngine) triggerGlobalHypersync(proposer []byte, expectedRoo
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Cancel sync when the engine shuts down so this goroutine doesn't
// outlive the engine and hold resources (locks, connections).
go func() {
select {
case <-e.ShutdownSignal():
cancel()
case <-ctx.Done():
}
}()
shardKey := tries.ShardKey{
L1: [3]byte{0x00, 0x00, 0x00},
L2: intrinsics.GLOBAL_INTRINSIC_ADDRESS,
@ -1112,15 +1196,22 @@ func (e *AppConsensusEngine) performBlockingGlobalHypersync(proposer []byte, exp
// Wait for any existing sync to complete first
for e.globalProverSyncInProgress.Load() {
e.logger.Debug("blocking hypersync: waiting for existing sync to complete")
time.Sleep(100 * time.Millisecond)
select {
case <-e.ShutdownSignal():
return
case <-time.After(100 * time.Millisecond):
}
}
// Mark sync as in progress
if !e.globalProverSyncInProgress.CompareAndSwap(false, true) {
// Another sync started, wait for it
for e.globalProverSyncInProgress.Load() {
time.Sleep(100 * time.Millisecond)
select {
case <-e.ShutdownSignal():
return
case <-time.After(100 * time.Millisecond):
}
}
return
}
@ -1162,8 +1253,11 @@ func (e *AppConsensusEngine) performBlockingGlobalHypersync(proposer []byte, exp
)
}
e.globalProverRootSynced.Store(true)
e.logger.Info("blocking global hypersync completed")
// Don't unconditionally set synced=true. Commit(N-1) is cached with the
// pre-sync root, so we can't re-verify here. The next frame's deferred
// check will call Commit(N) fresh and verify convergence — matching the
// global engine's pattern where convergence happens on the next materialize.
e.logger.Info("blocking global hypersync completed, convergence will be verified on next frame")
}
func (e *AppConsensusEngine) GetFrame() *protobufs.AppShardFrame {
@ -1915,7 +2009,10 @@ func (e *AppConsensusEngine) awaitFirstGlobalFrame(
func (e *AppConsensusEngine) waitForProverRegistration(
ctx lifecycle.SignalerContext,
) error {
logger := e.logger.With(zap.String("shard_address", e.appAddressHex))
logger := e.logger.With(
zap.String("shard_address", e.appAddressHex),
zap.String("prover_address", hex.EncodeToString(e.proverAddress)),
)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
@ -1936,7 +2033,16 @@ func (e *AppConsensusEngine) waitForProverRegistration(
return nil
}
}
logger.Info("waiting for prover registration")
proverAddrs := make([]string, 0, len(provers))
for _, p := range provers {
proverAddrs = append(proverAddrs, hex.EncodeToString(p.Address))
}
logger.Info(
"waiting for prover registration",
zap.Int("active_provers_for_filter", len(provers)),
zap.String("filter", hex.EncodeToString(e.appAddress)),
zap.Strings("registry_addresses", proverAddrs),
)
}
select {
@ -2313,6 +2419,8 @@ func (e *AppConsensusEngine) startConsensus(
e.timeoutAggregator.Start(ctx)
<-lifecycle.AllReady(e.voteAggregator, e.timeoutAggregator)
e.consensusParticipant.Start(ctx)
e.logger.Info("consensus started successfully",
zap.String("shard_address", e.appAddressHex))
return nil
}
@ -2767,6 +2875,9 @@ func (e *AppConsensusEngine) OnQuorumCertificateTriggeredRankChange(
// OnRankChange implements consensus.Consumer.
func (e *AppConsensusEngine) OnRankChange(oldRank uint64, newRank uint64) {
if e.currentRank == newRank {
return
}
e.currentRank = newRank
err := e.ensureGlobalClient()
if err != nil {

View File

@ -44,11 +44,17 @@ func (p *AppLivenessProvider) Collect(
var collectorRecords []*sequencedAppMessage
var collector keyedaggregator.Collector[sequencedAppMessage]
alreadyCollected := false
if p.engine.messageCollectors != nil {
var err error
var found bool
collector, found, err = p.engine.getAppMessageCollector(rank)
if err != nil && !errors.Is(err, keyedaggregator.ErrSequenceBelowRetention) {
if err != nil && errors.Is(err, keyedaggregator.ErrSequenceBelowRetention) {
// Collector was already pruned by a prior Collect call for this
// rank. We must not overwrite collectedMessages with an empty
// slice or the previously-collected messages will be lost.
alreadyCollected = true
} else if err != nil {
p.engine.logger.Warn(
"could not fetch collector for rank",
zap.Uint64("rank", rank),
@ -133,9 +139,14 @@ func (p *AppLivenessProvider) Collect(
}
pendingMessagesCount.WithLabelValues(p.engine.appAddressHex).Set(0)
p.engine.collectedMessagesMu.Lock()
p.engine.collectedMessages = finalizedMessages
p.engine.collectedMessagesMu.Unlock()
// If we already collected for this rank (collector was pruned) and found no
// new messages, preserve the previously-collected messages rather than
// overwriting them with an empty slice.
if !alreadyCollected || len(finalizedMessages) > 0 {
p.engine.collectedMessagesMu.Lock()
p.engine.collectedMessages = finalizedMessages
p.engine.collectedMessagesMu.Unlock()
}
return CollectedCommitments{
frameNumber: frameNumber,

View File

@ -112,6 +112,15 @@ func NewAppConsensusEngineFactory(
}
}
// CloseSnapshots synchronously closes the hypergraph snapshot manager. Call
// this before closing the underlying database to ensure no Pebble snapshots
// remain open.
func (f *AppConsensusEngineFactory) CloseSnapshots() {
if closer, ok := f.hypergraph.(interface{ CloseSnapshots() }); ok {
closer.CloseSnapshots()
}
}
// CreateAppConsensusEngine creates a new AppConsensusEngine
func (f *AppConsensusEngineFactory) CreateAppConsensusEngine(
appAddress []byte,

View File

@ -212,7 +212,7 @@ func (e *AppConsensusEngine) startAppMessageAggregator(
}
func (e *AppConsensusEngine) addAppMessage(message *protobufs.Message) {
if e.messageAggregator == nil || message == nil {
if e.messageCollectors == nil || message == nil {
return
}
if len(message.Hash) == 0 {
@ -224,7 +224,29 @@ func (e *AppConsensusEngine) addAppMessage(message *protobufs.Message) {
if record == nil {
return
}
e.messageAggregator.Add(record)
// Add directly to the collector synchronously rather than going through
// the aggregator's async worker queue. The async path loses messages
// because OnSequenceChange advances the retention window before workers
// finish processing queued items, causing them to be silently pruned.
collector, _, err := e.messageCollectors.GetOrCreateCollector(rank)
if err != nil {
e.logger.Debug(
"could not get collector for app message",
zap.Uint64("rank", rank),
zap.Error(err),
)
return
}
if err := collector.Add(record); err != nil {
e.logger.Debug(
"could not add app message to collector",
zap.Uint64("rank", rank),
zap.Error(err),
)
return
}
}
func (e *AppConsensusEngine) nextRank() uint64 {
@ -298,7 +320,7 @@ func (e *AppConsensusEngine) deferAppMessage(
}
func (e *AppConsensusEngine) flushDeferredAppMessages(targetRank uint64) {
if e == nil || e.messageAggregator == nil || targetRank == 0 {
if e == nil || e.messageCollectors == nil || targetRank == 0 {
return
}
@ -313,8 +335,36 @@ func (e *AppConsensusEngine) flushDeferredAppMessages(targetRank uint64) {
return
}
collector, _, err := e.messageCollectors.GetOrCreateCollector(targetRank)
if err != nil {
if e.logger != nil {
e.logger.Debug(
"could not get collector for deferred app messages",
zap.String("app_address", e.appAddressHex),
zap.Uint64("target_rank", targetRank),
zap.Error(err),
)
}
return
}
added := 0
for _, msg := range messages {
e.messageAggregator.Add(newSequencedAppMessage(targetRank, msg))
record := newSequencedAppMessage(targetRank, msg)
if record == nil {
continue
}
if err := collector.Add(record); err != nil {
if e.logger != nil {
e.logger.Debug(
"could not add deferred app message to collector",
zap.Uint64("target_rank", targetRank),
zap.Error(err),
)
}
continue
}
added++
}
if e.logger != nil {
@ -322,7 +372,7 @@ func (e *AppConsensusEngine) flushDeferredAppMessages(targetRank uint64) {
"replayed deferred app messages",
zap.String("app_address", e.appAddressHex),
zap.Uint64("target_rank", targetRank),
zap.Int("count", len(messages)),
zap.Int("count", added),
)
}
}

View File

@ -41,7 +41,7 @@ func (e *AppConsensusEngine) validateConsensusMessage(
proposal := &protobufs.AppShardProposal{}
if err := proposal.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
e.logger.Debug("failed to unmarshal proposal", zap.Error(err))
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
@ -53,11 +53,21 @@ func (e *AppConsensusEngine) validateConsensusMessage(
}
if !bytes.Equal(proposal.State.Header.Address, e.appAddress) {
e.logger.Debug(
"ignoring proposal",
zap.String("reason", "address mismatch"),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
if e.forks.FinalizedRank() > proposal.GetRank() {
e.logger.Debug(
"ignoring proposal",
zap.String("reason", "stale rank"),
zap.Uint64("current_rank", e.forks.FinalizedRank()),
zap.Uint64("proposal_rank", proposal.GetRank()),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "ignore").Inc()
return p2p.ValidationResultIgnore
}
@ -70,7 +80,10 @@ func (e *AppConsensusEngine) validateConsensusMessage(
}
if !valid {
e.logger.Debug("invalid frame")
e.logger.Debug(
"invalid proposal",
zap.String("reason", "frame validator returned false"),
)
proposalValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
return p2p.ValidationResultReject
}
@ -158,6 +171,11 @@ func (e *AppConsensusEngine) validateConsensusMessage(
}
default:
e.logger.Debug(
"rejecting consensus message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
@ -170,7 +188,7 @@ func (e *AppConsensusEngine) validateProverMessage(
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -197,6 +215,12 @@ func (e *AppConsensusEngine) validateProverMessage(
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
e.logger.Debug(
"ignoring prover message",
zap.String("reason", "timestamp out of window"),
zap.Int64("timestamp", messageBundle.Timestamp),
zap.Int64("now", now),
)
return p2p.ValidationResultIgnore
}
@ -214,7 +238,7 @@ func (e *AppConsensusEngine) validateGlobalProverMessage(
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -241,6 +265,12 @@ func (e *AppConsensusEngine) validateGlobalProverMessage(
now := time.Now().UnixMilli()
if messageBundle.Timestamp > now+5000 || messageBundle.Timestamp < now-5000 {
e.logger.Debug(
"ignoring global prover message",
zap.String("reason", "timestamp out of window"),
zap.Int64("timestamp", messageBundle.Timestamp),
zap.Int64("now", now),
)
return p2p.ValidationResultIgnore
}
@ -307,18 +337,30 @@ func (e *AppConsensusEngine) validateFrameMessage(
}
if !valid {
e.logger.Debug(
"invalid app frame",
zap.String("reason", "frame validator returned false"),
)
frameValidationTotal.WithLabelValues(e.appAddressHex, "reject").Inc()
e.logger.Debug("invalid frame")
return p2p.ValidationResultReject
}
if frametime.AppFrameSince(frame) > 20*time.Second {
e.logger.Debug(
"ignoring app frame",
zap.String("reason", "frame too old"),
)
return p2p.ValidationResultIgnore
}
frameValidationTotal.WithLabelValues(e.appAddressHex, "accept").Inc()
default:
e.logger.Debug(
"rejecting frame message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
@ -367,18 +409,30 @@ func (e *AppConsensusEngine) validateGlobalFrameMessage(
}
if !valid {
e.logger.Debug("invalid frame")
e.logger.Debug(
"invalid global frame",
zap.String("reason", "frame validator returned false"),
)
globalFrameValidationTotal.WithLabelValues("reject").Inc()
return p2p.ValidationResultReject
}
if frametime.GlobalFrameSince(frame) > 20*time.Second {
e.logger.Debug(
"ignoring global frame",
zap.String("reason", "frame too old"),
)
return p2p.ValidationResultIgnore
}
globalFrameValidationTotal.WithLabelValues("accept").Inc()
default:
e.logger.Debug(
"rejecting global frame message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return p2p.ValidationResultReject
}
@ -391,7 +445,7 @@ func (e *AppConsensusEngine) validateAlertMessage(
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -441,7 +495,7 @@ func (e *AppConsensusEngine) validatePeerInfoMessage(
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -536,7 +590,7 @@ func (e *AppConsensusEngine) validateDispatchMessage(
) p2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -562,6 +616,11 @@ func (e *AppConsensusEngine) validateDispatchMessage(
if envelope.Timestamp < uint64(time.Now().UnixMilli())-2000 ||
envelope.Timestamp > uint64(time.Now().UnixMilli())+5000 {
e.logger.Debug(
"ignoring dispatch message",
zap.String("reason", "timestamp out of window"),
zap.Uint64("timestamp", envelope.Timestamp),
)
return p2p.ValidationResultIgnore
}
case protobufs.HubAddInboxType:

View File

@ -295,11 +295,17 @@ func (p *GlobalLeaderProvider) ProveNextState(
requestRoot := requestTree.Commit(p.engine.inclusionProver, false)
// Copy shared state under lock to avoid data race with materialize()
p.engine.shardCommitmentMu.Lock()
shardCommitments := p.engine.shardCommitments
proverRoot := p.engine.proverRoot
p.engine.shardCommitmentMu.Unlock()
// Prove the global frame header
newHeader, err := p.engine.frameProver.ProveGlobalFrameHeader(
(*prior).Header,
p.engine.shardCommitments,
p.engine.proverRoot,
shardCommitments,
proverRoot,
requestRoot,
signer,
timestamp,

View File

@ -40,11 +40,17 @@ func (p *GlobalLivenessProvider) Collect(
var collector keyedaggregator.Collector[sequencedGlobalMessage]
var collectorRecords []*sequencedGlobalMessage
alreadyCollected := false
if p.engine.messageCollectors != nil {
var err error
var found bool
collector, found, err = p.engine.getMessageCollector(rank)
if err != nil && !errors.Is(err, keyedaggregator.ErrSequenceBelowRetention) {
if err != nil && errors.Is(err, keyedaggregator.ErrSequenceBelowRetention) {
// Collector was already pruned by a prior Collect call for this
// rank. We must not overwrite collectedMessages with an empty
// slice or the previously-collected messages will be lost.
alreadyCollected = true
} else if err != nil {
p.engine.logger.Warn(
"could not fetch collector for rank",
zap.Uint64("rank", rank),
@ -53,6 +59,15 @@ func (p *GlobalLivenessProvider) Collect(
} else if found {
collectorRecords = collector.Records()
}
p.engine.logger.Debug(
"collector lookup for rank",
zap.Uint64("rank", rank),
zap.Uint64("frame_number", frameNumber),
zap.Bool("found", found),
zap.Bool("already_collected", alreadyCollected),
zap.Int("records", len(collectorRecords)),
zap.Uint64("current_rank", p.engine.currentRank),
)
}
acceptedMessages := make(
@ -62,8 +77,10 @@ func (p *GlobalLivenessProvider) Collect(
)
if collector != nil {
nilMsgCount := 0
for _, record := range collectorRecords {
if record == nil || record.message == nil {
nilMsgCount++
continue
}
if err := p.lockCollectorMessage(
@ -80,6 +97,13 @@ func (p *GlobalLivenessProvider) Collect(
}
acceptedMessages = append(acceptedMessages, record.message)
}
if nilMsgCount > 0 {
p.engine.logger.Debug(
"collector records with nil message (failed validation)",
zap.Int("nil_msg_count", nilMsgCount),
zap.Int("total_records", len(collectorRecords)),
)
}
}
messages := append([]*protobufs.Message{}, mixnetMessages...)
@ -115,12 +139,17 @@ func (p *GlobalLivenessProvider) Collect(
return GlobalCollectedCommitments{}, errors.Wrap(err, "collect")
}
// Store the accepted messages as canonical bytes for inclusion in the frame
collectedMsgs := make([][]byte, 0, len(acceptedMessages))
for _, msg := range acceptedMessages {
collectedMsgs = append(collectedMsgs, msg.Payload)
// Store the accepted messages as canonical bytes for inclusion in the frame.
// If we already collected for this rank (collector was pruned) and found no
// new messages, preserve the previously-collected messages rather than
// overwriting them with an empty slice.
if !alreadyCollected || len(acceptedMessages) > 0 {
collectedMsgs := make([][]byte, 0, len(acceptedMessages))
for _, msg := range acceptedMessages {
collectedMsgs = append(collectedMsgs, msg.Payload)
}
p.engine.collectedMessages = collectedMsgs
}
p.engine.collectedMessages = collectedMsgs
return GlobalCollectedCommitments{
frameNumber: frameNumber,

View File

@ -46,7 +46,12 @@ func (e *GlobalConsensusEngine) ensureCoverageThresholds() {
// triggerCoverageCheckAsync starts a coverage check in a goroutine if one is
// not already in progress. This prevents blocking the event processing loop.
func (e *GlobalConsensusEngine) triggerCoverageCheckAsync(frameNumber uint64) {
// frameProver is the address of the prover who produced the triggering frame;
// only that prover will emit split/merge messages.
func (e *GlobalConsensusEngine) triggerCoverageCheckAsync(
frameNumber uint64,
frameProver []byte,
) {
// Skip if a coverage check is already in progress
if !e.coverageCheckInProgress.CompareAndSwap(false, true) {
e.logger.Debug(
@ -56,17 +61,31 @@ func (e *GlobalConsensusEngine) triggerCoverageCheckAsync(frameNumber uint64) {
return
}
e.coverageWg.Add(1)
go func() {
defer e.coverageWg.Done()
defer e.coverageCheckInProgress.Store(false)
if err := e.checkShardCoverage(frameNumber); err != nil {
// Bail immediately if shutdown is already in progress to avoid
// blocking Stop() on hg.mu (which may be held by a sync or commit).
select {
case <-e.ShutdownSignal():
return
default:
}
if err := e.checkShardCoverage(frameNumber, frameProver); err != nil {
e.logger.Error("failed to check shard coverage", zap.Error(err))
}
}()
}
// checkShardCoverage verifies coverage levels for all active shards
func (e *GlobalConsensusEngine) checkShardCoverage(frameNumber uint64) error {
// checkShardCoverage verifies coverage levels for all active shards.
// frameProver is the address of the prover who produced the triggering frame.
func (e *GlobalConsensusEngine) checkShardCoverage(
frameNumber uint64,
frameProver []byte,
) error {
e.ensureCoverageThresholds()
// Get shard coverage information from prover registry
@ -218,13 +237,13 @@ func (e *GlobalConsensusEngine) checkShardCoverage(frameNumber uint64) error {
// Check for high coverage (potential split)
if proverCount > maxProvers {
e.handleHighCoverage([]byte(shardAddress), coverage, maxProvers)
e.handleHighCoverage([]byte(shardAddress), coverage, maxProvers, frameProver)
}
}
// Emit a single bulk merge event if there are any merge-eligible shards
if len(allMergeGroups) > 0 {
e.emitBulkMergeEvent(allMergeGroups)
e.emitBulkMergeEvent(allMergeGroups, frameProver)
}
return nil
@ -344,6 +363,7 @@ func (e *GlobalConsensusEngine) handleHighCoverage(
shardAddress []byte,
coverage *ShardCoverage,
maxProvers uint64,
frameProver []byte,
) {
addressLen := len(shardAddress)
@ -369,6 +389,7 @@ func (e *GlobalConsensusEngine) handleHighCoverage(
ProverCount: coverage.ProverCount,
AttestedStorage: coverage.AttestedStorage,
ProposedShards: proposedShards,
FrameProver: frameProver,
})
} else {
// Case 3.a.ii: No space to split, do nothing

View File

@ -82,7 +82,10 @@ func (e *GlobalConsensusEngine) eventDistributorLoop(
e.flushDeferredGlobalMessages(data.Frame.GetRank() + 1)
// Check shard coverage asynchronously to avoid blocking event processing
e.triggerCoverageCheckAsync(data.Frame.Header.FrameNumber)
e.triggerCoverageCheckAsync(
data.Frame.Header.FrameNumber,
data.Frame.Header.Prover,
)
// Update global coordination metrics
globalCoordinationTotal.Inc()
@ -119,6 +122,7 @@ func (e *GlobalConsensusEngine) eventDistributorLoop(
// prover allocations in the registry.
e.reconcileWorkerAllocations(data.Frame.Header.FrameNumber, self)
e.checkExcessPendingJoins(self, data.Frame.Header.FrameNumber)
e.checkAndSubmitSeniorityMerge(self, data.Frame.Header.FrameNumber)
e.logAllocationStatusOnly(ctx, data, self, effectiveSeniority)
}
}
@ -266,6 +270,16 @@ func (e *GlobalConsensusEngine) eventDistributorLoop(
}()
}
case typesconsensus.ControlEventShardSplitEligible:
if data, ok := event.Data.(*typesconsensus.ShardSplitEventData); ok {
e.handleShardSplitEvent(data)
}
case typesconsensus.ControlEventShardMergeEligible:
if data, ok := event.Data.(*typesconsensus.BulkShardMergeEventData); ok {
e.handleShardMergeEvent(data)
}
default:
e.logger.Debug(
"received unhandled event type",
@ -306,6 +320,7 @@ func (e *GlobalConsensusEngine) emitCoverageEvent(
func (e *GlobalConsensusEngine) emitBulkMergeEvent(
mergeGroups []typesconsensus.ShardMergeEventData,
frameProver []byte,
) {
if len(mergeGroups) == 0 {
return
@ -314,6 +329,7 @@ func (e *GlobalConsensusEngine) emitBulkMergeEvent(
// Combine all merge groups into a single bulk event
data := &typesconsensus.BulkShardMergeEventData{
MergeGroups: mergeGroups,
FrameProver: frameProver,
}
event := typesconsensus.ControlEvent{
@ -370,6 +386,156 @@ func (e *GlobalConsensusEngine) emitAlertEvent(alertMessage string) {
e.logger.Info("emitted alert message")
}
const shardActionCooldownFrames = 360
func (e *GlobalConsensusEngine) handleShardSplitEvent(
data *typesconsensus.ShardSplitEventData,
) {
// Only the prover who produced the triggering frame should emit
if !bytes.Equal(data.FrameProver, e.getProverAddress()) {
return
}
frameNumber := e.lastObservedFrame.Load()
if frameNumber == 0 {
return
}
addrKey := string(data.ShardAddress)
e.lastShardActionFrameMu.Lock()
if last, ok := e.lastShardActionFrame[addrKey]; ok &&
frameNumber-last < shardActionCooldownFrames {
e.lastShardActionFrameMu.Unlock()
e.logger.Debug(
"skipping shard split, cooldown active",
zap.String("shard_address", hex.EncodeToString(data.ShardAddress)),
zap.Uint64("last_action_frame", last),
zap.Uint64("current_frame", frameNumber),
)
return
}
e.lastShardActionFrame[addrKey] = frameNumber
e.lastShardActionFrameMu.Unlock()
op := globalintrinsics.NewShardSplitOp(
data.ShardAddress,
data.ProposedShards,
e.keyManager,
e.shardsStore,
e.proverRegistry,
)
if err := op.Prove(frameNumber); err != nil {
e.logger.Error(
"failed to prove shard split",
zap.Error(err),
)
return
}
splitBytes, err := op.ToRequestBytes()
if err != nil {
e.logger.Error(
"failed to serialize shard split",
zap.Error(err),
)
return
}
if err := e.pubsub.PublishToBitmask(
GLOBAL_PROVER_BITMASK,
splitBytes,
); err != nil {
e.logger.Error("failed to publish shard split", zap.Error(err))
} else {
e.logger.Info(
"published shard split",
zap.String("shard_address", hex.EncodeToString(data.ShardAddress)),
zap.Int("proposed_shards", len(data.ProposedShards)),
zap.Uint64("frame_number", frameNumber),
)
}
}
func (e *GlobalConsensusEngine) handleShardMergeEvent(
data *typesconsensus.BulkShardMergeEventData,
) {
// Only the prover who produced the triggering frame should emit
if !bytes.Equal(data.FrameProver, e.getProverAddress()) {
return
}
frameNumber := e.lastObservedFrame.Load()
if frameNumber == 0 {
return
}
for _, group := range data.MergeGroups {
if len(group.ShardAddresses) < 2 {
continue
}
// Use first shard's first 32 bytes as parent address
parentAddress := group.ShardAddresses[0][:32]
// Check cooldown for the parent address
parentKey := string(parentAddress)
e.lastShardActionFrameMu.Lock()
if last, ok := e.lastShardActionFrame[parentKey]; ok &&
frameNumber-last < shardActionCooldownFrames {
e.lastShardActionFrameMu.Unlock()
e.logger.Debug(
"skipping shard merge, cooldown active",
zap.String("parent_address", hex.EncodeToString(parentAddress)),
zap.Uint64("last_action_frame", last),
zap.Uint64("current_frame", frameNumber),
)
continue
}
e.lastShardActionFrame[parentKey] = frameNumber
e.lastShardActionFrameMu.Unlock()
op := globalintrinsics.NewShardMergeOp(
group.ShardAddresses,
parentAddress,
e.keyManager,
e.shardsStore,
e.proverRegistry,
)
if err := op.Prove(frameNumber); err != nil {
e.logger.Error(
"failed to prove shard merge",
zap.Error(err),
)
continue
}
mergeBytes, err := op.ToRequestBytes()
if err != nil {
e.logger.Error(
"failed to serialize shard merge",
zap.Error(err),
)
continue
}
if err := e.pubsub.PublishToBitmask(
GLOBAL_PROVER_BITMASK,
mergeBytes,
); err != nil {
e.logger.Error("failed to publish shard merge", zap.Error(err))
} else {
e.logger.Info(
"published shard merge",
zap.String("parent_address", hex.EncodeToString(parentAddress)),
zap.Int("shard_count", len(group.ShardAddresses)),
zap.Uint64("frame_number", frameNumber),
)
}
}
}
func (e *GlobalConsensusEngine) estimateSeniorityFromConfig() uint64 {
peerIds := []string{}
peerIds = append(peerIds, peer.ID(e.pubsub.GetPeerID()).String())
@ -414,6 +580,21 @@ func (e *GlobalConsensusEngine) evaluateForProposals(
) {
self, effectiveSeniority := e.allocationContext()
e.reconcileWorkerAllocations(data.Frame.Header.FrameNumber, self)
// Re-check after reconciliation — stale filters may have been cleared,
// making workers available for new proposals.
if !allowProposals {
workers, err := e.workerManager.RangeWorkers()
if err == nil {
for _, w := range workers {
if w != nil && len(w.Filter) == 0 {
allowProposals = true
break
}
}
}
}
e.checkExcessPendingJoins(self, data.Frame.Header.FrameNumber)
canPropose, skipReason := e.joinProposalReady(data.Frame.Header.FrameNumber)
@ -433,6 +614,7 @@ func (e *GlobalConsensusEngine) evaluateForProposals(
decideDescriptors := snapshot.decideDescriptors
worldBytes := snapshot.worldBytes
joinProposedThisCycle := false
if len(proposalDescriptors) != 0 && allowProposals {
if canPropose {
proposals, err := e.proposer.PlanAndAllocate(
@ -446,6 +628,7 @@ func (e *GlobalConsensusEngine) evaluateForProposals(
e.logger.Error("could not plan shard allocations", zap.Error(err))
} else {
if len(proposals) > 0 {
joinProposedThisCycle = true
e.lastJoinAttemptFrame.Store(data.Frame.Header.FrameNumber)
}
expectedRewardSum := big.NewInt(0)
@ -485,6 +668,11 @@ func (e *GlobalConsensusEngine) evaluateForProposals(
zap.Uint64("frame_number", data.Frame.Header.FrameNumber),
)
}
if !joinProposedThisCycle {
e.checkAndSubmitSeniorityMerge(self, data.Frame.Header.FrameNumber)
}
if len(pendingFilters) != 0 {
if err := e.proposer.DecideJoins(
uint64(data.Frame.Header.Difficulty),
@ -586,6 +774,21 @@ func (e *GlobalConsensusEngine) reconcileWorkerAllocations(
continue
}
// Expired joins (implicitly rejected) and expired leaves
// (implicitly confirmed) should also be cleared immediately —
// the allocation will never be confirmed/completed and the
// worker is stuck waiting for a state change that cannot come.
if alloc.Status == typesconsensus.ProverStatusJoining &&
frameNumber > alloc.JoinFrameNumber+pendingFilterGraceFrames {
rejectedFilters[string(alloc.ConfirmationFilter)] = struct{}{}
continue
}
if alloc.Status == typesconsensus.ProverStatusLeaving &&
frameNumber > alloc.LeaveFrameNumber+pendingFilterGraceFrames {
rejectedFilters[string(alloc.ConfirmationFilter)] = struct{}{}
continue
}
key := string(alloc.ConfirmationFilter)
worker, ok := filtersToWorkers[key]
if !ok {
@ -841,12 +1044,23 @@ func (e *GlobalConsensusEngine) collectAllocationSnapshot(
decideDescriptors := []provers.ShardDescriptor{}
for _, shardInfo := range shards {
resp, err := e.getAppShardsFromProver(
client,
slices.Concat(shardInfo.L1, shardInfo.L2),
)
shardKey := slices.Concat(shardInfo.L1, shardInfo.L2)
var resp *protobufs.GetAppShardsResponse
var err error
for attempt := 0; attempt < 3; attempt++ {
resp, err = e.getAppShardsFromProver(client, shardKey)
if err == nil {
break
}
e.logger.Debug(
"retrying app shard retrieval",
zap.Int("attempt", attempt+1),
zap.Error(err),
)
time.Sleep(time.Duration(attempt+1) * 500 * time.Millisecond)
}
if err != nil {
e.logger.Debug("could not get app shards from prover", zap.Error(err))
e.logger.Debug("could not get app shards from prover after retries", zap.Error(err))
return nil, false
}
@ -869,8 +1083,22 @@ func (e *GlobalConsensusEngine) collectAllocationSnapshot(
if self != nil {
for _, allocation := range self.Allocations {
if bytes.Equal(allocation.ConfirmationFilter, bp) {
allocated = allocation.Status != 4
if allocation.Status == typesconsensus.ProverStatusJoining {
allocated = allocation.Status != typesconsensus.ProverStatusLeaving
// Treat expired joins and leaves as unallocated so the
// proposer will submit a fresh join instead of sitting
// in limbo.
if allocation.Status == typesconsensus.ProverStatusJoining &&
data.Frame.Header.FrameNumber > allocation.JoinFrameNumber+pendingFilterGraceFrames {
allocated = false
}
if allocation.Status == typesconsensus.ProverStatusLeaving &&
data.Frame.Header.FrameNumber > allocation.LeaveFrameNumber+pendingFilterGraceFrames {
allocated = false
}
if allocation.Status == typesconsensus.ProverStatusJoining &&
data.Frame.Header.FrameNumber <= allocation.JoinFrameNumber+pendingFilterGraceFrames {
shardsPending++
awaitingFrame[allocation.JoinFrameNumber+360] = struct{}{}
}
@ -887,7 +1115,8 @@ func (e *GlobalConsensusEngine) collectAllocationSnapshot(
data.Frame.Header.FrameNumber > token.FRAME_2_1_EXTENDED_ENROLL_END {
pending = allocation.Status ==
typesconsensus.ProverStatusJoining &&
allocation.JoinFrameNumber+360 <= data.Frame.Header.FrameNumber
allocation.JoinFrameNumber+360 <= data.Frame.Header.FrameNumber &&
data.Frame.Header.FrameNumber <= allocation.JoinFrameNumber+pendingFilterGraceFrames
}
}
}
@ -1006,6 +1235,56 @@ func (e *GlobalConsensusEngine) logAllocationStatusOnly(
e.logAllocationStatus(snapshot)
}
// checkAndSubmitSeniorityMerge submits a seniority merge if the prover exists
// with incorrect seniority and cooldowns have elapsed. This is called both from
// evaluateForProposals (when no join was proposed) and from the "all workers
// allocated" path, ensuring seniority is corrected regardless of allocation state.
func (e *GlobalConsensusEngine) checkAndSubmitSeniorityMerge(
self *typesconsensus.ProverInfo,
frameNumber uint64,
) {
if self == nil {
return
}
mergeSeniority := e.estimateSeniorityFromConfig()
if mergeSeniority <= self.Seniority {
return
}
lastJoin := e.lastJoinAttemptFrame.Load()
lastMerge := e.lastSeniorityMergeFrame.Load()
joinCooldownOk := lastJoin == 0 || frameNumber-lastJoin >= 10
mergeCooldownOk := lastMerge == 0 || frameNumber-lastMerge >= 10
if joinCooldownOk && mergeCooldownOk {
frame := e.GetFrame()
if frame != nil {
helpers, peerIds := e.buildMergeHelpers()
err := e.submitSeniorityMerge(
frame, helpers, mergeSeniority, peerIds,
)
if err != nil {
e.logger.Error(
"could not submit seniority merge",
zap.Error(err),
)
} else {
e.lastSeniorityMergeFrame.Store(frameNumber)
}
}
} else {
e.logger.Debug(
"seniority merge deferred due to cooldown",
zap.Uint64("merge_seniority", mergeSeniority),
zap.Uint64("existing_seniority", self.Seniority),
zap.Uint64("last_join_frame", lastJoin),
zap.Uint64("last_merge_frame", lastMerge),
zap.Uint64("current_frame", frameNumber),
)
}
}
func (e *GlobalConsensusEngine) allocationContext() (
*typesconsensus.ProverInfo,
uint64,
@ -1021,7 +1300,7 @@ func (e *GlobalConsensusEngine) checkExcessPendingJoins(
self *typesconsensus.ProverInfo,
frameNumber uint64,
) {
excessFilters := e.selectExcessPendingFilters(self)
excessFilters := e.selectExcessPendingFilters(self, frameNumber)
if len(excessFilters) != 0 {
e.logger.Debug(
"identified excess pending joins",

View File

@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"source.quilibrium.com/quilibrium/monorepo/config"
typesconsensus "source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/store"
"source.quilibrium.com/quilibrium/monorepo/types/worker"
@ -65,6 +66,9 @@ func (m *mockWorkerManager) ProposeAllocations(coreIds []uint, filters [][]byte)
func (m *mockWorkerManager) DecideAllocations(reject [][]byte, confirm [][]byte) error {
return nil
}
func (m *mockWorkerManager) RespawnWorker(coreId uint, filter []byte) error {
return nil
}
func (m *mockWorkerManager) RangeWorkers() ([]*store.WorkerInfo, error) {
result := make([]*store.WorkerInfo, 0, len(m.workers))
for _, w := range m.workers {
@ -411,3 +415,133 @@ func TestReconcileWorkerAllocations_UnconfirmedProposalWithNilSelf(t *testing.T)
require.Len(t, workers, 1)
assert.Nil(t, workers[0].Filter, "filter should be cleared after timeout even with nil self")
}
func TestSelectExcessPendingFilters_ExpiredJoinsNotCounted(t *testing.T) {
engine := &GlobalConsensusEngine{
logger: zap.NewNop(),
config: &config.Config{
Engine: &config.EngineConfig{
DataWorkerCount: 2,
},
},
}
filter1 := []byte("shard-filter-1")
filter2 := []byte("shard-filter-2")
filter3 := []byte("shard-filter-3")
joinFrame := uint64(260000)
// 3 pending joins: 2 expired, 1 valid. Capacity = 2, active = 0.
// Without the fix, all 3 count as pending, allowedPending = 2, excess = 1,
// and the valid join might be randomly selected for rejection.
// With the fix, only the valid join counts, so excess = 0.
self := &typesconsensus.ProverInfo{
Address: []byte("prover-address"),
Allocations: []typesconsensus.ProverAllocationInfo{
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter1,
JoinFrameNumber: joinFrame,
},
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter2,
JoinFrameNumber: joinFrame,
},
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter3,
JoinFrameNumber: joinFrame + 500, // recent, not expired
},
},
}
// Frame is past the grace period for filter1 and filter2 but not filter3
frameNumber := joinFrame + pendingFilterGraceFrames + 1
excess := engine.selectExcessPendingFilters(self, frameNumber)
assert.Empty(t, excess, "expired joins should not count toward pending limit")
}
func TestSelectExcessPendingFilters_ValidJoinsStillLimited(t *testing.T) {
engine := &GlobalConsensusEngine{
logger: zap.NewNop(),
config: &config.Config{
Engine: &config.EngineConfig{
DataWorkerCount: 1,
},
},
}
filter1 := []byte("shard-filter-1")
filter2 := []byte("shard-filter-2")
joinFrame := uint64(260000)
// 2 valid pending joins, capacity = 1, active = 0 → excess = 1
self := &typesconsensus.ProverInfo{
Address: []byte("prover-address"),
Allocations: []typesconsensus.ProverAllocationInfo{
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter1,
JoinFrameNumber: joinFrame,
},
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter2,
JoinFrameNumber: joinFrame,
},
},
}
frameNumber := joinFrame + 100 // well within grace period
excess := engine.selectExcessPendingFilters(self, frameNumber)
assert.Len(t, excess, 1, "should identify 1 excess pending join")
}
func TestSelectExcessPendingFilters_MixedActiveAndExpired(t *testing.T) {
engine := &GlobalConsensusEngine{
logger: zap.NewNop(),
config: &config.Config{
Engine: &config.EngineConfig{
DataWorkerCount: 2,
},
},
}
filter1 := []byte("shard-filter-1")
filter2 := []byte("shard-filter-2")
filter3 := []byte("shard-filter-3")
// 1 active + 1 expired joining + 1 valid joining. Capacity = 2.
// Active uses 1 slot, so allowedPending = 1.
// Expired join should not count, leaving 1 valid pending → no excess.
self := &typesconsensus.ProverInfo{
Address: []byte("prover-address"),
Allocations: []typesconsensus.ProverAllocationInfo{
{
Status: typesconsensus.ProverStatusActive,
ConfirmationFilter: filter1,
JoinFrameNumber: 200000,
},
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter2,
JoinFrameNumber: 250000, // expired
},
{
Status: typesconsensus.ProverStatusJoining,
ConfirmationFilter: filter3,
JoinFrameNumber: 260000, // valid
},
},
}
frameNumber := uint64(260500) // past 250000+720 but not 260000+720
excess := engine.selectExcessPendingFilters(self, frameNumber)
assert.Empty(t, excess, "expired joins should be excluded; 1 active + 1 valid pending fits capacity 2")
}

View File

@ -152,8 +152,9 @@ type GlobalConsensusEngine struct {
alertPublicKey []byte
hasSentKeyBundle bool
proverSyncInProgress atomic.Bool
lastJoinAttemptFrame atomic.Uint64
lastObservedFrame atomic.Uint64
lastJoinAttemptFrame atomic.Uint64
lastSeniorityMergeFrame atomic.Uint64
lastObservedFrame atomic.Uint64
lastRejectFrame atomic.Uint64
proverRootVerifiedFrame atomic.Uint64
proverRootSynced atomic.Bool
@ -200,9 +201,12 @@ type GlobalConsensusEngine struct {
activeProveRanksMu sync.Mutex
appFrameStore map[string]*protobufs.AppShardFrame
appFrameStoreMu sync.RWMutex
lowCoverageStreak map[string]*coverageStreak
proverOnlyMode atomic.Bool
lowCoverageStreak map[string]*coverageStreak
proverOnlyMode atomic.Bool
lastShardActionFrame map[string]uint64
lastShardActionFrameMu sync.Mutex
coverageCheckInProgress atomic.Bool
coverageWg sync.WaitGroup
peerInfoDigestCache map[string]struct{}
peerInfoDigestCacheMu sync.Mutex
keyRegistryDigestCache map[string]struct{}
@ -337,6 +341,7 @@ func NewGlobalConsensusEngine(
currentDifficulty: config.Engine.Difficulty,
lastProvenFrameTime: time.Now(),
blacklistMap: make(map[string]bool),
lastShardActionFrame: make(map[string]uint64),
peerInfoDigestCache: make(map[string]struct{}),
keyRegistryDigestCache: make(map[string]struct{}),
peerAuthCache: make(map[string]time.Time),
@ -561,6 +566,9 @@ func NewGlobalConsensusEngine(
}
ready()
<-ctx.Done()
if err := engine.workerManager.Stop(); err != nil {
engine.logger.Warn("error stopping worker manager", zap.Error(err))
}
})
}
@ -847,41 +855,10 @@ func NewGlobalConsensusEngine(
componentBuilder.AddWorker(engine.peerInfoManager.Start)
// Subscribe to global consensus if participating
err = engine.subscribeToGlobalConsensus()
if err != nil {
return nil, err
}
// Subscribe to shard consensus messages to broker lock agreement
err = engine.subscribeToShardConsensusMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to frames
err = engine.subscribeToFrameMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to prover messages
err = engine.subscribeToProverMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to peer info messages
err = engine.subscribeToPeerInfoMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to alert messages
err = engine.subscribeToAlertMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// NOTE: subscribe calls are deferred until after ComponentManager is built
// (see below). The handler closures reference e.ShutdownSignal() which
// panics if ComponentManager is nil. Since Subscribe spawns goroutines
// immediately, a message arriving before Build() would hit a nil receiver.
// Start consensus message queue processor
componentBuilder.AddWorker(func(
@ -1041,6 +1018,47 @@ func NewGlobalConsensusEngine(
hgWithSelfPeer.SetSelfPeerID(peer.ID(ps.GetPeerID()).String())
}
// Subscribe to pubsub bitmasks. These calls spawn handler goroutines
// immediately, and the handlers reference e.ShutdownSignal() which
// requires ComponentManager to be non-nil. That's why subscriptions
// must happen after componentBuilder.Build() above.
// Subscribe to global consensus if participating
err = engine.subscribeToGlobalConsensus()
if err != nil {
return nil, err
}
// Subscribe to shard consensus messages to broker lock agreement
err = engine.subscribeToShardConsensusMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to frames
err = engine.subscribeToFrameMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to prover messages
err = engine.subscribeToProverMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to peer info messages
err = engine.subscribeToPeerInfoMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
// Subscribe to alert messages
err = engine.subscribeToAlertMessages()
if err != nil {
return nil, errors.Wrap(err, "start")
}
return engine, nil
}
@ -1137,8 +1155,8 @@ func (e *GlobalConsensusEngine) setupGRPCServer() error {
grpc.Creds(tlsCreds),
grpc.ChainUnaryInterceptor(e.authProvider.UnaryInterceptor),
grpc.ChainStreamInterceptor(e.authProvider.StreamInterceptor),
grpc.MaxRecvMsgSize(10*1024*1024),
grpc.MaxSendMsgSize(10*1024*1024),
grpc.MaxRecvMsgSize(e.config.Engine.SyncMessageLimits.MaxRecvMsgSize),
grpc.MaxSendMsgSize(e.config.Engine.SyncMessageLimits.MaxSendMsgSize),
)
// Create TCP listener
@ -1202,6 +1220,20 @@ func (e *GlobalConsensusEngine) Stop(force bool) <-chan error {
}
}
// Wait for any in-flight coverage check goroutine to finish before
// returning, so callers can safely close the Pebble DB. This is safe
// to wait on unboundedly because GetMetadataAtKey (the only hg.mu
// caller in the coverage path) bails immediately once shutdownCtx
// fires, so the goroutine will always complete after shutdown.
e.coverageWg.Wait()
// Synchronously close the snapshot manager so no Pebble snapshots remain
// open when the database is closed. The async goroutine chain from
// SetShutdownContext may not have completed yet.
if closer, ok := e.hyperSync.(interface{ CloseSnapshots() }); ok {
closer.CloseSnapshots()
}
close(errChan)
return errChan
}
@ -2094,6 +2126,16 @@ func (e *GlobalConsensusEngine) performBlockingProverHypersync(
close(done)
e.logger.Info("blocking hypersync completed")
if !e.config.Engine.ArchiveMode {
if err := e.proverRegistry.Refresh(); err != nil {
e.logger.Warn(
"failed to refresh prover registry after blocking hypersync",
zap.Error(err),
)
}
}
if len(newRoots) == 0 {
return nil
}
@ -2238,6 +2280,7 @@ func (e *GlobalConsensusEngine) joinProposalReady(
func (e *GlobalConsensusEngine) selectExcessPendingFilters(
self *typesconsensus.ProverInfo,
frameNumber uint64,
) [][]byte {
if self == nil || e.config == nil || e.config.Engine == nil {
e.logger.Debug("excess pending evaluation skipped: missing config or prover info")
@ -2261,6 +2304,12 @@ func (e *GlobalConsensusEngine) selectExcessPendingFilters(
case typesconsensus.ProverStatusActive:
active++
case typesconsensus.ProverStatusJoining:
// Skip expired joins — they are implicitly rejected and should
// not count toward the pending limit or be candidates for
// explicit rejection.
if frameNumber > allocation.JoinFrameNumber+pendingFilterGraceFrames {
continue
}
filterCopy := make([]byte, len(allocation.ConfirmationFilter))
copy(filterCopy, allocation.ConfirmationFilter)
pending = append(pending, filterCopy)
@ -3340,23 +3389,15 @@ func (e *GlobalConsensusEngine) ProposeWorkerJoin(
mergeSeniority = mergeSeniorityBI.Uint64()
}
// If prover already exists, check if we should submit a seniority merge
// Always include merge targets in the join — Materialize handles
// seniority for both new and existing provers. A separate seniority
// merge is not submitted because it would double-count with the join.
if proverExists {
if mergeSeniority > info.Seniority {
e.logger.Info(
"existing prover has lower seniority than merge would provide, submitting seniority merge",
zap.Uint64("existing_seniority", info.Seniority),
zap.Uint64("merge_seniority", mergeSeniority),
zap.Strings("peer_ids", peerIds),
)
return e.submitSeniorityMerge(frame, helpers, mergeSeniority, peerIds)
}
e.logger.Debug(
"prover already exists with sufficient seniority, skipping join",
"prover already exists, merge targets will be included in join",
zap.Uint64("existing_seniority", info.Seniority),
zap.Uint64("merge_seniority", mergeSeniority),
)
return nil
}
e.logger.Info(
@ -3508,16 +3549,32 @@ func (e *GlobalConsensusEngine) buildMergeHelpers() ([]*global.SeniorityMerge, [
helpers := []*global.SeniorityMerge{}
peerIds := []string{}
peerPrivKey, err := hex.DecodeString(e.config.P2P.PeerPrivKey)
peerPrivKeyBytes, err := hex.DecodeString(e.config.P2P.PeerPrivKey)
if err != nil {
e.logger.Debug("cannot decode peer key for merge helpers", zap.Error(err))
return helpers, peerIds
}
oldProver, err := keys.Ed448KeyFromBytes(
peerPrivKey,
e.pubsub.GetPublicKey(),
)
peerPrivKey, err := pcrypto.UnmarshalEd448PrivateKey(peerPrivKeyBytes)
if err != nil {
e.logger.Debug("cannot unmarshal peer key for merge helpers", zap.Error(err))
return helpers, peerIds
}
peerPub := peerPrivKey.GetPublic()
peerPubBytes, err := peerPub.Raw()
if err != nil {
e.logger.Debug("cannot get peer public key for merge helpers", zap.Error(err))
return helpers, peerIds
}
peerPrivRaw, err := peerPrivKey.Raw()
if err != nil {
e.logger.Debug("cannot get peer private key for merge helpers", zap.Error(err))
return helpers, peerIds
}
oldProver, err := keys.Ed448KeyFromBytes(peerPrivRaw, peerPubBytes)
if err != nil {
e.logger.Debug("cannot get peer key for merge helpers", zap.Error(err))
return helpers, peerIds
@ -3527,7 +3584,13 @@ func (e *GlobalConsensusEngine) buildMergeHelpers() ([]*global.SeniorityMerge, [
crypto.KeyTypeEd448,
oldProver,
))
peerIds = append(peerIds, peer.ID(e.pubsub.GetPeerID()).String())
peerId, err := peer.IDFromPublicKey(peerPub)
if err != nil {
e.logger.Debug("cannot get peer ID for merge helpers", zap.Error(err))
return helpers, peerIds
}
peerIds = append(peerIds, peerId.String())
if len(e.config.Engine.MultisigProverEnrollmentPaths) != 0 {
e.logger.Debug("loading old configs for merge helpers")

View File

@ -215,7 +215,7 @@ func (e *GlobalConsensusEngine) startGlobalMessageAggregator(
}
func (e *GlobalConsensusEngine) addGlobalMessage(data []byte) {
if e.messageAggregator == nil || len(data) == 0 {
if e.messageCollectors == nil || len(data) == 0 {
return
}
@ -270,8 +270,39 @@ func (e *GlobalConsensusEngine) addGlobalMessage(data []byte) {
}
}
record := newSequencedGlobalMessage(e.currentRank+1, payload)
e.messageAggregator.Add(record)
seq := e.currentRank + 1
record := newSequencedGlobalMessage(seq, payload)
// Add directly to the collector synchronously rather than going through
// the aggregator's async worker queue. The async path loses messages
// because OnSequenceChange advances the retention window before workers
// finish processing queued items, causing them to be silently pruned.
collector, _, err := e.messageCollectors.GetOrCreateCollector(seq)
if err != nil {
e.logger.Debug(
"could not get collector for global message",
zap.Uint64("sequence", seq),
zap.Uint64("current_rank", e.currentRank),
zap.Error(err),
)
return
}
if err := collector.Add(record); err != nil {
e.logger.Debug(
"could not add global message to collector",
zap.Uint64("sequence", seq),
zap.Error(err),
)
return
}
e.logger.Debug(
"added global message to collector",
zap.Uint64("sequence", seq),
zap.Uint64("current_rank", e.currentRank),
zap.Int("payload_len", len(payload)),
)
}
// filterProverOnlyRequests filters a list of message requests to only include

View File

@ -70,9 +70,12 @@ func (e *GlobalConsensusEngine) processProverMessageQueue(
ctx lifecycle.SignalerContext,
) {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
e.logger.Debug("prover message queue processor disabled (not archive mode)")
return
}
e.logger.Info("prover message queue processor started")
for {
select {
case <-e.haltCtx.Done():
@ -1722,7 +1725,10 @@ func (e *GlobalConsensusEngine) addCertifiedState(
}
// Trigger coverage check asynchronously to avoid blocking message processing
e.triggerCoverageCheckAsync(parent.State.GetFrameNumber())
e.triggerCoverageCheckAsync(
parent.State.GetFrameNumber(),
parent.State.Header.Prover,
)
}
func (e *GlobalConsensusEngine) handleProposal(message *pb.Message) {

View File

@ -172,7 +172,6 @@ func (e *GlobalConsensusEngine) subscribeToProverMessages() error {
GLOBAL_PROVER_BITMASK,
func(message *pb.Message) error {
if e.config.P2P.Network != 99 && !e.config.Engine.ArchiveMode {
e.logger.Debug("dropping prover message, not in archive mode")
return nil
}

View File

@ -20,7 +20,7 @@ func (e *GlobalConsensusEngine) validateGlobalConsensusMessage(
) tp2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -39,7 +39,7 @@ func (e *GlobalConsensusEngine) validateGlobalConsensusMessage(
proposal := &protobufs.GlobalProposal{}
if err := proposal.FromCanonicalBytes(message.Data); err != nil {
e.logger.Debug("failed to unmarshal frame", zap.Error(err))
e.logger.Debug("failed to unmarshal proposal", zap.Error(err))
proposalValidationTotal.WithLabelValues("reject").Inc()
return tp2p.ValidationResultReject
}
@ -68,7 +68,10 @@ func (e *GlobalConsensusEngine) validateGlobalConsensusMessage(
}
if !valid {
e.logger.Debug("invalid global frame")
e.logger.Debug(
"invalid global frame",
zap.String("reason", "frame validator returned false"),
)
proposalValidationTotal.WithLabelValues("reject").Inc()
return tp2p.ValidationResultReject
}
@ -193,6 +196,10 @@ func (e *GlobalConsensusEngine) validateShardConsensusMessage(
}
if frametime.AppFrameSince(frame) > 20*time.Second {
e.logger.Debug(
"ignoring shard proposal",
zap.String("reason", "frame too old"),
)
shardProposalValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
@ -211,7 +218,10 @@ func (e *GlobalConsensusEngine) validateShardConsensusMessage(
}
if !valid {
e.logger.Debug("invalid app frame")
e.logger.Debug(
"invalid app frame",
zap.String("reason", "frame validator returned false"),
)
shardProposalValidationTotal.WithLabelValues("reject").Inc()
return tp2p.ValidationResultReject
}
@ -233,6 +243,12 @@ func (e *GlobalConsensusEngine) validateShardConsensusMessage(
now := uint64(time.Now().UnixMilli())
if vote.Timestamp > now+5000 || vote.Timestamp < now-5000 {
e.logger.Debug(
"ignoring shard vote",
zap.String("reason", "timestamp out of window"),
zap.Uint64("timestamp", vote.Timestamp),
zap.Uint64("now", now),
)
shardVoteValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
@ -260,6 +276,12 @@ func (e *GlobalConsensusEngine) validateShardConsensusMessage(
now := uint64(time.Now().UnixMilli())
if timeoutState.Timestamp > now+5000 || timeoutState.Timestamp < now-5000 {
e.logger.Debug(
"ignoring shard timeout",
zap.String("reason", "timestamp out of window"),
zap.Uint64("timestamp", timeoutState.Timestamp),
zap.Uint64("now", now),
)
shardTimeoutStateValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
@ -285,6 +307,11 @@ func (e *GlobalConsensusEngine) validateShardConsensusMessage(
}
default:
e.logger.Debug(
"rejecting shard consensus message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return tp2p.ValidationResultReject
}
@ -301,7 +328,7 @@ func (e *GlobalConsensusEngine) validateProverMessage(
)
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -327,6 +354,10 @@ func (e *GlobalConsensusEngine) validateProverMessage(
for _, r := range messageBundle.Requests {
if r.GetKick() != nil {
e.logger.Debug(
"ignoring prover message",
zap.String("reason", "bundle contains kick request"),
)
return tp2p.ValidationResultIgnore
}
}
@ -396,12 +427,19 @@ func (e *GlobalConsensusEngine) validateAppFrameMessage(
}
if !valid {
e.logger.Debug("invalid frame")
e.logger.Debug(
"invalid app frame",
zap.String("reason", "frame validator returned false"),
)
shardFrameValidationTotal.WithLabelValues("reject").Inc()
return tp2p.ValidationResultReject
}
if frametime.AppFrameSince(frame) > 120*time.Second {
e.logger.Debug(
"ignoring app frame",
zap.String("reason", "frame too old"),
)
shardFrameValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
@ -409,6 +447,11 @@ func (e *GlobalConsensusEngine) validateAppFrameMessage(
shardFrameValidationTotal.WithLabelValues("accept").Inc()
default:
e.logger.Debug(
"rejecting app frame message",
zap.String("reason", "unknown type prefix"),
zap.Uint32("type", typePrefix),
)
return tp2p.ValidationResultReject
}
@ -421,7 +464,7 @@ func (e *GlobalConsensusEngine) validateFrameMessage(
) tp2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -454,11 +497,21 @@ func (e *GlobalConsensusEngine) validateFrameMessage(
}
if e.currentRank > frame.GetRank()+2 {
e.logger.Debug(
"ignoring global frame",
zap.String("reason", "rank too old"),
zap.Uint64("current_rank", e.currentRank),
zap.Uint64("frame_rank", frame.GetRank()),
)
frameValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
if frametime.GlobalFrameSince(frame) > 120*time.Second {
e.logger.Debug(
"ignoring global frame",
zap.String("reason", "frame too old"),
)
frameValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore
}
@ -478,7 +531,7 @@ func (e *GlobalConsensusEngine) validatePeerInfoMessage(
) tp2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)
@ -567,7 +620,7 @@ func (e *GlobalConsensusEngine) validateAlertMessage(
) tp2p.ValidationResult {
// Check if data is long enough to contain type prefix
if len(message.Data) < 4 {
e.logger.Error(
e.logger.Debug(
"message too short",
zap.Int("data_length", len(message.Data)),
)

View File

@ -234,15 +234,6 @@ func (m *Manager) PlanAndAllocate(
})
}
workerLookup := make(map[uint]*store.WorkerInfo, len(all))
for _, w := range all {
workerLookup[w.CoreId] = w
}
if len(proposals) > 0 {
m.persistPlannedFilters(proposals, workerLookup, frameNumber)
}
// Perform allocations
workerIds := []uint{}
filters := [][]byte{}
@ -258,9 +249,19 @@ func (m *Manager) PlanAndAllocate(
m.logger.Warn("allocate worker failed",
zap.Error(err),
)
return proposals, errors.Wrap(err, "plan and allocate")
}
return proposals, errors.Wrap(err, "plan and allocate")
// Persist filters only after successful publication — if the join
// fails to publish, we don't want workers stuck with filters that
// block them for proposalTimeoutFrames.
workerLookup := make(map[uint]*store.WorkerInfo, len(all))
for _, w := range all {
workerLookup[w.CoreId] = w
}
m.persistPlannedFilters(proposals, workerLookup, frameNumber)
return proposals, nil
}
func (m *Manager) persistPlannedFilters(
@ -300,6 +301,20 @@ func (m *Manager) persistPlannedFilters(
zap.Uint("core_id", info.CoreId),
zap.Error(err),
)
continue
}
m.logger.Info(
"reassigning worker to new filter",
zap.Uint("core_id", info.CoreId),
zap.String("filter", hex.EncodeToString(filterCopy)),
)
if err := m.workerMgr.RespawnWorker(info.CoreId, filterCopy); err != nil {
m.logger.Warn(
"failed to respawn worker with new filter",
zap.Uint("core_id", info.CoreId),
zap.Error(err),
)
}
}
}

View File

@ -66,6 +66,9 @@ func (m *mockWorkerManager) Start(ctx context.Context) error {
func (m *mockWorkerManager) Stop() error {
panic("unimplemented")
}
func (m *mockWorkerManager) RespawnWorker(coreId uint, filter []byte) error {
return nil
}
func (m *mockWorkerManager) RangeWorkers() ([]*store.WorkerInfo, error) {
out := make([]*store.WorkerInfo, len(m.workers))

View File

@ -38,31 +38,26 @@ func (b *BLSAppFrameValidator) Validate(
frame *protobufs.AppShardFrame,
) (bool, error) {
if frame == nil || frame.Header == nil {
b.logger.Debug("frame or header is nil")
return false, nil
return false, errors.New("frame or header is nil")
}
if len(frame.Header.Address) == 0 {
b.logger.Debug("address is empty")
return false, nil
return false, errors.New("address is empty")
}
if frame.Header.StateRoots == nil || len(frame.Header.StateRoots) != 4 {
b.logger.Debug(
"invalid state roots",
zap.Int("roots_len", len(frame.Header.StateRoots)),
return false, errors.Errorf(
"invalid state roots count: %d",
len(frame.Header.StateRoots),
)
return false, nil
}
for i, stateRoot := range frame.Header.StateRoots {
if len(stateRoot) != 74 && len(stateRoot) != 64 {
b.logger.Debug(
"invalid state root",
zap.Int("root_index", i),
zap.Int("root_len", len(stateRoot)),
return false, errors.Errorf(
"invalid state root length at index %d: %d",
i, len(stateRoot),
)
return false, nil
}
}
@ -75,8 +70,7 @@ func (b *BLSAppFrameValidator) Validate(
if !isValid {
b.logger.Debug(
"frame verification result",
zap.Bool("is_valid", isValid),
"frame verification failed",
zap.Error(err),
zap.Uint64("frame_number", frame.Header.FrameNumber),
zap.String("address", hex.EncodeToString(frame.Header.Address)),
@ -85,7 +79,7 @@ func (b *BLSAppFrameValidator) Validate(
hex.EncodeToString(frame.Header.ParentSelector),
),
)
return false, nil
return false, errors.Wrap(err, "frame header verification")
}
if frame.Header.PublicKeySignatureBls48581 != nil {

View File

@ -38,16 +38,14 @@ func (b *BLSGlobalFrameValidator) Validate(
frame *protobufs.GlobalFrame,
) (bool, error) {
if frame == nil || frame.Header == nil {
b.logger.Debug("frame or header is nil")
return false, nil
return false, errors.New("frame or header is nil")
}
if len(frame.Header.Output) != 516 {
b.logger.Debug(
"invalid output length",
zap.Int("output_len", len(frame.Header.Output)),
return false, errors.Errorf(
"invalid output length: %d",
len(frame.Header.Output),
)
return false, nil
}
if frame.Header.FrameNumber == 0 {
@ -56,19 +54,16 @@ func (b *BLSGlobalFrameValidator) Validate(
}
if frame.Header.PublicKeySignatureBls48581 == nil {
b.logger.Debug("no bls signature")
return false, nil
return false, errors.New("no bls signature")
}
sig := frame.Header.PublicKeySignatureBls48581
if sig.Signature == nil || sig.PublicKey == nil {
b.logger.Debug("signature or public key is nil")
return false, nil
return false, errors.New("signature or public key is nil")
}
if sig.Bitmask == nil {
b.logger.Debug("bitmask is nil")
return false, nil
return false, errors.New("bitmask is nil")
}
bits, err := b.frameProver.VerifyGlobalFrameHeader(
@ -79,8 +74,7 @@ func (b *BLSGlobalFrameValidator) Validate(
if !isValid {
b.logger.Debug(
"frame verification result",
zap.Bool("is_valid", isValid),
"frame verification failed",
zap.Error(err),
zap.Uint64("frame_number", frame.Header.FrameNumber),
zap.String(
@ -88,7 +82,7 @@ func (b *BLSGlobalFrameValidator) Validate(
hex.EncodeToString(frame.Header.ParentSelector),
),
)
return false, err
return false, errors.Wrap(err, "global frame header verification")
}
provers, err := b.proverRegistry.GetActiveProvers(nil)

View File

@ -132,9 +132,33 @@ func (r *DataWorkerIPCServer) Start() error {
func (r *DataWorkerIPCServer) Stop() error {
r.logger.Info("stopping server gracefully")
// Stop the app consensus engine first, then synchronously close the
// snapshot manager so no Pebble snapshots remain when the database closes.
if r.appConsensusEngine != nil {
if r.cancel != nil {
r.cancel()
}
<-r.appConsensusEngine.Stop(false)
r.appConsensusEngine = nil
}
r.appConsensusEngineFactory.CloseSnapshots()
r.pubsub.Close()
if r.server != nil {
r.server.GracefulStop()
stopped := make(chan struct{})
srv := r.server
go func() {
srv.GracefulStop()
close(stopped)
}()
select {
case <-stopped:
case <-time.After(5 * time.Second):
r.logger.Warn("server graceful stop timed out during shutdown, forcing")
srv.Stop()
<-stopped
}
}
if r.peerInfoCancel != nil {
r.peerInfoCancel()
@ -155,17 +179,31 @@ func (r *DataWorkerIPCServer) Respawn(
}
func (r *DataWorkerIPCServer) RespawnServer(filter []byte) error {
if r.server != nil {
r.logger.Info("stopping server for respawn")
r.server.GracefulStop()
r.server = nil
}
if r.appConsensusEngine != nil {
r.logger.Info("respawning worker: stopping old engine")
if r.cancel != nil {
r.cancel()
}
<-r.appConsensusEngine.Stop(false)
r.appConsensusEngine = nil
r.logger.Info("respawning worker: old engine stopped")
}
if r.server != nil {
r.logger.Info("stopping server for respawn")
stopped := make(chan struct{})
srv := r.server
go func() {
srv.GracefulStop()
close(stopped)
}()
select {
case <-stopped:
case <-time.After(5 * time.Second):
r.logger.Warn("server graceful stop timed out, forcing stop")
srv.Stop()
<-stopped
}
r.server = nil
}
// Establish an auth provider
@ -245,16 +283,26 @@ func (r *DataWorkerIPCServer) RespawnServer(filter []byte) error {
return errors.Wrap(err, "respawn server")
}
r.ctx, r.cancel, _ = lifecycle.WithSignallerAndCancel(context.Background())
var errCh <-chan error
r.ctx, r.cancel, errCh = lifecycle.WithSignallerAndCancel(context.Background())
// Capture engine and ctx in local variables to avoid race with subsequent RespawnServer calls
engine := r.appConsensusEngine
ctx := r.ctx
go func() {
if err, ok := <-errCh; ok && err != nil {
r.logger.Error("app engine fatal error during respawn",
zap.Error(err))
}
}()
r.logger.Info("respawning worker: engine created, starting")
go func() {
if engine == nil {
return
}
if err = engine.Start(ctx); err != nil {
r.logger.Error("error while running", zap.Error(err))
r.logger.Error("respawning worker: engine start failed", zap.Error(err))
} else {
r.logger.Info("respawning worker: engine started successfully")
}
}()
}

View File

@ -208,7 +208,9 @@ func (e *GlobalExecutionEngine) validateBundle(
op.GetKick() != nil ||
op.GetUpdate() != nil ||
op.GetShard() != nil ||
op.GetSeniorityMerge() != nil
op.GetSeniorityMerge() != nil ||
op.GetShardSplit() != nil ||
op.GetShardMerge() != nil
if !isGlobalOp {
if e.config.Network == 0 &&
@ -526,6 +528,7 @@ func (e *GlobalExecutionEngine) tryGetIntrinsic(address []byte) (
e.rewardIssuance,
e.proverRegistry,
e.blsConstructor,
e.shardsStore,
)
if err != nil {
return nil, errors.Wrap(err, "try get intrinsic")

View File

@ -121,7 +121,7 @@ func (c *CodeDeployment) Verify(frameNumber uint64) (bool, error) {
buf := bytes.NewReader(c.Circuit)
err := c.compiler.ValidateCircuit(buf)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid code deployment")
}
return true, nil

View File

@ -356,14 +356,14 @@ func (c *CodeExecute) Verify(frameNumber uint64) (bool, error) {
) {
return false, errors.Wrap(
errors.New("invalid signature"),
"verify",
"verify: invalid code execute",
)
}
}
_, err := c.buildExecutionDAG()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid code execute")
}
return true, nil

View File

@ -349,7 +349,7 @@ func (c *CodeFinalize) Verify(frameNumber uint64) (bool, error) {
if len(c.Results) == 0 {
return false, errors.Wrap(
errors.New("no execution results provided"),
"verify",
"verify: invalid code finalize",
)
}
@ -358,7 +358,7 @@ func (c *CodeFinalize) Verify(frameNumber uint64) (bool, error) {
if len(change.Address) != 32 || len(change.Domain) != 32 {
return false, errors.Wrap(
errors.New("invalid address length in state change"),
"verify",
"verify: invalid code finalize",
)
}
}
@ -367,7 +367,7 @@ func (c *CodeFinalize) Verify(frameNumber uint64) (bool, error) {
clone.ProofOfExecution = nil
msg, err := clone.ToCanonicalBytes()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid code finalize")
}
valid, err := c.keyManager.ValidateSignature(
@ -379,11 +379,11 @@ func (c *CodeFinalize) Verify(frameNumber uint64) (bool, error) {
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid code finalize")
}
if !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid code finalize")
}
return true, nil

View File

@ -204,7 +204,7 @@ func (a *AltShardUpdate) Verify(frameNumber uint64) (bool, error) {
)
domain, err := poseidon.HashBytes(domainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid alt shard update")
}
message := a.getSignedMessage()
@ -216,7 +216,7 @@ func (a *AltShardUpdate) Verify(frameNumber uint64) (bool, error) {
domain.FillBytes(make([]byte, 32)),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid alt shard update")
}
if !valid {
return false, errors.New("invalid signature")

View File

@ -5,6 +5,7 @@ import (
"github.com/pkg/errors"
"source.quilibrium.com/quilibrium/monorepo/protobufs"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/hypergraph"
"source.quilibrium.com/quilibrium/monorepo/types/keys"
@ -671,6 +672,96 @@ func (p *ProverSeniorityMerge) ToProtobuf() *protobufs.ProverSeniorityMerge {
}
}
// ShardSplitFromProtobuf converts a protobuf ShardSplit to intrinsics
func ShardSplitFromProtobuf(
pb *protobufs.ShardSplit,
hg hypergraph.Hypergraph,
keyManager keys.KeyManager,
shardsStore store.ShardsStore,
proverRegistry consensus.ProverRegistry,
) (*ShardSplitOp, error) {
if pb == nil {
return nil, nil
}
pubKeySig, err := BLS48581AddressedSignatureFromProtobuf(
pb.PublicKeySignatureBls48581,
)
if err != nil {
return nil, errors.Wrap(err, "shard split from protobuf")
}
return &ShardSplitOp{
ShardAddress: pb.ShardAddress,
ProposedShards: pb.ProposedShards,
FrameNumber: pb.FrameNumber,
PublicKeySignatureBLS48581: *pubKeySig,
hypergraph: hg,
keyManager: keyManager,
shardsStore: shardsStore,
proverRegistry: proverRegistry,
}, nil
}
// ToProtobuf converts an intrinsics ShardSplitOp to protobuf
func (op *ShardSplitOp) ToProtobuf() *protobufs.ShardSplit {
if op == nil {
return nil
}
return &protobufs.ShardSplit{
ShardAddress: op.ShardAddress,
ProposedShards: op.ProposedShards,
FrameNumber: op.FrameNumber,
PublicKeySignatureBls48581: op.PublicKeySignatureBLS48581.ToProtobuf(),
}
}
// ShardMergeFromProtobuf converts a protobuf ShardMerge to intrinsics
func ShardMergeFromProtobuf(
pb *protobufs.ShardMerge,
hg hypergraph.Hypergraph,
keyManager keys.KeyManager,
shardsStore store.ShardsStore,
proverRegistry consensus.ProverRegistry,
) (*ShardMergeOp, error) {
if pb == nil {
return nil, nil
}
pubKeySig, err := BLS48581AddressedSignatureFromProtobuf(
pb.PublicKeySignatureBls48581,
)
if err != nil {
return nil, errors.Wrap(err, "shard merge from protobuf")
}
return &ShardMergeOp{
ShardAddresses: pb.ShardAddresses,
ParentAddress: pb.ParentAddress,
FrameNumber: pb.FrameNumber,
PublicKeySignatureBLS48581: *pubKeySig,
hypergraph: hg,
keyManager: keyManager,
shardsStore: shardsStore,
proverRegistry: proverRegistry,
}, nil
}
// ToProtobuf converts an intrinsics ShardMergeOp to protobuf
func (op *ShardMergeOp) ToProtobuf() *protobufs.ShardMerge {
if op == nil {
return nil
}
return &protobufs.ShardMerge{
ShardAddresses: op.ShardAddresses,
ParentAddress: op.ParentAddress,
FrameNumber: op.FrameNumber,
PublicKeySignatureBls48581: op.PublicKeySignatureBLS48581.ToProtobuf(),
}
}
// FromProtobuf converts a protobuf MessageRequest to intrinsics types
func GlobalRequestFromProtobuf(
pb *protobufs.MessageRequest,

View File

@ -38,6 +38,7 @@ type GlobalIntrinsic struct {
rewardIssuance consensus.RewardIssuance
proverRegistry consensus.ProverRegistry
blsConstructor crypto.BlsConstructor
shardsStore store.ShardsStore
}
var GLOBAL_RDF_SCHEMA = `BASE <https://types.quilibrium.com/schema-repository/>
@ -730,6 +731,108 @@ func (a *GlobalIntrinsic) Validate(
).Inc()
return nil
case protobufs.ShardSplitType:
pb := &protobufs.ShardSplit{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return errors.Wrap(err, "validate")
}
op, err := ShardSplitFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return errors.Wrap(err, "validate")
}
valid, err := op.Verify(frameNumber)
if err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return errors.Wrap(err, "validate")
}
if !valid {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return errors.Wrap(
errors.New("invalid shard split"),
"validate",
)
}
observability.ValidateTotal.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil
case protobufs.ShardMergeType:
pb := &protobufs.ShardMerge{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return errors.Wrap(err, "validate")
}
op, err := ShardMergeFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return errors.Wrap(err, "validate")
}
valid, err := op.Verify(frameNumber)
if err != nil {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return errors.Wrap(err, "validate")
}
if !valid {
observability.ValidateErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return errors.Wrap(
errors.New("invalid shard merge"),
"validate",
)
}
observability.ValidateTotal.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil
default:
observability.ValidateErrors.WithLabelValues(
"global",
@ -1268,6 +1371,110 @@ func (a *GlobalIntrinsic) InvokeStep(
).Inc()
return resultState, nil
case protobufs.ShardSplitType:
opTimer := prometheus.NewTimer(
observability.OperationDuration.WithLabelValues(
"global",
"shard_split",
),
)
defer opTimer.ObserveDuration()
pb := &protobufs.ShardSplit{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, errors.Wrap(err, "invoke step")
}
op, err := ShardSplitFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, errors.Wrap(err, "invoke step")
}
matTimer := prometheus.NewTimer(
observability.MaterializeDuration.WithLabelValues("global"),
)
resultState, matErr := op.Materialize(frameNumber, state)
matTimer.ObserveDuration()
if matErr != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, errors.Wrap(matErr, "invoke step")
}
observability.InvokeStepTotal.WithLabelValues(
"global",
"shard_split",
).Inc()
return resultState, nil
case protobufs.ShardMergeType:
opTimer := prometheus.NewTimer(
observability.OperationDuration.WithLabelValues(
"global",
"shard_merge",
),
)
defer opTimer.ObserveDuration()
pb := &protobufs.ShardMerge{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, errors.Wrap(err, "invoke step")
}
op, err := ShardMergeFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, errors.Wrap(err, "invoke step")
}
matTimer := prometheus.NewTimer(
observability.MaterializeDuration.WithLabelValues("global"),
)
resultState, matErr := op.Materialize(frameNumber, state)
matTimer.ObserveDuration()
if matErr != nil {
observability.InvokeStepErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, errors.Wrap(matErr, "invoke step")
}
observability.InvokeStepTotal.WithLabelValues(
"global",
"shard_merge",
).Inc()
return resultState, nil
default:
observability.InvokeStepErrors.WithLabelValues(
"global",
@ -1397,6 +1604,28 @@ func (a *GlobalIntrinsic) Lock(
"prover_seniority_merge",
).Inc()
case protobufs.ShardSplitType:
reads, writes, err = a.tryLockShardSplit(frameNumber, input)
if err != nil {
return nil, err
}
observability.LockTotal.WithLabelValues(
"global",
"shard_split",
).Inc()
case protobufs.ShardMergeType:
reads, writes, err = a.tryLockShardMerge(frameNumber, input)
if err != nil {
return nil, err
}
observability.LockTotal.WithLabelValues(
"global",
"shard_merge",
).Inc()
default:
observability.LockErrors.WithLabelValues(
"global",
@ -1914,6 +2143,112 @@ func (a *GlobalIntrinsic) tryLockSeniorityMerge(
return reads, writes, nil
}
func (a *GlobalIntrinsic) tryLockShardSplit(
frameNumber uint64,
input []byte,
) (
[][]byte,
[][]byte,
error,
) {
pb := &protobufs.ShardSplit{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
op, err := ShardSplitFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
reads, err := op.GetReadAddresses(frameNumber)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
writes, err := op.GetWriteAddresses(frameNumber)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_split",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
return reads, writes, nil
}
func (a *GlobalIntrinsic) tryLockShardMerge(
frameNumber uint64,
input []byte,
) (
[][]byte,
[][]byte,
error,
) {
pb := &protobufs.ShardMerge{}
if err := pb.FromCanonicalBytes(input); err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
op, err := ShardMergeFromProtobuf(
pb,
a.hypergraph,
a.keyManager,
a.shardsStore,
a.proverRegistry,
)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
reads, err := op.GetReadAddresses(frameNumber)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
writes, err := op.GetWriteAddresses(frameNumber)
if err != nil {
observability.LockErrors.WithLabelValues(
"global",
"shard_merge",
).Inc()
return nil, nil, errors.Wrap(err, "lock")
}
return reads, writes, nil
}
// LoadGlobalIntrinsic loads the global intrinsic from the global intrinsic
// address. The global intrinsic is implicitly deployed and always exists at the
// global address.
@ -1928,6 +2263,7 @@ func LoadGlobalIntrinsic(
rewardIssuance consensus.RewardIssuance,
proverRegistry consensus.ProverRegistry,
blsConstructor crypto.BlsConstructor,
shardsStore store.ShardsStore,
) (*GlobalIntrinsic, error) {
// Verify the address is the global intrinsic address
if !bytes.Equal(address, intrinsics.GLOBAL_INTRINSIC_ADDRESS[:]) {
@ -1956,6 +2292,7 @@ func LoadGlobalIntrinsic(
rewardIssuance: rewardIssuance,
proverRegistry: proverRegistry,
blsConstructor: blsConstructor,
shardsStore: shardsStore,
}, nil
}

View File

@ -414,7 +414,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
)
confirmDomain, err := poseidon.HashBytes(confirmDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
_, err = p.hypergraph.GetVertex([64]byte(slices.Concat(
@ -422,7 +422,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
tree, err := p.hypergraph.GetVertexData([64]byte(slices.Concat(
@ -430,7 +430,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
pubkey, err := p.rdfMultiprover.Get(
@ -440,7 +440,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
tree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
for _, filter := range p.Filters {
@ -449,7 +449,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
slices.Concat([]byte("PROVER_ALLOCATION"), pubkey, filter),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
allocationAddress := allocationAddressBI.FillBytes(make([]byte, 32))
allocationFullAddress := [64]byte{}
@ -461,7 +461,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
if err != nil || allocationTree == nil {
return false, errors.Wrap(
errors.New("allocation not found"),
"verify",
"verify: invalid prover confirm",
)
}
@ -473,7 +473,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover confirm")
}
status := uint8(0)
@ -485,7 +485,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
if status != 0 && status != 3 {
return false, errors.Wrap(
errors.New("invalid allocation state for confirmation"),
"verify",
"verify: invalid prover confirm",
)
}
@ -499,7 +499,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil || len(joinFrameBytes) != 8 {
return false, errors.Wrap(errors.New("missing join frame"), "verify")
return false, errors.Wrap(errors.New("missing join frame"), "verify: invalid prover confirm")
}
joinFrame := binary.BigEndian.Uint64(joinFrameBytes)
@ -509,7 +509,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
// If joined before frame 255840, cannot confirm until frame 255840
return false, errors.Wrap(
errors.New("cannot confirm before frame 255840"),
"verify",
"verify: invalid prover confirm",
)
}
@ -534,13 +534,13 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
"must wait 360 frames after join to confirm (%d)",
framesSinceJoin,
),
"verify",
"verify: invalid prover confirm",
)
}
if framesSinceJoin > 720 {
return false, errors.Wrap(
errors.New("confirmation window expired (720 frames)"),
"verify",
"verify: invalid prover confirm",
)
}
}
@ -554,7 +554,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil || len(leaveFrameBytes) != 8 {
return false, errors.Wrap(errors.New("missing leave frame"), "verify")
return false, errors.Wrap(errors.New("missing leave frame"), "verify: invalid prover confirm")
}
leaveFrame := binary.BigEndian.Uint64(leaveFrameBytes)
@ -562,13 +562,13 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
if framesSinceLeave < 360 {
return false, errors.Wrap(
errors.New("must wait 360 frames after leave to confirm"),
"verify",
"verify: invalid prover confirm",
)
}
if framesSinceLeave > 720 {
return false, errors.Wrap(
errors.New("leave confirmation window expired (720 frames)"),
"verify",
"verify: invalid prover confirm",
)
}
}
@ -583,7 +583,7 @@ func (p *ProverConfirm) Verify(frameNumber uint64) (bool, error) {
confirmDomain.Bytes(),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover confirm")
}
return true, nil

View File

@ -1,6 +1,7 @@
package global
import (
"bytes"
"encoding/binary"
"fmt"
"math/big"
@ -149,6 +150,66 @@ func (p *ProverJoin) Materialize(
}
}
// Compute seniority from merge targets before the prover-exists check,
// so it can be applied to both new and existing provers.
var computedSeniority uint64 = 0
if len(p.MergeTargets) > 0 {
var mergePeerIds []string
for _, target := range p.MergeTargets {
// Check if this merge target was already consumed
spentBI, err := poseidon.HashBytes(slices.Concat(
[]byte("PROVER_JOIN_MERGE"),
target.PublicKey,
))
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
v, vErr := hg.Get(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
spentBI.FillBytes(make([]byte, 32)),
hgstate.VertexAddsDiscriminator,
)
if vErr == nil && v != nil {
// Spent marker exists — check who consumed it
spentTree, ok := v.(*tries.VectorCommitmentTree)
if ok && spentTree != nil {
storedAddr, getErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"merge:SpentMerge",
"ProverAddress",
spentTree,
)
if getErr == nil && len(storedAddr) == 32 &&
!bytes.Equal(storedAddr, proverAddress) {
continue // consumed by a different prover
}
}
// Same prover or legacy empty marker — count seniority
}
if target.KeyType == crypto.KeyTypeEd448 {
pk, err := pcrypto.UnmarshalEd448PublicKey(target.PublicKey)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
peerId, err := peer.IDFromPublicKey(pk)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
mergePeerIds = append(mergePeerIds, peerId.String())
}
}
if len(mergePeerIds) > 0 {
seniorityBig := compat.GetAggregatedSeniority(mergePeerIds)
if seniorityBig.IsUint64() {
computedSeniority = seniorityBig.Uint64()
}
}
}
if !proverExists {
// Create new prover entry
proverTree = &qcrypto.VectorCommitmentTree{}
@ -194,39 +255,9 @@ func (p *ProverJoin) Materialize(
return nil, errors.Wrap(err, "materialize")
}
// Calculate seniority from MergeTargets
var seniority uint64 = 0
if len(p.MergeTargets) > 0 {
// Convert Ed448 public keys to peer IDs
var peerIds []string
for _, target := range p.MergeTargets {
if target.KeyType == crypto.KeyTypeEd448 {
pk, err := pcrypto.UnmarshalEd448PublicKey(target.PublicKey)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
peerId, err := peer.IDFromPublicKey(pk)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
peerIds = append(peerIds, peerId.String())
}
}
// Get aggregated seniority
if len(peerIds) > 0 {
seniorityBig := compat.GetAggregatedSeniority(peerIds)
if seniorityBig.IsUint64() {
seniority = seniorityBig.Uint64()
}
}
}
// Store seniority
// Store seniority (computed above from merge targets)
seniorityBytes := make([]byte, 8)
binary.BigEndian.PutUint64(seniorityBytes, seniority)
binary.BigEndian.PutUint64(seniorityBytes, computedSeniority)
err = p.rdfMultiprover.Set(
GLOBAL_RDF_SCHEMA,
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
@ -318,6 +349,54 @@ func (p *ProverJoin) Materialize(
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
} else if computedSeniority > 0 {
// For existing provers, update seniority if merge targets provide a
// higher value than what's currently stored.
existingSeniorityData, err := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"prover:Prover",
"Seniority",
proverTree,
)
var existingSeniority uint64 = 0
if err == nil && len(existingSeniorityData) == 8 {
existingSeniority = binary.BigEndian.Uint64(existingSeniorityData)
}
if computedSeniority > existingSeniority {
seniorityBytes := make([]byte, 8)
binary.BigEndian.PutUint64(seniorityBytes, computedSeniority)
err = p.rdfMultiprover.Set(
GLOBAL_RDF_SCHEMA,
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
"prover:Prover",
"Seniority",
seniorityBytes,
proverTree,
)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
updatedVertex := hg.NewVertexAddMaterializedState(
intrinsics.GLOBAL_INTRINSIC_ADDRESS,
[32]byte(proverAddress),
frameNumber,
proverTree,
proverTree,
)
err = hg.Set(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
proverAddress,
hgstate.VertexAddsDiscriminator,
frameNumber,
updatedVertex,
)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
}
}
// Create hyperedge for this prover
@ -457,21 +536,59 @@ func (p *ProverJoin) Materialize(
return nil, errors.Wrap(err, "materialize")
}
// confirm this has not already been used
spentAddress := [64]byte{}
copy(spentAddress[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(spentAddress[32:], spentMergeBI.FillBytes(make([]byte, 32)))
spentMergeAddr := spentMergeBI.FillBytes(make([]byte, 32))
// Check existing spent marker
var prior *tries.VectorCommitmentTree
existing, existErr := hg.Get(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
spentMergeAddr,
hgstate.VertexAddsDiscriminator,
)
if existErr == nil && existing != nil {
existingTree, ok := existing.(*tries.VectorCommitmentTree)
if ok && existingTree != nil {
storedAddr, getErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"merge:SpentMerge",
"ProverAddress",
existingTree,
)
if getErr == nil && len(storedAddr) == 32 {
// New format marker — already has a prover address.
// Skip regardless of whether it's ours or another's.
continue
}
// Legacy empty marker — overwrite with prover address
prior = existingTree
}
}
// Write spent marker with prover address
spentTree := &tries.VectorCommitmentTree{}
err = p.rdfMultiprover.Set(
GLOBAL_RDF_SCHEMA,
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
"merge:SpentMerge",
"ProverAddress",
proverAddress,
spentTree,
)
if err != nil {
return nil, errors.Wrap(err, "materialize")
}
spentMergeVertex := hg.NewVertexAddMaterializedState(
intrinsics.GLOBAL_INTRINSIC_ADDRESS,
[32]byte(spentMergeBI.FillBytes(make([]byte, 32))),
[32]byte(spentMergeAddr),
frameNumber,
nil,
&tries.VectorCommitmentTree{},
prior,
spentTree,
)
err = hg.Set(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
spentMergeBI.FillBytes(make([]byte, 32)),
spentMergeAddr,
hgstate.VertexAddsDiscriminator,
frameNumber,
spentMergeVertex,
@ -528,15 +645,36 @@ func (p *ProverJoin) Prove(frameNumber uint64) error {
return errors.Wrap(err, "prove")
}
// Set the public key before signing merge targets, since merge target
// signatures are over the BLS public key and Verify() checks against it.
blsPublicKey := prover.Public().([]byte)
for _, mt := range p.MergeTargets {
if mt.signer != nil {
mt.Signature, err = mt.signer.SignWithDomain(
p.PublicKeySignatureBLS48581.PublicKey,
blsPublicKey,
[]byte("PROVER_JOIN_MERGE"),
)
if err != nil {
return errors.Wrap(err, "prove")
}
// Self-verify: catch key material issues before publishing
valid, verifyErr := p.keyManager.ValidateSignature(
mt.KeyType,
mt.PublicKey,
blsPublicKey,
mt.Signature,
[]byte("PROVER_JOIN_MERGE"),
)
if verifyErr != nil || !valid {
return fmt.Errorf(
"prove: merge target self-verify failed "+
"(key_type=%d, pub_key_len=%d, sig_len=%d, bls_pub_len=%d, err=%v)",
mt.KeyType, len(mt.PublicKey), len(mt.Signature),
len(blsPublicKey), verifyErr,
)
}
}
}
@ -573,7 +711,7 @@ func (p *ProverJoin) Prove(frameNumber uint64) error {
// Create the proof of possession signature over the public key with the POP
// domain
popSignature, err := prover.SignWithDomain(
prover.Public().([]byte),
blsPublicKey,
popDomain,
)
if err != nil {
@ -583,7 +721,7 @@ func (p *ProverJoin) Prove(frameNumber uint64) error {
// Create the BLS48581SignatureWithProofOfPossession
p.PublicKeySignatureBLS48581 = BLS48581SignatureWithProofOfPossession{
Signature: signature,
PublicKey: prover.Public().([]byte),
PublicKey: blsPublicKey,
PopSignature: popSignature,
}
@ -648,9 +786,36 @@ func (p *ProverJoin) GetWriteAddresses(frameNumber uint64) ([][]byte, error) {
return nil, errors.Wrap(err, "get write addresses")
}
spentAddr := spentMergeBI.FillBytes(make([]byte, 32))
// Skip merge targets whose spent markers already contain a prover
// address (new format). These won't be written to — either they
// belong to this prover (already recorded) or a different one.
// Legacy empty markers and new markers need a write lock since
// Materialize will write them.
if p.hypergraph != nil {
spentFullAddr := [64]byte{}
copy(spentFullAddr[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(spentFullAddr[32:], spentAddr)
spentData, dataErr := p.hypergraph.GetVertexData(spentFullAddr)
if dataErr == nil && spentData != nil {
storedAddr, getErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"merge:SpentMerge",
"ProverAddress",
spentData,
)
if getErr == nil && len(storedAddr) == 32 {
// New format — won't be written to
continue
}
// Legacy empty — will be overwritten, need write lock
}
}
addresses[string(slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
spentMergeBI.FillBytes(make([]byte, 32)),
spentAddr,
))] = struct{}{}
}
@ -673,23 +838,23 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
// First check if prover can join (not in tree or in left state)
addressBI, err := poseidon.HashBytes(p.PublicKeySignatureBLS48581.PublicKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover join")
}
address := addressBI.FillBytes(make([]byte, 32))
for _, filter := range p.Filters {
if len(filter) < 32 {
return false, errors.Wrap(errors.New("invalid filter size"), "verify")
return false, errors.Wrap(errors.New("invalid filter size"), "verify: invalid prover join")
}
}
if len(p.Proof)%516 != 0 || len(p.Proof)/516 != len(p.Filters) {
return false, errors.Wrap(errors.New("proof size mismatch"), "verify")
return false, errors.Wrap(errors.New("proof size mismatch"), "verify: invalid prover join")
}
// Disallow too old of a request
if p.FrameNumber+10 < frameNumber {
return false, errors.Wrap(errors.New("outdated request"), "verify")
return false, errors.Wrap(errors.New("outdated request"), "verify: invalid prover join")
}
frame, err := p.frameStore.GetGlobalClockFrame(p.FrameNumber)
@ -702,13 +867,13 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
return false, errors.Wrap(errors.Wrap(
err,
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover join")
}
if !frames.First() || !frames.Valid() {
return false, errors.Wrap(errors.Wrap(
errors.New("not found"),
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover join")
}
frame, err = frames.Value()
frames.Close()
@ -716,7 +881,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
return false, errors.Wrap(errors.Wrap(
err,
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover join")
}
}
@ -742,10 +907,42 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
solutions,
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid multi proof"), "verify")
return false, errors.Wrap(errors.New("invalid multi proof"), "verify: invalid prover join")
}
for _, mt := range p.MergeTargets {
spentMergeBI, err := poseidon.HashBytes(slices.Concat(
[]byte("PROVER_JOIN_MERGE"),
mt.PublicKey,
))
if err != nil {
return false, errors.Wrap(err, "verify: invalid prover join")
}
spentFullAddr := [64]byte{}
copy(spentFullAddr[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(spentFullAddr[32:], spentMergeBI.FillBytes(make([]byte, 32)))
v, err := p.hypergraph.GetVertex(spentFullAddr)
if err == nil && v != nil {
// Spent marker exists — check if consumed by a different prover
spentData, dataErr := p.hypergraph.GetVertexData(spentFullAddr)
if dataErr == nil && spentData != nil {
storedAddr, getErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"merge:SpentMerge",
"ProverAddress",
spentData,
)
if getErr == nil && len(storedAddr) == 32 &&
!bytes.Equal(storedAddr, address) {
// Consumed by a different prover — skip
continue
}
}
// Same prover or legacy empty — validate signature below
}
valid, err := p.keyManager.ValidateSignature(
mt.KeyType,
mt.PublicKey,
@ -754,27 +951,13 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
[]byte("PROVER_JOIN_MERGE"),
)
if err != nil || !valid {
return false, errors.Wrap(err, "verify")
}
spentMergeBI, err := poseidon.HashBytes(slices.Concat(
[]byte("PROVER_JOIN_MERGE"),
mt.PublicKey,
))
if err != nil {
return false, errors.Wrap(err, "verify")
}
// confirm this has not already been used
spentAddress := [64]byte{}
copy(spentAddress[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(spentAddress[32:], spentMergeBI.FillBytes(make([]byte, 32)))
v, err := p.hypergraph.GetVertex(spentAddress)
if err == nil && v != nil {
return false, errors.Wrap(
errors.New("merge target already used"),
"verify",
fmt.Errorf(
"invalid merge target signature (key_type=%d, pub_key_len=%d, sig_len=%d, bls_pub_len=%d)",
mt.KeyType, len(mt.PublicKey), len(mt.Signature),
len(p.PublicKeySignatureBLS48581.PublicKey),
),
"verify: invalid prover join",
)
}
}
@ -798,7 +981,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
// Prover has been kicked for malicious behavior
return false, errors.Wrap(
errors.New("prover has been previously kicked"),
"verify",
"verify: invalid prover join",
)
}
}
@ -813,7 +996,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover join")
}
allocationAddress := allocationAddressBI.FillBytes(make([]byte, 32))
// Create composite address: GLOBAL_INTRINSIC_ADDRESS + prover address
@ -837,11 +1020,49 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
if err == nil && len(statusData) > 0 {
status := statusData[0]
if status != 4 {
// Prover is in some other state - cannot join
return false, errors.Wrap(
errors.New("prover already exists in non-left state"),
"verify",
)
// Check if the previous join/leave has implicitly expired
// (720 frames), making the prover effectively "left"
expired := false
if status == 0 {
// Joining: check if join expired
joinFrameBytes, jErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"allocation:ProverAllocation",
"JoinFrameNumber",
tree,
)
if jErr == nil && len(joinFrameBytes) == 8 {
joinFrame := binary.BigEndian.Uint64(joinFrameBytes)
if joinFrame >= token.FRAME_2_1_EXTENDED_ENROLL_END &&
frameNumber > joinFrame+720 {
expired = true
}
}
} else if status == 3 {
// Leaving: check if leave expired
leaveFrameBytes, lErr := p.rdfMultiprover.Get(
GLOBAL_RDF_SCHEMA,
"allocation:ProverAllocation",
"LeaveFrameNumber",
tree,
)
if lErr == nil && len(leaveFrameBytes) == 8 {
leaveFrame := binary.BigEndian.Uint64(leaveFrameBytes)
if frameNumber > leaveFrame+720 {
expired = true
}
}
}
if !expired {
return false, errors.Wrap(
fmt.Errorf(
"prover already exists in non-left state (status=%d, frame=%d)",
status, frameNumber,
),
"verify: invalid prover join",
)
}
}
}
}
@ -854,7 +1075,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
joinClone.PublicKeySignatureBls48581 = nil
joinMessage, err := joinClone.ToCanonicalBytes()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover join")
}
// Create the domain for the first signature
@ -865,7 +1086,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
)
joinDomain, err := poseidon.HashBytes(joinDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover join")
}
// Create the domain for the proof of possession
@ -880,7 +1101,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
joinDomain.FillBytes(make([]byte, 32)),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover join")
}
// Verify the proof of possession
@ -892,11 +1113,26 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
popDomain,
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid pop signature"), "verify")
return false, errors.Wrap(errors.New("invalid pop signature"), "verify: invalid prover join")
}
// Verify any merge signatures
// Verify any merge signatures (skip already-consumed targets)
for _, mt := range p.MergeTargets {
spentBI, err := poseidon.HashBytes(slices.Concat(
[]byte("PROVER_JOIN_MERGE"),
mt.PublicKey,
))
if err != nil {
return false, errors.Wrap(err, "verify: invalid prover join")
}
spentAddr := [64]byte{}
copy(spentAddr[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(spentAddr[32:], spentBI.FillBytes(make([]byte, 32)))
v, vErr := p.hypergraph.GetVertex(spentAddr)
if vErr == nil && v != nil {
continue
}
valid, err := p.keyManager.ValidateSignature(
mt.KeyType,
mt.PublicKey,
@ -905,7 +1141,7 @@ func (p *ProverJoin) Verify(frameNumber uint64) (valid bool, err error) {
[]byte("PROVER_JOIN_MERGE"),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid merge signature"), "verify")
return false, errors.Wrap(errors.New("invalid merge signature"), "verify: invalid prover join")
}
}

View File

@ -393,7 +393,7 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) {
if !p.verifyEquivocation(p.KickedProverPublicKey) {
return false, errors.Wrap(
errors.New("no equivocation detected"),
"verify",
"verify: invalid prover kick",
)
}
@ -407,13 +407,13 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) {
return false, errors.Wrap(errors.Wrap(
err,
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover kick")
}
if !frames.First() || !frames.Valid() {
return false, errors.Wrap(errors.Wrap(
errors.New("not found"),
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover kick")
}
frame, err = frames.Value()
frames.Close()
@ -421,7 +421,7 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) {
return false, errors.Wrap(errors.Wrap(
err,
fmt.Sprintf("frame number: %d", p.FrameNumber),
), "verify")
), "verify: invalid prover kick")
}
}
@ -433,17 +433,17 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) {
p.TraversalProof,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover kick")
}
if !validTraversal || len(p.Proof) == 0 {
return false, errors.Wrap(errors.New("invalid multiproof"), "verify")
return false, errors.Wrap(errors.New("invalid multiproof"), "verify: invalid prover kick")
}
// Parse the multiproof
multiproof := p.hypergraph.GetProver().NewMultiproof()
if err := multiproof.FromBytes(p.Proof); err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover kick")
}
// Verify the proof against the tree
@ -459,10 +459,10 @@ func (p *ProverKick) Verify(frameNumber uint64) (bool, error) {
nil, // No type index needed for global intrinsic
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover kick")
}
if !valid {
return false, errors.Wrap(errors.New("invalid multiproof"), "verify")
return false, errors.Wrap(errors.New("invalid multiproof"), "verify: invalid prover kick")
}
return true, nil

View File

@ -363,7 +363,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
)
leaveDomain, err := poseidon.HashBytes(leaveDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover leave")
}
_, err = p.hypergraph.GetVertex([64]byte(slices.Concat(
@ -371,7 +371,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover leave")
}
tree, err := p.hypergraph.GetVertexData([64]byte(slices.Concat(
@ -379,7 +379,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover leave")
}
pubkey, err := p.rdfMultiprover.Get(
@ -389,7 +389,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
tree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover leave")
}
// Check that at least one allocation exists and is active
@ -431,7 +431,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
if !hasActiveAllocation {
return false, errors.Wrap(
errors.New("no active allocations found for specified filters"),
"verify",
"verify: invalid prover leave",
)
}
@ -444,7 +444,7 @@ func (p *ProverLeave) Verify(frameNumber uint64) (bool, error) {
leaveDomain.Bytes(),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover leave")
}
return true, nil

View File

@ -341,7 +341,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
)
pauseDomain, err := poseidon.HashBytes(pauseDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
_, err = p.hypergraph.GetVertex([64]byte(slices.Concat(
@ -349,7 +349,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
tree, err := p.hypergraph.GetVertexData([64]byte(slices.Concat(
@ -357,7 +357,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
pubkey, err := p.rdfMultiprover.Get(
@ -367,7 +367,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
tree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
// Calculate allocation address to verify it exists and is active
@ -375,7 +375,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
slices.Concat([]byte("PROVER_ALLOCATION"), pubkey, p.Filter),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
allocationAddress := allocationAddressBI.FillBytes(make([]byte, 32))
allocationFullAddress := [64]byte{}
@ -387,7 +387,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
if err != nil || allocationTree == nil {
return false, errors.Wrap(
errors.New("allocation not found"),
"verify",
"verify: invalid prover pause",
)
}
@ -399,7 +399,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover pause")
}
status := uint8(0)
@ -411,7 +411,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
if status != 1 {
return false, errors.Wrap(
errors.New("can only pause when allocation is active"),
"verify",
"verify: invalid prover pause",
)
}
@ -424,7 +424,7 @@ func (p *ProverPause) Verify(frameNumber uint64) (bool, error) {
pauseDomain.Bytes(),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover pause")
}
return true, nil

View File

@ -392,7 +392,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
)
rejectDomain, err := poseidon.HashBytes(rejectDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
_, err = p.hypergraph.GetVertex([64]byte(slices.Concat(
@ -400,7 +400,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
tree, err := p.hypergraph.GetVertexData([64]byte(slices.Concat(
@ -408,7 +408,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
pubkey, err := p.rdfMultiprover.Get(
@ -418,7 +418,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
tree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
for _, filter := range p.Filters {
@ -427,7 +427,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
slices.Concat([]byte("PROVER_ALLOCATION"), pubkey, filter),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
allocationAddress := allocationAddressBI.FillBytes(make([]byte, 32))
allocationFullAddress := [64]byte{}
@ -439,7 +439,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
if err != nil || allocationTree == nil {
return false, errors.Wrap(
errors.New("allocation not found"),
"verify",
"verify: invalid prover reject",
)
}
@ -451,7 +451,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover reject")
}
status := uint8(0)
@ -463,7 +463,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
if status != 0 && status != 3 {
return false, errors.Wrap(
errors.New("invalid allocation state for rejection"),
"verify",
"verify: invalid prover reject",
)
}
@ -477,7 +477,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil || len(joinFrameBytes) != 8 {
return false, errors.Wrap(errors.New("missing join frame"), "verify")
return false, errors.Wrap(errors.New("missing join frame"), "verify: invalid prover reject")
}
joinFrame := binary.BigEndian.Uint64(joinFrameBytes)
@ -488,7 +488,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
if framesSinceJoin > 720 {
return false, errors.Wrap(
errors.New("join already implicitly rejected after 720 frames"),
"verify",
"verify: invalid prover reject",
)
}
}
@ -502,7 +502,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil || len(leaveFrameBytes) != 8 {
return false, errors.Wrap(errors.New("missing leave frame"), "verify")
return false, errors.Wrap(errors.New("missing leave frame"), "verify: invalid prover reject")
}
leaveFrame := binary.BigEndian.Uint64(leaveFrameBytes)
@ -510,13 +510,13 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
if framesSinceLeave < 360 {
return false, errors.Wrap(
errors.New("must wait 360 frames after leave to reject"),
"verify",
"verify: invalid prover reject",
)
}
if framesSinceLeave > 720 {
return false, errors.Wrap(
errors.New("leave already implicitly confirmed after 720 frames"),
"verify",
"verify: invalid prover reject",
)
}
}
@ -531,7 +531,7 @@ func (p *ProverReject) Verify(frameNumber uint64) (bool, error) {
rejectDomain.Bytes(),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover reject")
}
return true, nil

View File

@ -340,7 +340,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
)
resumeDomain, err := poseidon.HashBytes(resumeDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
_, err = p.hypergraph.GetVertex([64]byte(slices.Concat(
@ -348,7 +348,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
tree, err := p.hypergraph.GetVertexData([64]byte(slices.Concat(
@ -356,7 +356,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
p.PublicKeySignatureBLS48581.Address,
)))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
pubkey, err := p.rdfMultiprover.Get(
@ -366,7 +366,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
tree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
// Calculate allocation address to verify it exists and is paused
@ -374,7 +374,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
slices.Concat([]byte("PROVER_ALLOCATION"), pubkey, p.Filter),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
allocationAddress := allocationAddressBI.FillBytes(make([]byte, 32))
allocationFullAddress := [64]byte{}
@ -386,7 +386,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
if err != nil || allocationTree == nil {
return false, errors.Wrap(
errors.New("allocation not found"),
"verify",
"verify: invalid prover resume",
)
}
@ -398,7 +398,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover resume")
}
status := uint8(0)
@ -410,7 +410,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
if status != 2 {
return false, errors.Wrap(
errors.New("can only resume when allocation is paused"),
"verify",
"verify: invalid prover resume",
)
}
@ -422,7 +422,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
allocationTree,
)
if err != nil || len(pauseFrameBytes) != 8 {
return false, errors.Wrap(errors.New("missing pause frame"), "verify")
return false, errors.Wrap(errors.New("missing pause frame"), "verify: invalid prover resume")
}
pauseFrame := binary.BigEndian.Uint64(pauseFrameBytes)
@ -431,7 +431,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
if framesSincePause > 360 {
return false, errors.Wrap(
errors.New("pause timeout exceeded, allocation has implicitly left"),
"verify",
"verify: invalid prover resume",
)
}
@ -444,7 +444,7 @@ func (p *ProverResume) Verify(frameNumber uint64) (bool, error) {
resumeDomain.Bytes(),
)
if err != nil || !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid prover resume")
}
return true, nil

View File

@ -2,6 +2,7 @@ package global
import (
"encoding/binary"
"fmt"
"math/big"
"slices"
@ -235,23 +236,38 @@ func (p *ProverSeniorityMerge) Prove(frameNumber uint64) error {
}
// Sign merge target signatures
blsPublicKey := signingKey.Public().([]byte)
for _, mt := range p.MergeTargets {
if mt.signer != nil {
mt.Signature, err = mt.signer.SignWithDomain(
signingKey.Public().([]byte),
blsPublicKey,
[]byte("PROVER_SENIORITY_MERGE"),
)
if err != nil {
return errors.Wrap(err, "prove")
}
// Self-verify: catch key material issues before publishing
valid, verifyErr := p.keyManager.ValidateSignature(
mt.KeyType,
mt.PublicKey,
blsPublicKey,
mt.Signature,
[]byte("PROVER_SENIORITY_MERGE"),
)
if verifyErr != nil || !valid {
return fmt.Errorf(
"prove: merge target self-verify failed "+
"(key_type=%d, pub_key_len=%d, sig_len=%d, bls_pub_len=%d, err=%v)",
mt.KeyType, len(mt.PublicKey), len(mt.Signature),
len(blsPublicKey), verifyErr,
)
}
}
}
// Get the public key
pubKey := signingKey.Public()
// Compute address from public key
addressBI, err := poseidon.HashBytes(pubKey.([]byte))
addressBI, err := poseidon.HashBytes(blsPublicKey)
if err != nil {
return errors.Wrap(err, "prove")
}
@ -333,34 +349,34 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
if p.hypergraph == nil {
return false, errors.Wrap(
errors.New("hypergraph not initialized"),
"verify",
"verify: invalid prover seniority merge",
)
}
if p.keyManager == nil {
return false, errors.Wrap(
errors.New("key manager not initialized"),
"verify",
"verify: invalid prover seniority merge",
)
}
if p.rdfMultiprover == nil {
return false, errors.Wrap(
errors.New("rdf multiprover not initialized"),
"verify",
"verify: invalid prover seniority merge",
)
}
if len(p.MergeTargets) == 0 {
return false, errors.Wrap(errors.New("no merge targets"), "verify")
return false, errors.Wrap(errors.New("no merge targets"), "verify: invalid prover seniority merge")
}
if len(p.PublicKeySignatureBLS48581.Address) != 32 {
return false, errors.Wrap(
errors.New("invalid addressed prover address"),
"verify",
"verify: invalid prover seniority merge",
)
}
// Disallow too old of a request
if p.FrameNumber+10 < frameNumber {
return false, errors.Wrap(errors.New("outdated request"), "verify")
return false, errors.Wrap(errors.New("outdated request"), "verify: invalid prover seniority merge")
}
// Resolve the prover vertex
@ -370,7 +386,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
vertexData, err := p.hypergraph.GetVertexData(proverFullAddr)
if err != nil || vertexData == nil {
return false, errors.Wrap(errors.New("prover not found"), "verify")
return false, errors.Wrap(errors.New("prover not found"), "verify: invalid prover seniority merge")
}
// Fetch the registered PublicKey
@ -381,19 +397,19 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
vertexData,
)
if err != nil || len(pubKeyBytes) == 0 {
return false, errors.Wrap(errors.New("prover public key missing"), "verify")
return false, errors.Wrap(errors.New("prover public key missing"), "verify: invalid prover seniority merge")
}
// Check poseidon(pubKey) == addressed.Address
addrBI, err := poseidon.HashBytes(pubKeyBytes)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
addrCheck := addrBI.FillBytes(make([]byte, 32))
if !slices.Equal(addrCheck, p.PublicKeySignatureBLS48581.Address) {
return false, errors.Wrap(
errors.New("address does not match registered pubkey"),
"verify",
"verify: invalid prover seniority merge",
)
}
@ -408,7 +424,10 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
[]byte("PROVER_SENIORITY_MERGE"),
)
if err != nil || !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(
errors.New("invalid merge target signature"),
"verify: invalid prover seniority merge",
)
}
// Confirm this merge target has not already been used
@ -417,7 +436,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
mt.PublicKey,
))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
spentAddress := [64]byte{}
@ -428,7 +447,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
if err == nil && v != nil {
return false, errors.Wrap(
errors.New("merge target already used"),
"verify",
"verify: invalid prover seniority merge",
)
}
@ -438,7 +457,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
mt.PublicKey,
))
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
joinSpentAddress := [64]byte{}
@ -449,7 +468,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
if err == nil && v != nil {
return false, errors.Wrap(
errors.New("merge target already used in join"),
"verify",
"verify: invalid prover seniority merge",
)
}
@ -457,12 +476,12 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
if mt.KeyType == crypto.KeyTypeEd448 {
pk, err := pcrypto.UnmarshalEd448PublicKey(mt.PublicKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
peerId, err := peer.IDFromPublicKey(pk)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
peerIds = append(peerIds, peerId.String())
@ -494,7 +513,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
if mergeSeniority <= existingSeniority {
return false, errors.Wrap(
errors.New("merge would not increase seniority"),
"verify",
"verify: invalid prover seniority merge",
)
}
@ -505,7 +524,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
)
mergeDomain, err := poseidon.HashBytes(mergeDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover seniority merge")
}
// Recreate the message that was signed
@ -523,7 +542,7 @@ func (p *ProverSeniorityMerge) Verify(frameNumber uint64) (bool, error) {
mergeDomain.Bytes(),
)
if err != nil || !ok {
return false, errors.Wrap(errors.New("invalid seniority merge signature"), "verify")
return false, errors.Wrap(errors.New("invalid seniority merge signature"), "verify: invalid prover seniority merge")
}
return true, nil

View File

@ -96,11 +96,11 @@ func (p *ProverShardUpdate) Prove(uint64) error {
func (p *ProverShardUpdate) Verify(frameNumber uint64) (bool, error) {
_, err := p.buildContext()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover shard update")
}
if frameNumber != p.FrameHeader.FrameNumber+1 {
return false, errors.Wrap(errors.New("invalid update"), "verify")
return false, errors.Wrap(errors.New("invalid update"), "verify: invalid prover shard update")
}
return true, nil

View File

@ -307,34 +307,34 @@ func (p *ProverUpdate) Verify(frameNumber uint64) (bool, error) {
if p.hypergraph == nil {
return false, errors.Wrap(
errors.New("hypergraph not initialized"),
"verify",
"verify: invalid prover update",
)
}
if p.keyManager == nil {
return false, errors.Wrap(
errors.New("key manager not initialized"),
"verify",
"verify: invalid prover update",
)
}
if p.rdfMultiprover == nil {
return false, errors.Wrap(
errors.New("rdf multiprover not initialized"),
"verify",
"verify: invalid prover update",
)
}
if p.PublicKeySignatureBLS48581 == nil {
return false, errors.Wrap(errors.New("missing signature"), "verify")
return false, errors.Wrap(errors.New("missing signature"), "verify: invalid prover update")
}
if len(p.DelegateAddress) != 32 {
return false, errors.Wrap(
errors.New("missing delegate address"),
"verify",
"verify: invalid prover update",
)
}
if len(p.PublicKeySignatureBLS48581.Address) != 32 {
return false, errors.Wrap(
errors.New("invalid addressed prover address"),
"verify",
"verify: invalid prover update",
)
}
@ -345,7 +345,7 @@ func (p *ProverUpdate) Verify(frameNumber uint64) (bool, error) {
vertexData, err := p.hypergraph.GetVertexData(proverFullAddr)
if err != nil || vertexData == nil {
return false, errors.Wrap(errors.New("prover not found"), "verify")
return false, errors.Wrap(errors.New("prover not found"), "verify: invalid prover update")
}
// Fetch the registered PublicKey to verify the address binding and the
@ -357,20 +357,20 @@ func (p *ProverUpdate) Verify(frameNumber uint64) (bool, error) {
vertexData,
)
if err != nil || len(pubKeyBytes) == 0 {
return false, errors.Wrap(errors.New("prover public key missing"), "verify")
return false, errors.Wrap(errors.New("prover public key missing"), "verify: invalid prover update")
}
pubKey := pubKeyBytes
// Check poseidon(pubKey) == addressed.Address
addrBI, err := poseidon.HashBytes(pubKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover update")
}
addrCheck := addrBI.FillBytes(make([]byte, 32))
if !slices.Equal(addrCheck, p.PublicKeySignatureBLS48581.Address) {
return false, errors.Wrap(
errors.New("address does not match registered pubkey"),
"verify",
"verify: invalid prover update",
)
}
@ -381,7 +381,7 @@ func (p *ProverUpdate) Verify(frameNumber uint64) (bool, error) {
)
updateDomain, err := poseidon.HashBytes(updateDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid prover update")
}
// Validate signature over the new DelegateAddress
@ -394,13 +394,13 @@ func (p *ProverUpdate) Verify(frameNumber uint64) (bool, error) {
updateDomain.Bytes(),
)
if err != nil || !ok {
return false, errors.Wrap(errors.New("invalid update signature"), "verify")
return false, errors.Wrap(errors.New("invalid update signature"), "verify: invalid prover update")
}
if len(p.DelegateAddress) != 32 {
return false, errors.Wrap(
errors.New("delegate address must be 32 bytes"),
"verify",
"verify: invalid prover update",
)
}

View File

@ -418,6 +418,30 @@ func GlobalRequestFromBytes(
)
}
// ToRequestBytes serializes a ShardSplitOp to MessageRequest bytes using
// protobuf
func (op *ShardSplitOp) ToRequestBytes() ([]byte, error) {
pb := op.ToProtobuf()
req := &protobufs.MessageRequest{
Request: &protobufs.MessageRequest_ShardSplit{
ShardSplit: pb,
},
}
return req.ToCanonicalBytes()
}
// ToRequestBytes serializes a ShardMergeOp to MessageRequest bytes using
// protobuf
func (op *ShardMergeOp) ToRequestBytes() ([]byte, error) {
pb := op.ToProtobuf()
req := &protobufs.MessageRequest{
Request: &protobufs.MessageRequest_ShardMerge{
ShardMerge: pb,
},
}
return req.ToCanonicalBytes()
}
// ToBytes serializes a ProverUpdate to bytes using protobuf
func (p *ProverUpdate) ToBytes() ([]byte, error) {
pb := p.ToProtobuf()

View File

@ -0,0 +1,254 @@
package global
import (
"bytes"
"math/big"
"slices"
"github.com/iden3/go-iden3-crypto/poseidon"
"github.com/pkg/errors"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics"
"source.quilibrium.com/quilibrium/monorepo/types/execution/state"
"source.quilibrium.com/quilibrium/monorepo/types/hypergraph"
"source.quilibrium.com/quilibrium/monorepo/types/keys"
"source.quilibrium.com/quilibrium/monorepo/types/store"
)
type ShardMergeOp struct {
ShardAddresses [][]byte
ParentAddress []byte
FrameNumber uint64
PublicKeySignatureBLS48581 BLS48581AddressedSignature
hypergraph hypergraph.Hypergraph
keyManager keys.KeyManager
shardsStore store.ShardsStore
proverRegistry consensus.ProverRegistry
}
func NewShardMergeOp(
shardAddresses [][]byte,
parentAddress []byte,
keyManager keys.KeyManager,
shardsStore store.ShardsStore,
proverRegistry consensus.ProverRegistry,
) *ShardMergeOp {
return &ShardMergeOp{
ShardAddresses: shardAddresses,
ParentAddress: parentAddress,
keyManager: keyManager,
shardsStore: shardsStore,
proverRegistry: proverRegistry,
}
}
func (op *ShardMergeOp) GetCost() (*big.Int, error) {
return big.NewInt(0), nil
}
func (op *ShardMergeOp) Verify(frameNumber uint64) (bool, error) {
if op.proverRegistry == nil {
return false, errors.New("prover registry not initialized")
}
// Validate shard addresses
if len(op.ShardAddresses) < 2 || len(op.ShardAddresses) > 8 {
return false, errors.New("shard_addresses must have 2-8 entries")
}
if len(op.ParentAddress) != 32 {
return false, errors.New("parent_address must be 32 bytes")
}
for _, addr := range op.ShardAddresses {
if len(addr) <= 32 {
return false, errors.New(
"cannot merge base shards (must be > 32 bytes)",
)
}
if !bytes.HasPrefix(addr, op.ParentAddress) {
return false, errors.New(
"all shard addresses must share the parent address prefix",
)
}
}
// Look up the public key from the prover registry using the address
address := op.PublicKeySignatureBLS48581.Address
if len(address) != 32 {
return false, errors.New("invalid address length")
}
info, err := op.proverRegistry.GetProverInfo(address)
if err != nil || info == nil {
return false, errors.New("signer is not a registered prover")
}
hasGlobal := false
for _, alloc := range info.Allocations {
if alloc.ConfirmationFilter == nil &&
alloc.Status == consensus.ProverStatusActive {
hasGlobal = true
break
}
}
if !hasGlobal {
return false, errors.New("signer is not an active global prover")
}
pubKey := info.PublicKey
// Verify BLS signature using the looked-up public key
signedData := slices.Concat(
big.NewInt(int64(op.FrameNumber)).FillBytes(make([]byte, 8)),
op.ParentAddress,
)
mergeDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_MERGE"),
)
mergeDomain, err := poseidon.HashBytes(mergeDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify: invalid shard merge")
}
ok, err := op.keyManager.ValidateSignature(
crypto.KeyTypeBLS48581G1,
pubKey,
signedData,
op.PublicKeySignatureBLS48581.Signature,
mergeDomain.Bytes(),
)
if err != nil || !ok {
return false, errors.Wrap(
errors.New("invalid BLS signature"),
"verify: invalid shard merge",
)
}
// Verify all shards have fewer provers than minimum threshold
globalProvers, err := op.proverRegistry.GetActiveProvers(nil)
if err != nil {
return false, errors.Wrap(err, "verify: invalid shard merge: min provers")
}
minP := uint64(len(globalProvers)) * 2 / 3
if minP > 6 {
minP = 6
}
for _, addr := range op.ShardAddresses {
count, err := op.proverRegistry.GetProverCount(addr)
if err != nil {
return false, errors.Wrap(err, "verify: invalid shard merge: prover count")
}
if uint64(count) >= minP {
return false, errors.Errorf(
"shard has %d provers (min threshold %d), merge not eligible",
count, minP,
)
}
}
return true, nil
}
func (op *ShardMergeOp) Materialize(
frameNumber uint64,
s state.State,
) (state.State, error) {
if op.shardsStore == nil {
return nil, errors.New("shards store not initialized")
}
// Remove each sub-shard address from the shards store
for _, shardAddr := range op.ShardAddresses {
// Extract L2 (first 32 bytes) and Path (remaining bytes as uint32s)
shardKey := shardAddr[:32]
path := make([]uint32, 0, len(shardAddr)-32)
for _, b := range shardAddr[32:] {
path = append(path, uint32(b))
}
err := op.shardsStore.DeleteAppShard(nil, shardKey, path)
if err != nil {
return nil, errors.Wrap(err, "materialize shard merge")
}
}
return s, nil
}
func (op *ShardMergeOp) Prove(frameNumber uint64) error {
if op.keyManager == nil {
return errors.New("key manager not initialized")
}
signingKey, err := op.keyManager.GetSigningKey("q-prover-key")
if err != nil {
return errors.Wrap(err, "prove")
}
pubKey := signingKey.Public().([]byte)
// Derive the address from the public key
addressBI, err := poseidon.HashBytes(pubKey)
if err != nil {
return errors.Wrap(err, "prove")
}
address := addressBI.FillBytes(make([]byte, 32))
signedData := slices.Concat(
big.NewInt(int64(frameNumber)).FillBytes(make([]byte, 8)),
op.ParentAddress,
)
mergeDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_MERGE"),
)
mergeDomain, err := poseidon.HashBytes(mergeDomainPreimage)
if err != nil {
return errors.Wrap(err, "prove")
}
signature, err := signingKey.SignWithDomain(
signedData,
mergeDomain.Bytes(),
)
if err != nil {
return errors.Wrap(err, "prove")
}
op.FrameNumber = frameNumber
op.PublicKeySignatureBLS48581 = BLS48581AddressedSignature{
Address: address,
Signature: signature,
}
return nil
}
func (op *ShardMergeOp) GetReadAddresses(
frameNumber uint64,
) ([][]byte, error) {
return nil, nil
}
func (op *ShardMergeOp) GetWriteAddresses(
frameNumber uint64,
) ([][]byte, error) {
// Shard merge writes to the shard addresses being removed
addresses := make([][]byte, 0, len(op.ShardAddresses))
for _, addr := range op.ShardAddresses {
fullAddr := [64]byte{}
copy(fullAddr[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(fullAddr[32:], addr)
addresses = append(addresses, fullAddr[:])
}
return addresses, nil
}
var _ intrinsics.IntrinsicOperation = (*ShardMergeOp)(nil)

View File

@ -0,0 +1,543 @@
package global_test
import (
"math/big"
"slices"
"testing"
"github.com/iden3/go-iden3-crypto/poseidon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/global"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics"
"source.quilibrium.com/quilibrium/monorepo/types/mocks"
)
func TestShardMergeOp_Prove(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockSigner := new(mocks.MockBLSSigner)
parentAddress := make([]byte, 32)
for i := range parentAddress {
parentAddress[i] = byte(i % 256)
}
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
frameNumber := uint64(12345)
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
// Derive expected 32-byte address from pubKey
addressBI, err := poseidon.HashBytes(pubKey)
require.NoError(t, err)
expectedAddress := addressBI.FillBytes(make([]byte, 32))
mergeDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_MERGE"),
)
mergeDomain, err := poseidon.HashBytes(mergeDomainPreimage)
require.NoError(t, err)
expectedMessage := slices.Concat(
big.NewInt(int64(frameNumber)).FillBytes(make([]byte, 8)),
parentAddress,
)
mockSigner.On("Public").Return(pubKey)
mockSigner.On("SignWithDomain", expectedMessage, mergeDomain.Bytes()).
Return([]byte("signature"), nil)
mockKeyManager.On("GetSigningKey", "q-prover-key").Return(mockSigner, nil)
op := global.NewShardMergeOp(
shardAddresses,
parentAddress,
mockKeyManager,
nil,
nil,
)
err = op.Prove(frameNumber)
require.NoError(t, err)
assert.Equal(t, expectedAddress, op.PublicKeySignatureBLS48581.Address)
assert.Equal(t, []byte("signature"), op.PublicKeySignatureBLS48581.Signature)
assert.Equal(t, frameNumber, op.FrameNumber)
mockSigner.AssertExpectations(t)
mockKeyManager.AssertExpectations(t)
}
func TestShardMergeOp_Verify(t *testing.T) {
t.Run("prover registry required", func(t *testing.T) {
parentAddress := make([]byte, 32)
op := global.NewShardMergeOp(
[][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
},
parentAddress,
nil,
nil,
nil, // no registry
)
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "prover registry not initialized")
})
t.Run("too few shard addresses", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
op := global.NewShardMergeOp(
[][]byte{append(slices.Clone(parentAddress), 0x00)}, // only 1
parentAddress,
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "2-8")
})
t.Run("parent address wrong length", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
op := global.NewShardMergeOp(
[][]byte{make([]byte, 34), make([]byte, 34)},
make([]byte, 31), // wrong length
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "32 bytes")
})
t.Run("base shard cannot be merged", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
op := global.NewShardMergeOp(
[][]byte{
make([]byte, 32), // exactly 32 bytes = base shard
append(slices.Clone(parentAddress), 0x01),
},
parentAddress,
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "base shards")
})
t.Run("shard does not share parent prefix", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
badShard := make([]byte, 33)
badShard[0] = 0xFF
op := global.NewShardMergeOp(
[][]byte{
append(slices.Clone(parentAddress), 0x00),
badShard,
},
parentAddress,
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "parent address prefix")
})
t.Run("invalid address length", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
op := global.NewShardMergeOp(
[][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
},
parentAddress,
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 585), // wrong length
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "invalid address length")
})
t.Run("signer not a registered prover", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(nil, nil)
op := global.NewShardMergeOp(
shardAddresses, parentAddress, nil, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "not a registered prover")
})
t.Run("signer not a global prover", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: []byte("some-app-shard"),
Status: consensus.ProverStatusActive,
},
},
}, nil)
op := global.NewShardMergeOp(
shardAddresses, parentAddress, nil, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "not an active global prover")
})
t.Run("invalid signature fails", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
pubKey := make([]byte, 585)
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
mergeDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_MERGE"),
)
mergeDomain, err := poseidon.HashBytes(mergeDomainPreimage)
require.NoError(t, err)
mockKeyManager.On("ValidateSignature",
crypto.KeyTypeBLS48581G1,
pubKey,
mock.Anything,
[]byte("bad-sig"),
mergeDomain.Bytes(),
).Return(false, nil)
op := global.NewShardMergeOp(
shardAddresses, parentAddress, mockKeyManager, nil, mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: []byte("bad-sig"),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "invalid BLS signature")
})
t.Run("shard prover count above merge threshold", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
setupMergeSignatureValidation(mockKeyManager, pubKey)
// 10 global provers -> minP = 10*2/3 = 6
globalProvers := make([]*consensus.ProverInfo, 10)
mockRegistry.On("GetActiveProvers", []byte(nil)).Return(globalProvers, nil)
// First shard has 10 provers (>= minP=6), merge not eligible
mockRegistry.On("GetProverCount", shardAddresses[0]).Return(10, nil)
op := global.NewShardMergeOp(
shardAddresses, parentAddress, mockKeyManager, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "merge not eligible")
})
t.Run("valid merge with eligibility checks", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
parentAddress := make([]byte, 32)
for i := range parentAddress {
parentAddress[i] = byte(i % 256)
}
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
setupMergeSignatureValidation(mockKeyManager, pubKey)
// 10 global provers -> minP = 10*2/3 = 6
globalProvers := make([]*consensus.ProverInfo, 10)
mockRegistry.On("GetActiveProvers", []byte(nil)).Return(globalProvers, nil)
// Both shards have 2 provers (< minP=6), merge eligible
mockRegistry.On("GetProverCount", shardAddresses[0]).Return(2, nil)
mockRegistry.On("GetProverCount", shardAddresses[1]).Return(2, nil)
op := global.NewShardMergeOp(
shardAddresses, parentAddress, mockKeyManager, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.NoError(t, err)
assert.True(t, valid)
})
}
func TestShardMergeOp_Materialize(t *testing.T) {
t.Run("deletes sub-shards", func(t *testing.T) {
mockShardsStore := new(mocks.MockShardsStore)
parentAddress := make([]byte, 32)
for i := range parentAddress {
parentAddress[i] = byte(i % 256)
}
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
for _, addr := range shardAddresses {
shardKey := slices.Clone(addr[:32])
path := make([]uint32, 0, len(addr)-32)
for _, b := range addr[32:] {
path = append(path, uint32(b))
}
mockShardsStore.On("DeleteAppShard",
mock.Anything, // txn
shardKey,
path,
).Return(nil).Once()
}
op := global.NewShardMergeOp(
shardAddresses,
parentAddress,
nil,
mockShardsStore,
nil,
)
newState, err := op.Materialize(100, nil)
require.NoError(t, err)
assert.Nil(t, newState)
mockShardsStore.AssertExpectations(t)
})
t.Run("shardsStore nil returns error", func(t *testing.T) {
op := global.NewShardMergeOp(
[][]byte{make([]byte, 33), make([]byte, 33)},
make([]byte, 32),
nil,
nil, // no store
nil,
)
_, err := op.Materialize(100, nil)
require.Error(t, err)
assert.Contains(t, err.Error(), "shards store not initialized")
})
}
func TestShardMergeOp_GetCost(t *testing.T) {
op := global.NewShardMergeOp(nil, nil, nil, nil, nil)
cost, err := op.GetCost()
require.NoError(t, err)
assert.Equal(t, int64(0), cost.Int64())
}
func TestShardMergeOp_GetWriteAddresses(t *testing.T) {
parentAddress := make([]byte, 32)
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
op := global.NewShardMergeOp(shardAddresses, parentAddress, nil, nil, nil)
addrs, err := op.GetWriteAddresses(0)
require.NoError(t, err)
require.Len(t, addrs, 2)
for _, addr := range addrs {
assert.Len(t, addr, 64)
assert.Equal(t, intrinsics.GLOBAL_INTRINSIC_ADDRESS[:], addr[:32])
}
}
func TestShardMergeOp_GetReadAddresses(t *testing.T) {
op := global.NewShardMergeOp(nil, nil, nil, nil, nil)
addrs, err := op.GetReadAddresses(0)
require.NoError(t, err)
assert.Nil(t, addrs)
}
func TestShardMergeOp_ToRequestBytes(t *testing.T) {
parentAddress := make([]byte, 32)
for i := range parentAddress {
parentAddress[i] = byte(i % 256)
}
shardAddresses := [][]byte{
append(slices.Clone(parentAddress), 0x00),
append(slices.Clone(parentAddress), 0x01),
}
op := global.NewShardMergeOp(shardAddresses, parentAddress, nil, nil, nil)
op.FrameNumber = 12345
op.PublicKeySignatureBLS48581 = MockAddressedSignature()
data, err := op.ToRequestBytes()
require.NoError(t, err)
require.NotEmpty(t, data)
}
// setupMergeSignatureValidation sets up mock expectations for BLS signature
// validation on a merge operation.
func setupMergeSignatureValidation(
mockKeyManager *mocks.MockKeyManager,
pubKey []byte,
) {
mockKeyManager.On("ValidateSignature",
crypto.KeyTypeBLS48581G1,
pubKey,
mock.Anything,
mock.Anything,
mock.Anything,
).Return(true, nil)
}

View File

@ -0,0 +1,249 @@
package global
import (
"bytes"
"math/big"
"slices"
"github.com/iden3/go-iden3-crypto/poseidon"
"github.com/pkg/errors"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics"
"source.quilibrium.com/quilibrium/monorepo/types/execution/state"
"source.quilibrium.com/quilibrium/monorepo/types/hypergraph"
"source.quilibrium.com/quilibrium/monorepo/types/keys"
"source.quilibrium.com/quilibrium/monorepo/types/store"
)
const maxProversThreshold = 32
type ShardSplitOp struct {
ShardAddress []byte
ProposedShards [][]byte
FrameNumber uint64
PublicKeySignatureBLS48581 BLS48581AddressedSignature
hypergraph hypergraph.Hypergraph
keyManager keys.KeyManager
shardsStore store.ShardsStore
proverRegistry consensus.ProverRegistry
}
func NewShardSplitOp(
shardAddress []byte,
proposedShards [][]byte,
keyManager keys.KeyManager,
shardsStore store.ShardsStore,
proverRegistry consensus.ProverRegistry,
) *ShardSplitOp {
return &ShardSplitOp{
ShardAddress: shardAddress,
ProposedShards: proposedShards,
keyManager: keyManager,
shardsStore: shardsStore,
proverRegistry: proverRegistry,
}
}
func (op *ShardSplitOp) GetCost() (*big.Int, error) {
return big.NewInt(0), nil
}
func (op *ShardSplitOp) Verify(frameNumber uint64) (bool, error) {
if op.proverRegistry == nil {
return false, errors.New("prover registry not initialized")
}
// Validate shard address length
if len(op.ShardAddress) < 32 || len(op.ShardAddress) > 63 {
return false, errors.New("shard_address must be 32-63 bytes")
}
// Validate proposed shards
if len(op.ProposedShards) < 2 || len(op.ProposedShards) > 8 {
return false, errors.New("proposed_shards must have 2-8 entries")
}
for _, shard := range op.ProposedShards {
if len(shard) != len(op.ShardAddress)+1 &&
len(shard) != len(op.ShardAddress)+2 {
return false, errors.Errorf(
"proposed shard length %d invalid for parent length %d",
len(shard), len(op.ShardAddress),
)
}
if !bytes.HasPrefix(shard, op.ShardAddress) {
return false, errors.New("proposed shard must share parent prefix")
}
}
// Look up the public key from the prover registry using the address
address := op.PublicKeySignatureBLS48581.Address
if len(address) != 32 {
return false, errors.New("invalid address length")
}
info, err := op.proverRegistry.GetProverInfo(address)
if err != nil || info == nil {
return false, errors.New("signer is not a registered prover")
}
hasGlobal := false
for _, alloc := range info.Allocations {
if alloc.ConfirmationFilter == nil &&
alloc.Status == consensus.ProverStatusActive {
hasGlobal = true
break
}
}
if !hasGlobal {
return false, errors.New("signer is not an active global prover")
}
pubKey := info.PublicKey
// Verify BLS signature using the looked-up public key
signedData := slices.Concat(
big.NewInt(int64(op.FrameNumber)).FillBytes(make([]byte, 8)),
op.ShardAddress,
)
splitDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_SPLIT"),
)
splitDomain, err := poseidon.HashBytes(splitDomainPreimage)
if err != nil {
return false, errors.Wrap(err, "verify: invalid shard split")
}
ok, err := op.keyManager.ValidateSignature(
crypto.KeyTypeBLS48581G1,
pubKey,
signedData,
op.PublicKeySignatureBLS48581.Signature,
splitDomain.Bytes(),
)
if err != nil || !ok {
return false, errors.Wrap(
errors.New("invalid BLS signature"),
"verify: invalid shard split",
)
}
// Verify shard has enough provers to warrant split (> maxProvers)
count, err := op.proverRegistry.GetProverCount(op.ShardAddress)
if err != nil {
return false, errors.Wrap(err, "verify: invalid shard split: prover count")
}
if count <= maxProversThreshold {
return false, errors.Errorf(
"shard has %d provers, split requires > %d",
count, maxProversThreshold,
)
}
return true, nil
}
func (op *ShardSplitOp) Materialize(
frameNumber uint64,
s state.State,
) (state.State, error) {
if op.shardsStore == nil {
return nil, errors.New("shards store not initialized")
}
// Register each new sub-shard address in the shards store
for _, proposedShard := range op.ProposedShards {
// Extract L2 (first 32 bytes) and Path (remaining bytes as uint32s)
l2 := proposedShard[:32]
path := make([]uint32, 0, len(proposedShard)-32)
for _, b := range proposedShard[32:] {
path = append(path, uint32(b))
}
err := op.shardsStore.PutAppShard(nil, store.ShardInfo{
L2: slices.Clone(l2),
Path: path,
})
if err != nil {
return nil, errors.Wrap(err, "materialize shard split")
}
}
return s, nil
}
func (op *ShardSplitOp) Prove(frameNumber uint64) error {
if op.keyManager == nil {
return errors.New("key manager not initialized")
}
signingKey, err := op.keyManager.GetSigningKey("q-prover-key")
if err != nil {
return errors.Wrap(err, "prove")
}
pubKey := signingKey.Public().([]byte)
// Derive the address from the public key
addressBI, err := poseidon.HashBytes(pubKey)
if err != nil {
return errors.Wrap(err, "prove")
}
address := addressBI.FillBytes(make([]byte, 32))
signedData := slices.Concat(
big.NewInt(int64(frameNumber)).FillBytes(make([]byte, 8)),
op.ShardAddress,
)
splitDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_SPLIT"),
)
splitDomain, err := poseidon.HashBytes(splitDomainPreimage)
if err != nil {
return errors.Wrap(err, "prove")
}
signature, err := signingKey.SignWithDomain(
signedData,
splitDomain.Bytes(),
)
if err != nil {
return errors.Wrap(err, "prove")
}
op.FrameNumber = frameNumber
op.PublicKeySignatureBLS48581 = BLS48581AddressedSignature{
Address: address,
Signature: signature,
}
return nil
}
func (op *ShardSplitOp) GetReadAddresses(
frameNumber uint64,
) ([][]byte, error) {
return nil, nil
}
func (op *ShardSplitOp) GetWriteAddresses(
frameNumber uint64,
) ([][]byte, error) {
// Shard split writes to shard addresses
addresses := make([][]byte, 0, len(op.ProposedShards))
for _, shard := range op.ProposedShards {
addr := [64]byte{}
copy(addr[:32], intrinsics.GLOBAL_INTRINSIC_ADDRESS[:])
copy(addr[32:], shard)
addresses = append(addresses, addr[:])
}
return addresses, nil
}
var _ intrinsics.IntrinsicOperation = (*ShardSplitOp)(nil)

View File

@ -0,0 +1,527 @@
package global_test
import (
"math/big"
"slices"
"testing"
"github.com/iden3/go-iden3-crypto/poseidon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"source.quilibrium.com/quilibrium/monorepo/node/execution/intrinsics/global"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
"source.quilibrium.com/quilibrium/monorepo/types/crypto"
"source.quilibrium.com/quilibrium/monorepo/types/execution/intrinsics"
"source.quilibrium.com/quilibrium/monorepo/types/mocks"
"source.quilibrium.com/quilibrium/monorepo/types/store"
)
func TestShardSplitOp_Prove(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockSigner := new(mocks.MockBLSSigner)
shardAddress := make([]byte, 33)
for i := range shardAddress {
shardAddress[i] = byte(i % 256)
}
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
frameNumber := uint64(12345)
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
// Derive expected 32-byte address from pubKey
addressBI, err := poseidon.HashBytes(pubKey)
require.NoError(t, err)
expectedAddress := addressBI.FillBytes(make([]byte, 32))
splitDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_SPLIT"),
)
splitDomain, err := poseidon.HashBytes(splitDomainPreimage)
require.NoError(t, err)
expectedMessage := slices.Concat(
big.NewInt(int64(frameNumber)).FillBytes(make([]byte, 8)),
shardAddress,
)
mockSigner.On("Public").Return(pubKey)
mockSigner.On("SignWithDomain", expectedMessage, splitDomain.Bytes()).
Return([]byte("signature"), nil)
mockKeyManager.On("GetSigningKey", "q-prover-key").Return(mockSigner, nil)
op := global.NewShardSplitOp(
shardAddress,
proposedShards,
mockKeyManager,
nil,
nil,
)
err = op.Prove(frameNumber)
require.NoError(t, err)
assert.Equal(t, expectedAddress, op.PublicKeySignatureBLS48581.Address)
assert.Equal(t, []byte("signature"), op.PublicKeySignatureBLS48581.Signature)
assert.Equal(t, frameNumber, op.FrameNumber)
mockSigner.AssertExpectations(t)
mockKeyManager.AssertExpectations(t)
}
func TestShardSplitOp_Verify(t *testing.T) {
t.Run("prover registry required", func(t *testing.T) {
op := global.NewShardSplitOp(
make([]byte, 33),
[][]byte{make([]byte, 34), make([]byte, 34)},
nil,
nil,
nil, // no registry
)
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "prover registry not initialized")
})
t.Run("shard address too short", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
op := global.NewShardSplitOp(
make([]byte, 31), // too short
[][]byte{make([]byte, 33), make([]byte, 33)},
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "32-63 bytes")
})
t.Run("shard address too long", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
op := global.NewShardSplitOp(
make([]byte, 64), // too long
[][]byte{make([]byte, 65), make([]byte, 65)},
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "32-63 bytes")
})
t.Run("too few proposed shards", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
op := global.NewShardSplitOp(
shardAddress,
[][]byte{append(slices.Clone(shardAddress), 0x00)}, // only 1
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "2-8")
})
t.Run("proposed shard wrong prefix", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
badShard := make([]byte, 34)
badShard[0] = 0xFF // does not match parent prefix
op := global.NewShardSplitOp(
shardAddress,
[][]byte{
append(slices.Clone(shardAddress), 0x00),
badShard,
},
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 32),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "prefix")
})
t.Run("invalid address length", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
op := global.NewShardSplitOp(
shardAddress,
[][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
},
nil,
nil,
mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: make([]byte, 585), // wrong length
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "invalid address length")
})
t.Run("signer not a registered prover", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(nil, nil)
op := global.NewShardSplitOp(
shardAddress, proposedShards, nil, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "not a registered prover")
})
t.Run("signer not a global prover", func(t *testing.T) {
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: []byte("some-app-shard"),
Status: consensus.ProverStatusActive,
},
},
}, nil)
op := global.NewShardSplitOp(
shardAddress, proposedShards, nil, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "not an active global prover")
})
t.Run("invalid signature fails", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
pubKey := make([]byte, 585)
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
splitDomainPreimage := slices.Concat(
intrinsics.GLOBAL_INTRINSIC_ADDRESS[:],
[]byte("SHARD_SPLIT"),
)
splitDomain, err := poseidon.HashBytes(splitDomainPreimage)
require.NoError(t, err)
mockKeyManager.On("ValidateSignature",
crypto.KeyTypeBLS48581G1,
pubKey,
mock.Anything,
[]byte("bad-sig"),
splitDomain.Bytes(),
).Return(false, nil)
op := global.NewShardSplitOp(
shardAddress, proposedShards, mockKeyManager, nil, mockRegistry,
)
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: []byte("bad-sig"),
}
valid, err := op.Verify(0)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "invalid BLS signature")
})
t.Run("shard prover count below split threshold", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
setupSplitSignatureValidation(mockKeyManager, pubKey)
mockRegistry.On("GetProverCount", shardAddress).Return(30, nil)
op := global.NewShardSplitOp(
shardAddress, proposedShards, mockKeyManager, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.Error(t, err)
assert.False(t, valid)
assert.Contains(t, err.Error(), "split requires")
})
t.Run("valid split with eligibility checks", func(t *testing.T) {
mockKeyManager := new(mocks.MockKeyManager)
mockRegistry := new(mocks.MockProverRegistry)
shardAddress := make([]byte, 33)
for i := range shardAddress {
shardAddress[i] = byte(i % 256)
}
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
pubKey := make([]byte, 585)
for i := range pubKey {
pubKey[i] = byte(i % 256)
}
address := make([]byte, 32)
address[0] = 0x42
mockRegistry.On("GetProverInfo", address).Return(&consensus.ProverInfo{
PublicKey: pubKey,
Allocations: []consensus.ProverAllocationInfo{
{
ConfirmationFilter: nil,
Status: consensus.ProverStatusActive,
},
},
}, nil)
setupSplitSignatureValidation(mockKeyManager, pubKey)
mockRegistry.On("GetProverCount", shardAddress).Return(35, nil)
op := global.NewShardSplitOp(
shardAddress, proposedShards, mockKeyManager, nil, mockRegistry,
)
op.FrameNumber = 100
op.PublicKeySignatureBLS48581 = global.BLS48581AddressedSignature{
Address: address,
Signature: make([]byte, 74),
}
valid, err := op.Verify(100)
require.NoError(t, err)
assert.True(t, valid)
})
}
func TestShardSplitOp_Materialize(t *testing.T) {
t.Run("registers proposed shards", func(t *testing.T) {
mockShardsStore := new(mocks.MockShardsStore)
// 33-byte shard address: 32 bytes L2 + 1 byte path
shardAddress := make([]byte, 33)
for i := range shardAddress {
shardAddress[i] = byte(i % 256)
}
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
for _, ps := range proposedShards {
l2 := slices.Clone(ps[:32])
path := make([]uint32, 0, len(ps)-32)
for _, b := range ps[32:] {
path = append(path, uint32(b))
}
mockShardsStore.On("PutAppShard",
mock.Anything, // txn
store.ShardInfo{L2: l2, Path: path},
).Return(nil).Once()
}
op := global.NewShardSplitOp(
shardAddress,
proposedShards,
nil,
mockShardsStore,
nil,
)
newState, err := op.Materialize(100, nil)
require.NoError(t, err)
assert.Nil(t, newState) // state passthrough (nil in)
mockShardsStore.AssertExpectations(t)
})
t.Run("shardsStore nil returns error", func(t *testing.T) {
op := global.NewShardSplitOp(
make([]byte, 33),
[][]byte{make([]byte, 34), make([]byte, 34)},
nil,
nil, // no store
nil,
)
_, err := op.Materialize(100, nil)
require.Error(t, err)
assert.Contains(t, err.Error(), "shards store not initialized")
})
}
func TestShardSplitOp_GetCost(t *testing.T) {
op := global.NewShardSplitOp(nil, nil, nil, nil, nil)
cost, err := op.GetCost()
require.NoError(t, err)
assert.Equal(t, int64(0), cost.Int64())
}
func TestShardSplitOp_GetWriteAddresses(t *testing.T) {
shardAddress := make([]byte, 33)
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
op := global.NewShardSplitOp(shardAddress, proposedShards, nil, nil, nil)
addrs, err := op.GetWriteAddresses(0)
require.NoError(t, err)
require.Len(t, addrs, 2)
for _, addr := range addrs {
assert.Len(t, addr, 64)
assert.Equal(t, intrinsics.GLOBAL_INTRINSIC_ADDRESS[:], addr[:32])
}
}
func TestShardSplitOp_GetReadAddresses(t *testing.T) {
op := global.NewShardSplitOp(nil, nil, nil, nil, nil)
addrs, err := op.GetReadAddresses(0)
require.NoError(t, err)
assert.Nil(t, addrs)
}
func TestShardSplitOp_ToRequestBytes(t *testing.T) {
shardAddress := make([]byte, 33)
for i := range shardAddress {
shardAddress[i] = byte(i % 256)
}
proposedShards := [][]byte{
append(slices.Clone(shardAddress), 0x00),
append(slices.Clone(shardAddress), 0x01),
}
op := global.NewShardSplitOp(shardAddress, proposedShards, nil, nil, nil)
op.FrameNumber = 12345
op.PublicKeySignatureBLS48581 = MockAddressedSignature()
data, err := op.ToRequestBytes()
require.NoError(t, err)
require.NotEmpty(t, data)
}
// setupSplitSignatureValidation sets up mock expectations for BLS signature
// validation on a split operation.
func setupSplitSignatureValidation(
mockKeyManager *mocks.MockKeyManager,
pubKey []byte,
) {
mockKeyManager.On("ValidateSignature",
crypto.KeyTypeBLS48581G1,
pubKey,
mock.Anything,
mock.Anything,
mock.Anything,
).Return(true, nil)
}

View File

@ -147,27 +147,27 @@ func (h *HyperedgeAdd) GetWriteAddresses(
// Verify implements intrinsics.IntrinsicOperation.
func (h *HyperedgeAdd) Verify(frameNumber uint64) (bool, error) {
if h.Value == nil {
return false, errors.Wrap(errors.New("missing hyperedge value"), "verify")
return false, errors.Wrap(errors.New("missing hyperedge value"), "verify: invalid hyperedge add")
}
conns := h.Value.GetSize()
if conns.Cmp(big.NewInt(0)) == 0 {
return false, errors.Wrap(
errors.New("hyperedge must connect at least one atom"),
"verify",
"verify: invalid hyperedge add",
)
}
hyperedgeID := h.Value.GetID()
if !bytes.Equal(hyperedgeID[:32], h.Domain[:]) {
return false, errors.Wrap(errors.New("hyperedge domain mismatch"), "verify")
return false, errors.Wrap(errors.New("hyperedge domain mismatch"), "verify: invalid hyperedge add")
}
commit := h.Value.Commit(h.inclusionProver)
if len(commit) == 0 {
return false, errors.Wrap(
errors.New("invalid commitment for hyperedge"),
"verify",
"verify: invalid hyperedge add",
)
}
@ -183,11 +183,11 @@ func (h *HyperedgeAdd) Verify(frameNumber uint64) (bool, error) {
slices.Concat(h.Domain[:], []byte("HYPEREDGE_ADD")),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid hyperedge add")
}
if !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid hyperedge add")
}
return true, nil

View File

@ -115,21 +115,21 @@ func (h *HyperedgeRemove) GetWriteAddresses(
func (h *HyperedgeRemove) Verify(frameNumber uint64) (bool, error) {
// Verify that the hyperedge is valid
if h.Value == nil {
return false, errors.Wrap(errors.New("missing hyperedge value"), "verify")
return false, errors.Wrap(errors.New("missing hyperedge value"), "verify: invalid hyperedge remove")
}
hyperedgeID := h.Value.GetID()
if len(hyperedgeID) != 64 {
return false, errors.Wrap(
errors.New("invalid hyperedge id length"),
"verify",
"verify: invalid hyperedge remove",
)
}
if !bytes.Equal(hyperedgeID[:32], h.Domain[:]) {
return false, errors.Wrap(
errors.New("hyperedge domain mismatch"),
"verify",
"verify: invalid hyperedge remove",
)
}
@ -144,11 +144,11 @@ func (h *HyperedgeRemove) Verify(frameNumber uint64) (bool, error) {
slices.Concat(h.Domain[:], []byte("HYPEREDGE_REMOVE")),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid hyperedge remove")
}
if !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid hyperedge remove")
}
return true, nil

View File

@ -179,14 +179,14 @@ func (h *VertexAdd) GetWriteAddresses(
func (h *VertexAdd) Verify(frameNumber uint64) (bool, error) {
// Check if data is valid and can be committed
if len(h.Data) == 0 {
return false, errors.Wrap(errors.New("missing data for vertex"), "verify")
return false, errors.Wrap(errors.New("missing data for vertex"), "verify: invalid vertex add")
}
for _, d := range h.Data {
if !d.Verify() {
return false, errors.Wrap(
errors.New("invalid proof for data"),
"verify",
"verify: invalid vertex add",
)
}
}
@ -202,7 +202,7 @@ func (h *VertexAdd) Verify(frameNumber uint64) (bool, error) {
}
if diskSize > 1024*1024*5 {
return false, errors.Wrap(errors.New("data too large"), "verify")
return false, errors.Wrap(errors.New("data too large"), "verify: invalid vertex add")
}
valid, err := h.keyManager.ValidateSignature(
@ -213,11 +213,11 @@ func (h *VertexAdd) Verify(frameNumber uint64) (bool, error) {
slices.Concat(h.Domain[:], []byte("VERTEX_ADD")),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid vertex add")
}
if !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid vertex add")
}
return true, nil

View File

@ -109,11 +109,11 @@ func (h *VertexRemove) Verify(frameNumber uint64) (bool, error) {
slices.Concat(h.Domain[:], []byte("VERTEX_REMOVE")),
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid vertex remove")
}
if !valid {
return false, errors.Wrap(errors.New("invalid signature"), "verify")
return false, errors.Wrap(errors.New("invalid signature"), "verify: invalid vertex remove")
}
return true, nil

View File

@ -2698,7 +2698,7 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
len(tx.Inputs) > 100 || len(tx.Outputs) > 100 {
return false, errors.Wrap(
errors.New("invalid quantity of inputs, outputs, or proofs"),
"verify",
"verify: invalid mint transaction",
)
}
@ -2706,20 +2706,20 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
if fee == nil ||
new(big.Int).Lsh(big.NewInt(1), uint(128)).Cmp(fee) < 0 ||
new(big.Int).Cmp(fee) > 0 {
return false, errors.Wrap(errors.New("invalid fees"), "verify")
return false, errors.Wrap(errors.New("invalid fees"), "verify: invalid mint transaction")
}
}
if tx.config.Behavior&Divisible == 0 && len(tx.Inputs) != len(tx.Outputs) {
return false, errors.Wrap(
errors.New("non-divisible token has mismatching inputs and outputs"),
"verify",
"verify: invalid mint transaction",
)
}
challenge, err := tx.GetChallenge()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid mint transaction")
}
inputs := [][]byte{}
@ -2731,13 +2731,13 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
challenge,
tx,
); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid mint transaction")
}
if _, ok := check[string(input.Signature[(56*4):(56*5)])]; ok {
return false, errors.Wrap(
errors.New("attempted double-spend"),
"verify",
"verify: invalid mint transaction",
)
}
check[string(input.Signature[(56*4):(56*5)])] = struct{}{}
@ -2748,12 +2748,12 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
commitments := [][]byte{}
for i, o := range tx.Outputs {
if valid, err := o.Verify(frameNumber, i, tx); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid mint transaction")
}
spendCheckBI, err := poseidon.HashBytes(o.RecipientOutput.VerificationKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid mint transaction")
}
_, err = tx.hypergraph.GetVertex([64]byte(
@ -2762,7 +2762,7 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
if err == nil {
return false, errors.Wrap(
errors.New("invalid verification key"),
"verify",
"verify: invalid mint transaction",
)
}
@ -2771,7 +2771,7 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
}
if !tx.bulletproofProver.VerifyRangeProof(tx.RangeProof, commitment, 128) {
return false, errors.Wrap(errors.New("invalid range proof"), "verify")
return false, errors.Wrap(errors.New("invalid range proof"), "verify: invalid mint transaction")
}
// There are no fees in the sumcheck, either because QUIL token native mint
@ -2783,7 +2783,7 @@ func (tx *MintTransaction) Verify(frameNumber uint64) (bool, error) {
commitments,
[]*big.Int{},
) {
return false, errors.Wrap(errors.New("invalid sum check"), "verify")
return false, errors.Wrap(errors.New("invalid sum check"), "verify: invalid mint transaction")
}
return true, nil

View File

@ -1566,7 +1566,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
len(tx.Inputs) != len(tx.TraversalProof.SubProofs) {
return false, errors.Wrap(
errors.New("invalid quantity of inputs, outputs, or proofs"),
"verify",
"verify: invalid pending transaction",
)
}
@ -1574,20 +1574,20 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if fee == nil ||
new(big.Int).Lsh(big.NewInt(1), uint(128)).Cmp(fee) < 0 ||
new(big.Int).Cmp(fee) > 0 {
return false, errors.Wrap(errors.New("invalid fees"), "verify")
return false, errors.Wrap(errors.New("invalid fees"), "verify: invalid pending transaction")
}
}
if tx.config.Behavior&Divisible == 0 && len(tx.Inputs) != len(tx.Outputs) {
return false, errors.Wrap(
errors.New("non-divisible token has mismatching inputs and outputs"),
"verify",
"verify: invalid pending transaction",
)
}
challenge, err := tx.GetChallenge()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
inputs := [][]byte{}
@ -1601,7 +1601,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
tx.TraversalProof,
i,
); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
if bytes.Equal(tx.Domain[:], QUIL_TOKEN_ADDRESS) &&
@ -1609,7 +1609,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if _, ok := check[string(input.Signature[:32])]; ok {
return false, errors.Wrap(
errors.New("attempted double-spend"),
"verify",
"verify: invalid pending transaction",
)
}
check[string(input.Signature[:32])] = struct{}{}
@ -1618,7 +1618,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if _, ok := check[string(input.Signature[(56*4):(56*5)])]; ok {
return false, errors.Wrap(
errors.New("attempted double-spend"),
"verify",
"verify: invalid pending transaction",
)
}
check[string(input.Signature[(56*4):(56*5)])] = struct{}{}
@ -1630,12 +1630,12 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
commitments := [][]byte{}
for i, o := range tx.Outputs {
if valid, err := o.Verify(frameNumber, tx.config); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
spendCheckBI, err := poseidon.HashBytes(o.RefundOutput.VerificationKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
_, err = tx.hypergraph.GetVertex([64]byte(
@ -1644,13 +1644,13 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if err == nil {
return false, errors.Wrap(
errors.New("invalid refund verification key"),
"verify",
"verify: invalid pending transaction",
)
}
spendCheckBI, err = poseidon.HashBytes(o.ToOutput.VerificationKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
_, err = tx.hypergraph.GetVertex([64]byte(
@ -1659,7 +1659,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if err == nil {
return false, errors.Wrap(
errors.New("invalid to verification key"),
"verify",
"verify: invalid pending transaction",
)
}
@ -1672,7 +1672,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
tx.Domain[:],
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid pending transaction")
}
valid, err := tx.hypergraph.VerifyTraversalProof(
@ -1685,11 +1685,11 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
if err != nil || !valid {
return false, errors.Wrap(errors.New(
fmt.Sprintf("invalid traversal proof: %v", err),
), "verify")
), "verify: invalid pending transaction")
}
if !tx.bulletproofProver.VerifyRangeProof(tx.RangeProof, commitment, 128) {
return false, errors.Wrap(errors.New("invalid range proof"), "verify")
return false, errors.Wrap(errors.New("invalid range proof"), "verify: invalid pending transaction")
}
sumcheckFees := []*big.Int{}
@ -1703,7 +1703,7 @@ func (tx *PendingTransaction) Verify(frameNumber uint64) (bool, error) {
commitments,
sumcheckFees,
) {
return false, errors.Wrap(errors.New("invalid sum check"), "verify")
return false, errors.Wrap(errors.New("invalid sum check"), "verify: invalid pending transaction")
}
return true, nil

View File

@ -1474,7 +1474,7 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
len(tx.Inputs) != len(tx.TraversalProof.SubProofs) {
return false, errors.Wrap(
errors.New("invalid quantity of inputs, outputs, or proofs"),
"verify",
"verify: invalid transaction",
)
}
@ -1482,20 +1482,20 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
if fee == nil ||
new(big.Int).Lsh(big.NewInt(1), uint(128)).Cmp(fee) < 0 ||
new(big.Int).Cmp(fee) > 0 {
return false, errors.Wrap(errors.New("invalid fees"), "verify")
return false, errors.Wrap(errors.New("invalid fees"), "verify: invalid transaction")
}
}
if tx.config.Behavior&Divisible == 0 && len(tx.Inputs) != len(tx.Outputs) {
return false, errors.Wrap(
errors.New("non-divisible token has mismatching inputs and outputs"),
"verify",
"verify: invalid transaction",
)
}
challenge, err := tx.GetChallenge()
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
inputs := [][]byte{}
@ -1509,13 +1509,13 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
tx.TraversalProof,
i,
); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
if _, ok := check[string(input.Signature[(56*4):(56*5)])]; ok {
return false, errors.Wrap(
errors.New("attempted double-spend"),
"verify",
"verify: invalid transaction",
)
}
check[string(input.Signature[(56*4):(56*5)])] = struct{}{}
@ -1526,7 +1526,7 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
commitments := [][]byte{}
for i, o := range tx.Outputs {
if valid, err := o.Verify(frameNumber, tx.config); !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
if tx.config.Behavior&Divisible == 0 {
@ -1537,13 +1537,13 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
o.RecipientOutput.AdditionalReferenceKey,
tx.Inputs[i].Proofs[len(tx.Inputs[i].Proofs)-1][64:],
) {
return false, errors.Wrap(errors.New("invalid reference"), "verify")
return false, errors.Wrap(errors.New("invalid reference"), "verify: invalid transaction")
}
}
spendCheckBI, err := poseidon.HashBytes(o.RecipientOutput.VerificationKey)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
_, err = tx.hypergraph.GetVertex([64]byte(
@ -1552,7 +1552,7 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
if err == nil {
return false, errors.Wrap(
errors.New("invalid verification key"),
"verify",
"verify: invalid transaction",
)
}
@ -1565,7 +1565,7 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
tx.Domain[:],
)
if err != nil {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
valid, err := tx.hypergraph.VerifyTraversalProof(
@ -1576,11 +1576,11 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
tx.TraversalProof,
)
if err != nil || !valid {
return false, errors.Wrap(err, "verify")
return false, errors.Wrap(err, "verify: invalid transaction")
}
if !tx.bulletproofProver.VerifyRangeProof(tx.RangeProof, commitment, 128) {
return false, errors.Wrap(errors.New("invalid range proof"), "verify")
return false, errors.Wrap(errors.New("invalid range proof"), "verify: invalid transaction")
}
sumcheckFees := []*big.Int{}
@ -1594,7 +1594,7 @@ func (tx *Transaction) Verify(frameNumber uint64) (bool, error) {
commitments,
sumcheckFees,
) {
return false, errors.Wrap(errors.New("invalid sum check"), "verify")
return false, errors.Wrap(errors.New("invalid sum check"), "verify: invalid transaction")
}
return true, nil

View File

@ -12,6 +12,7 @@ import (
"fmt"
"log"
"math/big"
"net"
"net/http"
npprof "net/http/pprof"
"os"
@ -46,6 +47,12 @@ import (
qruntime "source.quilibrium.com/quilibrium/monorepo/utils/runtime"
)
func init() {
// Use the pure-Go DNS resolver to avoid SIGFPE crashes in cgo-based
// system resolvers (observed on some glibc/musl configurations).
net.DefaultResolver = &net.Resolver{PreferGo: true}
}
var (
configDirectory = flag.String(
"config",
@ -631,6 +638,9 @@ func main() {
errCh := masterNode.Start(ctx)
defer masterNode.Stop()
done := make(chan os.Signal, 1)
signal.Notify(done, syscall.SIGINT, syscall.SIGTERM)
if nodeConfig.ListenGRPCMultiaddr != "" {
srv, err := rpc.NewRPCServer(
nodeConfig,
@ -662,6 +672,9 @@ func main() {
monitor.Start(ctx)
select {
case <-done:
logger.Info("received shutdown signal")
quit()
case <-diskFullCh:
quit()
case err := <-errCh:

View File

@ -44,6 +44,7 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
"golang.org/x/crypto/sha3"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
grpcpeer "google.golang.org/grpc/peer"
@ -84,19 +85,23 @@ type BlossomSub struct {
// Track which bit slices belong to which original bitmasks, used to reference
// count bitmasks for closed subscriptions
subscriptionTracker map[string][][]byte
subscriptions []*blossomsub.Subscription
subscriptionMutex sync.RWMutex
h host.Host
signKey crypto.PrivKey
peerScore map[string]*appScore
peerScoreMx sync.Mutex
bootstrap internal.PeerConnector
discovery internal.PeerConnector
manualReachability atomic.Pointer[bool]
p2pConfig config.P2PConfig
dht *dht.IpfsDHT
coreId uint
configDir ConfigDir
// Track subscriptions per bitmask key so Unsubscribe can cancel them
// before closing the bitmask (blossomsub refuses to close a bitmask
// with open subscriptions).
subscriptionsByBitmask map[string][]*blossomsub.Subscription
subscriptionMutex sync.RWMutex
h host.Host
signKey crypto.PrivKey
peerScore map[string]*appScore
peerScoreMx sync.Mutex
bootstrap internal.PeerConnector
discovery internal.PeerConnector
manualReachability atomic.Pointer[bool]
p2pConfig config.P2PConfig
dht *dht.IpfsDHT
routingDiscovery *routing.RoutingDiscovery
coreId uint
configDir ConfigDir
}
var _ p2p.PubSub = (*BlossomSub)(nil)
@ -150,15 +155,16 @@ func NewBlossomSubWithHost(
}
bs := &BlossomSub{
ctx: ctx,
cancel: cancel,
logger: logger,
bitmaskMap: make(map[string]*blossomsub.Bitmask),
subscriptionTracker: make(map[string][][]byte),
signKey: privKey,
peerScore: make(map[string]*appScore),
p2pConfig: *p2pConfig,
coreId: coreId,
ctx: ctx,
cancel: cancel,
logger: logger,
bitmaskMap: make(map[string]*blossomsub.Bitmask),
subscriptionTracker: make(map[string][][]byte),
subscriptionsByBitmask: make(map[string][]*blossomsub.Subscription),
signKey: privKey,
peerScore: make(map[string]*appScore),
p2pConfig: *p2pConfig,
coreId: coreId,
}
idService := internal.IDServiceFromHost(host)
@ -317,6 +323,7 @@ func NewBlossomSubWithHost(
peerID := host.ID()
bs.dht = kademliaDHT
bs.routingDiscovery = routingDiscovery
bs.ps = pubsub
bs.peerID = peerID
bs.h = host
@ -457,7 +464,24 @@ func NewBlossomSub(
if coreId == 0 {
opts = append(opts, libp2p.Identity(privKey))
} else {
workerKey, _, err := crypto.GenerateEd448Key(rand.Reader)
// Derive a deterministic worker key from the peer key + core ID.
// This gives each worker a stable, unique peer ID across restarts
// (avoiding sybil detection) while still using the original peer
// key for message signing.
rawPriv, err := privKey.Raw()
if err != nil {
logger.Panic("error getting private key bytes", zap.Error(err))
}
shake := sha3.NewShake256()
shake.Write(rawPriv)
shake.Write([]byte(fmt.Sprintf("/worker/%d", coreId)))
seed := make([]byte, 64)
if _, err := shake.Read(seed); err != nil {
logger.Panic("error deriving worker key seed", zap.Error(err))
}
workerKey, _, err := crypto.GenerateEd448Key(
bytes.NewReader(seed),
)
if err != nil {
logger.Panic("error generating worker peerkey", zap.Error(err))
}
@ -515,17 +539,18 @@ func NewBlossomSub(
ctx, cancel := context.WithCancel(ctx)
bs := &BlossomSub{
ctx: ctx,
cancel: cancel,
logger: logger,
bitmaskMap: make(map[string]*blossomsub.Bitmask),
subscriptionTracker: make(map[string][][]byte),
signKey: privKey,
peerScore: make(map[string]*appScore),
p2pConfig: *p2pConfig,
derivedPeerID: derivedPeerId,
coreId: coreId,
configDir: configDir,
ctx: ctx,
cancel: cancel,
logger: logger,
bitmaskMap: make(map[string]*blossomsub.Bitmask),
subscriptionTracker: make(map[string][][]byte),
subscriptionsByBitmask: make(map[string][]*blossomsub.Subscription),
signKey: privKey,
peerScore: make(map[string]*appScore),
p2pConfig: *p2pConfig,
derivedPeerID: derivedPeerId,
coreId: coreId,
configDir: configDir,
}
h, err := libp2p.New(opts...)
@ -767,6 +792,7 @@ func NewBlossomSub(
peerID := h.ID()
bs.dht = kademliaDHT
bs.routingDiscovery = routingDiscovery
bs.ps = pubsub
bs.peerID = peerID
bs.h = h
@ -872,16 +898,81 @@ func (b *BlossomSub) background(ctx context.Context) {
refreshScores := time.NewTicker(DecayInterval)
defer refreshScores.Stop()
peerReconnectInterval := b.p2pConfig.PeerReconnectCheckInterval
peerReconnect := time.NewTicker(peerReconnectInterval)
defer peerReconnect.Stop()
for {
select {
case <-refreshScores.C:
b.refreshScores()
case <-peerReconnect.C:
b.checkAndReconnectPeers(ctx)
case <-ctx.Done():
return
}
}
}
func (b *BlossomSub) checkAndReconnectPeers(ctx context.Context) {
peerCount := len(b.h.Network().Peers())
if peerCount >= b.p2pConfig.MinBootstrapPeers {
return
}
b.logger.Warn(
"low peer count, attempting to re-bootstrap and discover",
zap.Int("current_peers", peerCount),
zap.Int("min_bootstrap_peers", b.p2pConfig.MinBootstrapPeers),
)
// Re-bootstrap the DHT to refresh the routing table. At startup,
// kademliaDHT.Bootstrap() populates the routing table by connecting to
// bootstrap peers. Without calling it again here, the routing table can
// go empty after all peers disconnect, making FindPeers unable to
// discover anyone — leaving the node permanently stuck.
if b.dht != nil {
if err := b.dht.Bootstrap(ctx); err != nil {
b.logger.Error("DHT re-bootstrap failed", zap.Error(err))
}
}
// Re-advertise so other peers can find us through the DHT.
if b.routingDiscovery != nil {
util.Advertise(
ctx,
b.routingDiscovery,
getNetworkNamespace(b.p2pConfig.Network),
)
}
// Clear peerstore addresses for disconnected peers so we don't keep
// dialing stale/invalid addresses that were added in previous attempts.
for _, p := range b.h.Peerstore().Peers() {
if p == b.h.ID() {
continue
}
if b.h.Network().Connectedness(p) != network.Connected &&
b.h.Network().Connectedness(p) != network.Limited {
b.h.Peerstore().ClearAddrs(p)
}
}
if err := b.DiscoverPeers(ctx); err != nil {
b.logger.Error("peer reconnect failed", zap.Error(err))
}
newCount := len(b.h.Network().Peers())
if newCount >= b.p2pConfig.MinBootstrapPeers {
b.logger.Info("peer reconnect succeeded", zap.Int("peers", newCount))
} else {
b.logger.Warn(
"peer reconnect: still low peer count, will retry at next interval",
zap.Int("peers", newCount),
)
}
}
func (b *BlossomSub) refreshScores() {
b.peerScoreMx.Lock()
@ -983,9 +1074,9 @@ func (b *BlossomSub) Subscribe(
zap.String("bitmask", hex.EncodeToString(bitmask)),
)
// Track subscriptions for cleanup on Close
// Track subscriptions per bitmask for cleanup
b.subscriptionMutex.Lock()
b.subscriptions = append(b.subscriptions, subs...)
b.subscriptionsByBitmask[string(bitmask)] = subs
b.subscriptionMutex.Unlock()
for _, sub := range subs {
@ -1068,6 +1159,15 @@ func (b *BlossomSub) Unsubscribe(bitmask []byte, raw bool) {
zap.String("bitmask", hex.EncodeToString(bitmask)),
)
// Cancel the subscription objects so the bitmask can be closed and the
// subscription goroutines exit.
if subs, ok := b.subscriptionsByBitmask[bitmaskKey]; ok {
for _, sub := range subs {
sub.Cancel()
}
delete(b.subscriptionsByBitmask, bitmaskKey)
}
// Check each bit slice to see if it's still needed by other subscriptions
for _, bitSlice := range bitSlices {
bitSliceKey := string(bitSlice)
@ -1288,7 +1388,7 @@ func (b *BlossomSub) runConnectivityTest(
if info.ID == b.h.ID() {
continue
}
if strings.Contains(info.Addrs[0].String(), "dnsaddr") {
if strings.Contains(info.Addrs[0].String(), "dns4") {
candidates = append(candidates, info)
}
}
@ -1316,7 +1416,7 @@ func (b *BlossomSub) invokeConnectivityTest(
if err != nil {
host, err = addr.ValueForProtocol(ma.P_IP6)
if err != nil {
host, err = addr.ValueForProtocol(ma.P_DNSADDR)
host, err = addr.ValueForProtocol(ma.P_DNS4)
if err != nil {
continue
}
@ -2031,10 +2131,12 @@ func (b *BlossomSub) Close() error {
// Cancel all subscriptions to unblock any pending Next() calls
b.subscriptionMutex.Lock()
for _, sub := range b.subscriptions {
sub.Cancel()
for _, subs := range b.subscriptionsByBitmask {
for _, sub := range subs {
sub.Cancel()
}
}
b.subscriptions = nil
b.subscriptionsByBitmask = nil
b.subscriptionMutex.Unlock()
return nil

170
node/p2p/blossomsub_test.go Normal file
View File

@ -0,0 +1,170 @@
package p2p
import (
"context"
"sync/atomic"
"testing"
"time"
"github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/network"
"go.uber.org/zap"
blossomsub "source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub"
"source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb"
)
// newTestBlossomSub creates a minimal BlossomSub wrapper suitable for testing
// Subscribe/Unsubscribe without the full DHT/discovery/bootstrap setup.
func newTestBlossomSub(t *testing.T) *BlossomSub {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
h, err := libp2p.New(
libp2p.ResourceManager(&network.NullResourceManager{}),
)
if err != nil {
t.Fatal(err)
}
ps, err := blossomsub.NewBlossomSub(ctx, h)
if err != nil {
h.Close()
cancel()
t.Fatal(err)
}
bs := &BlossomSub{
ctx: ctx,
cancel: cancel,
logger: zap.NewNop(),
ps: ps,
h: h,
bitmaskMap: make(map[string]*blossomsub.Bitmask),
subscriptionTracker: make(map[string][][]byte),
subscriptionsByBitmask: make(map[string][]*blossomsub.Subscription),
}
bs.p2pConfig.SubscriptionQueueSize = 128
t.Cleanup(func() {
cancel()
h.Close()
})
return bs
}
func noopHandler(*pb.Message) error { return nil }
// TestUnsubscribeAllowsResubscribe is the critical regression test. It verifies
// that after Unsubscribe, the same bitmask can be subscribed to again. Before
// the fix, bm.Close() silently failed because subscriptions were still open,
// and the subsequent ps.Join() returned an error because the bitmask was still
// registered.
func TestUnsubscribeAllowsResubscribe(t *testing.T) {
bs := newTestBlossomSub(t)
bitmask := []byte{0x01}
// First subscribe
if err := bs.Subscribe(bitmask, noopHandler); err != nil {
t.Fatalf("first Subscribe failed: %v", err)
}
// Unsubscribe must cancel subs before Close so Close succeeds
bs.Unsubscribe(bitmask, false)
// Re-subscribe this fails without the fix because the bitmask is still
// registered in the pubsub (Close was a silent no-op).
if err := bs.Subscribe(bitmask, noopHandler); err != nil {
t.Fatalf("re-Subscribe after Unsubscribe failed: %v", err)
}
// Clean up
bs.Unsubscribe(bitmask, false)
}
// TestUnsubscribeTracksPerBitmask verifies that subscribing to multiple
// bitmasks tracks them independently and unsubscribing one doesn't affect
// the other.
func TestUnsubscribeTracksPerBitmask(t *testing.T) {
bs := newTestBlossomSub(t)
bitmaskA := []byte{0x01}
bitmaskB := []byte{0x02}
// Subscribe to both
if err := bs.Subscribe(bitmaskA, noopHandler); err != nil {
t.Fatalf("Subscribe A failed: %v", err)
}
if err := bs.Subscribe(bitmaskB, noopHandler); err != nil {
t.Fatalf("Subscribe B failed: %v", err)
}
// Both should be tracked
bs.subscriptionMutex.RLock()
if _, ok := bs.subscriptionsByBitmask[string(bitmaskA)]; !ok {
t.Error("bitmask A not tracked in subscriptionsByBitmask")
}
if _, ok := bs.subscriptionsByBitmask[string(bitmaskB)]; !ok {
t.Error("bitmask B not tracked in subscriptionsByBitmask")
}
bs.subscriptionMutex.RUnlock()
// Unsubscribe A only
bs.Unsubscribe(bitmaskA, false)
bs.subscriptionMutex.RLock()
if _, ok := bs.subscriptionsByBitmask[string(bitmaskA)]; ok {
t.Error("bitmask A still tracked after Unsubscribe")
}
if _, ok := bs.subscriptionsByBitmask[string(bitmaskB)]; !ok {
t.Error("bitmask B should still be tracked")
}
bs.subscriptionMutex.RUnlock()
// A should be re-subscribable (Close succeeded)
if err := bs.Subscribe(bitmaskA, noopHandler); err != nil {
t.Fatalf("re-Subscribe A after Unsubscribe failed: %v", err)
}
// Unsubscribe both
bs.Unsubscribe(bitmaskA, false)
bs.Unsubscribe(bitmaskB, false)
bs.subscriptionMutex.RLock()
if len(bs.subscriptionsByBitmask) != 0 {
t.Errorf("subscriptionsByBitmask should be empty, got %d entries",
len(bs.subscriptionsByBitmask))
}
bs.subscriptionMutex.RUnlock()
}
// TestUnsubscribeHandlerExits verifies that after Unsubscribe, the handler
// goroutine actually stops. sub.Cancel() unblocks the sub.Next() call in the
// goroutine, causing it to return false and exit.
func TestUnsubscribeHandlerExits(t *testing.T) {
bs := newTestBlossomSub(t)
bitmask := []byte{0x01}
var calls atomic.Int32
handler := func(*pb.Message) error {
calls.Add(1)
return nil
}
if err := bs.Subscribe(bitmask, handler); err != nil {
t.Fatalf("Subscribe failed: %v", err)
}
bs.Unsubscribe(bitmask, false)
// Give the goroutine time to observe the cancellation and exit.
time.Sleep(100 * time.Millisecond)
snapshot := calls.Load()
// Wait again and verify no further increments.
time.Sleep(100 * time.Millisecond)
if got := calls.Load(); got != snapshot {
t.Errorf("handler still running after Unsubscribe: calls went from %d to %d",
snapshot, got)
}
}

View File

@ -11,6 +11,8 @@ import (
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/peerstore"
"github.com/libp2p/go-libp2p/p2p/protocol/identify"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"
"go.uber.org/zap"
)
@ -84,7 +86,16 @@ func (pc *peerConnector) connectToPeer(
return
}
pc.host.Peerstore().AddAddrs(p.ID, p.Addrs, peerstore.AddressTTL)
routable := ma.FilterAddrs(p.Addrs, func(a ma.Multiaddr) bool {
pub, err := manet.IsPublicAddr(a)
return pub && err == nil
})
if len(routable) == 0 {
atomic.AddUint32(failure, 1)
return
}
pc.host.Peerstore().AddAddrs(p.ID, routable, peerstore.AddressTTL)
conn, err := pc.host.Network().DialPeer(ctx, p.ID)
if err != nil {

View File

@ -96,6 +96,7 @@ var pebbleMigrations = []func(*pebble.Batch, *pebble.DB, *config.Config) error{
migration_2_1_0_1820,
migration_2_1_0_1821,
migration_2_1_0_1822,
migration_2_1_0_1823,
}
func NewPebbleDB(
@ -1133,6 +1134,12 @@ func migration_2_1_0_1822(b *pebble.Batch, db *pebble.DB, cfg *config.Config) er
return doMigration1818(db, cfg)
}
// migration_2_1_0_1823 rebuilds the global prover shard tree to fix potential
// corruption from transaction bypass bugs in SaveRoot and Commit.
func migration_2_1_0_1823(b *pebble.Batch, db *pebble.DB, cfg *config.Config) error {
return doMigration1818(db, cfg)
}
// pebbleBatchDB wraps a *pebble.Batch to implement store.KVDB for use in migrations
type pebbleBatchDB struct {
b *pebble.Batch

View File

@ -466,6 +466,10 @@ func (w *WorkerManager) AllocateWorker(coreId uint, filter []byte) error {
return nil
}
func (w *WorkerManager) RespawnWorker(coreId uint, filter []byte) error {
return w.respawnWorker(coreId, filter)
}
func (w *WorkerManager) DeallocateWorker(coreId uint) error {
timer := prometheus.NewTimer(
workerOperationDuration.WithLabelValues("deallocate"),

View File

@ -66,6 +66,8 @@ const (
ProverSeniorityMergeType uint32 = 0x031A
TimeoutStateType uint32 = 0x031C
TimeoutCertificateType uint32 = 0x031D
ShardSplitType uint32 = 0x031E
ShardMergeType uint32 = 0x031F
// Hypergraph types (0x0400 - 0x04FF)
HypergraphConfigurationType uint32 = 0x0401

View File

@ -2078,6 +2078,386 @@ func (a *AltShardUpdate) FromCanonicalBytes(data []byte) error {
return nil
}
// ShardSplit serialization methods
func (s *ShardSplit) ToCanonicalBytes() ([]byte, error) {
buf := new(bytes.Buffer)
// Write type prefix
if err := binary.Write(buf, binary.BigEndian, ShardSplitType); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write shard_address (length-prefixed)
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(s.ShardAddress)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(s.ShardAddress); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write number of proposed_shards
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(s.ProposedShards)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write each proposed shard (length-prefixed)
for _, shard := range s.ProposedShards {
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(shard)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(shard); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
}
// Write frame_number
if err := binary.Write(buf, binary.BigEndian, s.FrameNumber); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write public_key_signature_bls48581
if s.PublicKeySignatureBls48581 != nil {
sigBytes, err := s.PublicKeySignatureBls48581.ToCanonicalBytes()
if err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(sigBytes)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(sigBytes); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
} else {
if err := binary.Write(buf, binary.BigEndian, uint32(0)); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
}
return buf.Bytes(), nil
}
func (s *ShardSplit) FromCanonicalBytes(data []byte) error {
buf := bytes.NewBuffer(data)
// Read and verify type prefix
var typePrefix uint32
if err := binary.Read(buf, binary.BigEndian, &typePrefix); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if typePrefix != ShardSplitType {
return errors.Wrap(
errors.New("invalid type prefix"),
"from canonical bytes",
)
}
// Read shard_address
var addrLen uint32
if err := binary.Read(buf, binary.BigEndian, &addrLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if addrLen > 64 {
return errors.Wrap(
errors.New("invalid shard address length"),
"from canonical bytes",
)
}
s.ShardAddress = make([]byte, addrLen)
if _, err := buf.Read(s.ShardAddress); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
// Read number of proposed_shards
var numShards uint32
if err := binary.Read(buf, binary.BigEndian, &numShards); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if numShards > 8 {
return errors.Wrap(
errors.New("too many proposed shards"),
"from canonical bytes",
)
}
// Read each proposed shard
s.ProposedShards = make([][]byte, numShards)
for i := uint32(0); i < numShards; i++ {
var shardLen uint32
if err := binary.Read(buf, binary.BigEndian, &shardLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if shardLen > 66 {
return errors.Wrap(
errors.New("invalid proposed shard length"),
"from canonical bytes",
)
}
s.ProposedShards[i] = make([]byte, shardLen)
if _, err := buf.Read(s.ProposedShards[i]); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
}
// Read frame_number
if err := binary.Read(buf, binary.BigEndian, &s.FrameNumber); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
// Read public_key_signature_bls48581
var sigLen uint32
if err := binary.Read(buf, binary.BigEndian, &sigLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if sigLen > 0 {
sigBytes := make([]byte, sigLen)
if _, err := buf.Read(sigBytes); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
s.PublicKeySignatureBls48581 = &BLS48581AddressedSignature{}
if err := s.PublicKeySignatureBls48581.FromCanonicalBytes(
sigBytes,
); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
}
return nil
}
func (s *ShardSplit) Validate() error {
if len(s.ShardAddress) < 32 || len(s.ShardAddress) > 63 {
return errors.New("shard_address must be 32-63 bytes")
}
if len(s.ProposedShards) < 2 || len(s.ProposedShards) > 8 {
return errors.New("proposed_shards must have 2-8 entries")
}
for _, shard := range s.ProposedShards {
if len(shard) != len(s.ShardAddress)+1 &&
len(shard) != len(s.ShardAddress)+2 {
return errors.Errorf(
"proposed shard length %d invalid for parent length %d",
len(shard), len(s.ShardAddress),
)
}
if !bytes.HasPrefix(shard, s.ShardAddress) {
return errors.New("proposed shard must share parent prefix")
}
}
if s.PublicKeySignatureBls48581 == nil {
return errors.New("BLS signature must be present")
}
return nil
}
// ShardMerge serialization methods
func (s *ShardMerge) ToCanonicalBytes() ([]byte, error) {
buf := new(bytes.Buffer)
// Write type prefix
if err := binary.Write(buf, binary.BigEndian, ShardMergeType); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write number of shard_addresses
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(s.ShardAddresses)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write each shard address (length-prefixed)
for _, addr := range s.ShardAddresses {
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(addr)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(addr); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
}
// Write parent_address (length-prefixed)
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(s.ParentAddress)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(s.ParentAddress); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write frame_number
if err := binary.Write(buf, binary.BigEndian, s.FrameNumber); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
// Write public_key_signature_bls48581
if s.PublicKeySignatureBls48581 != nil {
sigBytes, err := s.PublicKeySignatureBls48581.ToCanonicalBytes()
if err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if err := binary.Write(
buf,
binary.BigEndian,
uint32(len(sigBytes)),
); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
if _, err := buf.Write(sigBytes); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
} else {
if err := binary.Write(buf, binary.BigEndian, uint32(0)); err != nil {
return nil, errors.Wrap(err, "to canonical bytes")
}
}
return buf.Bytes(), nil
}
func (s *ShardMerge) FromCanonicalBytes(data []byte) error {
buf := bytes.NewBuffer(data)
// Read and verify type prefix
var typePrefix uint32
if err := binary.Read(buf, binary.BigEndian, &typePrefix); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if typePrefix != ShardMergeType {
return errors.Wrap(
errors.New("invalid type prefix"),
"from canonical bytes",
)
}
// Read number of shard_addresses
var numAddrs uint32
if err := binary.Read(buf, binary.BigEndian, &numAddrs); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if numAddrs > 8 {
return errors.Wrap(
errors.New("too many shard addresses"),
"from canonical bytes",
)
}
// Read each shard address
s.ShardAddresses = make([][]byte, numAddrs)
for i := uint32(0); i < numAddrs; i++ {
var addrLen uint32
if err := binary.Read(buf, binary.BigEndian, &addrLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if addrLen > 64 {
return errors.Wrap(
errors.New("invalid shard address length"),
"from canonical bytes",
)
}
s.ShardAddresses[i] = make([]byte, addrLen)
if _, err := buf.Read(s.ShardAddresses[i]); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
}
// Read parent_address
var parentLen uint32
if err := binary.Read(buf, binary.BigEndian, &parentLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if parentLen > 64 {
return errors.Wrap(
errors.New("invalid parent address length"),
"from canonical bytes",
)
}
s.ParentAddress = make([]byte, parentLen)
if _, err := buf.Read(s.ParentAddress); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
// Read frame_number
if err := binary.Read(buf, binary.BigEndian, &s.FrameNumber); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
// Read public_key_signature_bls48581
var sigLen uint32
if err := binary.Read(buf, binary.BigEndian, &sigLen); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
if sigLen > 0 {
sigBytes := make([]byte, sigLen)
if _, err := buf.Read(sigBytes); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
s.PublicKeySignatureBls48581 = &BLS48581AddressedSignature{}
if err := s.PublicKeySignatureBls48581.FromCanonicalBytes(
sigBytes,
); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
}
return nil
}
func (s *ShardMerge) Validate() error {
if len(s.ShardAddresses) < 2 || len(s.ShardAddresses) > 8 {
return errors.New("shard_addresses must have 2-8 entries")
}
if len(s.ParentAddress) != 32 {
return errors.New("parent_address must be 32 bytes")
}
for _, addr := range s.ShardAddresses {
if len(addr) <= 32 {
return errors.New("cannot merge base shards (must be > 32 bytes)")
}
if !bytes.HasPrefix(addr, s.ParentAddress) {
return errors.New(
"all shard addresses must share the parent address prefix",
)
}
}
if s.PublicKeySignatureBls48581 == nil {
return errors.New("BLS signature must be present")
}
return nil
}
func (m *MessageRequest) ToCanonicalBytes() ([]byte, error) {
buf := new(bytes.Buffer)
@ -2145,6 +2525,10 @@ func (m *MessageRequest) ToCanonicalBytes() ([]byte, error) {
innerBytes, err = request.AltShardUpdate.ToCanonicalBytes()
case *MessageRequest_SeniorityMerge:
innerBytes, err = request.SeniorityMerge.ToCanonicalBytes()
case *MessageRequest_ShardSplit:
innerBytes, err = request.ShardSplit.ToCanonicalBytes()
case *MessageRequest_ShardMerge:
innerBytes, err = request.ShardMerge.ToCanonicalBytes()
default:
return nil, errors.New("unknown request type")
}
@ -2419,6 +2803,24 @@ func (m *MessageRequest) FromCanonicalBytes(data []byte) error {
SeniorityMerge: seniorityMerge,
}
case ShardSplitType:
shardSplit := &ShardSplit{}
if err := shardSplit.FromCanonicalBytes(dataBytes); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
m.Request = &MessageRequest_ShardSplit{
ShardSplit: shardSplit,
}
case ShardMergeType:
shardMerge := &ShardMerge{}
if err := shardMerge.FromCanonicalBytes(dataBytes); err != nil {
return errors.Wrap(err, "from canonical bytes")
}
m.Request = &MessageRequest_ShardMerge{
ShardMerge: shardMerge,
}
default:
return errors.Errorf("unknown message type: 0x%08X", innerType)
}

File diff suppressed because it is too large Load Diff

View File

@ -111,6 +111,28 @@ message AltShardUpdate {
bytes signature = 7;
}
message ShardSplit {
// The original shard address being split (32-63 bytes)
bytes shard_address = 1;
// The new sub-shard addresses proposed by the split
repeated bytes proposed_shards = 2;
// The frame number at which the split was detected
uint64 frame_number = 3;
// The BLS48-581 addressed signature proving prover identity
quilibrium.node.keys.pb.BLS48581AddressedSignature public_key_signature_bls48581 = 4;
}
message ShardMerge {
// The sub-shard addresses being merged
repeated bytes shard_addresses = 1;
// The parent shard address (first 32 bytes)
bytes parent_address = 2;
// The frame number at which the merge was detected
uint64 frame_number = 3;
// The BLS48-581 addressed signature proving prover identity
quilibrium.node.keys.pb.BLS48581AddressedSignature public_key_signature_bls48581 = 4;
}
message MessageRequest {
oneof request {
quilibrium.node.global.pb.ProverJoin join = 1;
@ -140,6 +162,8 @@ message MessageRequest {
quilibrium.node.global.pb.FrameHeader shard = 25;
quilibrium.node.global.pb.AltShardUpdate alt_shard_update = 26;
quilibrium.node.global.pb.ProverSeniorityMerge seniority_merge = 27;
quilibrium.node.global.pb.ShardSplit shard_split = 28;
quilibrium.node.global.pb.ShardMerge shard_merge = 29;
}
int64 timestamp = 99;
}

View File

@ -664,6 +664,284 @@ func TestProverKick_Serialization(t *testing.T) {
}
}
func TestShardSplit_Serialization(t *testing.T) {
tests := []struct {
name string
split *ShardSplit
}{
{
name: "complete shard split",
split: &ShardSplit{
ShardAddress: make([]byte, 33),
ProposedShards: [][]byte{make([]byte, 34), make([]byte, 34)},
FrameNumber: 12345,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
},
},
{
name: "split with max proposed shards",
split: &ShardSplit{
ShardAddress: append([]byte{0xFF}, make([]byte, 32)...),
ProposedShards: [][]byte{
make([]byte, 34), make([]byte, 34),
make([]byte, 34), make([]byte, 34),
make([]byte, 34), make([]byte, 34),
make([]byte, 34), make([]byte, 34),
},
FrameNumber: 99999,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: append([]byte{0xAA}, make([]byte, 73)...),
Address: append([]byte{0xCC}, make([]byte, 31)...),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := tt.split.ToCanonicalBytes()
require.NoError(t, err)
require.NotNil(t, data)
split2 := &ShardSplit{}
err = split2.FromCanonicalBytes(data)
require.NoError(t, err)
assert.Equal(t, tt.split.ShardAddress, split2.ShardAddress)
assert.Equal(t, tt.split.ProposedShards, split2.ProposedShards)
assert.Equal(t, tt.split.FrameNumber, split2.FrameNumber)
require.NotNil(t, split2.PublicKeySignatureBls48581)
assert.Equal(t, tt.split.PublicKeySignatureBls48581.Signature, split2.PublicKeySignatureBls48581.Signature)
assert.Equal(t, tt.split.PublicKeySignatureBls48581.Address, split2.PublicKeySignatureBls48581.Address)
})
}
}
func TestShardMerge_Serialization(t *testing.T) {
tests := []struct {
name string
merge *ShardMerge
}{
{
name: "complete shard merge",
merge: &ShardMerge{
ShardAddresses: [][]byte{make([]byte, 33), make([]byte, 33)},
ParentAddress: make([]byte, 32),
FrameNumber: 12345,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
},
},
{
name: "merge with max shard addresses",
merge: &ShardMerge{
ShardAddresses: [][]byte{
append([]byte{0x01}, make([]byte, 32)...),
append([]byte{0x02}, make([]byte, 32)...),
append([]byte{0x03}, make([]byte, 32)...),
append([]byte{0x04}, make([]byte, 32)...),
},
ParentAddress: append([]byte{0xFF}, make([]byte, 31)...),
FrameNumber: 77777,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: append([]byte{0xDD}, make([]byte, 73)...),
Address: append([]byte{0xFF}, make([]byte, 31)...),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := tt.merge.ToCanonicalBytes()
require.NoError(t, err)
require.NotNil(t, data)
merge2 := &ShardMerge{}
err = merge2.FromCanonicalBytes(data)
require.NoError(t, err)
assert.Equal(t, tt.merge.ShardAddresses, merge2.ShardAddresses)
assert.Equal(t, tt.merge.ParentAddress, merge2.ParentAddress)
assert.Equal(t, tt.merge.FrameNumber, merge2.FrameNumber)
require.NotNil(t, merge2.PublicKeySignatureBls48581)
assert.Equal(t, tt.merge.PublicKeySignatureBls48581.Signature, merge2.PublicKeySignatureBls48581.Signature)
assert.Equal(t, tt.merge.PublicKeySignatureBls48581.Address, merge2.PublicKeySignatureBls48581.Address)
})
}
}
func TestShardSplit_Validate(t *testing.T) {
t.Run("valid split passes", func(t *testing.T) {
parent := make([]byte, 33)
split := &ShardSplit{
ShardAddress: parent,
ProposedShards: [][]byte{append(parent, 0x00), append(parent, 0x01)},
FrameNumber: 100,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
}
err := split.Validate()
assert.NoError(t, err)
})
t.Run("shard address too short", func(t *testing.T) {
split := &ShardSplit{
ShardAddress: make([]byte, 31),
ProposedShards: [][]byte{make([]byte, 33), make([]byte, 33)},
}
err := split.Validate()
assert.Error(t, err)
})
t.Run("too few proposed shards", func(t *testing.T) {
split := &ShardSplit{
ShardAddress: make([]byte, 33),
ProposedShards: [][]byte{make([]byte, 34)},
}
err := split.Validate()
assert.Error(t, err)
})
t.Run("nil signature", func(t *testing.T) {
parent := make([]byte, 33)
split := &ShardSplit{
ShardAddress: parent,
ProposedShards: [][]byte{append(parent, 0x00), append(parent, 0x01)},
FrameNumber: 100,
PublicKeySignatureBls48581: nil,
}
err := split.Validate()
assert.Error(t, err)
})
}
func TestShardMerge_Validate(t *testing.T) {
t.Run("valid merge passes", func(t *testing.T) {
parent := make([]byte, 32)
merge := &ShardMerge{
ShardAddresses: [][]byte{append(parent, 0x00), append(parent, 0x01)},
ParentAddress: parent,
FrameNumber: 100,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
}
err := merge.Validate()
assert.NoError(t, err)
})
t.Run("too few shard addresses", func(t *testing.T) {
merge := &ShardMerge{
ShardAddresses: [][]byte{make([]byte, 33)},
ParentAddress: make([]byte, 32),
}
err := merge.Validate()
assert.Error(t, err)
})
t.Run("parent address wrong length", func(t *testing.T) {
merge := &ShardMerge{
ShardAddresses: [][]byte{make([]byte, 33), make([]byte, 33)},
ParentAddress: make([]byte, 31),
}
err := merge.Validate()
assert.Error(t, err)
})
t.Run("base shard rejected", func(t *testing.T) {
parent := make([]byte, 32)
merge := &ShardMerge{
ShardAddresses: [][]byte{make([]byte, 32), append(parent, 0x01)},
ParentAddress: parent,
}
err := merge.Validate()
assert.Error(t, err)
})
t.Run("nil signature", func(t *testing.T) {
parent := make([]byte, 32)
merge := &ShardMerge{
ShardAddresses: [][]byte{append(parent, 0x00), append(parent, 0x01)},
ParentAddress: parent,
FrameNumber: 100,
PublicKeySignatureBls48581: nil,
}
err := merge.Validate()
assert.Error(t, err)
})
}
func TestMessageRequest_ShardSplit_Serialization(t *testing.T) {
parent := make([]byte, 33)
req := &MessageRequest{
Request: &MessageRequest_ShardSplit{
ShardSplit: &ShardSplit{
ShardAddress: parent,
ProposedShards: [][]byte{append(parent, 0x00), append(parent, 0x01)},
FrameNumber: 12345,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
},
},
}
data, err := req.ToCanonicalBytes()
require.NoError(t, err)
require.NotNil(t, data)
req2 := &MessageRequest{}
err = req2.FromCanonicalBytes(data)
require.NoError(t, err)
splitReq, ok := req2.Request.(*MessageRequest_ShardSplit)
require.True(t, ok)
assert.Equal(t, parent, splitReq.ShardSplit.ShardAddress)
assert.Equal(t, uint64(12345), splitReq.ShardSplit.FrameNumber)
assert.Len(t, splitReq.ShardSplit.ProposedShards, 2)
}
func TestMessageRequest_ShardMerge_Serialization(t *testing.T) {
parent := make([]byte, 32)
req := &MessageRequest{
Request: &MessageRequest_ShardMerge{
ShardMerge: &ShardMerge{
ShardAddresses: [][]byte{append(parent, 0x00), append(parent, 0x01)},
ParentAddress: parent,
FrameNumber: 67890,
PublicKeySignatureBls48581: &BLS48581AddressedSignature{
Signature: make([]byte, 74),
Address: make([]byte, 32),
},
},
},
}
data, err := req.ToCanonicalBytes()
require.NoError(t, err)
require.NotNil(t, data)
req2 := &MessageRequest{}
err = req2.FromCanonicalBytes(data)
require.NoError(t, err)
mergeReq, ok := req2.Request.(*MessageRequest_ShardMerge)
require.True(t, ok)
assert.Equal(t, parent, mergeReq.ShardMerge.ParentAddress)
assert.Equal(t, uint64(67890), mergeReq.ShardMerge.FrameNumber)
assert.Len(t, mergeReq.ShardMerge.ShardAddresses, 2)
}
func TestProverLivenessCheck_Serialization(t *testing.T) {
tests := []struct {
name string

View File

@ -116,6 +116,7 @@ func (s *ShardMergeEventData) ControlEventData() {}
// BulkShardMergeEventData contains all merge-eligible shard groups in a single event
type BulkShardMergeEventData struct {
MergeGroups []ShardMergeEventData
FrameProver []byte
}
func (b *BulkShardMergeEventData) ControlEventData() {}
@ -126,6 +127,7 @@ type ShardSplitEventData struct {
ProverCount int
AttestedStorage uint64
ProposedShards [][]byte
FrameProver []byte
}
func (s *ShardSplitEventData) ControlEventData() {}

View File

@ -18,4 +18,5 @@ type WorkerManager interface {
ProposeAllocations(coreIds []uint, filters [][]byte) error
DecideAllocations(reject [][]byte, confirm [][]byte) error
RangeWorkers() ([]*store.WorkerInfo, error)
RespawnWorker(coreId uint, filter []byte) error
}