resolve sync/reel/validation deadlock

This commit is contained in:
Cassandra Heart 2026-01-10 04:12:41 -06:00
parent 8c8fca2ab7
commit 910385ec0b
No known key found for this signature in database
GPG Key ID: 371083BFA6C240AA
4 changed files with 14 additions and 34 deletions

View File

@ -2070,13 +2070,6 @@ func (e *GlobalConsensusEngine) performBlockingProverHypersync(
newRoots := e.syncProvider.HyperSync(ctx, proposer, shardKey, nil, expectedRoot)
close(done)
if err := e.proverRegistry.Refresh(); err != nil {
e.logger.Warn(
"failed to refresh prover registry after blocking hypersync",
zap.Error(err),
)
}
e.logger.Info("blocking hypersync completed")
if len(newRoots) == 0 {
return nil

View File

@ -265,6 +265,19 @@ func (e *GlobalConsensusEngine) handleFrameMessage(
return
}
valid, err := e.frameValidator.Validate(frame)
if err != nil {
e.logger.Debug("global frame validation error", zap.Error(err))
framesProcessedTotal.WithLabelValues("error").Inc()
return
}
if !valid {
framesProcessedTotal.WithLabelValues("error").Inc()
e.logger.Debug("invalid global frame")
return
}
if frame.Header != nil {
e.recordFrameMessageFrameNumber(frame.Header.FrameNumber)
}
@ -282,7 +295,7 @@ func (e *GlobalConsensusEngine) handleFrameMessage(
return
}
frame, err := e.globalTimeReel.GetHead()
frame, err = e.globalTimeReel.GetHead()
if err == nil && frame != nil {
e.currentRank = frame.GetRank()
}

View File

@ -453,19 +453,6 @@ func (e *GlobalConsensusEngine) validateFrameMessage(
return tp2p.ValidationResultReject
}
valid, err := e.frameValidator.Validate(frame)
if err != nil {
e.logger.Debug("global frame validation error", zap.Error(err))
frameValidationTotal.WithLabelValues("reject").Inc()
return tp2p.ValidationResultReject
}
if !valid {
frameValidationTotal.WithLabelValues("reject").Inc()
e.logger.Debug("invalid global frame")
return tp2p.ValidationResultReject
}
if e.currentRank > frame.GetRank()+2 {
frameValidationTotal.WithLabelValues("ignore").Inc()
return tp2p.ValidationResultIgnore

View File

@ -412,19 +412,6 @@ func (r *ProverRegistry) PruneOrphanJoins(frameNumber uint64) error {
return nil
}
// Reload prover state from hypergraph to ensure deterministic pruning
// across all nodes regardless of in-memory cache state
r.globalTrie = &tries.RollingFrecencyCritbitTrie{}
r.shardTries = make(map[string]*tries.RollingFrecencyCritbitTrie)
r.proverCache = make(map[string]*consensus.ProverInfo)
r.filterCache = make(map[string][]*consensus.ProverInfo)
r.addressToFilters = make(map[string][]string)
if err := r.extractGlobalState(); err != nil {
r.logger.Error("failed to reload global state before pruning", zap.Error(err))
return errors.Wrap(err, "prune orphan joins")
}
cutoff := frameNumber - 760
var prunedAllocations int
var prunedProvers int