diff --git a/hypergraph/hypergraph.go b/hypergraph/hypergraph.go index 806be23..7398048 100644 --- a/hypergraph/hypergraph.go +++ b/hypergraph/hypergraph.go @@ -159,6 +159,34 @@ func (hg *HypergraphCRDT) contextWithShutdown( return ctx, cancel } +// lockWithShutdown tries to acquire hg.mu exclusively. If the shutdown context +// fires before the lock is acquired, it returns false and the caller must not +// proceed. A background goroutine ensures the lock is released if it is +// eventually acquired after shutdown. +func (hg *HypergraphCRDT) lockWithShutdown() bool { + if hg.shutdownCtx == nil { + hg.mu.Lock() + return true + } + + locked := make(chan struct{}) + go func() { + hg.mu.Lock() + close(locked) + }() + + select { + case <-locked: + return true + case <-hg.shutdownCtx.Done(): + go func() { + <-locked + hg.mu.Unlock() + }() + return false + } +} + func (hg *HypergraphCRDT) snapshotSet( shardKey tries.ShardKey, targetStore tries.TreeBackingStore, @@ -696,7 +724,9 @@ func (hg *HypergraphCRDT) GetMetadataAtKey(pathKey []byte) ( []hypergraph.ShardMetadata, error, ) { - hg.mu.Lock() + if !hg.lockWithShutdown() { + return nil, errors.New("shutting down") + } defer hg.mu.Unlock() if len(pathKey) < 32 { return nil, errors.Wrap( diff --git a/node/consensus/global/global_consensus_engine.go b/node/consensus/global/global_consensus_engine.go index 9de715b..2a71e86 100644 --- a/node/consensus/global/global_consensus_engine.go +++ b/node/consensus/global/global_consensus_engine.go @@ -1211,19 +1211,11 @@ func (e *GlobalConsensusEngine) Stop(force bool) <-chan error { } // Wait for any in-flight coverage check goroutine to finish before - // returning, so callers can safely close the Pebble DB. Use a bounded - // wait so Stop() cannot hang indefinitely if the goroutine is blocked - // on hg.mu (held by a sync or commit that hasn't drained yet). - coverageDone := make(chan struct{}) - go func() { - e.coverageWg.Wait() - close(coverageDone) - }() - select { - case <-coverageDone: - case <-time.After(5 * time.Second): - e.logger.Warn("timed out waiting for coverage check to complete") - } + // returning, so callers can safely close the Pebble DB. This is safe + // to wait on unboundedly because GetMetadataAtKey (the only hg.mu + // caller in the coverage path) bails immediately once shutdownCtx + // fires, so the goroutine will always complete after shutdown. + e.coverageWg.Wait() close(errChan) return errChan