fix: do not bailout early on shutdown of coverage check

This commit is contained in:
Cassandra Heart 2026-02-22 00:07:57 -06:00
parent 82536708e5
commit f726e44a2b
No known key found for this signature in database
GPG Key ID: 371083BFA6C240AA
2 changed files with 36 additions and 14 deletions

View File

@ -159,6 +159,34 @@ func (hg *HypergraphCRDT) contextWithShutdown(
return ctx, cancel
}
// lockWithShutdown tries to acquire hg.mu exclusively. If the shutdown context
// fires before the lock is acquired, it returns false and the caller must not
// proceed. A background goroutine ensures the lock is released if it is
// eventually acquired after shutdown.
func (hg *HypergraphCRDT) lockWithShutdown() bool {
if hg.shutdownCtx == nil {
hg.mu.Lock()
return true
}
locked := make(chan struct{})
go func() {
hg.mu.Lock()
close(locked)
}()
select {
case <-locked:
return true
case <-hg.shutdownCtx.Done():
go func() {
<-locked
hg.mu.Unlock()
}()
return false
}
}
func (hg *HypergraphCRDT) snapshotSet(
shardKey tries.ShardKey,
targetStore tries.TreeBackingStore,
@ -696,7 +724,9 @@ func (hg *HypergraphCRDT) GetMetadataAtKey(pathKey []byte) (
[]hypergraph.ShardMetadata,
error,
) {
hg.mu.Lock()
if !hg.lockWithShutdown() {
return nil, errors.New("shutting down")
}
defer hg.mu.Unlock()
if len(pathKey) < 32 {
return nil, errors.Wrap(

View File

@ -1211,19 +1211,11 @@ func (e *GlobalConsensusEngine) Stop(force bool) <-chan error {
}
// Wait for any in-flight coverage check goroutine to finish before
// returning, so callers can safely close the Pebble DB. Use a bounded
// wait so Stop() cannot hang indefinitely if the goroutine is blocked
// on hg.mu (held by a sync or commit that hasn't drained yet).
coverageDone := make(chan struct{})
go func() {
e.coverageWg.Wait()
close(coverageDone)
}()
select {
case <-coverageDone:
case <-time.After(5 * time.Second):
e.logger.Warn("timed out waiting for coverage check to complete")
}
// returning, so callers can safely close the Pebble DB. This is safe
// to wait on unboundedly because GetMetadataAtKey (the only hg.mu
// caller in the coverage path) bails immediately once shutdownCtx
// fires, so the goroutine will always complete after shutdown.
e.coverageWg.Wait()
close(errChan)
return errChan