fix: resolve rare SIGFPE, fix orphan expired joins blocking workers from reallocating

This commit is contained in:
Cassandra Heart 2026-02-18 20:43:39 -06:00
parent db4efe35cd
commit 87f2872cc8
No known key found for this signature in database
GPG Key ID: 371083BFA6C240AA
2 changed files with 22 additions and 0 deletions

View File

@ -766,6 +766,21 @@ func (e *GlobalConsensusEngine) reconcileWorkerAllocations(
continue
}
// Expired joins (implicitly rejected) and expired leaves
// (implicitly confirmed) should also be cleared immediately —
// the allocation will never be confirmed/completed and the
// worker is stuck waiting for a state change that cannot come.
if alloc.Status == typesconsensus.ProverStatusJoining &&
frameNumber > alloc.JoinFrameNumber+pendingFilterGraceFrames {
rejectedFilters[string(alloc.ConfirmationFilter)] = struct{}{}
continue
}
if alloc.Status == typesconsensus.ProverStatusLeaving &&
frameNumber > alloc.LeaveFrameNumber+pendingFilterGraceFrames {
rejectedFilters[string(alloc.ConfirmationFilter)] = struct{}{}
continue
}
key := string(alloc.ConfirmationFilter)
worker, ok := filtersToWorkers[key]
if !ok {

View File

@ -12,6 +12,7 @@ import (
"fmt"
"log"
"math/big"
"net"
"net/http"
npprof "net/http/pprof"
"os"
@ -46,6 +47,12 @@ import (
qruntime "source.quilibrium.com/quilibrium/monorepo/utils/runtime"
)
func init() {
// Use the pure-Go DNS resolver to avoid SIGFPE crashes in cgo-based
// system resolvers (observed on some glibc/musl configurations).
net.DefaultResolver = &net.Resolver{PreferGo: true}
}
var (
configDirectory = flag.String(
"config",