feat: add global process tracker for daemon cleanup

add process_tracker.go with daemon PID tracking
register/unregister daemons in StartDaemon/StopDaemon
track all spawned processes for later cleanup
This commit is contained in:
Marcin Rataj 2025-08-22 17:47:12 +02:00
parent 7b9e9473e5
commit 2b2e875e86
2 changed files with 97 additions and 0 deletions

View File

@ -274,6 +274,11 @@ func (n *Node) StartDaemonWithReq(req RunRequest, authorization string) *Node {
res := n.Runner.MustRun(newReq)
n.Daemon = res
// Register the daemon process for cleanup tracking
if res.Cmd != nil && res.Cmd.Process != nil {
globalProcessTracker.RegisterProcess(res.Cmd.Process)
}
log.Debugf("node %d started, checking API", n.ID)
n.WaitOnAPI(authorization)
@ -317,6 +322,10 @@ func (n *Node) StopDaemon() *Node {
log.Debugf("didn't stop node %d since no daemon present", n.ID)
return n
}
// Store PID for cleanup tracking
pid := n.Daemon.Cmd.Process.Pid
watch := make(chan struct{}, 1)
go func() {
_, _ = n.Daemon.Cmd.Process.Wait()
@ -326,6 +335,7 @@ func (n *Node) StopDaemon() *Node {
// os.Interrupt does not support interrupts on Windows https://github.com/golang/go/issues/46345
if runtime.GOOS == "windows" {
if n.signalAndWait(watch, syscall.SIGKILL, 5*time.Second) {
globalProcessTracker.UnregisterProcess(pid)
return n
}
log.Panicf("timed out stopping node %d with peer ID %s", n.ID, n.PeerID())
@ -333,18 +343,22 @@ func (n *Node) StopDaemon() *Node {
log.Debugf("signaling node %d with SIGTERM", n.ID)
if n.signalAndWait(watch, syscall.SIGTERM, 1*time.Second) {
globalProcessTracker.UnregisterProcess(pid)
return n
}
log.Debugf("signaling node %d with SIGTERM", n.ID)
if n.signalAndWait(watch, syscall.SIGTERM, 2*time.Second) {
globalProcessTracker.UnregisterProcess(pid)
return n
}
log.Debugf("signaling node %d with SIGQUIT", n.ID)
if n.signalAndWait(watch, syscall.SIGQUIT, 5*time.Second) {
globalProcessTracker.UnregisterProcess(pid)
return n
}
log.Debugf("signaling node %d with SIGKILL", n.ID)
if n.signalAndWait(watch, syscall.SIGKILL, 5*time.Second) {
globalProcessTracker.UnregisterProcess(pid)
return n
}
log.Panicf("timed out stopping node %d with peer ID %s", n.ID, n.PeerID())

View File

@ -0,0 +1,83 @@
package harness
import (
"os"
"sync"
"syscall"
"time"
)
// processTracker keeps track of all daemon processes started during tests
type processTracker struct {
mu sync.Mutex
processes map[int]*os.Process
}
// globalProcessTracker is a package-level tracker for all spawned daemons
var globalProcessTracker = &processTracker{
processes: make(map[int]*os.Process),
}
// RegisterProcess adds a process to the tracker
func (pt *processTracker) RegisterProcess(proc *os.Process) {
if proc == nil {
return
}
pt.mu.Lock()
defer pt.mu.Unlock()
pt.processes[proc.Pid] = proc
log.Debugf("registered daemon process PID %d", proc.Pid)
}
// UnregisterProcess removes a process from the tracker
func (pt *processTracker) UnregisterProcess(pid int) {
pt.mu.Lock()
defer pt.mu.Unlock()
delete(pt.processes, pid)
log.Debugf("unregistered daemon process PID %d", pid)
}
// KillAll forcefully terminates all tracked processes
func (pt *processTracker) KillAll() {
pt.mu.Lock()
defer pt.mu.Unlock()
for pid, proc := range pt.processes {
log.Debugf("force killing daemon process PID %d", pid)
// Try SIGTERM first
if err := proc.Signal(syscall.SIGTERM); err != nil {
if !os.IsProcessDone(err) {
log.Debugf("error sending SIGTERM to PID %d: %v", pid, err)
}
}
// Give it a moment to terminate
time.Sleep(100 * time.Millisecond)
// Force kill if still running
if err := proc.Kill(); err != nil {
if !os.IsProcessDone(err) {
log.Debugf("error killing PID %d: %v", pid, err)
}
}
// Clean up entry
delete(pt.processes, pid)
}
if len(pt.processes) > 0 {
log.Debugf("cleaned up %d daemon processes", len(pt.processes))
}
}
// IsProcessDone checks if an error indicates the process has already exited
func IsProcessDone(err error) bool {
return err == os.ErrProcessDone
}
// CleanupDaemonProcesses kills all tracked daemon processes
// This should be called in test cleanup or panic recovery
func CleanupDaemonProcesses() {
globalProcessTracker.KillAll()
}