diff --git a/test/cli/harness/node.go b/test/cli/harness/node.go index 6403a2f1a..116dc61d6 100644 --- a/test/cli/harness/node.go +++ b/test/cli/harness/node.go @@ -274,6 +274,11 @@ func (n *Node) StartDaemonWithReq(req RunRequest, authorization string) *Node { res := n.Runner.MustRun(newReq) n.Daemon = res + + // Register the daemon process for cleanup tracking + if res.Cmd != nil && res.Cmd.Process != nil { + globalProcessTracker.RegisterProcess(res.Cmd.Process) + } log.Debugf("node %d started, checking API", n.ID) n.WaitOnAPI(authorization) @@ -317,6 +322,10 @@ func (n *Node) StopDaemon() *Node { log.Debugf("didn't stop node %d since no daemon present", n.ID) return n } + + // Store PID for cleanup tracking + pid := n.Daemon.Cmd.Process.Pid + watch := make(chan struct{}, 1) go func() { _, _ = n.Daemon.Cmd.Process.Wait() @@ -326,6 +335,7 @@ func (n *Node) StopDaemon() *Node { // os.Interrupt does not support interrupts on Windows https://github.com/golang/go/issues/46345 if runtime.GOOS == "windows" { if n.signalAndWait(watch, syscall.SIGKILL, 5*time.Second) { + globalProcessTracker.UnregisterProcess(pid) return n } log.Panicf("timed out stopping node %d with peer ID %s", n.ID, n.PeerID()) @@ -333,18 +343,22 @@ func (n *Node) StopDaemon() *Node { log.Debugf("signaling node %d with SIGTERM", n.ID) if n.signalAndWait(watch, syscall.SIGTERM, 1*time.Second) { + globalProcessTracker.UnregisterProcess(pid) return n } log.Debugf("signaling node %d with SIGTERM", n.ID) if n.signalAndWait(watch, syscall.SIGTERM, 2*time.Second) { + globalProcessTracker.UnregisterProcess(pid) return n } log.Debugf("signaling node %d with SIGQUIT", n.ID) if n.signalAndWait(watch, syscall.SIGQUIT, 5*time.Second) { + globalProcessTracker.UnregisterProcess(pid) return n } log.Debugf("signaling node %d with SIGKILL", n.ID) if n.signalAndWait(watch, syscall.SIGKILL, 5*time.Second) { + globalProcessTracker.UnregisterProcess(pid) return n } log.Panicf("timed out stopping node %d with peer ID %s", n.ID, n.PeerID()) diff --git a/test/cli/harness/process_tracker.go b/test/cli/harness/process_tracker.go new file mode 100644 index 000000000..4aee60818 --- /dev/null +++ b/test/cli/harness/process_tracker.go @@ -0,0 +1,83 @@ +package harness + +import ( + "os" + "sync" + "syscall" + "time" +) + +// processTracker keeps track of all daemon processes started during tests +type processTracker struct { + mu sync.Mutex + processes map[int]*os.Process +} + +// globalProcessTracker is a package-level tracker for all spawned daemons +var globalProcessTracker = &processTracker{ + processes: make(map[int]*os.Process), +} + +// RegisterProcess adds a process to the tracker +func (pt *processTracker) RegisterProcess(proc *os.Process) { + if proc == nil { + return + } + pt.mu.Lock() + defer pt.mu.Unlock() + pt.processes[proc.Pid] = proc + log.Debugf("registered daemon process PID %d", proc.Pid) +} + +// UnregisterProcess removes a process from the tracker +func (pt *processTracker) UnregisterProcess(pid int) { + pt.mu.Lock() + defer pt.mu.Unlock() + delete(pt.processes, pid) + log.Debugf("unregistered daemon process PID %d", pid) +} + +// KillAll forcefully terminates all tracked processes +func (pt *processTracker) KillAll() { + pt.mu.Lock() + defer pt.mu.Unlock() + + for pid, proc := range pt.processes { + log.Debugf("force killing daemon process PID %d", pid) + + // Try SIGTERM first + if err := proc.Signal(syscall.SIGTERM); err != nil { + if !os.IsProcessDone(err) { + log.Debugf("error sending SIGTERM to PID %d: %v", pid, err) + } + } + + // Give it a moment to terminate + time.Sleep(100 * time.Millisecond) + + // Force kill if still running + if err := proc.Kill(); err != nil { + if !os.IsProcessDone(err) { + log.Debugf("error killing PID %d: %v", pid, err) + } + } + + // Clean up entry + delete(pt.processes, pid) + } + + if len(pt.processes) > 0 { + log.Debugf("cleaned up %d daemon processes", len(pt.processes)) + } +} + +// IsProcessDone checks if an error indicates the process has already exited +func IsProcessDone(err error) bool { + return err == os.ErrProcessDone +} + +// CleanupDaemonProcesses kills all tracked daemon processes +// This should be called in test cleanup or panic recovery +func CleanupDaemonProcesses() { + globalProcessTracker.KillAll() +} \ No newline at end of file