mirror of
https://github.com/ipfs/kubo.git
synced 2026-03-11 19:27:51 +08:00
fix: make TestSwarmConnectWithAutoConf more reliable
disable MDNS to prevent test interference when running in parallel add retry logic for daemon startup to handle transient failures replace fixed sleep with proper readiness check remove unnecessary parallel execution of subtests
This commit is contained in:
parent
6398b48299
commit
5507b59778
@ -2,7 +2,6 @@ package autoconf
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ipfs/kubo/test/cli/harness"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -21,10 +20,12 @@ func TestSwarmConnectWithAutoConf(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("AutoConf disabled - should work", func(t *testing.T) {
|
||||
// Don't run subtests in parallel to avoid daemon startup conflicts
|
||||
testSwarmConnectWithAutoConfSetting(t, false, true) // expect success
|
||||
})
|
||||
|
||||
t.Run("AutoConf enabled - should work", func(t *testing.T) {
|
||||
// Don't run subtests in parallel to avoid daemon startup conflicts
|
||||
testSwarmConnectWithAutoConfSetting(t, true, true) // expect success (fix the bug!)
|
||||
})
|
||||
}
|
||||
@ -44,17 +45,10 @@ func testSwarmConnectWithAutoConfSetting(t *testing.T, autoConfEnabled bool, exp
|
||||
"/dnsaddr/bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb",
|
||||
})
|
||||
|
||||
// CRITICAL: Start the daemon first - this is the key requirement
|
||||
// The daemon must be running and working properly
|
||||
// Start the daemon
|
||||
node.StartDaemon()
|
||||
defer node.StopDaemon()
|
||||
|
||||
// Give daemon time to start up completely
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// Verify daemon is responsive
|
||||
result := node.RunIPFS("id")
|
||||
require.Equal(t, 0, result.ExitCode(), "Daemon should be responsive before testing swarm connect")
|
||||
t.Logf("Daemon is running and responsive. AutoConf enabled: %v", autoConfEnabled)
|
||||
|
||||
// Now test swarm connect to a bootstrap peer
|
||||
@ -62,7 +56,7 @@ func testSwarmConnectWithAutoConfSetting(t *testing.T, autoConfEnabled bool, exp
|
||||
// 1. The daemon is running
|
||||
// 2. The CLI should connect to the daemon via API
|
||||
// 3. The daemon should handle the swarm connect request
|
||||
result = node.RunIPFS("swarm", "connect", "/dnsaddr/bootstrap.libp2p.io")
|
||||
result := node.RunIPFS("swarm", "connect", "/dnsaddr/bootstrap.libp2p.io")
|
||||
|
||||
// swarm connect should work regardless of AutoConf setting
|
||||
assert.Equal(t, 0, result.ExitCode(),
|
||||
|
||||
@ -265,23 +265,74 @@ func (n *Node) StartDaemonWithReq(req RunRequest, authorization string) *Node {
|
||||
if alive {
|
||||
log.Panicf("node %d is already running", n.ID)
|
||||
}
|
||||
newReq := req
|
||||
newReq.Path = n.IPFSBin
|
||||
newReq.Args = append([]string{"daemon"}, req.Args...)
|
||||
newReq.RunFunc = (*exec.Cmd).Start
|
||||
|
||||
log.Debugf("starting node %d", n.ID)
|
||||
res := n.Runner.MustRun(newReq)
|
||||
// Start the daemon with a simple retry mechanism
|
||||
// Sometimes when tests run in parallel, daemon startup can fail transiently
|
||||
var daemonStarted bool
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
if attempt > 0 {
|
||||
time.Sleep(time.Second) // Brief pause before retry
|
||||
log.Debugf("retrying daemon start for node %d (attempt %d/3)", n.ID, attempt+1)
|
||||
}
|
||||
|
||||
n.Daemon = res
|
||||
func() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
lastErr = fmt.Errorf("panic during daemon start: %v", r)
|
||||
log.Debugf("node %d daemon start attempt %d failed: %v", n.ID, attempt+1, r)
|
||||
}
|
||||
}()
|
||||
|
||||
// Register the daemon process for cleanup tracking
|
||||
if res.Cmd != nil && res.Cmd.Process != nil {
|
||||
globalProcessTracker.RegisterProcess(res.Cmd.Process)
|
||||
newReq := req
|
||||
newReq.Path = n.IPFSBin
|
||||
newReq.Args = append([]string{"daemon"}, req.Args...)
|
||||
newReq.RunFunc = (*exec.Cmd).Start
|
||||
|
||||
log.Debugf("starting node %d", n.ID)
|
||||
res := n.Runner.MustRun(newReq)
|
||||
|
||||
n.Daemon = res
|
||||
|
||||
// Register the daemon process for cleanup tracking
|
||||
if res.Cmd != nil && res.Cmd.Process != nil {
|
||||
globalProcessTracker.RegisterProcess(res.Cmd.Process)
|
||||
}
|
||||
|
||||
log.Debugf("node %d started, checking API", n.ID)
|
||||
n.WaitOnAPI(authorization)
|
||||
daemonStarted = true
|
||||
}()
|
||||
|
||||
if daemonStarted {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !daemonStarted {
|
||||
if lastErr != nil {
|
||||
log.Panicf("node %d failed to start daemon after 3 attempts: %v", n.ID, lastErr)
|
||||
} else {
|
||||
log.Panicf("node %d failed to start daemon after 3 attempts", n.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for daemon to be fully ready by checking it can respond to commands
|
||||
// This is more reliable than just checking the API endpoint
|
||||
maxRetries := 30
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
result := n.RunIPFS("id")
|
||||
if result.ExitCode() == 0 {
|
||||
log.Debugf("node %d daemon is fully responsive", n.ID)
|
||||
break
|
||||
}
|
||||
if i == maxRetries-1 {
|
||||
log.Panicf("node %d daemon not responsive after %d retries. stderr: %s",
|
||||
n.ID, maxRetries, result.Stderr.String())
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
|
||||
log.Debugf("node %d started, checking API", n.ID)
|
||||
n.WaitOnAPI(authorization)
|
||||
return n
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user