diff --git a/config/swarm.go b/config/swarm.go index 83f42a295..2cd4e7194 100644 --- a/config/swarm.go +++ b/config/swarm.go @@ -138,7 +138,7 @@ type ConnMgr struct { // ResourceMgr defines configuration options for the libp2p Network Resource Manager // type ResourceMgr struct { - // Enables the Network Resource Manager feature + // Enables the Network Resource Manager feature, default to on. Enabled Flag `json:",omitempty"` Limits *rcmgr.BasicLimiterConfig `json:",omitempty"` } diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index 938f5eb43..28d05a131 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -29,8 +29,7 @@ func ResourceManager(cfg config.SwarmConfig) func(fx.Lifecycle, repo.Repo) (netw var manager network.ResourceManager var opts Libp2pOpts - // Config Swarm.ResourceMgr.Enabled decides if we run a real manager - enabled := cfg.ResourceMgr.Enabled.WithDefault(false) + enabled := cfg.ResourceMgr.Enabled.WithDefault(true) /// ENV overrides Config (if present) switch os.Getenv("LIBP2P_RCMGR") { diff --git a/core/node/libp2p/rcmgr_metrics.go b/core/node/libp2p/rcmgr_metrics.go index 56ccfa9d6..48c54426c 100644 --- a/core/node/libp2p/rcmgr_metrics.go +++ b/core/node/libp2p/rcmgr_metrics.go @@ -1,6 +1,7 @@ package libp2p import ( + "errors" "strconv" "github.com/libp2p/go-libp2p-core/network" @@ -11,6 +12,17 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +func mustRegister(c prometheus.Collector) { + err := prometheus.Register(c) + are := prometheus.AlreadyRegisteredError{} + if errors.As(err, &are) { + return + } + if err != nil { + panic(err) + } +} + func createRcmgrMetrics() rcmgr.MetricsReporter { const ( direction = "direction" @@ -26,7 +38,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{direction, usesFD}, ) - prometheus.MustRegister(connAllowed) + mustRegister(connAllowed) connBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -35,7 +47,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{direction, usesFD}, ) - prometheus.MustRegister(connBlocked) + mustRegister(connBlocked) streamAllowed := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -44,7 +56,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{direction}, ) - prometheus.MustRegister(streamAllowed) + mustRegister(streamAllowed) streamBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -53,19 +65,19 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{direction}, ) - prometheus.MustRegister(streamBlocked) + mustRegister(streamBlocked) peerAllowed := prometheus.NewCounter(prometheus.CounterOpts{ Name: "libp2p_rcmgr_peers_allowed_total", Help: "allowed peers", }) - prometheus.MustRegister(peerAllowed) + mustRegister(peerAllowed) peerBlocked := prometheus.NewCounter(prometheus.CounterOpts{ Name: "libp2p_rcmgr_peer_blocked_total", Help: "blocked peers", }) - prometheus.MustRegister(peerBlocked) + mustRegister(peerBlocked) protocolAllowed := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -74,7 +86,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{protocol}, ) - prometheus.MustRegister(protocolAllowed) + mustRegister(protocolAllowed) protocolBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -83,7 +95,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{protocol}, ) - prometheus.MustRegister(protocolBlocked) + mustRegister(protocolBlocked) protocolPeerBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -92,7 +104,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{protocol}, ) - prometheus.MustRegister(protocolPeerBlocked) + mustRegister(protocolPeerBlocked) serviceAllowed := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -101,7 +113,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{service}, ) - prometheus.MustRegister(serviceAllowed) + mustRegister(serviceAllowed) serviceBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -110,7 +122,7 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{service}, ) - prometheus.MustRegister(serviceBlocked) + mustRegister(serviceBlocked) servicePeerBlocked := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -119,19 +131,19 @@ func createRcmgrMetrics() rcmgr.MetricsReporter { }, []string{service}, ) - prometheus.MustRegister(servicePeerBlocked) + mustRegister(servicePeerBlocked) memoryAllowed := prometheus.NewCounter(prometheus.CounterOpts{ Name: "libp2p_rcmgr_memory_allocations_allowed_total", Help: "allowed memory allocations", }) - prometheus.MustRegister(memoryAllowed) + mustRegister(memoryAllowed) memoryBlocked := prometheus.NewCounter(prometheus.CounterOpts{ Name: "libp2p_rcmgr_memory_allocations_blocked_total", Help: "blocked memory allocations", }) - prometheus.MustRegister(memoryBlocked) + mustRegister(memoryBlocked) return rcmgrMetrics{ connAllowed, diff --git a/docs/config.md b/docs/config.md index 090bcdfa9..0a4b6c3c1 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1632,13 +1632,10 @@ The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p-resour and tracking recource usage over time. #### `Swarm.ResourceMgr.Enabled` - -**EXPERIMENTAL**: this feature is disabled by default, use with caution. - Enables the libp2p Network Resource Manager and auguments the default limits using user-defined ones in `Swarm.ResourceMgr.Limits` (if present). -Default: `false` +Default: `true` Type: `flag` diff --git a/test/sharness/t0116-prometheus-data/prometheus_metrics b/test/sharness/t0116-prometheus-data/prometheus_metrics index dd358e82f..adffa4c1b 100644 --- a/test/sharness/t0116-prometheus-data/prometheus_metrics +++ b/test/sharness/t0116-prometheus-data/prometheus_metrics @@ -656,6 +656,10 @@ leveldb_datastore_sync_latency_seconds_bucket leveldb_datastore_sync_latency_seconds_count leveldb_datastore_sync_latency_seconds_sum leveldb_datastore_sync_total +libp2p_rcmgr_memory_allocations_allowed_total +libp2p_rcmgr_memory_allocations_blocked_total +libp2p_rcmgr_peer_blocked_total +libp2p_rcmgr_peers_allowed_total process_cpu_seconds_total process_max_fds process_open_fds diff --git a/test/sharness/t0139-swarm-rcmgr.sh b/test/sharness/t0139-swarm-rcmgr.sh index 659f508b1..895863004 100755 --- a/test/sharness/t0139-swarm-rcmgr.sh +++ b/test/sharness/t0139-swarm-rcmgr.sh @@ -17,40 +17,10 @@ test_expect_success 'disconnected: swarm stats requires running daemon' ' test_should_contain "missing ResourceMgr" actual ' -# swarm limit|stats should fail in online mode by default -# because Resource Manager is opt-in +# swarm limit|stats should succeed in online mode by default +# because Resource Manager is opt-out test_launch_ipfs_daemon -test_expect_success 'ResourceMgr disabled by default: swarm limit requires Swarm.ResourceMgr.Enabled' ' - test_expect_code 1 ipfs swarm limit system 2> actual && - test_should_contain "missing ResourceMgr" actual -' -test_expect_success 'ResourceMgr disabled by default: swarm stats requires Swarm.ResourceMgr.Enabled' ' - test_expect_code 1 ipfs swarm stats all 2> actual && - test_should_contain "missing ResourceMgr" actual -' - -test_kill_ipfs_daemon - -test_expect_success "setting an invalid limit should result in a failure" " - test_expect_code 1 ipfs config --json Swarm.ResourceMgr.Limits.System.Conns 'asdf' 2> actual && - test_should_contain 'failed to unmarshal' actual -" - -# swarm limit|stat should work when Swarm.ResourceMgr.Enabled -test_expect_success "test enabling resource manager" " - ipfs config --json Swarm.ResourceMgr.Enabled true && - ipfs config --json Swarm.ResourceMgr && - jq -e '.Swarm.ResourceMgr.Enabled == true' < \"$IPFS_PATH/config\" -" - -test_launch_ipfs_daemon - -test_expect_success "test setting system conns limit" " - ipfs config --json Swarm.ResourceMgr.Enabled true && - ipfs config --json Swarm.ResourceMgr.Limits.System.Conns 99999 -" - # every scope has the same fields, so we only inspect System test_expect_success 'ResourceMgr enabled: swarm limit' ' ipfs swarm limit system --enc=json | tee json && @@ -79,13 +49,18 @@ test_expect_success 'ResourceMgr enabled: swarm stats' ' # shut down the daemon, set a limit in the config, and verify that it's applied test_kill_ipfs_daemon -test_expect_success "set system conn limit" " +test_expect_success "Set system conns limit while daemon is not running" " ipfs config --json Swarm.ResourceMgr.Limits.System.Conns 99999 " +test_expect_success "Set an invalid limit, which should result in a failure" " + test_expect_code 1 ipfs config --json Swarm.ResourceMgr.Limits.System.Conns 'asdf' 2> actual && + test_should_contain 'failed to unmarshal' actual +" + test_launch_ipfs_daemon -test_expect_success 'ResourceMgr enabled: swarm limit' ' +test_expect_success 'Ensure the new system conns limit is applied' ' ipfs swarm limit system --enc=json | tee json && jq -e ".Conns == 99999" < json ' @@ -152,4 +127,23 @@ test_expect_success 'Set limit for peer scope with an invalid peer ID' ' test_kill_ipfs_daemon +# test correct behavior when resource manager is disabled +test_expect_success 'Disable resource manager' ' + ipfs config --bool Swarm.ResourceMgr.Enabled false +' + +test_launch_ipfs_daemon + +test_expect_success 'Swarm limit should fail since RM is disabled' ' + test_expect_code 1 ipfs swarm limit system 2> actual && + test_should_contain "missing ResourceMgr" actual +' + +test_expect_success 'Swarm stats should fail since RM is disabled' ' + test_expect_code 1 ipfs swarm stats all 2> actual && + test_should_contain "missing ResourceMgr" actual +' + +test_kill_ipfs_daemon + test_done