diff --git a/core/node/libp2p/rcmgr.go b/core/node/libp2p/rcmgr.go index 35894dc72..49c9d3823 100644 --- a/core/node/libp2p/rcmgr.go +++ b/core/node/libp2p/rcmgr.go @@ -52,22 +52,18 @@ func ResourceManager(cfg config.SwarmConfig) interface{} { return nil, opts, fmt.Errorf("opening IPFS_PATH: %w", err) } - limitConfig, err := createDefaultLimitConfig(cfg) + limits, err := createDefaultLimitConfig(cfg) if err != nil { return nil, opts, err } - // The logic for defaults and overriding with specified SwarmConfig.ResourceMgr.Limits - // is documented in docs/config.md. - // Any changes here should be reflected there. if cfg.ResourceMgr.Limits != nil { l := *cfg.ResourceMgr.Limits - // This effectively overrides the computed default LimitConfig with any vlues from cfg.ResourceMgr.Limits - l.Apply(limitConfig) - limitConfig = l + l.Apply(limits) + limits = l } - limiter := rcmgr.NewFixedLimiter(limitConfig) + limiter := rcmgr.NewFixedLimiter(limits) str, err := rcmgrObs.NewStatsTraceReporter() if err != nil { diff --git a/core/node/libp2p/rcmgr_defaults.go b/core/node/libp2p/rcmgr_defaults.go index fe50ea22a..849d7e82a 100644 --- a/core/node/libp2p/rcmgr_defaults.go +++ b/core/node/libp2p/rcmgr_defaults.go @@ -44,8 +44,51 @@ var noLimitIncrease = rcmgr.BaseLimitIncrease{ // This file defines implicit limit defaults used when Swarm.ResourceMgr.Enabled // createDefaultLimitConfig creates LimitConfig to pass to libp2p's resource manager. -// The defaults follow the documentation in docs/config.md. -// Any changes in the logic here should be reflected there. +// libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. +// The intent of the default config here is to provide good defaults, +// and where the defaults aren't good enough, +// to expose a good set of higher-level "knobs" to users to satisfy most use cases +// without requiring users to wade into all the intricacies of libp2p's resource manager. +// +// The inputs one can specify in SwarmConfig are: +// - cfg.ResourceMgr.MaxMemory: This is the max amount of memory in bytes to allow libp2p to use. +// libp2p's resource manager will prevent additional resource creation while this limit is hit. +// If this value isn't specified, 1/8th of the total system memory is used. +// - cfg.ResourceMgr.MaxFileDescriptors: This is the maximum number of file descriptors to allow libp2p to use. +// libp2p's resource manager will prevent additional file descriptor consumption while this limit is hit. +// If this value isn't specified, the maximum between 1/2 of system FD limit and 4096 is used. +// - Swarm.ConnMgr.HighWater: If a connection manager is specified, libp2p's resource manager +// will allow 2x more connections than the HighWater mark +// so the connection manager has "space and time" to close "least useful" connections. +// +// With these inputs defined, limits are created at the system, transient, and peer scopes. +// Other scopes are ignored (by being set to infinity). +// The reason these scopes are chosen is because: +// - system - This gives us the coarse-grained control we want so we can reason about the system as a whole. +// It is the backstop, and allows us to reason about resource consumption more easily +// since don't have think about the interaction of many other scopes. +// - transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. +// - peer - The peer scope doesn't protect us against intentional DoS attacks. +// It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. +// We are reliant on the system scope for protection here in the malicious case. +// The reason for having a peer scope is to protect against unintentional DoS attacks +// (e.g., bug in a peer which is causing it to "misbehave"). +// In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. +// +// Within these scopes, limits are just set on memory, FD, and inbound connections/streams. +// Limits are set based on the inputs above. +// We trust this node to behave properly and thus ignore outbound connection/stream limits. +// We apply any limits that libp2p has for its protocols/services +// since we assume libp2p knows best here. +// +// This leaves 3 levels of resource management protection: +// 1. The user who does nothing and uses defaults - In this case they get some sane defaults +// based on the amount of memory and file descriptors their system has. +// This should protect the node from many attacks. +// 2. Slightly more advanced user - They can tweak the above by passing in config on +// maxMemory, maxFD, or maxConns with Swarm.HighWater.ConnMgr. +// 3. Power user - They specify all the limits they want set via Swarm.ResourceMgr.Limits +// and we don't do any defaults/overrides. We pass that config blindly into libp2p resource manager. func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) { maxMemoryDefaultString := humanize.Bytes(uint64(memory.TotalMemory()) / 8) maxMemoryString := cfg.ResourceMgr.MaxMemory.WithDefault(maxMemoryDefaultString) @@ -62,6 +105,7 @@ func createDefaultLimitConfig(cfg config.SwarmConfig) (rcmgr.LimitConfig, error) FD: int(numFD), // By default, we just limit connections on the inbound side. + // Note that the limit gets adjusted below if "cfg.ConnMgr.HighWater" is set. Conns: bigEnough, ConnsInbound: rcmgr.DefaultLimits.SystemBaseLimit.ConnsInbound, // same as libp2p default ConnsOutbound: bigEnough, diff --git a/docs/changelogs/v0.17.md b/docs/changelogs/v0.17.md index 490543c5b..8cab74991 100644 --- a/docs/changelogs/v0.17.md +++ b/docs/changelogs/v0.17.md @@ -20,28 +20,6 @@ Below is an outline of all that is in this release, so you get a sense of all th -#### libp2p resource management enabled by default - -To help protect nodes from DoS (resource exhaustion) and eclipse attacks, -go-libp2p released a [Network Resource Manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manage) with a host of improvements throughout 2022. - -Kubo first [exposed this functionality in Kubo 0.13](https://github.com/ipfs/kubo/blob/master/docs/changelogs/v0.13.md#-libp2p-network-resource-manager-swarmresourcemgr), -but it was disabled by default. - -The resource manager is now enabled by default to protect nodes. -The defaults balance providing protection from various attacks while still enabling normal usecases to work as expected. - -If you want to adjust the defaults, then you can: -1. bound the amount of memory and file descriptors that libp2p will use with [Swarm.ResourceMgr.MaxMemory](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrmaxmemory) -and Swarm.ResourceMgr.MaxFileDescriptors](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrmaxfiledescriptors) and/or -2. override any specific resource scopes/limits with [Swarm.ResourceMgr.Limits](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgrlimits) - -See [Swarm.ResourceMgr](https://github.com/ipfs/go-ipfs/blob/master/docs/config.md#swarmresourcemgr) for -1. what limits are set by default, -2. example override configuration, -3. how to access prometheus metrics and view grafana dashboards of resource usage, and -4. how to set explicit "allow lists" to protect against eclipse attacks. - #### Implicit connection manager limits Starting with this release, `ipfs init` will no longer store the default diff --git a/docs/config.md b/docs/config.md index 19becd51b..da4276cd9 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1808,76 +1808,30 @@ Type: `optionalDuration` ### `Swarm.ResourceMgr` -The [libp2p Netowrk Resource Manager](https://github.com/libp2p/go-libp2p-resource-manager#readme) allows setting limits per [Resource Scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes), +The [libp2p Network Resource Manager](https://github.com/libp2p/go-libp2p-resource-manager#readme) allows setting limits per a scope, and tracking recource usage over time. -** Levels of Configuration ** -libp2p's resource manager provides tremendous flexibility but also adds a lot of complexity. -There are these levels of limit configuration for resource management protection: -1. "The user who does nothing" - In this case they get some sane defaults discussed below - based on the amount of memory and file descriptors their system has. - This should protect the node from many attacks. -2. "Slightly more advanced user" - They can tweak the default limits discussed below. - Where the defaults aren't good enough, a good set of higher-level "knobs" are exposed to satisfy most use cases - without requiring users to wade into all the intricacies of libp2p's resource manager. - The "knobs"/inputs are `Swarm.ResourceMgr.MaxMemory` and `Swarm.ResourceMgr.MaxFileDescriptors` as described below. -3. "Power user" - They specify all the default limits from below they want override via `Swarm.ResourceMgr.Limits`; - -** Default Limits ** -With these inputs defined, [resource manager limits](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#limits) are created at the -[system](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#the-system-scope), -[transient](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#the-transient-scope), -and [peer](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#peer-scopes) scopes. -Other scopes are ignored (by being set to "~infinity". - -The reason these scopes are chosen is because: -- system - This gives us the coarse-grained control we want so we can reason about the system as a whole. - It is the backstop, and allows us to reason about resource consumption more easily - since don't have think about the interaction of many other scopes. -- transient - Limiting connections that are in process of being established provides backpressure so not too much work queues up. -- peer - The peer scope doesn't protect us against intentional DoS attacks. - It's just as easy for an attacker to send 100 requests/second with 1 peerId vs. 10 requests/second with 10 peers. - We are reliant on the system scope for protection here in the malicious case. - The reason for having a peer scope is to protect against unintentional DoS attacks - (e.g., bug in a peer which is causing it to "misbehave"). - In the unintional case, we want to make sure a "misbehaving" node doesn't consume more resources than necessary. - -Within these scopes, limits are just set on -[memory](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#memory), -[file descriptors (FD)](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#file-descriptors), [*inbound* connections](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#connections), -and [*inbound* streams](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#streams). -Limits are set based on the inputs above. -We trust this node to behave properly and thus don't limit *outbound* connection/stream limits. -We apply any limits that libp2p has for its protocols/services -since we assume libp2p knows best here. - -** libp2p resource monitoring ** -For [monitoring libp2p resource usage](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#monitoring), -various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`). -There are also [pre-built Grafana dashboards](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager/obs/grafana-dashboards) that can be added to a Grafana instance. - #### `Swarm.ResourceMgr.Enabled` -Enables the libp2p Resource Manager using limits based on the defaults and/or other configuration as discussed above. +Enables the libp2p Network Resource Manager and auguments the default limits +using user-defined ones in `Swarm.ResourceMgr.Limits` (if present). + +Various `*rcmgr_*` metrics can be accessed as the prometheus endpoint at `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`) Default: `true` + Type: `flag` #### `Swarm.ResourceMgr.MaxMemory` -This is the max amount of memory to allow libp2p to use. -libp2p's resource manager will prevent additional resource creation while this limit is reached. -This value is also used to scale the limit on various resources at various scopes -when the default limits (discuseed above) are used. -For example, increasing this value will increase the default limit for incoming connections. +The maximum amount of memory that the libp2p resource manager will allow. Default: `[TOTAL_SYSTEM_MEMORY]/8` Type: `optionalBytes` #### `Swarm.ResourceMgr.MaxFileDescriptors` -This is the maximum number of file descriptors to allow libp2p to use. -libp2p's resource manager will prevent additional file descriptor consumption while this limit is reached. +Define the maximum number of file descriptors that libp2p can use. This param is ignored on Windows. @@ -1888,26 +1842,21 @@ Type: `optionalInteger` Map of resource limits [per scope](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#resource-scopes). -The map supports fields from the [`LimitConfig` struct](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L111). +The map supports fields from [`ScalingLimitConfig`](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L21-L59) +struct from [go-libp2p-resource-manager](https://github.com/libp2p/go-libp2p/tree/master/p2p/host/resource-manager#readme). -[`BaseLimit`s](https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit.go#L89) can be set for any scope, and within the `BaseLimit`, all limit s are optional. - -The `Swarm.ResourceMgr.Limits` override the default limits described above. -Any override `BaseLimits` or limit s from `Swarm.ResourceMgr.Limits` -that aren't specified will use the default limits. - -Example #1: setting limits for a specific scope ```json { "Swarm": { "ResourceMgr": { + "Enabled": true, "Limits": { "System": { - "Memory": 1073741824, - "FD": 512, "Conns": 1024, "ConnsInbound": 256, "ConnsOutbound": 1024, + "FD": 512, + "Memory": 1073741824, "Streams": 16384, "StreamsInbound": 4096, "StreamsOutbound": 16384 @@ -1918,28 +1867,13 @@ Example #1: setting limits for a specific scope } ``` -Example #2: setting a specific limit -```json -{ - "Swarm": { - "ResourceMgr": { - "Limits": { - "Transient": { - "ConnsOutbound": 256, - } - } - } - } -} -``` - Current resource usage and a list of services, protocols, and peers can be obtained via `ipfs swarm stats --help` It is also possible to adjust some runtime limits via `ipfs swarm limit --help`. Changes made via `ipfs swarm limit` are persisted in `Swarm.ResourceMgr.Limits`. -Default: `{}` (use the safe implicit defaults described above) +Default: `{}` (use the safe implicit defaults) Type: `object[string->object]`