diff --git a/config/internal.go b/config/internal.go index 267bb250f..8341a9149 100644 --- a/config/internal.go +++ b/config/internal.go @@ -6,6 +6,14 @@ type Internal struct { UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` // moved to Import.UnixFSHAMTDirectorySizeThreshold Libp2pForceReachability *OptionalString `json:",omitempty"` BackupBootstrapInterval *OptionalDuration `json:",omitempty"` + // MFSAutoflushThreshold controls the number of entries cached in memory + // for each MFS directory before auto-flush is triggered to prevent + // unbounded memory growth when using --flush=false. + // Default: 256 (matches HAMT shard size) + // Set to 0 to disable cache limiting (old behavior, may cause high memory usage) + // This is an EXPERIMENTAL feature and may change or be removed in future releases. + // See https://github.com/ipfs/kubo/issues/10842 + MFSAutoflushThreshold OptionalInteger `json:",omitempty"` } type InternalBitswap struct { diff --git a/core/commands/files.go b/core/commands/files.go index 12a96eba2..35cdff6ee 100644 --- a/core/commands/files.go +++ b/core/commands/files.go @@ -64,13 +64,16 @@ defaults to true and ensures two things: 1) that the changes are reflected in the full MFS structure (updated CIDs) 2) that the parent-folder's cache is cleared. Use caution when setting this flag to false. It will improve performance for large numbers of file operations, but it does so at the cost -of consistency guarantees and unbound growth of the directories' in-memory -caches. If the daemon is unexpectedly killed before running 'ipfs files -flush' on the files in question, then data may be lost. This also applies to -run 'ipfs repo gc' concurrently with '--flush=false' operations. We recommend -flushing paths regularly with 'ipfs files flush', specially the folders on -which many write operations are happening, as a way to clear the directory -cache, free memory and speed up read operations.`, +of consistency guarantees. If the daemon is unexpectedly killed before running +'ipfs files flush' on the files in question, then data may be lost. This also +applies to run 'ipfs repo gc' concurrently with '--flush=false' operations. + +When using '--flush=false', directories will automatically flush when the +number of cached entries exceeds the Internal.MFSAutoflushThreshold config. +This prevents unbounded memory growth. We recommend flushing +paths regularly with 'ipfs files flush', specially the folders on which many +write operations are happening, as a way to clear the directory cache, free +memory and speed up read operations.`, }, Options: []cmds.Option{ cmds.BoolOption(filesFlushOptionName, "f", "Flush target and ancestors after write.").WithDefault(true), @@ -1258,6 +1261,13 @@ Remove files or directories. cmds.BoolOption(forceOptionName, "Forcibly remove target at path; implies -r for directories"), }, Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { + // Check if user explicitly set --flush=false + if flushOpt, ok := req.Options[filesFlushOptionName]; ok { + if flush, ok := flushOpt.(bool); ok && !flush { + return fmt.Errorf("files rm always flushes for safety. The --flush flag cannot be set to false for this command") + } + } + nd, err := cmdenv.GetNode(env) if err != nil { return err diff --git a/core/node/core.go b/core/node/core.go index 5ad1e557b..7cafe302e 100644 --- a/core/node/core.go +++ b/core/node/core.go @@ -242,6 +242,16 @@ func Files(strategy string) func(mctx helpers.MetricsCtx, lc fx.Lifecycle, repo } root, err := mfs.NewRoot(ctx, dag, nd, pf, prov) + if err != nil { + return nil, err + } + + // Configure MFS directory cache auto-flush threshold if specified (experimental) + cfg, err := repo.Config() + if err == nil && !cfg.Internal.MFSAutoflushThreshold.IsDefault() { + threshold := int(cfg.Internal.MFSAutoflushThreshold.WithDefault(int64(mfs.DefaultMaxCacheSize))) + root.SetMaxCacheSize(threshold) + } lc.Append(fx.Hook{ OnStop: func(ctx context.Context) error { diff --git a/docs/changelogs/v0.38.md b/docs/changelogs/v0.38.md index 440ee0f9d..ccf60730d 100644 --- a/docs/changelogs/v0.38.md +++ b/docs/changelogs/v0.38.md @@ -81,6 +81,10 @@ Identity CIDs use [multihash `0x00`](https://github.com/multiformats/multicodec/ This release resolves several long-standing MFS issues: raw nodes now preserve their codec instead of being forced to dag-pb, append operations on raw nodes work correctly by converting to UnixFS when needed, and identity CIDs properly inherit the full CID prefix from parent directories. +#### MFS directory cache auto-flush + +The new [`Internal.MFSAutoflushThreshold`](https://github.com/ipfs/kubo/blob/master/docs/config.md#internalmfsautoflushthreshold) configuration option prevents unbounded memory growth when using `--flush=false` with `ipfs files` commands by automatically flushing directories when their cache exceeds the configured threshold (default: 256 entries). + ### 📦️ Important dependency updates ### 📝 Changelog diff --git a/docs/config.md b/docs/config.md index ef3d8ad63..9ef96514d 100644 --- a/docs/config.md +++ b/docs/config.md @@ -1599,6 +1599,31 @@ Type: `flag` **MOVED:** see [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold) +### `Internal.MFSAutoflushThreshold` + +Controls the number of entries cached in memory for each MFS directory before +auto-flush is triggered to prevent unbounded memory growth when using `--flush=false` +with `ipfs files` commands. + +When a directory's cache reaches this threshold, it will automatically flush to +the blockstore even when `--flush=false` is specified. This prevents excessive +memory usage while still allowing performance benefits of deferred flushing for +smaller operations. + +**Examples:** +* `256` - Default value. Provides a good balance between performance and memory usage. +* `0` - Disables cache limiting (behavior before Kubo 0.38). May cause high memory + usage with `--flush=false` on large directories. +* `1024` - Higher limit for systems with more available memory that need to perform + many operations before flushing. + +Default: `256` + +Type: `optionalInteger` (0 disables the limit, risky, may lead to errors) + +**Note:** This is an EXPERIMENTAL feature and may change or be removed in future releases. +See [#10842](https://github.com/ipfs/kubo/issues/10842) for more information. + ## `Ipns` ### `Ipns.RepublishPeriod` diff --git a/test/cli/files_test.go b/test/cli/files_test.go index 275261897..91589ea9c 100644 --- a/test/cli/files_test.go +++ b/test/cli/files_test.go @@ -118,3 +118,63 @@ func TestFilesCp(t *testing.T) { assert.Equal(t, data, catRes.Stdout.Trimmed()) }) } + +func TestFilesRm(t *testing.T) { + t.Parallel() + + t.Run("files rm with --flush=false returns error", func(t *testing.T) { + // Test that files rm rejects --flush=false so user does not assume disabling flush works + // (rm ignored it before, better to explicitly error) + // See https://github.com/ipfs/kubo/issues/10842 + t.Parallel() + + node := harness.NewT(t).NewNode().Init().StartDaemon() + + // Create a file to remove + node.IPFS("files", "mkdir", "/test-dir") + + // Try to remove with --flush=false, should error + res := node.RunIPFS("files", "rm", "-r", "--flush=false", "/test-dir") + assert.NotEqual(t, 0, res.ExitErr.ExitCode()) + assert.Contains(t, res.Stderr.String(), "files rm always flushes for safety") + assert.Contains(t, res.Stderr.String(), "cannot be set to false") + + // Verify the directory still exists (wasn't removed due to error) + lsRes := node.IPFS("files", "ls", "/") + assert.Contains(t, lsRes.Stdout.String(), "test-dir") + }) + + t.Run("files rm with --flush=true works", func(t *testing.T) { + t.Parallel() + + node := harness.NewT(t).NewNode().Init().StartDaemon() + + // Create a file to remove + node.IPFS("files", "mkdir", "/test-dir") + + // Remove with explicit --flush=true, should work + res := node.IPFS("files", "rm", "-r", "--flush=true", "/test-dir") + assert.NoError(t, res.Err) + + // Verify the directory was removed + lsRes := node.IPFS("files", "ls", "/") + assert.NotContains(t, lsRes.Stdout.String(), "test-dir") + }) + + t.Run("files rm without flush flag works (default behavior)", func(t *testing.T) { + t.Parallel() + + node := harness.NewT(t).NewNode().Init().StartDaemon() + + // Create a file to remove + node.IPFS("files", "mkdir", "/test-dir") + + // Remove without flush flag (should use default which is true) + res := node.IPFS("files", "rm", "-r", "/test-dir") + assert.NoError(t, res.Err) + + // Verify the directory was removed + lsRes := node.IPFS("files", "ls", "/") + assert.NotContains(t, lsRes.Stdout.String(), "test-dir") + }) +}