mirror of
https://github.com/ipfs/kubo.git
synced 2026-03-02 06:47:51 +08:00
fix(mfs): unbound cache growth with flush=false (#10971)
* fix: prevent --flush=false in 'ipfs files rm' command the 'ipfs files rm' command always flushes for safety to ensure data integrity. this change adds an explicit error when users try to pass --flush=false, improving ux and preventing confusion. related to #10842 * fix: add MFS cache size limit to prevent unbounded growth - add Internal.MFSAutoflushThreshold config (experimental) - directories auto-flush when cache exceeds threshold with --flush=false - prevents high memory usage issue from #10842 - default: 256 entries per directory (matching HAMT shard size) - set to 0 to restore old behavior (risky, may cause errors) Closes #10842
This commit is contained in:
parent
d37b92bfcd
commit
fa17b69c7d
@ -6,6 +6,14 @@ type Internal struct {
|
||||
UnixFSShardingSizeThreshold *OptionalString `json:",omitempty"` // moved to Import.UnixFSHAMTDirectorySizeThreshold
|
||||
Libp2pForceReachability *OptionalString `json:",omitempty"`
|
||||
BackupBootstrapInterval *OptionalDuration `json:",omitempty"`
|
||||
// MFSAutoflushThreshold controls the number of entries cached in memory
|
||||
// for each MFS directory before auto-flush is triggered to prevent
|
||||
// unbounded memory growth when using --flush=false.
|
||||
// Default: 256 (matches HAMT shard size)
|
||||
// Set to 0 to disable cache limiting (old behavior, may cause high memory usage)
|
||||
// This is an EXPERIMENTAL feature and may change or be removed in future releases.
|
||||
// See https://github.com/ipfs/kubo/issues/10842
|
||||
MFSAutoflushThreshold OptionalInteger `json:",omitempty"`
|
||||
}
|
||||
|
||||
type InternalBitswap struct {
|
||||
|
||||
@ -64,13 +64,16 @@ defaults to true and ensures two things: 1) that the changes are reflected in
|
||||
the full MFS structure (updated CIDs) 2) that the parent-folder's cache is
|
||||
cleared. Use caution when setting this flag to false. It will improve
|
||||
performance for large numbers of file operations, but it does so at the cost
|
||||
of consistency guarantees and unbound growth of the directories' in-memory
|
||||
caches. If the daemon is unexpectedly killed before running 'ipfs files
|
||||
flush' on the files in question, then data may be lost. This also applies to
|
||||
run 'ipfs repo gc' concurrently with '--flush=false' operations. We recommend
|
||||
flushing paths regularly with 'ipfs files flush', specially the folders on
|
||||
which many write operations are happening, as a way to clear the directory
|
||||
cache, free memory and speed up read operations.`,
|
||||
of consistency guarantees. If the daemon is unexpectedly killed before running
|
||||
'ipfs files flush' on the files in question, then data may be lost. This also
|
||||
applies to run 'ipfs repo gc' concurrently with '--flush=false' operations.
|
||||
|
||||
When using '--flush=false', directories will automatically flush when the
|
||||
number of cached entries exceeds the Internal.MFSAutoflushThreshold config.
|
||||
This prevents unbounded memory growth. We recommend flushing
|
||||
paths regularly with 'ipfs files flush', specially the folders on which many
|
||||
write operations are happening, as a way to clear the directory cache, free
|
||||
memory and speed up read operations.`,
|
||||
},
|
||||
Options: []cmds.Option{
|
||||
cmds.BoolOption(filesFlushOptionName, "f", "Flush target and ancestors after write.").WithDefault(true),
|
||||
@ -1258,6 +1261,13 @@ Remove files or directories.
|
||||
cmds.BoolOption(forceOptionName, "Forcibly remove target at path; implies -r for directories"),
|
||||
},
|
||||
Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error {
|
||||
// Check if user explicitly set --flush=false
|
||||
if flushOpt, ok := req.Options[filesFlushOptionName]; ok {
|
||||
if flush, ok := flushOpt.(bool); ok && !flush {
|
||||
return fmt.Errorf("files rm always flushes for safety. The --flush flag cannot be set to false for this command")
|
||||
}
|
||||
}
|
||||
|
||||
nd, err := cmdenv.GetNode(env)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@ -242,6 +242,16 @@ func Files(strategy string) func(mctx helpers.MetricsCtx, lc fx.Lifecycle, repo
|
||||
}
|
||||
|
||||
root, err := mfs.NewRoot(ctx, dag, nd, pf, prov)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Configure MFS directory cache auto-flush threshold if specified (experimental)
|
||||
cfg, err := repo.Config()
|
||||
if err == nil && !cfg.Internal.MFSAutoflushThreshold.IsDefault() {
|
||||
threshold := int(cfg.Internal.MFSAutoflushThreshold.WithDefault(int64(mfs.DefaultMaxCacheSize)))
|
||||
root.SetMaxCacheSize(threshold)
|
||||
}
|
||||
|
||||
lc.Append(fx.Hook{
|
||||
OnStop: func(ctx context.Context) error {
|
||||
|
||||
@ -81,6 +81,10 @@ Identity CIDs use [multihash `0x00`](https://github.com/multiformats/multicodec/
|
||||
|
||||
This release resolves several long-standing MFS issues: raw nodes now preserve their codec instead of being forced to dag-pb, append operations on raw nodes work correctly by converting to UnixFS when needed, and identity CIDs properly inherit the full CID prefix from parent directories.
|
||||
|
||||
#### MFS directory cache auto-flush
|
||||
|
||||
The new [`Internal.MFSAutoflushThreshold`](https://github.com/ipfs/kubo/blob/master/docs/config.md#internalmfsautoflushthreshold) configuration option prevents unbounded memory growth when using `--flush=false` with `ipfs files` commands by automatically flushing directories when their cache exceeds the configured threshold (default: 256 entries).
|
||||
|
||||
### 📦️ Important dependency updates
|
||||
|
||||
### 📝 Changelog
|
||||
|
||||
@ -1599,6 +1599,31 @@ Type: `flag`
|
||||
|
||||
**MOVED:** see [`Import.UnixFSHAMTDirectorySizeThreshold`](#importunixfshamtdirectorysizethreshold)
|
||||
|
||||
### `Internal.MFSAutoflushThreshold`
|
||||
|
||||
Controls the number of entries cached in memory for each MFS directory before
|
||||
auto-flush is triggered to prevent unbounded memory growth when using `--flush=false`
|
||||
with `ipfs files` commands.
|
||||
|
||||
When a directory's cache reaches this threshold, it will automatically flush to
|
||||
the blockstore even when `--flush=false` is specified. This prevents excessive
|
||||
memory usage while still allowing performance benefits of deferred flushing for
|
||||
smaller operations.
|
||||
|
||||
**Examples:**
|
||||
* `256` - Default value. Provides a good balance between performance and memory usage.
|
||||
* `0` - Disables cache limiting (behavior before Kubo 0.38). May cause high memory
|
||||
usage with `--flush=false` on large directories.
|
||||
* `1024` - Higher limit for systems with more available memory that need to perform
|
||||
many operations before flushing.
|
||||
|
||||
Default: `256`
|
||||
|
||||
Type: `optionalInteger` (0 disables the limit, risky, may lead to errors)
|
||||
|
||||
**Note:** This is an EXPERIMENTAL feature and may change or be removed in future releases.
|
||||
See [#10842](https://github.com/ipfs/kubo/issues/10842) for more information.
|
||||
|
||||
## `Ipns`
|
||||
|
||||
### `Ipns.RepublishPeriod`
|
||||
|
||||
@ -118,3 +118,63 @@ func TestFilesCp(t *testing.T) {
|
||||
assert.Equal(t, data, catRes.Stdout.Trimmed())
|
||||
})
|
||||
}
|
||||
|
||||
func TestFilesRm(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("files rm with --flush=false returns error", func(t *testing.T) {
|
||||
// Test that files rm rejects --flush=false so user does not assume disabling flush works
|
||||
// (rm ignored it before, better to explicitly error)
|
||||
// See https://github.com/ipfs/kubo/issues/10842
|
||||
t.Parallel()
|
||||
|
||||
node := harness.NewT(t).NewNode().Init().StartDaemon()
|
||||
|
||||
// Create a file to remove
|
||||
node.IPFS("files", "mkdir", "/test-dir")
|
||||
|
||||
// Try to remove with --flush=false, should error
|
||||
res := node.RunIPFS("files", "rm", "-r", "--flush=false", "/test-dir")
|
||||
assert.NotEqual(t, 0, res.ExitErr.ExitCode())
|
||||
assert.Contains(t, res.Stderr.String(), "files rm always flushes for safety")
|
||||
assert.Contains(t, res.Stderr.String(), "cannot be set to false")
|
||||
|
||||
// Verify the directory still exists (wasn't removed due to error)
|
||||
lsRes := node.IPFS("files", "ls", "/")
|
||||
assert.Contains(t, lsRes.Stdout.String(), "test-dir")
|
||||
})
|
||||
|
||||
t.Run("files rm with --flush=true works", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
node := harness.NewT(t).NewNode().Init().StartDaemon()
|
||||
|
||||
// Create a file to remove
|
||||
node.IPFS("files", "mkdir", "/test-dir")
|
||||
|
||||
// Remove with explicit --flush=true, should work
|
||||
res := node.IPFS("files", "rm", "-r", "--flush=true", "/test-dir")
|
||||
assert.NoError(t, res.Err)
|
||||
|
||||
// Verify the directory was removed
|
||||
lsRes := node.IPFS("files", "ls", "/")
|
||||
assert.NotContains(t, lsRes.Stdout.String(), "test-dir")
|
||||
})
|
||||
|
||||
t.Run("files rm without flush flag works (default behavior)", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
node := harness.NewT(t).NewNode().Init().StartDaemon()
|
||||
|
||||
// Create a file to remove
|
||||
node.IPFS("files", "mkdir", "/test-dir")
|
||||
|
||||
// Remove without flush flag (should use default which is true)
|
||||
res := node.IPFS("files", "rm", "-r", "/test-dir")
|
||||
assert.NoError(t, res.Err)
|
||||
|
||||
// Verify the directory was removed
|
||||
lsRes := node.IPFS("files", "ls", "/")
|
||||
assert.NotContains(t, lsRes.Stdout.String(), "test-dir")
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user