feat(files): wire Import.UnixFSChunker and UnixFSDirectoryMaxLinks to MFS

`ipfs files` commands now respect these Import.* config options:
- UnixFSChunker: configures chunk size for `files write`
- UnixFSDirectoryMaxLinks: triggers HAMT sharding in `files mkdir`
- UnixFSHAMTDirectorySizeEstimation: controls size estimation mode

previously, MFS used hardcoded defaults ignoring user config.

changes:
- config/import.go: add UnixFSSplitterFunc() returning chunk.SplitterGen
- core/node/core.go: pass chunker, maxLinks, sizeEstimationMode to
  mfs.NewRoot() via new boxo RootOption API
- core/commands/files.go: pass maxLinks and sizeEstimationMode to
  mfs.Mkdir() and ensureContainingDirectoryExists(); document that
  UnixFSFileMaxLinks doesn't apply to files write (trickle DAG limitation)
- test/cli/files_test.go: add tests for UnixFSDirectoryMaxLinks and
  UnixFSChunker, including CID parity test with `ipfs add --trickle`

related: boxo@54e044f1b265
This commit is contained in:
Marcin Rataj 2026-02-02 02:00:02 +01:00
parent ff35575e31
commit 506cc6e70f
10 changed files with 175 additions and 26 deletions

View File

@ -2,11 +2,13 @@ package config
import (
"fmt"
"io"
"strconv"
"strings"
chunk "github.com/ipfs/boxo/chunker"
"github.com/ipfs/boxo/ipld/unixfs/importer/helpers"
"github.com/ipfs/boxo/ipld/unixfs/io"
uio "github.com/ipfs/boxo/ipld/unixfs/io"
"github.com/ipfs/boxo/verifcid"
mh "github.com/multiformats/go-multihash"
)
@ -47,7 +49,7 @@ const (
var (
DefaultUnixFSFileMaxLinks = int64(helpers.DefaultLinksPerBlock)
DefaultUnixFSDirectoryMaxLinks = int64(0)
DefaultUnixFSHAMTDirectoryMaxFanout = int64(io.DefaultShardWidth)
DefaultUnixFSHAMTDirectoryMaxFanout = int64(uio.DefaultShardWidth)
)
// Import configures the default options for ingesting data. This affects commands
@ -222,15 +224,39 @@ func isValidChunker(chunker string) bool {
}
// HAMTSizeEstimationMode returns the boxo SizeEstimationMode based on the config value.
func (i *Import) HAMTSizeEstimationMode() io.SizeEstimationMode {
func (i *Import) HAMTSizeEstimationMode() uio.SizeEstimationMode {
switch i.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation) {
case HAMTSizeEstimationLinks:
return io.SizeEstimationLinks
return uio.SizeEstimationLinks
case HAMTSizeEstimationBlock:
return io.SizeEstimationBlock
return uio.SizeEstimationBlock
case HAMTSizeEstimationDisabled:
return io.SizeEstimationDisabled
return uio.SizeEstimationDisabled
default:
return io.SizeEstimationLinks
return uio.SizeEstimationLinks
}
}
// UnixFSSplitterFunc returns a SplitterGen function based on Import.UnixFSChunker.
// The returned function creates a Splitter for the configured chunking strategy.
// The chunker string is parsed once when this method is called, not on each use.
func (i *Import) UnixFSSplitterFunc() chunk.SplitterGen {
chunkerStr := i.UnixFSChunker.WithDefault(DefaultUnixFSChunker)
// Parse size-based chunker (most common case) and return optimized generator
if sizeStr, ok := strings.CutPrefix(chunkerStr, "size-"); ok {
if size, err := strconv.ParseInt(sizeStr, 10, 64); err == nil && size > 0 {
return chunk.SizeSplitterGen(size)
}
}
// For other chunker types (rabin, buzhash) or invalid config,
// fall back to parsing per-use (these are rare cases)
return func(r io.Reader) chunk.Splitter {
s, err := chunk.FromString(r, chunkerStr)
if err != nil {
return chunk.DefaultSplitter(r)
}
return s
}
}

View File

@ -28,6 +28,7 @@ import (
offline "github.com/ipfs/boxo/exchange/offline"
dag "github.com/ipfs/boxo/ipld/merkledag"
ft "github.com/ipfs/boxo/ipld/unixfs"
uio "github.com/ipfs/boxo/ipld/unixfs/io"
mfs "github.com/ipfs/boxo/mfs"
"github.com/ipfs/boxo/path"
cid "github.com/ipfs/go-cid"
@ -555,7 +556,9 @@ being GC'ed.
mkParents, _ := req.Options[filesParentsOptionName].(bool)
if mkParents {
err := ensureContainingDirectoryExists(nd.FilesRoot, dst, prefix)
maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks))
sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode()
err := ensureContainingDirectoryExists(nd.FilesRoot, dst, prefix, maxDirLinks, &sizeEstimationMode)
if err != nil {
return err
}
@ -994,9 +997,13 @@ stat' on the file or any of its ancestors.
WARNING:
The CID produced by 'files write' will be different from 'ipfs add' because
'ipfs file write' creates a trickle-dag optimized for append-only operations
'ipfs file write' creates a trickle-dag optimized for append-only operations.
See '--trickle' in 'ipfs add --help' for more information.
NOTE: The 'Import.UnixFSFileMaxLinks' config option does not apply to this command.
Trickle DAG has a fixed internal structure optimized for append operations.
To use configurable max-links, use 'ipfs add' with balanced DAG layout.
If you want to add a file without modifying an existing one,
use 'ipfs add' with '--to-files':
@ -1064,7 +1071,9 @@ See '--to-files' in 'ipfs add --help' for more information.
}
if mkParents {
err := ensureContainingDirectoryExists(nd.FilesRoot, path, prefix)
maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks))
sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode()
err := ensureContainingDirectoryExists(nd.FilesRoot, path, prefix, maxDirLinks, &sizeEstimationMode)
if err != nil {
return err
}
@ -1191,10 +1200,15 @@ Examples:
}
root := n.FilesRoot
maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks))
sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode()
err = mfs.Mkdir(root, dirtomake, mfs.MkdirOpts{
Mkparents: dashp,
Flush: flush,
CidBuilder: prefix,
Mkparents: dashp,
Flush: flush,
CidBuilder: prefix,
MaxLinks: maxDirLinks,
SizeEstimationMode: &sizeEstimationMode,
})
return err
@ -1510,7 +1524,7 @@ func getPrefix(req *cmds.Request, importCfg *config.Import) (cid.Builder, error)
return &prefix, nil
}
func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Builder) error {
func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Builder, maxLinks int, sizeEstimationMode *uio.SizeEstimationMode) error {
dirtomake := gopath.Dir(path)
if dirtomake == "/" {
@ -1518,8 +1532,10 @@ func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Build
}
return mfs.Mkdir(r, dirtomake, mfs.MkdirOpts{
Mkparents: true,
CidBuilder: builder,
Mkparents: true,
CidBuilder: builder,
MaxLinks: maxLinks,
SizeEstimationMode: sizeEstimationMode,
})
}

View File

@ -243,7 +243,20 @@ func Files(strategy string) func(mctx helpers.MetricsCtx, lc fx.Lifecycle, repo
prov = nil
}
root, err := mfs.NewRoot(ctx, dag, nd, pf, prov)
// Get configured settings from Import config
cfg, err := repo.Config()
if err != nil {
return nil, fmt.Errorf("failed to get config: %w", err)
}
chunkerGen := cfg.Import.UnixFSSplitterFunc()
maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks))
sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode()
root, err := mfs.NewRoot(ctx, dag, nd, pf, prov,
mfs.WithChunker(chunkerGen),
mfs.WithMaxLinks(maxDirLinks),
mfs.WithSizeEstimationMode(sizeEstimationMode),
)
if err != nil {
return nil, fmt.Errorf("failed to initialize MFS root from %s stored at %s: %w. "+
"If corrupted, use 'ipfs files chroot' to reset (see --help)", nd.Cid(), FilesRootDatastoreKey, err)

View File

@ -7,7 +7,7 @@ go 1.25
replace github.com/ipfs/kubo => ./../../..
require (
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265
github.com/ipfs/kubo v0.0.0-00010101000000-000000000000
github.com/libp2p/go-libp2p v0.47.0
github.com/multiformats/go-multiaddr v0.16.1

View File

@ -267,8 +267,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k=
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 h1:zqISQlY0hN/IQsNB5adpPSpuqcgRwQnboxv6ArxXt5k=
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk=

4
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/hashicorp/go-version v1.7.0
github.com/ipfs-shipyard/nopfs v0.0.14
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265
github.com/ipfs/go-block-format v0.2.3
github.com/ipfs/go-cid v0.6.0
github.com/ipfs/go-cidutil v0.1.0
@ -279,3 +279,5 @@ exclude (
github.com/ipfs/go-ipfs-cmds v2.0.1+incompatible
github.com/libp2p/go-libp2p v6.0.23+incompatible
)
replace github.com/ipfs/boxo => ../boxo

2
go.sum
View File

@ -338,8 +338,6 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk=

View File

@ -1,6 +1,7 @@
package cli
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
@ -688,4 +689,97 @@ func TestFilesMFSImportConfig(t *testing.T) {
codec := node.IPFS("cid", "format", "-f", "%c", mfsCid).Stdout.Trimmed()
require.Equal(t, "raw", codec)
})
t.Run("files mkdir respects Import.UnixFSDirectoryMaxLinks", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init()
node.UpdateConfig(func(cfg *config.Config) {
cfg.Import.CidVersion = *config.NewOptionalInteger(1)
// Set low link threshold to trigger HAMT sharding at 5 links
cfg.Import.UnixFSDirectoryMaxLinks = *config.NewOptionalInteger(5)
// Also need size estimation enabled for switching to work
cfg.Import.UnixFSHAMTDirectorySizeEstimation = *config.NewOptionalString("block")
})
node.StartDaemon()
defer node.StopDaemon()
// Create directory with 6 files (exceeds max 5 links)
node.IPFS("files", "mkdir", "/testdir")
content := "x"
tempFile := filepath.Join(node.Dir, "content.txt")
require.NoError(t, os.WriteFile(tempFile, []byte(content), 0644))
for i := 0; i < 6; i++ {
node.IPFS("files", "write", "--create", fmt.Sprintf("/testdir/file%d.txt", i), tempFile)
}
// Verify directory became HAMT sharded
cidStr := node.IPFS("files", "stat", "--hash", "/testdir").Stdout.Trimmed()
fsType, err := node.UnixFSDataType(cidStr)
require.NoError(t, err)
require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory after exceeding UnixFSDirectoryMaxLinks")
})
t.Run("files write respects Import.UnixFSChunker", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init()
node.UpdateConfig(func(cfg *config.Config) {
cfg.Import.CidVersion = *config.NewOptionalInteger(1)
cfg.Import.UnixFSRawLeaves = config.True
cfg.Import.UnixFSChunker = *config.NewOptionalString("size-1024") // 1KB chunks
})
node.StartDaemon()
defer node.StopDaemon()
// Create file larger than chunk size (3KB)
data := make([]byte, 3*1024)
for i := range data {
data[i] = byte(i % 256)
}
tempFile := filepath.Join(node.Dir, "large.bin")
require.NoError(t, os.WriteFile(tempFile, data, 0644))
node.IPFS("files", "write", "--create", "/large.bin", tempFile)
// Verify chunking: 3KB file with 1KB chunks should have multiple child blocks
cidStr := node.IPFS("files", "stat", "--hash", "/large.bin").Stdout.Trimmed()
dagStatJSON := node.IPFS("dag", "stat", "--enc=json", cidStr).Stdout.Trimmed()
var dagStat struct {
UniqueBlocks int `json:"UniqueBlocks"`
}
require.NoError(t, json.Unmarshal([]byte(dagStatJSON), &dagStat))
// With 1KB chunks on a 3KB file, we expect 4 blocks (3 leaf + 1 root)
assert.Greater(t, dagStat.UniqueBlocks, 1, "expected more than 1 block with 1KB chunker on 3KB file")
})
t.Run("files write with custom chunker produces same CID as ipfs add --trickle", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init()
node.UpdateConfig(func(cfg *config.Config) {
cfg.Import.CidVersion = *config.NewOptionalInteger(1)
cfg.Import.UnixFSRawLeaves = config.True
cfg.Import.UnixFSChunker = *config.NewOptionalString("size-512")
})
node.StartDaemon()
defer node.StopDaemon()
// Create test data (2KB to get multiple chunks)
data := make([]byte, 2048)
for i := range data {
data[i] = byte(i % 256)
}
tempFile := filepath.Join(node.Dir, "test.bin")
require.NoError(t, os.WriteFile(tempFile, data, 0644))
// Add via MFS
node.IPFS("files", "write", "--create", "/test.bin", tempFile)
mfsCid := node.IPFS("files", "stat", "--hash", "/test.bin").Stdout.Trimmed()
// Add via ipfs add with same chunker and trickle (MFS always uses trickle)
addCid := node.IPFS("add", "-Q", "--chunker=size-512", "--trickle", tempFile).Stdout.Trimmed()
// CIDs should match when using same chunker + trickle layout
require.Equal(t, addCid, mfsCid, "MFS and add --trickle should produce same CID with matching chunker")
})
}

View File

@ -135,7 +135,7 @@ require (
github.com/huin/goupnp v1.3.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/ipfs/bbloom v0.0.4 // indirect
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 // indirect
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 // indirect
github.com/ipfs/go-bitfield v1.1.0 // indirect
github.com/ipfs/go-block-format v0.2.3 // indirect
github.com/ipfs/go-cid v0.6.0 // indirect

View File

@ -296,8 +296,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU=
github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k=
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 h1:zqISQlY0hN/IQsNB5adpPSpuqcgRwQnboxv6ArxXt5k=
github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk=