diff --git a/config/import.go b/config/import.go index 2c88a12f3..8c40d7d1e 100644 --- a/config/import.go +++ b/config/import.go @@ -2,11 +2,13 @@ package config import ( "fmt" + "io" "strconv" "strings" + chunk "github.com/ipfs/boxo/chunker" "github.com/ipfs/boxo/ipld/unixfs/importer/helpers" - "github.com/ipfs/boxo/ipld/unixfs/io" + uio "github.com/ipfs/boxo/ipld/unixfs/io" "github.com/ipfs/boxo/verifcid" mh "github.com/multiformats/go-multihash" ) @@ -47,7 +49,7 @@ const ( var ( DefaultUnixFSFileMaxLinks = int64(helpers.DefaultLinksPerBlock) DefaultUnixFSDirectoryMaxLinks = int64(0) - DefaultUnixFSHAMTDirectoryMaxFanout = int64(io.DefaultShardWidth) + DefaultUnixFSHAMTDirectoryMaxFanout = int64(uio.DefaultShardWidth) ) // Import configures the default options for ingesting data. This affects commands @@ -222,15 +224,39 @@ func isValidChunker(chunker string) bool { } // HAMTSizeEstimationMode returns the boxo SizeEstimationMode based on the config value. -func (i *Import) HAMTSizeEstimationMode() io.SizeEstimationMode { +func (i *Import) HAMTSizeEstimationMode() uio.SizeEstimationMode { switch i.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation) { case HAMTSizeEstimationLinks: - return io.SizeEstimationLinks + return uio.SizeEstimationLinks case HAMTSizeEstimationBlock: - return io.SizeEstimationBlock + return uio.SizeEstimationBlock case HAMTSizeEstimationDisabled: - return io.SizeEstimationDisabled + return uio.SizeEstimationDisabled default: - return io.SizeEstimationLinks + return uio.SizeEstimationLinks + } +} + +// UnixFSSplitterFunc returns a SplitterGen function based on Import.UnixFSChunker. +// The returned function creates a Splitter for the configured chunking strategy. +// The chunker string is parsed once when this method is called, not on each use. +func (i *Import) UnixFSSplitterFunc() chunk.SplitterGen { + chunkerStr := i.UnixFSChunker.WithDefault(DefaultUnixFSChunker) + + // Parse size-based chunker (most common case) and return optimized generator + if sizeStr, ok := strings.CutPrefix(chunkerStr, "size-"); ok { + if size, err := strconv.ParseInt(sizeStr, 10, 64); err == nil && size > 0 { + return chunk.SizeSplitterGen(size) + } + } + + // For other chunker types (rabin, buzhash) or invalid config, + // fall back to parsing per-use (these are rare cases) + return func(r io.Reader) chunk.Splitter { + s, err := chunk.FromString(r, chunkerStr) + if err != nil { + return chunk.DefaultSplitter(r) + } + return s } } diff --git a/core/commands/files.go b/core/commands/files.go index 4076d0e3e..252ba467a 100644 --- a/core/commands/files.go +++ b/core/commands/files.go @@ -28,6 +28,7 @@ import ( offline "github.com/ipfs/boxo/exchange/offline" dag "github.com/ipfs/boxo/ipld/merkledag" ft "github.com/ipfs/boxo/ipld/unixfs" + uio "github.com/ipfs/boxo/ipld/unixfs/io" mfs "github.com/ipfs/boxo/mfs" "github.com/ipfs/boxo/path" cid "github.com/ipfs/go-cid" @@ -555,7 +556,9 @@ being GC'ed. mkParents, _ := req.Options[filesParentsOptionName].(bool) if mkParents { - err := ensureContainingDirectoryExists(nd.FilesRoot, dst, prefix) + maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks)) + sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode() + err := ensureContainingDirectoryExists(nd.FilesRoot, dst, prefix, maxDirLinks, &sizeEstimationMode) if err != nil { return err } @@ -994,9 +997,13 @@ stat' on the file or any of its ancestors. WARNING: The CID produced by 'files write' will be different from 'ipfs add' because -'ipfs file write' creates a trickle-dag optimized for append-only operations +'ipfs file write' creates a trickle-dag optimized for append-only operations. See '--trickle' in 'ipfs add --help' for more information. +NOTE: The 'Import.UnixFSFileMaxLinks' config option does not apply to this command. +Trickle DAG has a fixed internal structure optimized for append operations. +To use configurable max-links, use 'ipfs add' with balanced DAG layout. + If you want to add a file without modifying an existing one, use 'ipfs add' with '--to-files': @@ -1064,7 +1071,9 @@ See '--to-files' in 'ipfs add --help' for more information. } if mkParents { - err := ensureContainingDirectoryExists(nd.FilesRoot, path, prefix) + maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks)) + sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode() + err := ensureContainingDirectoryExists(nd.FilesRoot, path, prefix, maxDirLinks, &sizeEstimationMode) if err != nil { return err } @@ -1191,10 +1200,15 @@ Examples: } root := n.FilesRoot + maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks)) + sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode() + err = mfs.Mkdir(root, dirtomake, mfs.MkdirOpts{ - Mkparents: dashp, - Flush: flush, - CidBuilder: prefix, + Mkparents: dashp, + Flush: flush, + CidBuilder: prefix, + MaxLinks: maxDirLinks, + SizeEstimationMode: &sizeEstimationMode, }) return err @@ -1510,7 +1524,7 @@ func getPrefix(req *cmds.Request, importCfg *config.Import) (cid.Builder, error) return &prefix, nil } -func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Builder) error { +func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Builder, maxLinks int, sizeEstimationMode *uio.SizeEstimationMode) error { dirtomake := gopath.Dir(path) if dirtomake == "/" { @@ -1518,8 +1532,10 @@ func ensureContainingDirectoryExists(r *mfs.Root, path string, builder cid.Build } return mfs.Mkdir(r, dirtomake, mfs.MkdirOpts{ - Mkparents: true, - CidBuilder: builder, + Mkparents: true, + CidBuilder: builder, + MaxLinks: maxLinks, + SizeEstimationMode: sizeEstimationMode, }) } diff --git a/core/node/core.go b/core/node/core.go index 06e786f1f..fd23cc16b 100644 --- a/core/node/core.go +++ b/core/node/core.go @@ -243,7 +243,20 @@ func Files(strategy string) func(mctx helpers.MetricsCtx, lc fx.Lifecycle, repo prov = nil } - root, err := mfs.NewRoot(ctx, dag, nd, pf, prov) + // Get configured settings from Import config + cfg, err := repo.Config() + if err != nil { + return nil, fmt.Errorf("failed to get config: %w", err) + } + chunkerGen := cfg.Import.UnixFSSplitterFunc() + maxDirLinks := int(cfg.Import.UnixFSDirectoryMaxLinks.WithDefault(config.DefaultUnixFSDirectoryMaxLinks)) + sizeEstimationMode := cfg.Import.HAMTSizeEstimationMode() + + root, err := mfs.NewRoot(ctx, dag, nd, pf, prov, + mfs.WithChunker(chunkerGen), + mfs.WithMaxLinks(maxDirLinks), + mfs.WithSizeEstimationMode(sizeEstimationMode), + ) if err != nil { return nil, fmt.Errorf("failed to initialize MFS root from %s stored at %s: %w. "+ "If corrupted, use 'ipfs files chroot' to reset (see --help)", nd.Cid(), FilesRootDatastoreKey, err) diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index 7e33b1102..ba63471a4 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -7,7 +7,7 @@ go 1.25 replace github.com/ipfs/kubo => ./../../.. require ( - github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 + github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 github.com/ipfs/kubo v0.0.0-00010101000000-000000000000 github.com/libp2p/go-libp2p v0.47.0 github.com/multiformats/go-multiaddr v0.16.1 diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index fdd5ab799..241f89289 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -267,8 +267,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k= +github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 h1:zqISQlY0hN/IQsNB5adpPSpuqcgRwQnboxv6ArxXt5k= +github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= diff --git a/go.mod b/go.mod index 7bb8b8aa3..f7c91f275 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/hashicorp/go-version v1.7.0 github.com/ipfs-shipyard/nopfs v0.0.14 github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 - github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 + github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 github.com/ipfs/go-block-format v0.2.3 github.com/ipfs/go-cid v0.6.0 github.com/ipfs/go-cidutil v0.1.0 @@ -279,3 +279,5 @@ exclude ( github.com/ipfs/go-ipfs-cmds v2.0.1+incompatible github.com/libp2p/go-libp2p v6.0.23+incompatible ) + +replace github.com/ipfs/boxo => ../boxo diff --git a/go.sum b/go.sum index 01c43962a..0c6a3b906 100644 --- a/go.sum +++ b/go.sum @@ -338,8 +338,6 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= diff --git a/test/cli/files_test.go b/test/cli/files_test.go index bb817f2c8..6af1bc63d 100644 --- a/test/cli/files_test.go +++ b/test/cli/files_test.go @@ -1,6 +1,7 @@ package cli import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -688,4 +689,97 @@ func TestFilesMFSImportConfig(t *testing.T) { codec := node.IPFS("cid", "format", "-f", "%c", mfsCid).Stdout.Trimmed() require.Equal(t, "raw", codec) }) + + t.Run("files mkdir respects Import.UnixFSDirectoryMaxLinks", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init() + node.UpdateConfig(func(cfg *config.Config) { + cfg.Import.CidVersion = *config.NewOptionalInteger(1) + // Set low link threshold to trigger HAMT sharding at 5 links + cfg.Import.UnixFSDirectoryMaxLinks = *config.NewOptionalInteger(5) + // Also need size estimation enabled for switching to work + cfg.Import.UnixFSHAMTDirectorySizeEstimation = *config.NewOptionalString("block") + }) + node.StartDaemon() + defer node.StopDaemon() + + // Create directory with 6 files (exceeds max 5 links) + node.IPFS("files", "mkdir", "/testdir") + + content := "x" + tempFile := filepath.Join(node.Dir, "content.txt") + require.NoError(t, os.WriteFile(tempFile, []byte(content), 0644)) + + for i := 0; i < 6; i++ { + node.IPFS("files", "write", "--create", fmt.Sprintf("/testdir/file%d.txt", i), tempFile) + } + + // Verify directory became HAMT sharded + cidStr := node.IPFS("files", "stat", "--hash", "/testdir").Stdout.Trimmed() + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory after exceeding UnixFSDirectoryMaxLinks") + }) + + t.Run("files write respects Import.UnixFSChunker", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init() + node.UpdateConfig(func(cfg *config.Config) { + cfg.Import.CidVersion = *config.NewOptionalInteger(1) + cfg.Import.UnixFSRawLeaves = config.True + cfg.Import.UnixFSChunker = *config.NewOptionalString("size-1024") // 1KB chunks + }) + node.StartDaemon() + defer node.StopDaemon() + + // Create file larger than chunk size (3KB) + data := make([]byte, 3*1024) + for i := range data { + data[i] = byte(i % 256) + } + tempFile := filepath.Join(node.Dir, "large.bin") + require.NoError(t, os.WriteFile(tempFile, data, 0644)) + + node.IPFS("files", "write", "--create", "/large.bin", tempFile) + + // Verify chunking: 3KB file with 1KB chunks should have multiple child blocks + cidStr := node.IPFS("files", "stat", "--hash", "/large.bin").Stdout.Trimmed() + dagStatJSON := node.IPFS("dag", "stat", "--enc=json", cidStr).Stdout.Trimmed() + var dagStat struct { + UniqueBlocks int `json:"UniqueBlocks"` + } + require.NoError(t, json.Unmarshal([]byte(dagStatJSON), &dagStat)) + // With 1KB chunks on a 3KB file, we expect 4 blocks (3 leaf + 1 root) + assert.Greater(t, dagStat.UniqueBlocks, 1, "expected more than 1 block with 1KB chunker on 3KB file") + }) + + t.Run("files write with custom chunker produces same CID as ipfs add --trickle", func(t *testing.T) { + t.Parallel() + node := harness.NewT(t).NewNode().Init() + node.UpdateConfig(func(cfg *config.Config) { + cfg.Import.CidVersion = *config.NewOptionalInteger(1) + cfg.Import.UnixFSRawLeaves = config.True + cfg.Import.UnixFSChunker = *config.NewOptionalString("size-512") + }) + node.StartDaemon() + defer node.StopDaemon() + + // Create test data (2KB to get multiple chunks) + data := make([]byte, 2048) + for i := range data { + data[i] = byte(i % 256) + } + tempFile := filepath.Join(node.Dir, "test.bin") + require.NoError(t, os.WriteFile(tempFile, data, 0644)) + + // Add via MFS + node.IPFS("files", "write", "--create", "/test.bin", tempFile) + mfsCid := node.IPFS("files", "stat", "--hash", "/test.bin").Stdout.Trimmed() + + // Add via ipfs add with same chunker and trickle (MFS always uses trickle) + addCid := node.IPFS("add", "-Q", "--chunker=size-512", "--trickle", tempFile).Stdout.Trimmed() + + // CIDs should match when using same chunker + trickle layout + require.Equal(t, addCid, mfsCid, "MFS and add --trickle should produce same CID with matching chunker") + }) } diff --git a/test/dependencies/go.mod b/test/dependencies/go.mod index 7c0d5d79b..50d81099b 100644 --- a/test/dependencies/go.mod +++ b/test/dependencies/go.mod @@ -135,7 +135,7 @@ require ( github.com/huin/goupnp v1.3.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect - github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 // indirect + github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect github.com/ipfs/go-block-format v0.2.3 // indirect github.com/ipfs/go-cid v0.6.0 // indirect diff --git a/test/dependencies/go.sum b/test/dependencies/go.sum index e75a95622..67b43843a 100644 --- a/test/dependencies/go.sum +++ b/test/dependencies/go.sum @@ -296,8 +296,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779 h1:hWjjMiiu6aEDXqJmoF3opHrtT3EXwivLE2N0f87yDDU= -github.com/ipfs/boxo v0.36.1-0.20260201194832-c910c48ea779/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k= +github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265 h1:zqISQlY0hN/IQsNB5adpPSpuqcgRwQnboxv6ArxXt5k= +github.com/ipfs/boxo v0.36.1-0.20260202005650-54e044f1b265/go.mod h1:92hnRXfP5ScKEIqlq9Ns7LR1dFXEVADKWVGH0fjk83k= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk=