test(add): add CID profile tests and wire SizeEstimationMode

add comprehensive test suite for UnixFS CID determinism per IPIP-499:
- verify exact HAMT threshold boundary for both estimation modes:
  - v0-2015 (links): sum(name_len + cid_len) == 262144
  - v1-2025 (block): serialized block size == 262144
- verify HAMT triggers at threshold + 1 byte for both profiles
- add all deterministic CIDs for cross-implementation testing

also wires SizeEstimationMode through CLI/API, allowing
Import.UnixFSHAMTSizeEstimation config to take effect.

bumps boxo to ipfs/boxo@6707376 which aligns HAMT threshold with
JS implementation (uses > instead of >=), fixing CID determinism
at the exact 256 KiB boundary.
This commit is contained in:
Marcin Rataj 2026-01-19 06:13:13 +01:00
parent 01b1ce0cca
commit aaf05db536
15 changed files with 842 additions and 211 deletions

View File

@ -15,6 +15,7 @@ import (
"github.com/cheggaaa/pb"
"github.com/ipfs/boxo/files"
uio "github.com/ipfs/boxo/ipld/unixfs/io"
mfs "github.com/ipfs/boxo/mfs"
"github.com/ipfs/boxo/path"
"github.com/ipfs/boxo/verifcid"
@ -300,6 +301,7 @@ https://github.com/ipfs/kubo/blob/master/docs/config.md#import
maxFileLinks, maxFileLinksSet := req.Options[maxFileLinksOptionName].(int)
maxDirectoryLinks, maxDirectoryLinksSet := req.Options[maxDirectoryLinksOptionName].(int)
maxHAMTFanout, maxHAMTFanoutSet := req.Options[maxHAMTFanoutOptionName].(int)
var sizeEstimationMode uio.SizeEstimationMode
nocopy, _ := req.Options[noCopyOptionName].(bool)
fscache, _ := req.Options[fstoreCacheOptionName].(bool)
cidVer, cidVerSet := req.Options[cidVersionOptionName].(int)
@ -376,6 +378,9 @@ https://github.com/ipfs/kubo/blob/master/docs/config.md#import
maxHAMTFanout = int(cfg.Import.UnixFSHAMTDirectoryMaxFanout.WithDefault(config.DefaultUnixFSHAMTDirectoryMaxFanout))
}
// SizeEstimationMode is always set from config (no CLI flag)
sizeEstimationMode = cfg.Import.HAMTSizeEstimationMode()
fastProvideRoot = config.ResolveBoolFromConfig(fastProvideRoot, fastProvideRootSet, cfg.Import.FastProvideRoot, config.DefaultFastProvideRoot)
fastProvideWait = config.ResolveBoolFromConfig(fastProvideWait, fastProvideWaitSet, cfg.Import.FastProvideWait, config.DefaultFastProvideWait)
@ -471,6 +476,9 @@ https://github.com/ipfs/kubo/blob/master/docs/config.md#import
opts = append(opts, options.Unixfs.MaxHAMTFanout(maxHAMTFanout))
}
// SizeEstimationMode is always set from config
opts = append(opts, options.Unixfs.SizeEstimationMode(sizeEstimationMode))
if trickle {
opts = append(opts, options.Unixfs.Layout(options.TrickleLayout))
}

View File

@ -177,6 +177,9 @@ func (api *UnixfsAPI) Add(ctx context.Context, files files.Node, opts ...options
if settings.MaxHAMTFanoutSet {
fileAdder.MaxHAMTFanout = settings.MaxHAMTFanout
}
if settings.SizeEstimationModeSet {
fileAdder.SizeEstimationMode = settings.SizeEstimationMode
}
fileAdder.NoCopy = settings.NoCopy
fileAdder.CidBuilder = prefix
fileAdder.PreserveMode = settings.PreserveMode

View File

@ -24,16 +24,18 @@ type UnixfsAddSettings struct {
CidVersion int
MhType uint64
Inline bool
InlineLimit int
RawLeaves bool
RawLeavesSet bool
MaxFileLinks int
MaxFileLinksSet bool
MaxDirectoryLinks int
MaxDirectoryLinksSet bool
MaxHAMTFanout int
MaxHAMTFanoutSet bool
Inline bool
InlineLimit int
RawLeaves bool
RawLeavesSet bool
MaxFileLinks int
MaxFileLinksSet bool
MaxDirectoryLinks int
MaxDirectoryLinksSet bool
MaxHAMTFanout int
MaxHAMTFanoutSet bool
SizeEstimationMode *io.SizeEstimationMode
SizeEstimationModeSet bool
Chunker string
Layout Layout
@ -239,6 +241,15 @@ func (unixfsOpts) MaxHAMTFanout(n int) UnixfsAddOption {
}
}
// SizeEstimationMode specifies how directory size is estimated for HAMT sharding decisions.
func (unixfsOpts) SizeEstimationMode(mode io.SizeEstimationMode) UnixfsAddOption {
return func(settings *UnixfsAddSettings) error {
settings.SizeEstimationMode = &mode
settings.SizeEstimationModeSet = true
return nil
}
}
// Inline tells the adder to inline small blocks into CIDs
func (unixfsOpts) Inline(enable bool) UnixfsAddOption {
return func(settings *UnixfsAddSettings) error {

View File

@ -70,28 +70,29 @@ func NewAdder(ctx context.Context, p pin.Pinner, bs bstore.GCLocker, ds ipld.DAG
// Adder holds the switches passed to the `add` command.
type Adder struct {
ctx context.Context
pinning pin.Pinner
gcLocker bstore.GCLocker
dagService ipld.DAGService
bufferedDS *ipld.BufferedDAG
Out chan<- interface{}
Progress bool
Pin bool
PinName string
Trickle bool
RawLeaves bool
MaxLinks int
MaxDirectoryLinks int
MaxHAMTFanout int
Silent bool
NoCopy bool
Chunker string
mroot *mfs.Root
unlocker bstore.Unlocker
tempRoot cid.Cid
CidBuilder cid.Builder
liveNodes uint64
ctx context.Context
pinning pin.Pinner
gcLocker bstore.GCLocker
dagService ipld.DAGService
bufferedDS *ipld.BufferedDAG
Out chan<- interface{}
Progress bool
Pin bool
PinName string
Trickle bool
RawLeaves bool
MaxLinks int
MaxDirectoryLinks int
MaxHAMTFanout int
SizeEstimationMode *uio.SizeEstimationMode
Silent bool
NoCopy bool
Chunker string
mroot *mfs.Root
unlocker bstore.Unlocker
tempRoot cid.Cid
CidBuilder cid.Builder
liveNodes uint64
PreserveMode bool
PreserveMtime bool
@ -107,9 +108,10 @@ func (adder *Adder) mfsRoot() (*mfs.Root, error) {
// Note, this adds it to DAGService already.
mr, err := mfs.NewEmptyRoot(adder.ctx, adder.dagService, nil, nil, mfs.MkdirOpts{
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
SizeEstimationMode: adder.SizeEstimationMode,
})
if err != nil {
return nil, err
@ -273,11 +275,12 @@ func (adder *Adder) addNode(node ipld.Node, path string) error {
dir := gopath.Dir(path)
if dir != "." {
opts := mfs.MkdirOpts{
Mkparents: true,
Flush: false,
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
Mkparents: true,
Flush: false,
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
SizeEstimationMode: adder.SizeEstimationMode,
}
if err := mfs.Mkdir(mr, dir, opts); err != nil {
return err
@ -505,11 +508,12 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory
if toplevel && (adder.FileMode != 0 || !adder.FileMtime.IsZero()) {
mr, err := mfs.NewEmptyRoot(ctx, adder.dagService, nil, nil,
mfs.MkdirOpts{
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
ModTime: adder.FileMtime,
Mode: adder.FileMode,
CidBuilder: adder.CidBuilder,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
ModTime: adder.FileMtime,
Mode: adder.FileMode,
SizeEstimationMode: adder.SizeEstimationMode,
})
if err != nil {
return err
@ -523,13 +527,14 @@ func (adder *Adder) addDir(ctx context.Context, path string, dir files.Directory
return err
}
err = mfs.Mkdir(mr, path, mfs.MkdirOpts{
Mkparents: true,
Flush: false,
CidBuilder: adder.CidBuilder,
Mode: adder.FileMode,
ModTime: adder.FileMtime,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
Mkparents: true,
Flush: false,
CidBuilder: adder.CidBuilder,
Mode: adder.FileMode,
ModTime: adder.FileMtime,
MaxLinks: adder.MaxDirectoryLinks,
MaxHAMTFanout: adder.MaxHAMTFanout,
SizeEstimationMode: adder.SizeEstimationMode,
})
if err != nil {
return err

View File

@ -68,6 +68,10 @@ The `test-cid-v1` and `test-cid-v1-wide` profiles have been removed. Use `unixfs
- New `--hidden` / `-H` flag for `ipfs add` includes hidden files (default: false)
- The `--trickle` flag in `ipfs add` now respects `Import.UnixFSDAGLayout` config default
**HAMT Threshold Fix**
The HAMT directory sharding threshold comparison was aligned with the JS implementation ([ipfs/boxo@6707376](https://github.com/ipfs/boxo/commit/6707376002a3d4ba64895749ce9be2e00d265ed5)). The comparison changed from `>=` to `>`, meaning a directory exactly at the 256 KiB threshold now stays as a basic (flat) directory instead of converting to HAMT. This is a subtle 1-byte boundary change that improves CID determinism across implementations.
#### 🧹 Automatic cleanup of interrupted imports
If you cancel `ipfs add` or `ipfs dag import` mid-operation, Kubo now automatically cleans up incomplete data on the next daemon start. Previously, interrupted imports would leave orphan blocks in your repository that were difficult to identify and remove without pins and running explicit garbage collection.

View File

@ -7,7 +7,7 @@ go 1.25
replace github.com/ipfs/kubo => ./../../..
require (
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3
github.com/ipfs/kubo v0.0.0-00010101000000-000000000000
github.com/libp2p/go-libp2p v0.46.0
github.com/multiformats/go-multiaddr v0.16.1

View File

@ -265,8 +265,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00 h1:e9p5CizXgzPlnxt1kzDyYNoKusO4cvDjNG33UqyVhwM=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3 h1:X6iiSyBUwhKgQMzM57wSXVUZfivm5nWm5S/Y2SrSjhA=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk=

2
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/hashicorp/go-version v1.7.0
github.com/ipfs-shipyard/nopfs v0.0.14
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3
github.com/ipfs/go-block-format v0.2.3
github.com/ipfs/go-cid v0.6.0
github.com/ipfs/go-cidutil v0.1.0

4
go.sum
View File

@ -336,8 +336,8 @@ github.com/ipfs-shipyard/nopfs/ipfs v0.25.0 h1:OqNqsGZPX8zh3eFMO8Lf8EHRRnSGBMqcd
github.com/ipfs-shipyard/nopfs/ipfs v0.25.0/go.mod h1:BxhUdtBgOXg1B+gAPEplkg/GpyTZY+kCMSfsJvvydqU=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00 h1:e9p5CizXgzPlnxt1kzDyYNoKusO4cvDjNG33UqyVhwM=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3 h1:X6iiSyBUwhKgQMzM57wSXVUZfivm5nWm5S/Y2SrSjhA=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk=

View File

@ -8,7 +8,6 @@ import (
"testing"
"time"
"github.com/dustin/go-humanize"
"github.com/ipfs/kubo/config"
"github.com/ipfs/kubo/test/cli/harness"
"github.com/ipfs/kubo/test/cli/testutils"
@ -166,7 +165,7 @@ func TestAdd(t *testing.T) {
//
// UnixFSChunker=size-262144 (256KiB)
// Import.UnixFSFileMaxLinks=174
node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0") // legacy-cid-v0 for determinism across all params
node := harness.NewT(t).NewNode().Init("--profile=unixfs-v0-2015") // unixfs-v0-2015 for determinism across all params
node.UpdateConfig(func(cfg *config.Config) {
cfg.Import.UnixFSChunker = *config.NewOptionalString("size-262144") // 256 KiB chunks
cfg.Import.UnixFSFileMaxLinks = *config.NewOptionalInteger(174) // max 174 per level
@ -187,9 +186,9 @@ func TestAdd(t *testing.T) {
require.Equal(t, "QmbBftNHWmjSWKLC49dMVrfnY8pjrJYntiAXirFJ7oJrNk", cidStr)
})
t.Run("ipfs init --profile=legacy-cid-v0 sets config that produces legacy CIDv0", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v0-2015 sets config that produces legacy CIDv0", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0")
node := harness.NewT(t).NewNode().Init("--profile=unixfs-v0-2015")
node.StartDaemon()
defer node.StopDaemon()
@ -197,10 +196,10 @@ func TestAdd(t *testing.T) {
require.Equal(t, shortStringCidV0, cidStr)
})
t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v0-2015 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks", func(t *testing.T) {
t.Parallel()
seed := "v0-seed"
profile := "--profile=legacy-cid-v0"
profile := "--profile=unixfs-v0-2015"
t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) {
t.Parallel()
@ -232,12 +231,15 @@ func TestAdd(t *testing.T) {
})
})
t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v0-2015 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Parallel()
seed := "hamt-legacy-cid-v0"
profile := "--profile=legacy-cid-v0"
seed := "hamt-unixfs-v0-2015"
profile := "--profile=unixfs-v0-2015"
t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
// unixfs-v0-2015 uses links-based estimation: size = sum(nameLen + cidLen)
// Threshold is 256KiB = 262144 bytes
t.Run("at UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
@ -246,18 +248,24 @@ func TestAdd(t *testing.T) {
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV0Length, "255KiB", seed)
// Create directory exactly at the 256KiB threshold using links estimation.
// Links estimation: size = numFiles * (nameLen + cidLen)
// 4096 * (30 + 34) = 4096 * 64 = 262144 = threshold exactly
// With > comparison: stays as basic directory
// With >= comparison: converts to HAMT
const numFiles, nameLen = 4096, 30
err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm the number of links is more than UnixFSHAMTDirectorySizeThreshold (indicating regular "basic" directory"
// Should remain a basic directory (threshold uses > not >=)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 903, len(root.Links))
require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold")
})
t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
@ -266,21 +274,25 @@ func TestAdd(t *testing.T) {
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV0Length, "257KiB", seed)
// Create directory just over the 256KiB threshold using links estimation.
// Links estimation: size = numFiles * (nameLen + cidLen)
// 4097 * (30 + 34) = 4097 * 64 = 262208 > 262144, exceeds threshold
const numFiles, nameLen = 4097, 30
err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm this time, the number of links is less than UnixFSHAMTDirectorySizeThreshold
// Should be HAMT sharded (root links <= fanout of 256)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 252, len(root.Links))
require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold")
})
})
t.Run("ipfs init --profile=test-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v1-2025 produces CIDv1 with raw leaves", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1")
node := harness.NewT(t).NewNode().Init("--profile=unixfs-v1-2025")
node.StartDaemon()
defer node.StopDaemon()
@ -288,105 +300,21 @@ func TestAdd(t *testing.T) {
require.Equal(t, shortStringCidV1, cidStr) // raw leaf
})
t.Run("ipfs init --profile=test-cid-v1 applies UnixFSChunker=size-1048576", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v1-2025 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) {
t.Parallel()
seed := "v1-seed"
profile := "--profile=test-cid-v1"
t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
defer node.StopDaemon()
// Add 174MiB file:
// 174 * 1MiB should fit in single layer
cidStr := node.IPFSAddDeterministic("174MiB", seed)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 174, len(root.Links))
// expect same CID every time
require.Equal(t, "bafybeigwduxcf2aawppv3isnfeshnimkyplvw3hthxjhr2bdeje4tdaicu", cidStr)
})
t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
defer node.StopDaemon()
// add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer
cidStr := node.IPFSAddDeterministic("175MiB", seed)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 2, len(root.Links))
// expect same CID every time
require.Equal(t, "bafybeidhd7lo2n2v7lta5yamob3xwhbxcczmmtmhquwhjesi35jntf7mpu", cidStr)
})
})
t.Run("ipfs init --profile=test-cid-v1 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Parallel()
seed := "hamt-cid-v1"
profile := "--profile=test-cid-v1"
t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
defer node.StopDaemon()
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV1Length, "255KiB", seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout (indicating regular "basic" directory"
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 897, len(root.Links))
})
t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
defer node.StopDaemon()
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV1Length, "257KiB", seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 252, len(root.Links))
})
})
t.Run("ipfs init --profile=test-cid-v1-wide applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) {
t.Parallel()
seed := "v1-seed-1024"
profile := "--profile=test-cid-v1-wide"
seed := "v1-2025-seed"
profile := "--profile=unixfs-v1-2025"
t.Run("under UnixFSFileMaxLinks=1024", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
defer node.StopDaemon()
// Add 174MiB file:
// 1024 * 1MiB should fit in single layer
cidStr := node.IPFSAddDeterministic("1024MiB", seed)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 1024, len(root.Links))
// expect same CID every time
require.Equal(t, "bafybeiej5w63ir64oxgkr5htqmlerh5k2rqflurn2howimexrlkae64xru", cidStr)
})
t.Run("above UnixFSFileMaxLinks=1024", func(t *testing.T) {
@ -399,17 +327,19 @@ func TestAdd(t *testing.T) {
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 2, len(root.Links))
// expect same CID every time
require.Equal(t, "bafybeieilp2qx24pe76hxrxe6bpef5meuxto3kj5dd6mhb5kplfeglskdm", cidStr)
})
})
t.Run("ipfs init --profile=test-cid-v1-wide applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
t.Run("ipfs init --profile=unixfs-v1-2025 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
t.Parallel()
seed := "hamt-cid-v1"
profile := "--profile=test-cid-v1-wide"
seed := "hamt-unixfs-v1-2025"
profile := "--profile=unixfs-v1-2025"
t.Run("under UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
// unixfs-v1-2025 uses block-based size estimation: size = sum(LinkSerializedSize)
// where LinkSerializedSize includes protobuf overhead (tags, varints, wrappers).
// Threshold is 256KiB = 262144 bytes
t.Run("at UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
@ -418,18 +348,25 @@ func TestAdd(t *testing.T) {
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV1Length, "1023KiB", seed)
// Create directory exactly at the 256KiB threshold using block estimation.
// Block estimation: size = baseOverhead + numFiles * LinkSerializedSize
// LinkSerializedSize(11, 36, 0) = 55 bytes per link
// 4766 * 55 + 14 = 262130 + 14 = 262144 = threshold exactly
// With > comparison: stays as basic directory
// With >= comparison: converts to HAMT
const numFiles, nameLen = 4766, 11
err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout (indicating regular "basic" directory"
// Should remain a basic directory (threshold uses > not >=)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 3599, len(root.Links))
require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold")
})
t.Run("above UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(profile)
node.StartDaemon()
@ -438,15 +375,19 @@ func TestAdd(t *testing.T) {
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
err = createDirectoryForHAMT(randDir, cidV1Length, "1025KiB", seed)
// Create directory just over the 256KiB threshold using block estimation.
// Block estimation: size = baseOverhead + numFiles * LinkSerializedSize
// 4767 * 55 + 14 = 262185 + 14 = 262199 > 262144, exceeds threshold
const numFiles, nameLen = 4767, 11
err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed)
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
// Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout
// Should be HAMT sharded (root links <= fanout of 256)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 992, len(root.Links))
require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold")
})
})
@ -807,30 +748,56 @@ func TestAddFastProvide(t *testing.T) {
})
}
// createDirectoryForHAMT aims to create enough files with long names for the directory block to be close to the UnixFSHAMTDirectorySizeThreshold.
// The calculation is based on boxo's HAMTShardingSize and sizeBelowThreshold which calculates ballpark size of the block
// by adding length of link names and the binary cid length.
// See https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L491
func createDirectoryForHAMT(dirPath string, cidLength int, unixfsNodeSizeTarget, seed string) error {
hamtThreshold, err := humanize.ParseBytes(unixfsNodeSizeTarget)
if err != nil {
return err
}
// createDirectoryForHAMTLinksEstimation creates a directory with the specified number
// of files using the links-based size estimation formula (size = numFiles * (nameLen + cidLen)).
// Used by legacy profiles (unixfs-v0-2015).
//
// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic.
// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test.
//
// The lastNameLen parameter allows the last file to have a different name length,
// enabling exact +1 byte threshold tests.
//
// See boxo/ipld/unixfs/io/directory.go sizeBelowThreshold() for the links estimation.
func createDirectoryForHAMTLinksEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error {
return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed)
}
// Calculate how many files with long filenames are needed to hit UnixFSHAMTDirectorySizeThreshold
nameLen := 255 // max that works across windows/macos/linux
// createDirectoryForHAMTBlockEstimation creates a directory with the specified number
// of files using the block-based size estimation formula (LinkSerializedSize with protobuf overhead).
// Used by modern profiles (unixfs-v1-2025).
//
// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic.
// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test.
//
// The lastNameLen parameter allows the last file to have a different name length,
// enabling exact +1 byte threshold tests.
//
// See boxo/ipld/unixfs/io/directory.go estimatedBlockSize() for the block estimation.
func createDirectoryForHAMTBlockEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error {
return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed)
}
// createDeterministicFiles creates numFiles files with deterministic names.
// Files 0 to numFiles-2 have nameLen characters, and the last file has lastNameLen characters.
// Each file contains "x" (1 byte) for non-zero tsize in directory links.
func createDeterministicFiles(dirPath string, numFiles, nameLen, lastNameLen int, seed string) error {
alphabetLen := len(testutils.AlphabetEasy)
numFiles := int(hamtThreshold) / (nameLen + cidLength)
// Deterministic pseudo-random bytes for static CID
drand, err := testutils.DeterministicRandomReader(unixfsNodeSizeTarget, seed)
// Deterministic pseudo-random bytes for static filenames
drand, err := testutils.DeterministicRandomReader("1MiB", seed)
if err != nil {
return err
}
// Create necessary files in a single, flat directory
for i := 0; i < numFiles; i++ {
buf := make([]byte, nameLen)
// Use lastNameLen for the final file
currentNameLen := nameLen
if i == numFiles-1 {
currentNameLen = lastNameLen
}
buf := make([]byte, currentNameLen)
_, err := io.ReadFull(drand, buf)
if err != nil {
return err
@ -838,21 +805,17 @@ func createDirectoryForHAMT(dirPath string, cidLength int, unixfsNodeSizeTarget,
// Convert deterministic pseudo-random bytes to ASCII
var sb strings.Builder
for _, b := range buf {
// Map byte to printable ASCII range (33-126)
char := testutils.AlphabetEasy[int(b)%alphabetLen]
sb.WriteRune(char)
}
filename := sb.String()[:nameLen]
filename := sb.String()[:currentNameLen]
filePath := filepath.Join(dirPath, filename)
// Create empty file
f, err := os.Create(filePath)
if err != nil {
// Create file with 1-byte content for non-zero tsize
if err := os.WriteFile(filePath, []byte("x"), 0644); err != nil {
return err
}
f.Close()
}
return nil
}

View File

@ -0,0 +1,592 @@
package cli
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"github.com/ipfs/kubo/test/cli/harness"
"github.com/ipfs/kubo/test/cli/testutils"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// cidProfileExpectations defines expected behaviors for a UnixFS import profile.
// This allows DRY testing of multiple profiles with the same test logic.
type cidProfileExpectations struct {
// Profile identification
Name string // canonical profile name from IPIP-499
ProfileArgs []string // args to pass to ipfs init (empty for default behavior)
// CID format expectations
CIDVersion int // 0 or 1
HashFunc string // e.g., "sha2-256"
RawLeaves bool // true = raw codec for small files, false = dag-pb wrapped
// File chunking expectations
ChunkSize string // e.g., "1MiB" or "256KiB"
FileMaxLinks int // max links before DAG rebalancing
// HAMT directory sharding expectations.
// Threshold behavior: boxo converts to HAMT when size > HAMTThreshold (not >=).
// This means a directory exactly at the threshold stays as a basic (flat) directory.
HAMTFanout int // max links per HAMT shard bucket (256)
HAMTThreshold int // sharding threshold in bytes (262144 = 256 KiB)
HAMTSizeEstimation string // "block" (protobuf size) or "links" (legacy name+cid)
// Test vector parameters for threshold boundary tests.
// - DirBasic: size == threshold (stays basic)
// - DirHAMT: size > threshold (converts to HAMT)
// For block estimation, last filename length is adjusted to hit exact thresholds.
DirBasicNameLen int // filename length for basic directory (files 0 to N-2)
DirBasicLastNameLen int // filename length for last file (0 = same as DirBasicNameLen)
DirBasicFiles int // file count for basic directory (at exact threshold)
DirHAMTNameLen int // filename length for HAMT directory (files 0 to N-2)
DirHAMTLastNameLen int // filename length for last file (0 = same as DirHAMTNameLen)
DirHAMTFiles int // total file count for HAMT directory (over threshold)
// Expected deterministic CIDs for test vectors
SmallFileCID string // CID for single byte "x"
FileAtMaxLinksCID string // CID for file at max links
FileOverMaxLinksCID string // CID for file triggering rebalance
DirBasicCID string // CID for basic directory (at exact threshold, stays flat)
DirHAMTCID string // CID for HAMT directory (over threshold, sharded)
}
// unixfsV02015 is the legacy profile for backward-compatible CID generation.
// Alias: legacy-cid-v0
var unixfsV02015 = cidProfileExpectations{
Name: "unixfs-v0-2015",
ProfileArgs: []string{"--profile=unixfs-v0-2015"},
CIDVersion: 0,
HashFunc: "sha2-256",
RawLeaves: false,
ChunkSize: "256KiB",
FileMaxLinks: 174,
HAMTFanout: 256,
HAMTThreshold: 262144, // 256 KiB
HAMTSizeEstimation: "links",
DirBasicNameLen: 30, // 4096 * (30 + 34) = 262144 exactly at threshold
DirBasicFiles: 4096, // 4096 * 64 = 262144 (stays basic with >)
DirHAMTNameLen: 31, // 4033 * (31 + 34) = 262145 exactly +1 over threshold
DirHAMTLastNameLen: 0, // 0 = same as DirHAMTNameLen (uniform filenames)
DirHAMTFiles: 4033, // 4033 * 65 = 262145 (becomes HAMT)
SmallFileCID: "Qmf412jQZiuVUtdgnB36FXFX7xg5V6KEbSJ4dpQuhkLyfD", // "hello world" dag-pb wrapped
FileAtMaxLinksCID: "QmUbBALi174SnogsUzLpYbD4xPiBSFANF4iztWCsHbMKh2", // 44544KiB with seed "v0-seed"
FileOverMaxLinksCID: "QmepeWtdmS1hHXx1oZXsPUv6bMrfRRKfZcoPPU4eEfjnbf", // 44800KiB with seed "v0-seed"
DirBasicCID: "QmX5GtRk3TSSEHtdrykgqm4eqMEn3n2XhfkFAis5fjyZmN", // 4096 files at threshold
DirHAMTCID: "QmeMiJzmhpJAUgynAcxTQYek5PPKgdv3qEvFsdV3XpVnvP", // 4033 files +1 over threshold
}
// unixfsV12025 is the recommended profile for cross-implementation CID determinism.
var unixfsV12025 = cidProfileExpectations{
Name: "unixfs-v1-2025",
ProfileArgs: []string{"--profile=unixfs-v1-2025"},
CIDVersion: 1,
HashFunc: "sha2-256",
RawLeaves: true,
ChunkSize: "1MiB",
FileMaxLinks: 1024,
HAMTFanout: 256,
HAMTThreshold: 262144, // 256 KiB
HAMTSizeEstimation: "block",
// Block size = numFiles * linkSize + 4 bytes overhead
// LinkSerializedSize(11, 36, 1) = 55, LinkSerializedSize(21, 36, 1) = 65, LinkSerializedSize(22, 36, 1) = 66
DirBasicNameLen: 11, // 4765 files * 55 bytes
DirBasicLastNameLen: 21, // last file: 65 bytes; total: 4765*55 + 65 + 4 = 262144 (at threshold)
DirBasicFiles: 4766, // stays basic with > comparison
DirHAMTNameLen: 11, // 4765 files * 55 bytes
DirHAMTLastNameLen: 22, // last file: 66 bytes; total: 4765*55 + 66 + 4 = 262145 (+1 over threshold)
DirHAMTFiles: 4766, // becomes HAMT
SmallFileCID: "bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e", // "hello world" raw leaf
FileAtMaxLinksCID: "bafybeihmf37wcuvtx4hpu7he5zl5qaf2ineo2lqlfrapokkm5zzw7zyhvm", // 1024MiB with seed "v1-2025-seed"
FileOverMaxLinksCID: "bafybeihmzokxxjqwxjcryerhp5ezpcog2wcawfryb2xm64xiakgm4a5jue", // 1025MiB with seed "v1-2025-seed"
DirBasicCID: "bafybeic3h7rwruealwxkacabdy45jivq2crwz6bufb5ljwupn36gicplx4", // 4766 files at 262144 bytes (threshold)
DirHAMTCID: "bafybeiegvuterwurhdtkikfhbxcldohmxp566vpjdofhzmnhv6o4freidu", // 4766 files at 262145 bytes (+1 over)
}
// defaultProfile points to the profile that matches Kubo's implicit default behavior.
// Today this is unixfs-v0-2015. When Kubo changes defaults, update this pointer.
var defaultProfile = unixfsV02015
const (
cidV0Length = 34 // CIDv0 sha2-256
cidV1Length = 36 // CIDv1 sha2-256
)
// TestCIDProfiles generates deterministic test vectors for CID profile verification.
// Set CID_PROFILES_CAR_OUTPUT environment variable to export CAR files.
// Example: CID_PROFILES_CAR_OUTPUT=/tmp/cid-profiles go test -run TestCIDProfiles -v
func TestCIDProfiles(t *testing.T) {
t.Parallel()
carOutputDir := os.Getenv("CID_PROFILES_CAR_OUTPUT")
exportCARs := carOutputDir != ""
if exportCARs {
if err := os.MkdirAll(carOutputDir, 0755); err != nil {
t.Fatalf("failed to create CAR output directory: %v", err)
}
t.Logf("CAR export enabled, writing to: %s", carOutputDir)
}
// Test both IPIP-499 profiles
for _, profile := range []cidProfileExpectations{unixfsV02015, unixfsV12025} {
t.Run(profile.Name, func(t *testing.T) {
t.Parallel()
runProfileTests(t, profile, carOutputDir, exportCARs)
})
}
// Test default behavior (no profile specified)
t.Run("default", func(t *testing.T) {
t.Parallel()
// Default behavior should match defaultProfile (currently unixfs-v0-2015)
defaultExp := defaultProfile
defaultExp.Name = "default"
defaultExp.ProfileArgs = nil // no profile args = default behavior
runProfileTests(t, defaultExp, carOutputDir, exportCARs)
})
}
// runProfileTests runs all test vectors for a given profile.
func runProfileTests(t *testing.T, exp cidProfileExpectations, carOutputDir string, exportCARs bool) {
cidLen := cidV0Length
if exp.CIDVersion == 1 {
cidLen = cidV1Length
}
t.Run("small-file", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(exp.ProfileArgs...)
node.StartDaemon()
defer node.StopDaemon()
// Use "hello world" for determinism - matches CIDs in add_test.go
cidStr := node.IPFSAddStr("hello world")
// Verify CID version
verifyCIDVersion(t, node, cidStr, exp.CIDVersion)
// Verify hash function
verifyHashFunction(t, node, cidStr, exp.HashFunc)
// Verify raw leaves vs wrapped
verifyRawLeaves(t, node, cidStr, exp.RawLeaves)
// Verify deterministic CID if expected
if exp.SmallFileCID != "" {
require.Equal(t, exp.SmallFileCID, cidStr, "expected deterministic CID for small file")
}
if exportCARs {
carPath := filepath.Join(carOutputDir, exp.Name+"_small-file.car")
require.NoError(t, node.IPFSDagExport(cidStr, carPath))
t.Logf("exported: %s -> %s", cidStr, carPath)
}
})
t.Run("file-at-max-links", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(exp.ProfileArgs...)
node.StartDaemon()
defer node.StopDaemon()
// Calculate file size: maxLinks * chunkSize
fileSize := fileAtMaxLinksSize(exp)
// Seed matches add_test.go for deterministic CIDs
seed := seedForProfile(exp)
cidStr := node.IPFSAddDeterministic(fileSize, seed)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, exp.FileMaxLinks, len(root.Links),
"expected exactly %d links at max", exp.FileMaxLinks)
// Verify hash function on root
verifyHashFunction(t, node, cidStr, exp.HashFunc)
// Verify deterministic CID if expected
if exp.FileAtMaxLinksCID != "" {
require.Equal(t, exp.FileAtMaxLinksCID, cidStr, "expected deterministic CID for file at max links")
}
if exportCARs {
carPath := filepath.Join(carOutputDir, exp.Name+"_file-at-max-links.car")
require.NoError(t, node.IPFSDagExport(cidStr, carPath))
t.Logf("exported: %s -> %s", cidStr, carPath)
}
})
t.Run("file-over-max-links-rebalanced", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(exp.ProfileArgs...)
node.StartDaemon()
defer node.StopDaemon()
// One more chunk triggers rebalancing
fileSize := fileOverMaxLinksSize(exp)
// Seed matches add_test.go for deterministic CIDs
seed := seedForProfile(exp)
cidStr := node.IPFSAddDeterministic(fileSize, seed)
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, 2, len(root.Links), "expected 2 links after DAG rebalancing")
// Verify hash function on root
verifyHashFunction(t, node, cidStr, exp.HashFunc)
// Verify deterministic CID if expected
if exp.FileOverMaxLinksCID != "" {
require.Equal(t, exp.FileOverMaxLinksCID, cidStr, "expected deterministic CID for rebalanced file")
}
if exportCARs {
carPath := filepath.Join(carOutputDir, exp.Name+"_file-over-max-links.car")
require.NoError(t, node.IPFSDagExport(cidStr, carPath))
t.Logf("exported: %s -> %s", cidStr, carPath)
}
})
t.Run("dir-basic", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(exp.ProfileArgs...)
node.StartDaemon()
defer node.StopDaemon()
// Use consistent seed for deterministic CIDs
seed := hamtSeedForProfile(exp)
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create basic (flat) directory exactly at threshold.
// With > comparison, directory at exact threshold stays basic.
basicLastNameLen := exp.DirBasicLastNameLen
if basicLastNameLen == 0 {
basicLastNameLen = exp.DirBasicNameLen
}
if exp.HAMTSizeEstimation == "block" {
err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
} else {
err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
}
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.Equal(t, exp.DirBasicFiles, len(root.Links),
"expected basic directory with %d links", exp.DirBasicFiles)
// Verify hash function
verifyHashFunction(t, node, cidStr, exp.HashFunc)
// Verify size is exactly at threshold
if exp.HAMTSizeEstimation == "block" {
// Block estimation: verify actual serialized block size
blockSize := getBlockSize(t, node, cidStr)
require.Equal(t, exp.HAMTThreshold, blockSize,
"expected basic directory block size to be exactly at threshold (%d), got %d", exp.HAMTThreshold, blockSize)
}
if exp.HAMTSizeEstimation == "links" {
// Links estimation: verify sum of (name_len + cid_len) for all links
linksSize := 0
for _, link := range root.Links {
linksSize += len(link.Name) + cidLen
}
require.Equal(t, exp.HAMTThreshold, linksSize,
"expected basic directory links size to be exactly at threshold (%d), got %d", exp.HAMTThreshold, linksSize)
}
// Verify deterministic CID
if exp.DirBasicCID != "" {
require.Equal(t, exp.DirBasicCID, cidStr, "expected deterministic CID for basic directory")
}
if exportCARs {
carPath := filepath.Join(carOutputDir, exp.Name+"_dir-basic.car")
require.NoError(t, node.IPFSDagExport(cidStr, carPath))
t.Logf("exported: %s (%d files) -> %s", cidStr, exp.DirBasicFiles, carPath)
}
})
t.Run("dir-hamt", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init(exp.ProfileArgs...)
node.StartDaemon()
defer node.StopDaemon()
// Use consistent seed for deterministic CIDs
seed := hamtSeedForProfile(exp)
randDir, err := os.MkdirTemp(node.Dir, seed)
require.NoError(t, err)
// Create HAMT (sharded) directory exactly +1 byte over threshold.
// With > comparison, directory over threshold becomes HAMT.
lastNameLen := exp.DirHAMTLastNameLen
if lastNameLen == 0 {
lastNameLen = exp.DirHAMTNameLen
}
if exp.HAMTSizeEstimation == "block" {
err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
} else {
err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
}
require.NoError(t, err)
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
root, err := node.InspectPBNode(cidStr)
assert.NoError(t, err)
require.LessOrEqual(t, len(root.Links), exp.HAMTFanout,
"expected HAMT directory with <=%d links", exp.HAMTFanout)
// Verify hash function
verifyHashFunction(t, node, cidStr, exp.HashFunc)
// Verify deterministic CID
if exp.DirHAMTCID != "" {
require.Equal(t, exp.DirHAMTCID, cidStr, "expected deterministic CID for HAMT directory")
}
if exportCARs {
carPath := filepath.Join(carOutputDir, exp.Name+"_dir-hamt.car")
require.NoError(t, node.IPFSDagExport(cidStr, carPath))
t.Logf("exported: %s (%d files, HAMT root links: %d) -> %s",
cidStr, exp.DirHAMTFiles, len(root.Links), carPath)
}
})
}
// verifyCIDVersion checks that the CID has the expected version.
func verifyCIDVersion(t *testing.T, _ *harness.Node, cidStr string, expectedVersion int) {
t.Helper()
if expectedVersion == 0 {
require.True(t, strings.HasPrefix(cidStr, "Qm"),
"expected CIDv0 (starts with Qm), got: %s", cidStr)
} else {
require.True(t, strings.HasPrefix(cidStr, "b"),
"expected CIDv1 (base32, starts with b), got: %s", cidStr)
}
}
// verifyHashFunction checks that the CID uses the expected hash function.
func verifyHashFunction(t *testing.T, node *harness.Node, cidStr, expectedHash string) {
t.Helper()
// Use ipfs cid format to get hash function info
// Format string %h gives the hash function name
res := node.IPFS("cid", "format", "-f", "%h", cidStr)
hashFunc := strings.TrimSpace(res.Stdout.String())
require.Equal(t, expectedHash, hashFunc,
"expected hash function %s, got %s for CID %s", expectedHash, hashFunc, cidStr)
}
// verifyRawLeaves checks whether the CID represents a raw leaf or dag-pb wrapped block.
// For CIDv1: raw leaves have codec 0x55 (raw), wrapped have codec 0x70 (dag-pb).
// For CIDv0: always dag-pb (no raw leaves possible).
func verifyRawLeaves(t *testing.T, node *harness.Node, cidStr string, expectRaw bool) {
t.Helper()
// Use ipfs cid format to get codec info
// Format string %c gives the codec name
res := node.IPFS("cid", "format", "-f", "%c", cidStr)
codec := strings.TrimSpace(res.Stdout.String())
if expectRaw {
require.Equal(t, "raw", codec,
"expected raw codec for raw leaves, got %s for CID %s", codec, cidStr)
} else {
require.Equal(t, "dag-pb", codec,
"expected dag-pb codec for wrapped leaves, got %s for CID %s", codec, cidStr)
}
}
// getBlockSize returns the size of a block in bytes using ipfs block stat.
func getBlockSize(t *testing.T, node *harness.Node, cidStr string) int {
t.Helper()
res := node.IPFS("block", "stat", "--enc=json", cidStr)
var stat struct {
Size int `json:"Size"`
}
require.NoError(t, json.Unmarshal(res.Stdout.Bytes(), &stat))
return stat.Size
}
// fileAtMaxLinksSize returns the file size that produces exactly FileMaxLinks chunks.
func fileAtMaxLinksSize(exp cidProfileExpectations) string {
switch exp.ChunkSize {
case "1MiB":
return strings.Replace(exp.ChunkSize, "1MiB", "", 1) +
string(rune('0'+exp.FileMaxLinks/1000)) +
string(rune('0'+(exp.FileMaxLinks%1000)/100)) +
string(rune('0'+(exp.FileMaxLinks%100)/10)) +
string(rune('0'+exp.FileMaxLinks%10)) + "MiB"
case "256KiB":
// 174 * 256 KiB = 44544 KiB
totalKiB := exp.FileMaxLinks * 256
return intToStr(totalKiB) + "KiB"
default:
panic("unknown chunk size: " + exp.ChunkSize)
}
}
// fileOverMaxLinksSize returns the file size that triggers DAG rebalancing.
func fileOverMaxLinksSize(exp cidProfileExpectations) string {
switch exp.ChunkSize {
case "1MiB":
return intToStr(exp.FileMaxLinks+1) + "MiB"
case "256KiB":
// (174 + 1) * 256 KiB = 44800 KiB
totalKiB := (exp.FileMaxLinks + 1) * 256
return intToStr(totalKiB) + "KiB"
default:
panic("unknown chunk size: " + exp.ChunkSize)
}
}
func intToStr(n int) string {
if n == 0 {
return "0"
}
var digits []byte
for n > 0 {
digits = append([]byte{byte('0' + n%10)}, digits...)
n /= 10
}
return string(digits)
}
// seedForProfile returns the deterministic seed used in add_test.go for file tests.
func seedForProfile(exp cidProfileExpectations) string {
switch exp.Name {
case "unixfs-v0-2015", "default":
return "v0-seed"
case "unixfs-v1-2025":
return "v1-2025-seed"
default:
return exp.Name + "-seed"
}
}
// hamtSeedForProfile returns the deterministic seed for HAMT directory tests.
// Uses the same seed for both under/at threshold tests to ensure consistency.
func hamtSeedForProfile(exp cidProfileExpectations) string {
switch exp.Name {
case "unixfs-v0-2015", "default":
return "hamt-unixfs-v0-2015"
case "unixfs-v1-2025":
return "hamt-unixfs-v1-2025"
default:
return "hamt-" + exp.Name
}
}
// TestDefaultMatchesExpectedProfile verifies that default ipfs add behavior
// matches the expected profile (currently unixfs-v0-2015).
func TestDefaultMatchesExpectedProfile(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init()
node.StartDaemon()
defer node.StopDaemon()
// Small file test
cidDefault := node.IPFSAddStr("x")
// Same file with explicit profile
nodeWithProfile := harness.NewT(t).NewNode().Init(defaultProfile.ProfileArgs...)
nodeWithProfile.StartDaemon()
defer nodeWithProfile.StopDaemon()
cidWithProfile := nodeWithProfile.IPFSAddStr("x")
require.Equal(t, cidWithProfile, cidDefault,
"default behavior should match %s profile", defaultProfile.Name)
}
// TestProtobufHelpers verifies the protobuf size calculation helpers.
func TestProtobufHelpers(t *testing.T) {
t.Parallel()
t.Run("VarintLen", func(t *testing.T) {
// Varint encoding: 7 bits per byte, MSB indicates continuation
cases := []struct {
value uint64
expected int
}{
{0, 1},
{127, 1}, // 0x7F - max 1-byte varint
{128, 2}, // 0x80 - min 2-byte varint
{16383, 2}, // 0x3FFF - max 2-byte varint
{16384, 3}, // 0x4000 - min 3-byte varint
{2097151, 3}, // 0x1FFFFF - max 3-byte varint
{2097152, 4}, // 0x200000 - min 4-byte varint
{268435455, 4}, // 0xFFFFFFF - max 4-byte varint
{268435456, 5}, // 0x10000000 - min 5-byte varint
{34359738367, 5}, // 0x7FFFFFFFF - max 5-byte varint
}
for _, tc := range cases {
got := testutils.VarintLen(tc.value)
require.Equal(t, tc.expected, got, "VarintLen(%d)", tc.value)
}
})
t.Run("LinkSerializedSize", func(t *testing.T) {
// Test typical cases for directory links
cases := []struct {
nameLen int
cidLen int
tsize uint64
expected int
}{
// 255-char name, CIDv0 (34 bytes), tsize=0
// Inner: 1+1+34 + 1+2+255 + 1+1 = 296
// Outer: 1 + 2 + 296 = 299
{255, 34, 0, 299},
// 255-char name, CIDv1 (36 bytes), tsize=0
// Inner: 1+1+36 + 1+2+255 + 1+1 = 298
// Outer: 1 + 2 + 298 = 301
{255, 36, 0, 301},
// Short name (10 chars), CIDv1, tsize=0
// Inner: 1+1+36 + 1+1+10 + 1+1 = 52
// Outer: 1 + 1 + 52 = 54
{10, 36, 0, 54},
// 255-char name, CIDv1, large tsize
// Inner: 1+1+36 + 1+2+255 + 1+5 = 302 (tsize uses 5-byte varint)
// Outer: 1 + 2 + 302 = 305
{255, 36, 34359738367, 305},
}
for _, tc := range cases {
got := testutils.LinkSerializedSize(tc.nameLen, tc.cidLen, tc.tsize)
require.Equal(t, tc.expected, got, "LinkSerializedSize(%d, %d, %d)", tc.nameLen, tc.cidLen, tc.tsize)
}
})
t.Run("EstimateFilesForBlockThreshold", func(t *testing.T) {
threshold := 262144
nameLen := 255
cidLen := 36
var tsize uint64 = 0
numFiles := testutils.EstimateFilesForBlockThreshold(threshold, nameLen, cidLen, tsize)
require.Equal(t, 870, numFiles, "expected 870 files for threshold 262144")
numFilesUnder := testutils.EstimateFilesForBlockThreshold(threshold-1, nameLen, cidLen, tsize)
require.Equal(t, 870, numFilesUnder, "expected 870 files for threshold 262143")
numFilesOver := testutils.EstimateFilesForBlockThreshold(262185, nameLen, cidLen, tsize)
require.Equal(t, 871, numFilesOver, "expected 871 files for threshold 262185")
})
}

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"io"
"os"
"reflect"
"strings"
@ -148,9 +149,15 @@ func (n *Node) IPFSDagImport(content io.Reader, cid string, args ...string) erro
return res.Err
}
/*
func (n *Node) IPFSDagExport(cid string, car *os.File) error {
log.Debugf("node %d dag export of %s to %q with args: %v", n.ID, cid, car.Name())
// IPFSDagExport exports a DAG rooted at cid to a CAR file at carPath.
func (n *Node) IPFSDagExport(cid string, carPath string) error {
log.Debugf("node %d dag export of %s to %q", n.ID, cid, carPath)
car, err := os.Create(carPath)
if err != nil {
return err
}
defer car.Close()
res := n.Runner.MustRun(RunRequest{
Path: n.IPFSBin,
Args: []string{"dag", "export", cid},
@ -158,4 +165,3 @@ func (n *Node) IPFSDagExport(cid string, car *os.File) error {
})
return res.Err
}
*/

View File

@ -0,0 +1,39 @@
package testutils
import "math/bits"
// VarintLen returns the number of bytes needed to encode v as a protobuf varint.
func VarintLen(v uint64) int {
return int(9*uint32(bits.Len64(v))+64) / 64
}
// LinkSerializedSize calculates the serialized size of a single PBLink in a dag-pb block.
// This matches the calculation in boxo/ipld/unixfs/io/directory.go estimatedBlockSize().
//
// The protobuf wire format for a PBLink is:
//
// PBNode.Links wrapper tag (1 byte)
// + varint length of inner message
// + Hash field: tag (1) + varint(cidLen) + cidLen
// + Name field: tag (1) + varint(nameLen) + nameLen
// + Tsize field: tag (1) + varint(tsize)
func LinkSerializedSize(nameLen, cidLen int, tsize uint64) int {
// Inner link message size
linkLen := 1 + VarintLen(uint64(cidLen)) + cidLen + // Hash field
1 + VarintLen(uint64(nameLen)) + nameLen + // Name field
1 + VarintLen(tsize) // Tsize field
// Outer wrapper: tag (1 byte) + varint(linkLen) + linkLen
return 1 + VarintLen(uint64(linkLen)) + linkLen
}
// EstimateFilesForBlockThreshold estimates how many files with given name/cid lengths
// will fit under the block size threshold.
// Returns the number of files that keeps the block size just under the threshold.
func EstimateFilesForBlockThreshold(threshold, nameLen, cidLen int, tsize uint64) int {
linkSize := LinkSerializedSize(nameLen, cidLen, tsize)
// Base overhead for empty directory node (Data field + minimal structure)
// Empirically determined to be 4 bytes for dag-pb directories
baseOverhead := 4
return (threshold - baseOverhead) / linkSize
}

View File

@ -135,7 +135,7 @@ require (
github.com/huin/goupnp v1.3.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/ipfs/bbloom v0.0.4 // indirect
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00 // indirect
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3 // indirect
github.com/ipfs/go-bitfield v1.1.0 // indirect
github.com/ipfs/go-block-format v0.2.3 // indirect
github.com/ipfs/go-cid v0.6.0 // indirect

View File

@ -294,8 +294,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs=
github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00 h1:e9p5CizXgzPlnxt1kzDyYNoKusO4cvDjNG33UqyVhwM=
github.com/ipfs/boxo v0.35.3-0.20260117004328-4ff72d072c00/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3 h1:X6iiSyBUwhKgQMzM57wSXVUZfivm5nWm5S/Y2SrSjhA=
github.com/ipfs/boxo v0.35.3-0.20260119043727-6707376002a3/go.mod h1:Abmp1if6bMQG87/0SQPIB9fkxJnZMLCt2nQw3yUZHH0=
github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA=
github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU=
github.com/ipfs/go-block-format v0.2.3 h1:mpCuDaNXJ4wrBJLrtEaGFGXkferrw5eqVvzaHhtFKQk=