From 120800fe8b499b075ffc2c567b4d4aeae69cd7e7 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 27 Jan 2026 22:38:24 +0100 Subject: [PATCH] test(cli): improve HAMT threshold tests with exact +1 byte verification - add UnixFSDataType() helper to directly check UnixFS type via protobuf - refactor threshold tests to use exact +1 byte calculations instead of +1 file - verify directory type directly (ft.TDirectory vs ft.THAMTShard) instead of inferring from link count - clean up helper function signatures by removing unused cidLength parameter --- test/cli/add_test.go | 108 +++++++++++++++------------------- test/cli/cid_profiles_test.go | 24 ++++++-- test/cli/harness/pbinspect.go | 40 ++++++++++++- 3 files changed, 105 insertions(+), 67 deletions(-) diff --git a/test/cli/add_test.go b/test/cli/add_test.go index 03abb821d..68cfda7e2 100644 --- a/test/cli/add_test.go +++ b/test/cli/add_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + ft "github.com/ipfs/boxo/ipld/unixfs" "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/test/cli/harness" "github.com/ipfs/kubo/test/cli/testutils" @@ -251,21 +252,20 @@ func TestAdd(t *testing.T) { // Create directory exactly at the 256KiB threshold using links estimation. // Links estimation: size = numFiles * (nameLen + cidLen) // 4096 * (30 + 34) = 4096 * 64 = 262144 = threshold exactly - // With > comparison: stays as basic directory - // With >= comparison: converts to HAMT + // Threshold uses > not >=, so directory at exact threshold stays basic. const numFiles, nameLen = 4096, 30 - err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed) + err = createDirectoryForHAMTLinksEstimation(randDir, numFiles, nameLen, nameLen, seed) require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Should remain a basic directory (threshold uses > not >=) - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold") + // Verify it's a basic directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold") }) - t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) { + t.Run("1 byte over UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init(profile) node.StartDaemon() @@ -274,19 +274,19 @@ func TestAdd(t *testing.T) { randDir, err := os.MkdirTemp(node.Dir, seed) require.NoError(t, err) - // Create directory just over the 256KiB threshold using links estimation. - // Links estimation: size = numFiles * (nameLen + cidLen) - // 4097 * (30 + 34) = 4097 * 64 = 262208 > 262144, exceeds threshold - const numFiles, nameLen = 4097, 30 - err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed) + // Create directory exactly 1 byte over the 256KiB threshold using links estimation. + // Links estimation: size = sum(nameLen + cidLen) for each file + // 4095 * (30 + 34) + 1 * (31 + 34) = 262080 + 65 = 262145 = threshold + 1 + const numFiles, nameLen, lastNameLen = 4096, 30, 31 + err = createDirectoryForHAMTLinksEstimation(randDir, numFiles, nameLen, lastNameLen, seed) require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Should be HAMT sharded (root links <= fanout of 256) - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold") + // Verify it's a HAMT directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when 1 byte over threshold") }) }) @@ -349,24 +349,23 @@ func TestAdd(t *testing.T) { require.NoError(t, err) // Create directory exactly at the 256KiB threshold using block estimation. - // Block estimation: size = baseOverhead + numFiles * LinkSerializedSize - // LinkSerializedSize(11, 36, 0) = 55 bytes per link - // 4766 * 55 + 14 = 262130 + 14 = 262144 = threshold exactly - // With > comparison: stays as basic directory - // With >= comparison: converts to HAMT - const numFiles, nameLen = 4766, 11 - err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed) + // Block estimation: size = dataFieldSize + sum(LinkSerializedSize) + // - dataFieldSerializedSize() = 4 bytes (empty dir, no mode/mtime) + // - LinkSerializedSize(nameLen=11) = 55 bytes, LinkSerializedSize(nameLen=21) = 65 bytes + // Total: 4765 * 55 + 65 + 4 = 262144 = threshold exactly + const numFiles, nameLen, lastNameLen = 4766, 11, 21 + err = createDirectoryForHAMTBlockEstimation(randDir, numFiles, nameLen, lastNameLen, seed) require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Should remain a basic directory (threshold uses > not >=) - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold") + // Verify it's a basic directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold") }) - t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) { + t.Run("1 byte over UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) { t.Parallel() node := harness.NewT(t).NewNode().Init(profile) node.StartDaemon() @@ -375,19 +374,18 @@ func TestAdd(t *testing.T) { randDir, err := os.MkdirTemp(node.Dir, seed) require.NoError(t, err) - // Create directory just over the 256KiB threshold using block estimation. - // Block estimation: size = baseOverhead + numFiles * LinkSerializedSize - // 4767 * 55 + 14 = 262185 + 14 = 262199 > 262144, exceeds threshold - const numFiles, nameLen = 4767, 11 - err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed) + // Create directory exactly 1 byte over the 256KiB threshold. + // Same as above but lastNameLen=22 adds 1 byte: 4765 * 55 + 66 + 4 = 262145 + const numFiles, nameLen, lastNameLen = 4766, 11, 22 + err = createDirectoryForHAMTBlockEstimation(randDir, numFiles, nameLen, lastNameLen, seed) require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() - // Should be HAMT sharded (root links <= fanout of 256) - root, err := node.InspectPBNode(cidStr) - assert.NoError(t, err) - require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold") + // Verify it's a HAMT directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when 1 byte over threshold") }) }) @@ -398,8 +396,8 @@ func TestAdd(t *testing.T) { setupTestDir := func(t *testing.T, node *harness.Node) string { testDir, err := os.MkdirTemp(node.Dir, "hidden-test") require.NoError(t, err) - require.NoError(t, os.WriteFile(filepath.Join(testDir, "visible.txt"), []byte("visible"), 0644)) - require.NoError(t, os.WriteFile(filepath.Join(testDir, ".hidden"), []byte("hidden"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(testDir, "visible.txt"), []byte("visible"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(testDir, ".hidden"), []byte("hidden"), 0o644)) return testDir } @@ -447,8 +445,8 @@ func TestAdd(t *testing.T) { setupTestDir := func(t *testing.T, node *harness.Node) string { testDir, err := os.MkdirTemp(node.Dir, "empty-dirs-test") require.NoError(t, err) - require.NoError(t, os.Mkdir(filepath.Join(testDir, "empty-subdir"), 0755)) - require.NoError(t, os.WriteFile(filepath.Join(testDir, "file.txt"), []byte("content"), 0644)) + require.NoError(t, os.Mkdir(filepath.Join(testDir, "empty-subdir"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(testDir, "file.txt"), []byte("content"), 0o644)) return testDir } @@ -502,14 +500,14 @@ func TestAdd(t *testing.T) { // Top-level file and symlink targetFile := filepath.Join(testDir, "target.txt") - require.NoError(t, os.WriteFile(targetFile, []byte("target content"), 0644)) + require.NoError(t, os.WriteFile(targetFile, []byte("target content"), 0o644)) require.NoError(t, os.Symlink("target.txt", filepath.Join(testDir, "link.txt"))) // Nested file and symlink in sub-sub directory subsubdir := filepath.Join(testDir, "subdir", "subsubdir") - require.NoError(t, os.MkdirAll(subsubdir, 0755)) + require.NoError(t, os.MkdirAll(subsubdir, 0o755)) nestedTarget := filepath.Join(subsubdir, "nested-target.txt") - require.NoError(t, os.WriteFile(nestedTarget, []byte("nested content"), 0644)) + require.NoError(t, os.WriteFile(nestedTarget, []byte("nested content"), 0o644)) require.NoError(t, os.Symlink("nested-target.txt", filepath.Join(subsubdir, "nested-link.txt"))) return testDir @@ -806,32 +804,22 @@ func TestAddFastProvide(t *testing.T) { } // createDirectoryForHAMTLinksEstimation creates a directory with the specified number -// of files using the links-based size estimation formula (size = numFiles * (nameLen + cidLen)). +// of files for testing links-based size estimation (size = sum of nameLen + cidLen). // Used by legacy profiles (unixfs-v0-2015). // -// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic. -// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test. -// // The lastNameLen parameter allows the last file to have a different name length, // enabling exact +1 byte threshold tests. -// -// See boxo/ipld/unixfs/io/directory.go sizeBelowThreshold() for the links estimation. -func createDirectoryForHAMTLinksEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error { +func createDirectoryForHAMTLinksEstimation(dirPath string, numFiles, nameLen, lastNameLen int, seed string) error { return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed) } // createDirectoryForHAMTBlockEstimation creates a directory with the specified number -// of files using the block-based size estimation formula (LinkSerializedSize with protobuf overhead). +// of files for testing block-based size estimation (LinkSerializedSize with protobuf overhead). // Used by modern profiles (unixfs-v1-2025). // -// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic. -// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test. -// // The lastNameLen parameter allows the last file to have a different name length, // enabling exact +1 byte threshold tests. -// -// See boxo/ipld/unixfs/io/directory.go estimatedBlockSize() for the block estimation. -func createDirectoryForHAMTBlockEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error { +func createDirectoryForHAMTBlockEstimation(dirPath string, numFiles, nameLen, lastNameLen int, seed string) error { return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed) } @@ -870,7 +858,7 @@ func createDeterministicFiles(dirPath string, numFiles, nameLen, lastNameLen int filePath := filepath.Join(dirPath, filename) // Create file with 1-byte content for non-zero tsize - if err := os.WriteFile(filePath, []byte("x"), 0644); err != nil { + if err := os.WriteFile(filePath, []byte("x"), 0o644); err != nil { return err } } diff --git a/test/cli/cid_profiles_test.go b/test/cli/cid_profiles_test.go index 10e565283..76021c79d 100644 --- a/test/cli/cid_profiles_test.go +++ b/test/cli/cid_profiles_test.go @@ -7,6 +7,7 @@ import ( "strings" "testing" + ft "github.com/ipfs/boxo/ipld/unixfs" "github.com/ipfs/kubo/test/cli/harness" "github.com/ipfs/kubo/test/cli/testutils" "github.com/stretchr/testify/assert" @@ -133,7 +134,7 @@ func TestCIDProfiles(t *testing.T) { carOutputDir := os.Getenv("CID_PROFILES_CAR_OUTPUT") exportCARs := carOutputDir != "" if exportCARs { - if err := os.MkdirAll(carOutputDir, 0755); err != nil { + if err := os.MkdirAll(carOutputDir, 0o755); err != nil { t.Fatalf("failed to create CAR output directory: %v", err) } t.Logf("CAR export enabled, writing to: %s", carOutputDir) @@ -276,14 +277,19 @@ func runProfileTests(t *testing.T, exp cidProfileExpectations, carOutputDir stri basicLastNameLen = exp.DirBasicNameLen } if exp.HAMTSizeEstimation == "block" { - err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed) + err = createDirectoryForHAMTBlockEstimation(randDir, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed) } else { - err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed) + err = createDirectoryForHAMTLinksEstimation(randDir, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed) } require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + // Verify it's a basic directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold") + root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.Equal(t, exp.DirBasicFiles, len(root.Links), @@ -339,18 +345,24 @@ func runProfileTests(t *testing.T, exp cidProfileExpectations, carOutputDir stri lastNameLen = exp.DirHAMTNameLen } if exp.HAMTSizeEstimation == "block" { - err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed) + err = createDirectoryForHAMTBlockEstimation(randDir, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed) } else { - err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed) + err = createDirectoryForHAMTLinksEstimation(randDir, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed) } require.NoError(t, err) cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed() + // Verify it's a HAMT directory by checking UnixFS type + fsType, err := node.UnixFSDataType(cidStr) + require.NoError(t, err) + require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when over threshold") + + // HAMT root has at most fanout links (actual count depends on hash distribution) root, err := node.InspectPBNode(cidStr) assert.NoError(t, err) require.LessOrEqual(t, len(root.Links), exp.HAMTFanout, - "expected HAMT directory with <= %d links", exp.HAMTFanout) + "expected HAMT directory root to have <= %d links", exp.HAMTFanout) // Verify hash function verifyHashFunction(t, node, cidStr, exp.HashFunc) diff --git a/test/cli/harness/pbinspect.go b/test/cli/harness/pbinspect.go index 6abddb61f..6210e7bed 100644 --- a/test/cli/harness/pbinspect.go +++ b/test/cli/harness/pbinspect.go @@ -3,8 +3,47 @@ package harness import ( "bytes" "encoding/json" + + mdag "github.com/ipfs/boxo/ipld/merkledag" + ft "github.com/ipfs/boxo/ipld/unixfs" + pb "github.com/ipfs/boxo/ipld/unixfs/pb" ) +// UnixFSDataType returns the UnixFS DataType for the given CID by fetching the +// raw block and parsing the protobuf. This directly checks the Type field in +// the UnixFS Data message (https://specs.ipfs.tech/unixfs/#data). +// +// Common types: +// - ft.TDirectory (1) = basic flat directory +// - ft.THAMTShard (5) = HAMT sharded directory +func (n *Node) UnixFSDataType(cid string) (pb.Data_DataType, error) { + log.Debugf("node %d block get %s", n.ID, cid) + + var blockData bytes.Buffer + res := n.Runner.MustRun(RunRequest{ + Path: n.IPFSBin, + Args: []string{"block", "get", cid}, + CmdOpts: []CmdOpt{RunWithStdout(&blockData)}, + }) + if res.Err != nil { + return 0, res.Err + } + + // Parse dag-pb block + protoNode, err := mdag.DecodeProtobuf(blockData.Bytes()) + if err != nil { + return 0, err + } + + // Parse UnixFS data + fsNode, err := ft.FSNodeFromBytes(protoNode.Data()) + if err != nil { + return 0, err + } + + return fsNode.Type(), nil +} + // InspectPBNode uses dag-json output of 'ipfs dag get' to inspect // "Logical Format" of DAG-PB as defined in // https://web.archive.org/web/20250403194752/https://ipld.io/specs/codecs/dag-pb/spec/#logical-format @@ -28,7 +67,6 @@ func (n *Node) InspectPBNode(cid string) (PBNode, error) { return root, err } return root, nil - } // Define structs to match the JSON for