mirror of
https://github.com/ipfs/kubo.git
synced 2026-02-26 12:57:44 +08:00
test(cli): improve HAMT threshold tests with exact +1 byte verification
- add UnixFSDataType() helper to directly check UnixFS type via protobuf - refactor threshold tests to use exact +1 byte calculations instead of +1 file - verify directory type directly (ft.TDirectory vs ft.THAMTShard) instead of inferring from link count - clean up helper function signatures by removing unused cidLength parameter
This commit is contained in:
parent
9b04a587f7
commit
120800fe8b
@ -8,6 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
ft "github.com/ipfs/boxo/ipld/unixfs"
|
||||
"github.com/ipfs/kubo/config"
|
||||
"github.com/ipfs/kubo/test/cli/harness"
|
||||
"github.com/ipfs/kubo/test/cli/testutils"
|
||||
@ -251,21 +252,20 @@ func TestAdd(t *testing.T) {
|
||||
// Create directory exactly at the 256KiB threshold using links estimation.
|
||||
// Links estimation: size = numFiles * (nameLen + cidLen)
|
||||
// 4096 * (30 + 34) = 4096 * 64 = 262144 = threshold exactly
|
||||
// With > comparison: stays as basic directory
|
||||
// With >= comparison: converts to HAMT
|
||||
// Threshold uses > not >=, so directory at exact threshold stays basic.
|
||||
const numFiles, nameLen = 4096, 30
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed)
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, numFiles, nameLen, nameLen, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Should remain a basic directory (threshold uses > not >=)
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold")
|
||||
// Verify it's a basic directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold")
|
||||
})
|
||||
|
||||
t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) {
|
||||
t.Run("1 byte over UnixFSHAMTDirectorySizeThreshold=256KiB (links estimation)", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
node := harness.NewT(t).NewNode().Init(profile)
|
||||
node.StartDaemon()
|
||||
@ -274,19 +274,19 @@ func TestAdd(t *testing.T) {
|
||||
randDir, err := os.MkdirTemp(node.Dir, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create directory just over the 256KiB threshold using links estimation.
|
||||
// Links estimation: size = numFiles * (nameLen + cidLen)
|
||||
// 4097 * (30 + 34) = 4097 * 64 = 262208 > 262144, exceeds threshold
|
||||
const numFiles, nameLen = 4097, 30
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, cidV0Length, numFiles, nameLen, nameLen, seed)
|
||||
// Create directory exactly 1 byte over the 256KiB threshold using links estimation.
|
||||
// Links estimation: size = sum(nameLen + cidLen) for each file
|
||||
// 4095 * (30 + 34) + 1 * (31 + 34) = 262080 + 65 = 262145 = threshold + 1
|
||||
const numFiles, nameLen, lastNameLen = 4096, 30, 31
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, numFiles, nameLen, lastNameLen, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Should be HAMT sharded (root links <= fanout of 256)
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold")
|
||||
// Verify it's a HAMT directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when 1 byte over threshold")
|
||||
})
|
||||
})
|
||||
|
||||
@ -349,24 +349,23 @@ func TestAdd(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create directory exactly at the 256KiB threshold using block estimation.
|
||||
// Block estimation: size = baseOverhead + numFiles * LinkSerializedSize
|
||||
// LinkSerializedSize(11, 36, 0) = 55 bytes per link
|
||||
// 4766 * 55 + 14 = 262130 + 14 = 262144 = threshold exactly
|
||||
// With > comparison: stays as basic directory
|
||||
// With >= comparison: converts to HAMT
|
||||
const numFiles, nameLen = 4766, 11
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed)
|
||||
// Block estimation: size = dataFieldSize + sum(LinkSerializedSize)
|
||||
// - dataFieldSerializedSize() = 4 bytes (empty dir, no mode/mtime)
|
||||
// - LinkSerializedSize(nameLen=11) = 55 bytes, LinkSerializedSize(nameLen=21) = 65 bytes
|
||||
// Total: 4765 * 55 + 65 + 4 = 262144 = threshold exactly
|
||||
const numFiles, nameLen, lastNameLen = 4766, 11, 21
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, numFiles, nameLen, lastNameLen, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Should remain a basic directory (threshold uses > not >=)
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.Equal(t, numFiles, len(root.Links), "expected basic directory at exact threshold")
|
||||
// Verify it's a basic directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold")
|
||||
})
|
||||
|
||||
t.Run("over UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) {
|
||||
t.Run("1 byte over UnixFSHAMTDirectorySizeThreshold=256KiB (block estimation)", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
node := harness.NewT(t).NewNode().Init(profile)
|
||||
node.StartDaemon()
|
||||
@ -375,19 +374,18 @@ func TestAdd(t *testing.T) {
|
||||
randDir, err := os.MkdirTemp(node.Dir, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create directory just over the 256KiB threshold using block estimation.
|
||||
// Block estimation: size = baseOverhead + numFiles * LinkSerializedSize
|
||||
// 4767 * 55 + 14 = 262185 + 14 = 262199 > 262144, exceeds threshold
|
||||
const numFiles, nameLen = 4767, 11
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, cidV1Length, numFiles, nameLen, nameLen, seed)
|
||||
// Create directory exactly 1 byte over the 256KiB threshold.
|
||||
// Same as above but lastNameLen=22 adds 1 byte: 4765 * 55 + 66 + 4 = 262145
|
||||
const numFiles, nameLen, lastNameLen = 4766, 11, 22
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, numFiles, nameLen, lastNameLen, seed)
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Should be HAMT sharded (root links <= fanout of 256)
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.LessOrEqual(t, len(root.Links), 256, "expected HAMT directory when over threshold")
|
||||
// Verify it's a HAMT directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when 1 byte over threshold")
|
||||
})
|
||||
})
|
||||
|
||||
@ -398,8 +396,8 @@ func TestAdd(t *testing.T) {
|
||||
setupTestDir := func(t *testing.T, node *harness.Node) string {
|
||||
testDir, err := os.MkdirTemp(node.Dir, "hidden-test")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, "visible.txt"), []byte("visible"), 0644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, ".hidden"), []byte("hidden"), 0644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, "visible.txt"), []byte("visible"), 0o644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, ".hidden"), []byte("hidden"), 0o644))
|
||||
return testDir
|
||||
}
|
||||
|
||||
@ -447,8 +445,8 @@ func TestAdd(t *testing.T) {
|
||||
setupTestDir := func(t *testing.T, node *harness.Node) string {
|
||||
testDir, err := os.MkdirTemp(node.Dir, "empty-dirs-test")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, os.Mkdir(filepath.Join(testDir, "empty-subdir"), 0755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, "file.txt"), []byte("content"), 0644))
|
||||
require.NoError(t, os.Mkdir(filepath.Join(testDir, "empty-subdir"), 0o755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(testDir, "file.txt"), []byte("content"), 0o644))
|
||||
return testDir
|
||||
}
|
||||
|
||||
@ -502,14 +500,14 @@ func TestAdd(t *testing.T) {
|
||||
|
||||
// Top-level file and symlink
|
||||
targetFile := filepath.Join(testDir, "target.txt")
|
||||
require.NoError(t, os.WriteFile(targetFile, []byte("target content"), 0644))
|
||||
require.NoError(t, os.WriteFile(targetFile, []byte("target content"), 0o644))
|
||||
require.NoError(t, os.Symlink("target.txt", filepath.Join(testDir, "link.txt")))
|
||||
|
||||
// Nested file and symlink in sub-sub directory
|
||||
subsubdir := filepath.Join(testDir, "subdir", "subsubdir")
|
||||
require.NoError(t, os.MkdirAll(subsubdir, 0755))
|
||||
require.NoError(t, os.MkdirAll(subsubdir, 0o755))
|
||||
nestedTarget := filepath.Join(subsubdir, "nested-target.txt")
|
||||
require.NoError(t, os.WriteFile(nestedTarget, []byte("nested content"), 0644))
|
||||
require.NoError(t, os.WriteFile(nestedTarget, []byte("nested content"), 0o644))
|
||||
require.NoError(t, os.Symlink("nested-target.txt", filepath.Join(subsubdir, "nested-link.txt")))
|
||||
|
||||
return testDir
|
||||
@ -806,32 +804,22 @@ func TestAddFastProvide(t *testing.T) {
|
||||
}
|
||||
|
||||
// createDirectoryForHAMTLinksEstimation creates a directory with the specified number
|
||||
// of files using the links-based size estimation formula (size = numFiles * (nameLen + cidLen)).
|
||||
// of files for testing links-based size estimation (size = sum of nameLen + cidLen).
|
||||
// Used by legacy profiles (unixfs-v0-2015).
|
||||
//
|
||||
// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic.
|
||||
// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test.
|
||||
//
|
||||
// The lastNameLen parameter allows the last file to have a different name length,
|
||||
// enabling exact +1 byte threshold tests.
|
||||
//
|
||||
// See boxo/ipld/unixfs/io/directory.go sizeBelowThreshold() for the links estimation.
|
||||
func createDirectoryForHAMTLinksEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error {
|
||||
func createDirectoryForHAMTLinksEstimation(dirPath string, numFiles, nameLen, lastNameLen int, seed string) error {
|
||||
return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed)
|
||||
}
|
||||
|
||||
// createDirectoryForHAMTBlockEstimation creates a directory with the specified number
|
||||
// of files using the block-based size estimation formula (LinkSerializedSize with protobuf overhead).
|
||||
// of files for testing block-based size estimation (LinkSerializedSize with protobuf overhead).
|
||||
// Used by modern profiles (unixfs-v1-2025).
|
||||
//
|
||||
// Threshold behavior: boxo uses > comparison, so directory at exact threshold stays basic.
|
||||
// Use DirBasicFiles for basic directory test, DirHAMTFiles for HAMT directory test.
|
||||
//
|
||||
// The lastNameLen parameter allows the last file to have a different name length,
|
||||
// enabling exact +1 byte threshold tests.
|
||||
//
|
||||
// See boxo/ipld/unixfs/io/directory.go estimatedBlockSize() for the block estimation.
|
||||
func createDirectoryForHAMTBlockEstimation(dirPath string, cidLength, numFiles, nameLen, lastNameLen int, seed string) error {
|
||||
func createDirectoryForHAMTBlockEstimation(dirPath string, numFiles, nameLen, lastNameLen int, seed string) error {
|
||||
return createDeterministicFiles(dirPath, numFiles, nameLen, lastNameLen, seed)
|
||||
}
|
||||
|
||||
@ -870,7 +858,7 @@ func createDeterministicFiles(dirPath string, numFiles, nameLen, lastNameLen int
|
||||
filePath := filepath.Join(dirPath, filename)
|
||||
|
||||
// Create file with 1-byte content for non-zero tsize
|
||||
if err := os.WriteFile(filePath, []byte("x"), 0644); err != nil {
|
||||
if err := os.WriteFile(filePath, []byte("x"), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
ft "github.com/ipfs/boxo/ipld/unixfs"
|
||||
"github.com/ipfs/kubo/test/cli/harness"
|
||||
"github.com/ipfs/kubo/test/cli/testutils"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -133,7 +134,7 @@ func TestCIDProfiles(t *testing.T) {
|
||||
carOutputDir := os.Getenv("CID_PROFILES_CAR_OUTPUT")
|
||||
exportCARs := carOutputDir != ""
|
||||
if exportCARs {
|
||||
if err := os.MkdirAll(carOutputDir, 0755); err != nil {
|
||||
if err := os.MkdirAll(carOutputDir, 0o755); err != nil {
|
||||
t.Fatalf("failed to create CAR output directory: %v", err)
|
||||
}
|
||||
t.Logf("CAR export enabled, writing to: %s", carOutputDir)
|
||||
@ -276,14 +277,19 @@ func runProfileTests(t *testing.T, exp cidProfileExpectations, carOutputDir stri
|
||||
basicLastNameLen = exp.DirBasicNameLen
|
||||
}
|
||||
if exp.HAMTSizeEstimation == "block" {
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
|
||||
} else {
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, exp.DirBasicFiles, exp.DirBasicNameLen, basicLastNameLen, seed)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Verify it's a basic directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.TDirectory, fsType, "expected basic directory (type=1) at exact threshold")
|
||||
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.Equal(t, exp.DirBasicFiles, len(root.Links),
|
||||
@ -339,18 +345,24 @@ func runProfileTests(t *testing.T, exp cidProfileExpectations, carOutputDir stri
|
||||
lastNameLen = exp.DirHAMTNameLen
|
||||
}
|
||||
if exp.HAMTSizeEstimation == "block" {
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
|
||||
err = createDirectoryForHAMTBlockEstimation(randDir, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
|
||||
} else {
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, cidLen, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
|
||||
err = createDirectoryForHAMTLinksEstimation(randDir, exp.DirHAMTFiles, exp.DirHAMTNameLen, lastNameLen, seed)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
|
||||
|
||||
// Verify it's a HAMT directory by checking UnixFS type
|
||||
fsType, err := node.UnixFSDataType(cidStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, ft.THAMTShard, fsType, "expected HAMT directory (type=5) when over threshold")
|
||||
|
||||
// HAMT root has at most fanout links (actual count depends on hash distribution)
|
||||
root, err := node.InspectPBNode(cidStr)
|
||||
assert.NoError(t, err)
|
||||
require.LessOrEqual(t, len(root.Links), exp.HAMTFanout,
|
||||
"expected HAMT directory with <= %d links", exp.HAMTFanout)
|
||||
"expected HAMT directory root to have <= %d links", exp.HAMTFanout)
|
||||
|
||||
// Verify hash function
|
||||
verifyHashFunction(t, node, cidStr, exp.HashFunc)
|
||||
|
||||
@ -3,8 +3,47 @@ package harness
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
|
||||
mdag "github.com/ipfs/boxo/ipld/merkledag"
|
||||
ft "github.com/ipfs/boxo/ipld/unixfs"
|
||||
pb "github.com/ipfs/boxo/ipld/unixfs/pb"
|
||||
)
|
||||
|
||||
// UnixFSDataType returns the UnixFS DataType for the given CID by fetching the
|
||||
// raw block and parsing the protobuf. This directly checks the Type field in
|
||||
// the UnixFS Data message (https://specs.ipfs.tech/unixfs/#data).
|
||||
//
|
||||
// Common types:
|
||||
// - ft.TDirectory (1) = basic flat directory
|
||||
// - ft.THAMTShard (5) = HAMT sharded directory
|
||||
func (n *Node) UnixFSDataType(cid string) (pb.Data_DataType, error) {
|
||||
log.Debugf("node %d block get %s", n.ID, cid)
|
||||
|
||||
var blockData bytes.Buffer
|
||||
res := n.Runner.MustRun(RunRequest{
|
||||
Path: n.IPFSBin,
|
||||
Args: []string{"block", "get", cid},
|
||||
CmdOpts: []CmdOpt{RunWithStdout(&blockData)},
|
||||
})
|
||||
if res.Err != nil {
|
||||
return 0, res.Err
|
||||
}
|
||||
|
||||
// Parse dag-pb block
|
||||
protoNode, err := mdag.DecodeProtobuf(blockData.Bytes())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Parse UnixFS data
|
||||
fsNode, err := ft.FSNodeFromBytes(protoNode.Data())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return fsNode.Type(), nil
|
||||
}
|
||||
|
||||
// InspectPBNode uses dag-json output of 'ipfs dag get' to inspect
|
||||
// "Logical Format" of DAG-PB as defined in
|
||||
// https://web.archive.org/web/20250403194752/https://ipld.io/specs/codecs/dag-pb/spec/#logical-format
|
||||
@ -28,7 +67,6 @@ func (n *Node) InspectPBNode(cid string) (PBNode, error) {
|
||||
return root, err
|
||||
}
|
||||
return root, nil
|
||||
|
||||
}
|
||||
|
||||
// Define structs to match the JSON for
|
||||
|
||||
Loading…
Reference in New Issue
Block a user