kubo/config/import.go
Marcin Rataj 506cc6e70f feat(files): wire Import.UnixFSChunker and UnixFSDirectoryMaxLinks to MFS
`ipfs files` commands now respect these Import.* config options:
- UnixFSChunker: configures chunk size for `files write`
- UnixFSDirectoryMaxLinks: triggers HAMT sharding in `files mkdir`
- UnixFSHAMTDirectorySizeEstimation: controls size estimation mode

previously, MFS used hardcoded defaults ignoring user config.

changes:
- config/import.go: add UnixFSSplitterFunc() returning chunk.SplitterGen
- core/node/core.go: pass chunker, maxLinks, sizeEstimationMode to
  mfs.NewRoot() via new boxo RootOption API
- core/commands/files.go: pass maxLinks and sizeEstimationMode to
  mfs.Mkdir() and ensureContainingDirectoryExists(); document that
  UnixFSFileMaxLinks doesn't apply to files write (trickle DAG limitation)
- test/cli/files_test.go: add tests for UnixFSDirectoryMaxLinks and
  UnixFSChunker, including CID parity test with `ipfs add --trickle`

related: boxo@54e044f1b265
2026-02-02 02:00:02 +01:00

263 lines
9.0 KiB
Go

package config
import (
"fmt"
"io"
"strconv"
"strings"
chunk "github.com/ipfs/boxo/chunker"
"github.com/ipfs/boxo/ipld/unixfs/importer/helpers"
uio "github.com/ipfs/boxo/ipld/unixfs/io"
"github.com/ipfs/boxo/verifcid"
mh "github.com/multiformats/go-multihash"
)
const (
DefaultCidVersion = 0
DefaultUnixFSRawLeaves = false
DefaultUnixFSChunker = "size-262144"
DefaultHashFunction = "sha2-256"
DefaultFastProvideRoot = true
DefaultFastProvideWait = false
DefaultUnixFSHAMTDirectorySizeThreshold = 262144 // 256KiB - https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L26
// DefaultBatchMaxNodes controls the maximum number of nodes in a
// write-batch. The total size of the batch is limited by
// BatchMaxnodes and BatchMaxSize.
DefaultBatchMaxNodes = 128
// DefaultBatchMaxSize controls the maximum size of a single
// write-batch. The total size of the batch is limited by
// BatchMaxnodes and BatchMaxSize.
DefaultBatchMaxSize = 100 << 20 // 20MiB
// HAMTSizeEstimation values for Import.UnixFSHAMTDirectorySizeEstimation
HAMTSizeEstimationLinks = "links" // legacy: estimate using link names + CID byte lengths (default)
HAMTSizeEstimationBlock = "block" // full serialized dag-pb block size
HAMTSizeEstimationDisabled = "disabled" // disable HAMT sharding entirely
// DAGLayout values for Import.UnixFSDAGLayout
DAGLayoutBalanced = "balanced" // balanced DAG layout (default)
DAGLayoutTrickle = "trickle" // trickle DAG layout
DefaultUnixFSHAMTDirectorySizeEstimation = HAMTSizeEstimationLinks // legacy behavior
DefaultUnixFSDAGLayout = DAGLayoutBalanced // balanced DAG layout
DefaultUnixFSIncludeEmptyDirs = true // include empty directories
)
var (
DefaultUnixFSFileMaxLinks = int64(helpers.DefaultLinksPerBlock)
DefaultUnixFSDirectoryMaxLinks = int64(0)
DefaultUnixFSHAMTDirectoryMaxFanout = int64(uio.DefaultShardWidth)
)
// Import configures the default options for ingesting data. This affects commands
// that ingest data, such as 'ipfs add', 'ipfs dag put, 'ipfs block put', 'ipfs files write'.
type Import struct {
CidVersion OptionalInteger
UnixFSRawLeaves Flag
UnixFSChunker OptionalString
HashFunction OptionalString
UnixFSFileMaxLinks OptionalInteger
UnixFSDirectoryMaxLinks OptionalInteger
UnixFSHAMTDirectoryMaxFanout OptionalInteger
UnixFSHAMTDirectorySizeThreshold OptionalBytes
UnixFSHAMTDirectorySizeEstimation OptionalString // "links", "block", or "disabled"
UnixFSDAGLayout OptionalString // "balanced" or "trickle"
BatchMaxNodes OptionalInteger
BatchMaxSize OptionalInteger
FastProvideRoot Flag
FastProvideWait Flag
}
// ValidateImportConfig validates the Import configuration according to UnixFS spec requirements.
// See: https://specs.ipfs.tech/unixfs/#hamt-structure-and-parameters
func ValidateImportConfig(cfg *Import) error {
// Validate CidVersion
if !cfg.CidVersion.IsDefault() {
cidVer := cfg.CidVersion.WithDefault(DefaultCidVersion)
if cidVer != 0 && cidVer != 1 {
return fmt.Errorf("Import.CidVersion must be 0 or 1, got %d", cidVer)
}
}
// Validate UnixFSFileMaxLinks
if !cfg.UnixFSFileMaxLinks.IsDefault() {
maxLinks := cfg.UnixFSFileMaxLinks.WithDefault(DefaultUnixFSFileMaxLinks)
if maxLinks <= 0 {
return fmt.Errorf("Import.UnixFSFileMaxLinks must be positive, got %d", maxLinks)
}
}
// Validate UnixFSDirectoryMaxLinks
if !cfg.UnixFSDirectoryMaxLinks.IsDefault() {
maxLinks := cfg.UnixFSDirectoryMaxLinks.WithDefault(DefaultUnixFSDirectoryMaxLinks)
if maxLinks < 0 {
return fmt.Errorf("Import.UnixFSDirectoryMaxLinks must be non-negative, got %d", maxLinks)
}
}
// Validate UnixFSHAMTDirectoryMaxFanout if set
if !cfg.UnixFSHAMTDirectoryMaxFanout.IsDefault() {
fanout := cfg.UnixFSHAMTDirectoryMaxFanout.WithDefault(DefaultUnixFSHAMTDirectoryMaxFanout)
// Check all requirements: fanout < 8 covers both non-positive and non-multiple of 8
// Combined with power of 2 check and max limit, this ensures valid values: 8, 16, 32, 64, 128, 256, 512, 1024
if fanout < 8 || !isPowerOfTwo(fanout) || fanout > 1024 {
return fmt.Errorf("Import.UnixFSHAMTDirectoryMaxFanout must be a positive power of 2, multiple of 8, and not exceed 1024 (got %d)", fanout)
}
}
// Validate BatchMaxNodes
if !cfg.BatchMaxNodes.IsDefault() {
maxNodes := cfg.BatchMaxNodes.WithDefault(DefaultBatchMaxNodes)
if maxNodes <= 0 {
return fmt.Errorf("Import.BatchMaxNodes must be positive, got %d", maxNodes)
}
}
// Validate BatchMaxSize
if !cfg.BatchMaxSize.IsDefault() {
maxSize := cfg.BatchMaxSize.WithDefault(DefaultBatchMaxSize)
if maxSize <= 0 {
return fmt.Errorf("Import.BatchMaxSize must be positive, got %d", maxSize)
}
}
// Validate UnixFSChunker format
if !cfg.UnixFSChunker.IsDefault() {
chunker := cfg.UnixFSChunker.WithDefault(DefaultUnixFSChunker)
if !isValidChunker(chunker) {
return fmt.Errorf("Import.UnixFSChunker invalid format: %q (expected \"size-<bytes>\", \"rabin-<min>-<avg>-<max>\", or \"buzhash\")", chunker)
}
}
// Validate HashFunction
if !cfg.HashFunction.IsDefault() {
hashFunc := cfg.HashFunction.WithDefault(DefaultHashFunction)
hashCode, ok := mh.Names[strings.ToLower(hashFunc)]
if !ok {
return fmt.Errorf("Import.HashFunction unrecognized: %q", hashFunc)
}
// Check if the hash is allowed by verifcid
if !verifcid.DefaultAllowlist.IsAllowed(hashCode) {
return fmt.Errorf("Import.HashFunction %q is not allowed for use in IPFS", hashFunc)
}
}
// Validate UnixFSHAMTDirectorySizeEstimation
if !cfg.UnixFSHAMTDirectorySizeEstimation.IsDefault() {
est := cfg.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation)
switch est {
case HAMTSizeEstimationLinks, HAMTSizeEstimationBlock, HAMTSizeEstimationDisabled:
// valid
default:
return fmt.Errorf("Import.UnixFSHAMTDirectorySizeEstimation must be %q, %q, or %q, got %q",
HAMTSizeEstimationLinks, HAMTSizeEstimationBlock, HAMTSizeEstimationDisabled, est)
}
}
// Validate UnixFSDAGLayout
if !cfg.UnixFSDAGLayout.IsDefault() {
layout := cfg.UnixFSDAGLayout.WithDefault(DefaultUnixFSDAGLayout)
switch layout {
case DAGLayoutBalanced, DAGLayoutTrickle:
// valid
default:
return fmt.Errorf("Import.UnixFSDAGLayout must be %q or %q, got %q",
DAGLayoutBalanced, DAGLayoutTrickle, layout)
}
}
return nil
}
// isPowerOfTwo checks if a number is a power of 2
func isPowerOfTwo(n int64) bool {
return n > 0 && (n&(n-1)) == 0
}
// isValidChunker validates chunker format
func isValidChunker(chunker string) bool {
if chunker == "buzhash" {
return true
}
// Check for size-<bytes> format
if sizeStr, ok := strings.CutPrefix(chunker, "size-"); ok {
if sizeStr == "" {
return false
}
// Check if it's a valid positive integer (no negative sign allowed)
if sizeStr[0] == '-' {
return false
}
size, err := strconv.Atoi(sizeStr)
// Size must be positive (not zero)
return err == nil && size > 0
}
// Check for rabin-<min>-<avg>-<max> format
if strings.HasPrefix(chunker, "rabin-") {
parts := strings.Split(chunker, "-")
if len(parts) != 4 {
return false
}
// Parse and validate min, avg, max values
values := make([]int, 3)
for i := range 3 {
val, err := strconv.Atoi(parts[i+1])
if err != nil {
return false
}
values[i] = val
}
// Validate ordering: min <= avg <= max
min, avg, max := values[0], values[1], values[2]
return min <= avg && avg <= max
}
return false
}
// HAMTSizeEstimationMode returns the boxo SizeEstimationMode based on the config value.
func (i *Import) HAMTSizeEstimationMode() uio.SizeEstimationMode {
switch i.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation) {
case HAMTSizeEstimationLinks:
return uio.SizeEstimationLinks
case HAMTSizeEstimationBlock:
return uio.SizeEstimationBlock
case HAMTSizeEstimationDisabled:
return uio.SizeEstimationDisabled
default:
return uio.SizeEstimationLinks
}
}
// UnixFSSplitterFunc returns a SplitterGen function based on Import.UnixFSChunker.
// The returned function creates a Splitter for the configured chunking strategy.
// The chunker string is parsed once when this method is called, not on each use.
func (i *Import) UnixFSSplitterFunc() chunk.SplitterGen {
chunkerStr := i.UnixFSChunker.WithDefault(DefaultUnixFSChunker)
// Parse size-based chunker (most common case) and return optimized generator
if sizeStr, ok := strings.CutPrefix(chunkerStr, "size-"); ok {
if size, err := strconv.ParseInt(sizeStr, 10, 64); err == nil && size > 0 {
return chunk.SizeSplitterGen(size)
}
}
// For other chunker types (rabin, buzhash) or invalid config,
// fall back to parsing per-use (these are rare cases)
return func(r io.Reader) chunk.Splitter {
s, err := chunk.FromString(r, chunkerStr)
if err != nil {
return chunk.DefaultSplitter(r)
}
return s
}
}