kubo/test/cli/block_size_test.go
Marcin Rataj c6702eaf88
Some checks failed
CodeQL / codeql (push) Has been cancelled
Docker Check / lint (push) Has been cancelled
Docker Check / build (push) Has been cancelled
Gateway Conformance / gateway-conformance (push) Has been cancelled
Gateway Conformance / gateway-conformance-libp2p-experiment (push) Has been cancelled
Go Build / go-build (push) Has been cancelled
Go Check / go-check (push) Has been cancelled
Go Lint / go-lint (push) Has been cancelled
Go Test / unit-tests (push) Has been cancelled
Go Test / cli-tests (push) Has been cancelled
Go Test / example-tests (push) Has been cancelled
Interop / interop-prep (push) Has been cancelled
Sharness / sharness-test (push) Has been cancelled
Spell Check / spellcheck (push) Has been cancelled
Interop / helia-interop (push) Has been cancelled
Interop / ipfs-webui (push) Has been cancelled
fix: allow dag import of 1MiB chunks wrapped in dag-pb (#11185)
IPIP-499's unixfs-v1-2025 profile uses 1MiB chunks. with
--raw-leaves=false, protobuf wrapping pushes blocks slightly over 1MiB.
the previous 1MiB SoftBlockLimit rejected these blocks on dag import.

raise SoftBlockLimit to 2MiB to match the bitswap spec, which requires
implementations to support blocks up to 2MiB.

- raise SoftBlockLimit to 2MiB per the bitswap spec
- update error messages and help text
- bump boxo to main with ipfs/boxo#1101 (raised ChunkSizeLimit/BlockSizeLimit,
  256-byte overhead budget)
- update sharness tests for 2MiB boundary
- add test/cli boundary tests for block put, dag put, dag import,
  ipfs add (raw and wrapped leaves), and bitswap exchange including
  regression tests for the libp2p message size hard limit
2026-02-06 23:55:40 +01:00

404 lines
15 KiB
Go

package cli
import (
"bytes"
"crypto/rand"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/ipfs/kubo/test/cli/harness"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
twoMiB = 2 * 1024 * 1024 // 2097152 - bitswap spec block size limit
twoMiBPlus = twoMiB + 1 // 2097153
maxChunkSize = twoMiB - 256 // 2096896 - max chunker value (overhead budget for protobuf framing)
overMaxChunk = maxChunkSize + 1 // 2096897
// go-libp2p v0.47.0 network.MessageSizeMax is 4194304 bytes (4MiB).
// A bitswap message carrying a single block has a protobuf envelope
// whose size depends on the CID used to represent the block. For
// CIDv1 with raw codec and SHA2-256 multihash (4-byte CID prefix),
// the envelope is 18 bytes: 2 bytes for the empty Wantlist submessage,
// 6 bytes for the CID prefix field, 5 bytes for field tags and the
// payload length varint, and 5 bytes for the data length varint and
// block submessage length varint. The msgio varint reader rejects
// messages strictly larger than MessageSizeMax, so the maximum block
// that fits is 4194304 - 18 = 4194286 bytes.
//
// The hard limit varies slightly depending on the CID: a longer
// multihash (e.g. SHA-512) increases the CID prefix and reduces the
// maximum block payload by the same amount.
libp2pMsgMax = 4 * 1024 * 1024 // 4194304 - libp2p network.MessageSizeMax
bsBlockEnvelope = 18 // protobuf overhead for CIDv1 + raw + SHA2-256
maxTransferBlock = libp2pMsgMax - bsBlockEnvelope // 4194286 - largest block transferable via bitswap
overMaxTransfer = maxTransferBlock + 1 // 4194287
)
// blockSize returns the block size in bytes for a given CID by parsing
// the JSON output of `ipfs block stat --enc=json <cid>`.
func blockSize(t *testing.T, node *harness.Node, cid string) int {
t.Helper()
res := node.IPFS("block", "stat", "--enc=json", cid)
var stat struct {
Key string
Size int
}
require.NoError(t, json.Unmarshal(res.Stdout.Bytes(), &stat))
return stat.Size
}
// allBlockCIDs returns the root CID plus all recursive refs for a DAG.
func allBlockCIDs(t *testing.T, node *harness.Node, root string) []string {
t.Helper()
cids := []string{root}
res := node.IPFS("refs", "-r", "--unique", root)
for _, line := range strings.Split(strings.TrimSpace(res.Stdout.String()), "\n") {
if line != "" {
cids = append(cids, line)
}
}
return cids
}
// assertAllBlocksWithinLimit checks that every block in the DAG rooted at
// root is at most twoMiB bytes.
func assertAllBlocksWithinLimit(t *testing.T, node *harness.Node, root string) {
t.Helper()
for _, c := range allBlockCIDs(t, node, root) {
size := blockSize(t, node, c)
assert.LessOrEqual(t, size, twoMiB, fmt.Sprintf("block %s is %d bytes, exceeds 2MiB limit", c, size))
}
}
func TestBlockSizeBoundary(t *testing.T) {
t.Parallel()
t.Run("block put", func(t *testing.T) {
t.Parallel()
t.Run("exactly 2MiB succeeds", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiB)
cid := strings.TrimSpace(
node.PipeToIPFS(bytes.NewReader(data), "block", "put").Stdout.String(),
)
got := node.IPFS("block", "get", cid)
assert.Len(t, got.Stdout.Bytes(), twoMiB)
})
t.Run("2MiB+1 fails without --allow-big-block", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiBPlus)
res := node.RunPipeToIPFS(bytes.NewReader(data), "block", "put")
assert.NotEqual(t, 0, res.ExitCode())
assert.Contains(t, res.Stderr.String(), "produced block is over 2MiB: big blocks can't be exchanged with other peers. consider using UnixFS for automatic chunking of bigger files, or pass --allow-big-block to override")
})
t.Run("2MiB+1 succeeds with --allow-big-block", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiBPlus)
cid := strings.TrimSpace(
node.PipeToIPFS(bytes.NewReader(data), "block", "put", "--allow-big-block").Stdout.String(),
)
got := node.IPFS("block", "get", cid)
assert.Len(t, got.Stdout.Bytes(), twoMiBPlus)
})
})
t.Run("dag put", func(t *testing.T) {
t.Parallel()
t.Run("exactly 2MiB succeeds", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiB)
cid := strings.TrimSpace(
node.PipeToIPFS(bytes.NewReader(data), "dag", "put", "--input-codec=raw", "--store-codec=raw").Stdout.String(),
)
got := node.IPFS("block", "get", cid)
assert.Len(t, got.Stdout.Bytes(), twoMiB)
})
t.Run("2MiB+1 fails without --allow-big-block", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiBPlus)
res := node.RunPipeToIPFS(bytes.NewReader(data), "dag", "put", "--input-codec=raw", "--store-codec=raw")
assert.NotEqual(t, 0, res.ExitCode())
assert.Contains(t, res.Stderr.String(), "produced block is over 2MiB: big blocks can't be exchanged with other peers. consider using UnixFS for automatic chunking of bigger files, or pass --allow-big-block to override")
})
t.Run("2MiB+1 succeeds with --allow-big-block", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiBPlus)
cid := strings.TrimSpace(
node.PipeToIPFS(bytes.NewReader(data), "dag", "put", "--input-codec=raw", "--store-codec=raw", "--allow-big-block").Stdout.String(),
)
got := node.IPFS("block", "get", cid)
assert.Len(t, got.Stdout.Bytes(), twoMiBPlus)
})
})
t.Run("dag import and export", func(t *testing.T) {
t.Parallel()
t.Run("2MiB+1 block round-trips with --allow-big-block", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
// put an oversized raw block with override
data := make([]byte, twoMiBPlus)
cid := strings.TrimSpace(
node.PipeToIPFS(bytes.NewReader(data), "dag", "put", "--input-codec=raw", "--store-codec=raw", "--allow-big-block").Stdout.String(),
)
// export to CAR
carPath := filepath.Join(node.Dir, "oversized.car")
require.NoError(t, node.IPFSDagExport(cid, carPath))
// re-import without --allow-big-block should fail
carFile, err := os.Open(carPath)
require.NoError(t, err)
res := node.RunPipeToIPFS(carFile, "dag", "import")
carFile.Close()
assert.NotEqual(t, 0, res.ExitCode())
assert.Contains(t, res.Stderr.String()+res.Stdout.String(), "produced block is over 2MiB: big blocks can't be exchanged with other peers. consider using UnixFS for automatic chunking of bigger files, or pass --allow-big-block to override")
// re-import with --allow-big-block should succeed
carFile, err = os.Open(carPath)
require.NoError(t, err)
res = node.RunPipeToIPFS(carFile, "dag", "import", "--allow-big-block")
carFile.Close()
assert.Equal(t, 0, res.ExitCode())
})
})
t.Run("ipfs add non-raw-leaves", func(t *testing.T) {
t.Parallel()
// The chunker enforces ChunkSizeLimit (maxChunkSize = 2MiB - 256
// as of boxo 2026Q1) regardless of leaf type. It does not know at parse time whether
// raw or wrapped leaves will be used, so the 256-byte overhead
// budget is applied uniformly.
//
// With --raw-leaves=false each chunk is wrapped in protobuf,
// adding ~14 bytes overhead that pushes blocks past the chunk size.
// The overhead budget ensures the wrapped block stays within 2MiB.
//
// With --raw-leaves=true there is no protobuf wrapper, so the
// block is exactly the chunk size (maxChunkSize). The 256-byte
// budget is unused in this case but the chunker still enforces it.
// A full 2MiB chunk (--chunker=size-2097152) is rejected even
// though the resulting raw block would fit within BlockSizeLimit.
t.Run("1MiB chunk with protobuf wrapping succeeds under 2MiB limit", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiB)
res := node.RunPipeToIPFS(bytes.NewReader(data), "add", "-q", "--chunker=size-1048576", "--raw-leaves=false")
require.Equal(t, 0, res.ExitCode(), "stderr: %s", res.Stderr.String())
root := strings.TrimSpace(res.Stdout.String())
// the last line of `ipfs add -q` is the root CID
lines := strings.Split(root, "\n")
root = lines[len(lines)-1]
assertAllBlocksWithinLimit(t, node, root)
})
t.Run("max chunk with protobuf wrapping stays within block limit", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
// maxChunkSize leaves room for protobuf framing overhead
data := make([]byte, maxChunkSize*2)
res := node.RunPipeToIPFS(bytes.NewReader(data), "add", "-q",
fmt.Sprintf("--chunker=size-%d", maxChunkSize), "--raw-leaves=false")
require.Equal(t, 0, res.ExitCode(), "stderr: %s", res.Stderr.String())
lines := strings.Split(strings.TrimSpace(res.Stdout.String()), "\n")
root := lines[len(lines)-1]
assertAllBlocksWithinLimit(t, node, root)
})
t.Run("chunk size over limit is rejected by chunker", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
data := make([]byte, twoMiB+twoMiB)
res := node.RunPipeToIPFS(bytes.NewReader(data), "add", "-q",
fmt.Sprintf("--chunker=size-%d", overMaxChunk), "--raw-leaves=false")
assert.NotEqual(t, 0, res.ExitCode())
assert.Contains(t, res.Stderr.String(),
fmt.Sprintf("chunker parameters may not exceed the maximum chunk size of %d", maxChunkSize))
})
t.Run("max chunk with raw leaves succeeds", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon("--offline")
defer node.StopDaemon()
// raw leaves have no protobuf wrapper, so max chunk size fits easily
data := make([]byte, maxChunkSize*2)
res := node.RunPipeToIPFS(bytes.NewReader(data), "add", "-q",
fmt.Sprintf("--chunker=size-%d", maxChunkSize), "--raw-leaves=true")
require.Equal(t, 0, res.ExitCode(), "stderr: %s", res.Stderr.String())
lines := strings.Split(strings.TrimSpace(res.Stdout.String()), "\n")
root := lines[len(lines)-1]
assertAllBlocksWithinLimit(t, node, root)
})
})
t.Run("bitswap exchange", func(t *testing.T) {
t.Parallel()
t.Run("2MiB raw block transfers between peers", func(t *testing.T) {
t.Parallel()
h := harness.NewT(t)
provider := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer provider.StopDaemon()
requester := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer requester.StopDaemon()
data := make([]byte, twoMiB)
_, err := rand.Read(data)
require.NoError(t, err)
cid := strings.TrimSpace(
provider.PipeToIPFS(bytes.NewReader(data), "block", "put").Stdout.String(),
)
requester.Connect(provider)
res := requester.IPFS("block", "get", cid)
assert.Equal(t, data, res.Stdout.Bytes(), "retrieved block should match original")
})
t.Run("unixfs-v1-2025: 2MiB file transfers between peers", func(t *testing.T) {
t.Parallel()
h := harness.NewT(t)
provider := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer provider.StopDaemon()
requester := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer requester.StopDaemon()
// unixfs-v1-2025 profile uses CIDv1, raw leaves, SHA2-256,
// and 1MiB chunks. A 2MiB file produces two 1MiB raw leaf
// blocks plus a root node, all within the 2MiB spec limit.
data := make([]byte, twoMiB)
_, err := rand.Read(data)
require.NoError(t, err)
res := provider.RunPipeToIPFS(bytes.NewReader(data), "add", "-q")
require.Equal(t, 0, res.ExitCode(), "stderr: %s", res.Stderr.String())
lines := strings.Split(strings.TrimSpace(res.Stdout.String()), "\n")
root := lines[len(lines)-1]
requester.Connect(provider)
got := requester.IPFS("cat", root)
assert.Equal(t, data, got.Stdout.Bytes(), "retrieved file should match original")
})
// The following two tests guard the physical hard limit of the
// libp2p transport layer (network.MessageSizeMax = 4MiB). This is
// the actual ceiling for bitswap block transfer, independent of the
// 2MiB soft limit from the bitswap spec. Knowing the exact hard
// limit is important for backward-compatible protocol and standards
// evolution: any future increase to the bitswap spec block size
// must stay within the libp2p message framing budget, or the
// transport layer must be updated first.
t.Run("bitswap-over-libp2p: largest block that fits in message transfers", func(t *testing.T) {
t.Parallel()
h := harness.NewT(t)
provider := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer provider.StopDaemon()
requester := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer requester.StopDaemon()
data := make([]byte, maxTransferBlock)
_, err := rand.Read(data)
require.NoError(t, err)
cid := strings.TrimSpace(
provider.PipeToIPFS(bytes.NewReader(data), "block", "put", "--allow-big-block").Stdout.String(),
)
requester.Connect(provider)
// successful transfers complete in ~1s
timeout := time.After(5 * time.Second)
dataChan := make(chan []byte, 1)
go func() {
res := requester.RunIPFS("block", "get", cid)
dataChan <- res.Stdout.Bytes()
}()
select {
case got := <-dataChan:
assert.Equal(t, data, got, "retrieved block should match original")
case <-timeout:
t.Fatal("block get timed out: expected transfer to succeed at maxTransferBlock")
}
})
t.Run("bitswap-over-libp2p: one byte over message limit does not transfer", func(t *testing.T) {
t.Parallel()
h := harness.NewT(t)
provider := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer provider.StopDaemon()
requester := h.NewNode().Init("--profile=unixfs-v1-2025").StartDaemon()
defer requester.StopDaemon()
data := make([]byte, overMaxTransfer)
_, err := rand.Read(data)
require.NoError(t, err)
cid := strings.TrimSpace(
provider.PipeToIPFS(bytes.NewReader(data), "block", "put", "--allow-big-block").Stdout.String(),
)
requester.Connect(provider)
timeout := time.After(5 * time.Second)
dataChan := make(chan []byte, 1)
go func() {
res := requester.RunIPFS("block", "get", cid)
dataChan <- res.Stdout.Bytes()
}()
select {
case got := <-dataChan:
t.Fatalf("expected timeout, but block was retrieved (%d bytes)", len(got))
case <-timeout:
t.Log("block get timed out as expected: block exceeds libp2p message size limit")
}
})
})
}