mirror of
https://github.com/ipfs/kubo.git
synced 2026-03-10 02:40:11 +08:00
alternative to #5243 that updates go-cid and all packages that depend on it License: MIT Signed-off-by: Steven Allen <steven@stebalien.com>
256 lines
11 KiB
Go
256 lines
11 KiB
Go
// Package balanced provides methods to build balanced DAGs, which are generalistic
|
||
// DAGs in which all leaves (nodes representing chunks of data) are at the same
|
||
// distance from the root. Nodes can have only a maximum number of children; to be
|
||
// able to store more leaf data nodes balanced DAGs are extended by increasing its
|
||
// depth (and having more intermediary nodes).
|
||
//
|
||
// Internal nodes are always represented by UnixFS nodes (of type `File`) encoded
|
||
// inside DAG nodes (see the `go-ipfs/unixfs` package for details of UnixFS). In
|
||
// contrast, leaf nodes with data have multiple possible representations: UnixFS
|
||
// nodes as above, raw nodes with just the file data (no format) and Filestore
|
||
// nodes (that directly link to the file on disk using a format stored on a raw
|
||
// node, see the `go-ipfs/filestore` package for details of Filestore.)
|
||
//
|
||
// In the case the entire file fits into just one node it will be formatted as a
|
||
// (single) leaf node (without parent) with the possible representations already
|
||
// mentioned. This is the only scenario where the root can be of a type different
|
||
// that the UnixFS node.
|
||
//
|
||
// +-------------+
|
||
// | Root 4 |
|
||
// +-------------+
|
||
// |
|
||
// +--------------------------+----------------------------+
|
||
// | |
|
||
// +-------------+ +-------------+
|
||
// | Node 2 | | Node 5 |
|
||
// +-------------+ +-------------+
|
||
// | |
|
||
// +-------------+-------------+ +-------------+
|
||
// | | |
|
||
// +-------------+ +-------------+ +-------------+
|
||
// | Node 1 | | Node 3 | | Node 6 |
|
||
// +-------------+ +-------------+ +-------------+
|
||
// | | |
|
||
// +------+------+ +------+------+ +------+
|
||
// | | | | |
|
||
// +=========+ +=========+ +=========+ +=========+ +=========+
|
||
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | | Chunk 5 |
|
||
// +=========+ +=========+ +=========+ +=========+ +=========+
|
||
//
|
||
package balanced
|
||
|
||
import (
|
||
"errors"
|
||
|
||
h "github.com/ipfs/go-ipfs/importer/helpers"
|
||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||
|
||
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
|
||
)
|
||
|
||
// Layout builds a balanced DAG layout. In a balanced DAG of depth 1, leaf nodes
|
||
// with data are added to a single `root` until the maximum number of links is
|
||
// reached. Then, to continue adding more data leaf nodes, a `newRoot` is created
|
||
// pointing to the old `root` (which will now become and intermediary node),
|
||
// increasing the depth of the DAG to 2. This will increase the maximum number of
|
||
// data leaf nodes the DAG can have (`Maxlinks() ^ depth`). The `fillNodeRec`
|
||
// function will add more intermediary child nodes to `newRoot` (which already has
|
||
// `root` as child) that in turn will have leaf nodes with data added to them.
|
||
// After that process is completed (the maximum number of links is reached),
|
||
// `fillNodeRec` will return and the loop will be repeated: the `newRoot` created
|
||
// will become the old `root` and a new root will be created again to increase the
|
||
// depth of the DAG. The process is repeated until there is no more data to add
|
||
// (i.e. the DagBuilderHelper’s Done() function returns true).
|
||
//
|
||
// The nodes are filled recursively, so the DAG is built from the bottom up. Leaf
|
||
// nodes are created first using the chunked file data and its size. The size is
|
||
// then bubbled up to the parent (internal) node, which aggregates all the sizes of
|
||
// its children and bubbles that combined size up to its parent, and so on up to
|
||
// the root. This way, a balanced DAG acts like a B-tree when seeking to a byte
|
||
// offset in the file the graph represents: each internal node uses the file size
|
||
// of its children as an index when seeking.
|
||
//
|
||
// `Layout` creates a root and hands it off to be filled:
|
||
//
|
||
// +-------------+
|
||
// | Root 1 |
|
||
// +-------------+
|
||
// |
|
||
// ( fillNodeRec fills in the )
|
||
// ( chunks on the root. )
|
||
// |
|
||
// +------+------+
|
||
// | |
|
||
// + - - - - + + - - - - +
|
||
// | Chunk 1 | | Chunk 2 |
|
||
// + - - - - + + - - - - +
|
||
//
|
||
// ↓
|
||
// When the root is full but there's more data...
|
||
// ↓
|
||
//
|
||
// +-------------+
|
||
// | Root 1 |
|
||
// +-------------+
|
||
// |
|
||
// +------+------+
|
||
// | |
|
||
// +=========+ +=========+ + - - - - +
|
||
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
|
||
// +=========+ +=========+ + - - - - +
|
||
//
|
||
// ↓
|
||
// ...Layout's job is to create a new root.
|
||
// ↓
|
||
//
|
||
// +-------------+
|
||
// | Root 2 |
|
||
// +-------------+
|
||
// |
|
||
// +-------------+ - - - - - - - - +
|
||
// | |
|
||
// +-------------+ ( fillNodeRec creates the )
|
||
// | Node 1 | ( branch that connects )
|
||
// +-------------+ ( "Root 2" to "Chunk 3." )
|
||
// | |
|
||
// +------+------+ + - - - - -+
|
||
// | | |
|
||
// +=========+ +=========+ + - - - - +
|
||
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
|
||
// +=========+ +=========+ + - - - - +
|
||
//
|
||
func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
|
||
if db.Done() {
|
||
// No data, return just an empty node.
|
||
root, err := db.NewLeafNode(nil)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
// This works without Filestore support (`ProcessFileStore`).
|
||
// TODO: Why? Is there a test case missing?
|
||
|
||
return db.AddNodeAndClose(root)
|
||
}
|
||
|
||
// The first `root` will be a single leaf node with data
|
||
// (corner case), after that subsequent `root` nodes will
|
||
// always be internal nodes (with a depth > 0) that can
|
||
// be handled by the loop.
|
||
root, fileSize, err := db.NewLeafDataNode()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// Each time a DAG of a certain `depth` is filled (because it
|
||
// has reached its maximum capacity of `db.Maxlinks()` per node)
|
||
// extend it by making it a sub-DAG of a bigger DAG with `depth+1`.
|
||
for depth := 1; !db.Done(); depth++ {
|
||
|
||
// Add the old `root` as a child of the `newRoot`.
|
||
newRoot := db.NewFSNodeOverDag(ft.TFile)
|
||
newRoot.AddChild(root, fileSize, db)
|
||
|
||
// Fill the `newRoot` (that has the old `root` already as child)
|
||
// and make it the current `root` for the next iteration (when
|
||
// it will become "old").
|
||
root, fileSize, err = fillNodeRec(db, newRoot, depth)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
|
||
return db.AddNodeAndClose(root)
|
||
}
|
||
|
||
// fillNodeRec will "fill" the given internal (non-leaf) `node` with data by
|
||
// adding child nodes to it, either leaf data nodes (if `depth` is 1) or more
|
||
// internal nodes with higher depth (and calling itself recursively on them
|
||
// until *they* are filled with data). The data to fill the node with is
|
||
// provided by DagBuilderHelper.
|
||
//
|
||
// `node` represents a (sub-)DAG root that is being filled. If called recursively,
|
||
// it is `nil`, a new node is created. If it has been called from `Layout` (see
|
||
// diagram below) it points to the new root (that increases the depth of the DAG),
|
||
// it already has a child (the old root). New children will be added to this new
|
||
// root, and those children will in turn be filled (calling `fillNodeRec`
|
||
// recursively).
|
||
//
|
||
// +-------------+
|
||
// | `node` |
|
||
// | (new root) |
|
||
// +-------------+
|
||
// |
|
||
// +-------------+ - - - - - - + - - - - - - - - - - - +
|
||
// | | |
|
||
// +--------------+ + - - - - - + + - - - - - +
|
||
// | (old root) | | new child | | |
|
||
// +--------------+ + - - - - - + + - - - - - +
|
||
// | | |
|
||
// +------+------+ + - - + - - - +
|
||
// | | | |
|
||
// +=========+ +=========+ + - - - - + + - - - - +
|
||
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 |
|
||
// +=========+ +=========+ + - - - - + + - - - - +
|
||
//
|
||
// The `node` to be filled uses the `FSNodeOverDag` abstraction that allows adding
|
||
// child nodes without packing/unpacking the UnixFS layer node (having an internal
|
||
// `ft.FSNode` cache).
|
||
//
|
||
// It returns the `ipld.Node` representation of the passed `node` filled with
|
||
// children and the `nodeFileSize` with the total size of the file chunk (leaf)
|
||
// nodes stored under this node (parent nodes store this to enable efficient
|
||
// seeking through the DAG when reading data later).
|
||
//
|
||
// warning: **children** pinned indirectly, but input node IS NOT pinned.
|
||
func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (filledNode ipld.Node, nodeFileSize uint64, err error) {
|
||
if depth < 1 {
|
||
return nil, 0, errors.New("attempt to fillNode at depth < 1")
|
||
}
|
||
|
||
if node == nil {
|
||
node = db.NewFSNodeOverDag(ft.TFile)
|
||
}
|
||
|
||
// Child node created on every iteration to add to parent `node`.
|
||
// It can be a leaf node or another internal node.
|
||
var childNode ipld.Node
|
||
// File size from the child node needed to update the `FSNode`
|
||
// in `node` when adding the child.
|
||
var childFileSize uint64
|
||
|
||
// While we have room and there is data available to be added.
|
||
for node.NumChildren() < db.Maxlinks() && !db.Done() {
|
||
|
||
if depth == 1 {
|
||
// Base case: add leaf node with data.
|
||
childNode, childFileSize, err = db.NewLeafDataNode()
|
||
if err != nil {
|
||
return nil, 0, err
|
||
}
|
||
} else {
|
||
// Recursion case: create an internal node to in turn keep
|
||
// descending in the DAG and adding child nodes to it.
|
||
childNode, childFileSize, err = fillNodeRec(db, nil, depth-1)
|
||
if err != nil {
|
||
return nil, 0, err
|
||
}
|
||
}
|
||
|
||
err = node.AddChild(childNode, childFileSize, db)
|
||
if err != nil {
|
||
return nil, 0, err
|
||
}
|
||
}
|
||
|
||
nodeFileSize = node.FileSize()
|
||
|
||
// Get the final `dag.ProtoNode` with the `FSNode` data encoded inside.
|
||
filledNode, err = node.Commit()
|
||
if err != nil {
|
||
return nil, 0, err
|
||
}
|
||
|
||
return filledNode, nodeFileSize, nil
|
||
}
|