mirror of
https://github.com/ipfs/kubo.git
synced 2026-03-11 11:19:05 +08:00
Merge pull request #4661 from ipfs/extract/chunk
Extract: importers/chunk module as go-ipfs-chunker
This commit is contained in:
commit
990e4df32e
4
Godeps/Godeps.json
generated
4
Godeps/Godeps.json
generated
@ -56,10 +56,6 @@
|
||||
{
|
||||
"ImportPath": "github.com/texttheater/golang-levenshtein/levenshtein",
|
||||
"Rev": "dfd657628c58d3eeaa26391097853b2473c8b94e"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/whyrusleeping/chunker",
|
||||
"Rev": "537e901819164627ca4bb5ce4e3faa8ce7956564"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
10
Godeps/_workspace/src/github.com/whyrusleeping/chunker/.travis.yml
generated
vendored
10
Godeps/_workspace/src/github.com/whyrusleeping/chunker/.travis.yml
generated
vendored
@ -1,10 +0,0 @@
|
||||
language: go
|
||||
sudo: false
|
||||
|
||||
go:
|
||||
- 1.3.3
|
||||
- 1.4.2
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
23
Godeps/_workspace/src/github.com/whyrusleeping/chunker/LICENSE
generated
vendored
23
Godeps/_workspace/src/github.com/whyrusleeping/chunker/LICENSE
generated
vendored
@ -1,23 +0,0 @@
|
||||
Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
7
Godeps/_workspace/src/github.com/whyrusleeping/chunker/README.md
generated
vendored
7
Godeps/_workspace/src/github.com/whyrusleeping/chunker/README.md
generated
vendored
@ -1,7 +0,0 @@
|
||||
[](https://travis-ci.org/restic/chunker)
|
||||
|
||||
Content Defined Chunking (CDC) based on a rolling Rabin Checksum.
|
||||
|
||||
Part of https://github.com/restic/restic.
|
||||
|
||||
Better README will follow soon.
|
||||
370
Godeps/_workspace/src/github.com/whyrusleeping/chunker/chunker.go
generated
vendored
370
Godeps/_workspace/src/github.com/whyrusleeping/chunker/chunker.go
generated
vendored
@ -1,370 +0,0 @@
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"hash"
|
||||
"io"
|
||||
"math"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
KiB = 1024
|
||||
MiB = 1024 * KiB
|
||||
|
||||
// WindowSize is the size of the sliding window.
|
||||
windowSize = 16
|
||||
|
||||
chunkerBufSize = 512 * KiB
|
||||
)
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() interface{} { return make([]byte, chunkerBufSize) },
|
||||
}
|
||||
|
||||
type tables struct {
|
||||
out [256]Pol
|
||||
mod [256]Pol
|
||||
}
|
||||
|
||||
// cache precomputed tables, these are read-only anyway
|
||||
var cache struct {
|
||||
entries map[Pol]*tables
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func init() {
|
||||
cache.entries = make(map[Pol]*tables)
|
||||
}
|
||||
|
||||
// Chunk is one content-dependent chunk of bytes whose end was cut when the
|
||||
// Rabin Fingerprint had the value stored in Cut.
|
||||
type Chunk struct {
|
||||
Start uint64
|
||||
Length uint64
|
||||
Cut uint64
|
||||
Digest []byte
|
||||
Data []byte
|
||||
}
|
||||
|
||||
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
|
||||
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
|
||||
}
|
||||
|
||||
// Chunker splits content with Rabin Fingerprints.
|
||||
type Chunker struct {
|
||||
pol Pol
|
||||
polShift uint64
|
||||
tables *tables
|
||||
|
||||
rd io.Reader
|
||||
closed bool
|
||||
|
||||
chunkbuf []byte
|
||||
|
||||
window [windowSize]byte
|
||||
wpos int
|
||||
|
||||
buf []byte
|
||||
bpos uint64
|
||||
bmax uint64
|
||||
|
||||
start uint64
|
||||
count uint64
|
||||
pos uint64
|
||||
|
||||
pre uint64 // wait for this many bytes before start calculating an new chunk
|
||||
|
||||
digest uint64
|
||||
h hash.Hash
|
||||
|
||||
sizeMask uint64
|
||||
|
||||
// minimal and maximal size of the outputted blocks
|
||||
MinSize uint64
|
||||
MaxSize uint64
|
||||
}
|
||||
|
||||
// New returns a new Chunker based on polynomial p that reads from rd
|
||||
// with bufsize and pass all data to hash along the way.
|
||||
func New(rd io.Reader, pol Pol, h hash.Hash, avSize, min, max uint64) *Chunker {
|
||||
|
||||
sizepow := uint(math.Log2(float64(avSize)))
|
||||
|
||||
c := &Chunker{
|
||||
buf: bufPool.Get().([]byte),
|
||||
h: h,
|
||||
pol: pol,
|
||||
rd: rd,
|
||||
chunkbuf: make([]byte, 0, max),
|
||||
sizeMask: (1 << sizepow) - 1,
|
||||
|
||||
MinSize: min,
|
||||
MaxSize: max,
|
||||
}
|
||||
|
||||
c.reset()
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Chunker) reset() {
|
||||
c.polShift = uint64(c.pol.Deg() - 8)
|
||||
c.fillTables()
|
||||
|
||||
for i := 0; i < windowSize; i++ {
|
||||
c.window[i] = 0
|
||||
}
|
||||
|
||||
c.closed = false
|
||||
c.digest = 0
|
||||
c.wpos = 0
|
||||
c.count = 0
|
||||
c.slide(1)
|
||||
c.start = c.pos
|
||||
|
||||
if c.h != nil {
|
||||
c.h.Reset()
|
||||
}
|
||||
|
||||
// do not start a new chunk unless at least MinSize bytes have been read
|
||||
c.pre = c.MinSize - windowSize
|
||||
}
|
||||
|
||||
// Calculate out_table and mod_table for optimization. Must be called only
|
||||
// once. This implementation uses a cache in the global variable cache.
|
||||
func (c *Chunker) fillTables() {
|
||||
// if polynomial hasn't been specified, do not compute anything for now
|
||||
if c.pol == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// test if the tables are cached for this polynomial
|
||||
cache.Lock()
|
||||
defer cache.Unlock()
|
||||
if t, ok := cache.entries[c.pol]; ok {
|
||||
c.tables = t
|
||||
return
|
||||
}
|
||||
|
||||
// else create a new entry
|
||||
c.tables = &tables{}
|
||||
cache.entries[c.pol] = c.tables
|
||||
|
||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||
// the index for the table, the value contains the following:
|
||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||
// \ windowsize-1 zero bytes /
|
||||
// To slide out byte b_0 for window size w with known hash
|
||||
// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
|
||||
// H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
|
||||
// = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
|
||||
// = H( 0 || b_1 || ... || b_w)
|
||||
//
|
||||
// Afterwards a new byte can be shifted in.
|
||||
for b := 0; b < 256; b++ {
|
||||
var h Pol
|
||||
|
||||
h = appendByte(h, byte(b), c.pol)
|
||||
for i := 0; i < windowSize-1; i++ {
|
||||
h = appendByte(h, 0, c.pol)
|
||||
}
|
||||
c.tables.out[b] = h
|
||||
}
|
||||
|
||||
// calculate table for reduction mod Polynomial
|
||||
k := c.pol.Deg()
|
||||
for b := 0; b < 256; b++ {
|
||||
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
|
||||
//
|
||||
// The 8 bits above deg(Polynomial) determine what happens next and so
|
||||
// these bits are used as a lookup to this table. The value is split in
|
||||
// two parts: Part A contains the result of the modulus operation, part
|
||||
// B is used to cancel out the 8 top bits so that one XOR operation is
|
||||
// enough to reduce modulo Polynomial
|
||||
c.tables.mod[b] = Pol(uint64(b)<<uint64(k)).Mod(c.pol) | (Pol(b) << uint64(k))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Chunker) nextBytes() []byte {
|
||||
data := dupBytes(c.chunkbuf[:c.count])
|
||||
n := copy(c.chunkbuf, c.chunkbuf[c.count:])
|
||||
c.chunkbuf = c.chunkbuf[:n]
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
// Next returns the position and length of the next chunk of data. If an error
|
||||
// occurs while reading, the error is returned with a nil chunk. The state of
|
||||
// the current chunk is undefined. When the last chunk has been returned, all
|
||||
// subsequent calls yield a nil chunk and an io.EOF error.
|
||||
func (c *Chunker) Next() (*Chunk, error) {
|
||||
if c.tables == nil {
|
||||
return nil, errors.New("polynomial is not set")
|
||||
}
|
||||
|
||||
for {
|
||||
if c.bpos >= c.bmax {
|
||||
n, err := io.ReadFull(c.rd, c.buf[:])
|
||||
c.chunkbuf = append(c.chunkbuf, c.buf[:n]...)
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
|
||||
// io.ReadFull only returns io.EOF when no bytes could be read. If
|
||||
// this is the case and we're in this branch, there are no more
|
||||
// bytes to buffer, so this was the last chunk. If a different
|
||||
// error has occurred, return that error and abandon the current
|
||||
// chunk.
|
||||
if err == io.EOF && !c.closed {
|
||||
c.closed = true
|
||||
|
||||
// return the buffer to the pool
|
||||
bufPool.Put(c.buf)
|
||||
|
||||
data := c.nextBytes()
|
||||
|
||||
// return current chunk, if any bytes have been processed
|
||||
if c.count > 0 {
|
||||
return &Chunk{
|
||||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Digest: c.hashDigest(),
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.bpos = 0
|
||||
c.bmax = uint64(n)
|
||||
}
|
||||
|
||||
// check if bytes have to be dismissed before starting a new chunk
|
||||
if c.pre > 0 {
|
||||
n := c.bmax - c.bpos
|
||||
if c.pre > uint64(n) {
|
||||
c.pre -= uint64(n)
|
||||
c.updateHash(c.buf[c.bpos:c.bmax])
|
||||
|
||||
c.count += uint64(n)
|
||||
c.pos += uint64(n)
|
||||
c.bpos = c.bmax
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
|
||||
|
||||
c.bpos += c.pre
|
||||
c.count += c.pre
|
||||
c.pos += c.pre
|
||||
c.pre = 0
|
||||
}
|
||||
|
||||
add := c.count
|
||||
for _, b := range c.buf[c.bpos:c.bmax] {
|
||||
// inline c.slide(b) and append(b) to increase performance
|
||||
out := c.window[c.wpos]
|
||||
c.window[c.wpos] = b
|
||||
c.digest ^= uint64(c.tables.out[out])
|
||||
c.wpos = (c.wpos + 1) % windowSize
|
||||
|
||||
// c.append(b)
|
||||
index := c.digest >> c.polShift
|
||||
c.digest <<= 8
|
||||
c.digest |= uint64(b)
|
||||
|
||||
c.digest ^= uint64(c.tables.mod[index])
|
||||
// end inline
|
||||
|
||||
add++
|
||||
if add < c.MinSize {
|
||||
continue
|
||||
}
|
||||
|
||||
if (c.digest&c.sizeMask) == 0 || add >= c.MaxSize {
|
||||
i := add - c.count - 1
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+uint64(i)+1])
|
||||
c.count = add
|
||||
c.pos += uint64(i) + 1
|
||||
c.bpos += uint64(i) + 1
|
||||
|
||||
data := c.nextBytes()
|
||||
|
||||
chunk := &Chunk{
|
||||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Digest: c.hashDigest(),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
c.reset()
|
||||
|
||||
return chunk, nil
|
||||
}
|
||||
}
|
||||
|
||||
steps := c.bmax - c.bpos
|
||||
if steps > 0 {
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+steps])
|
||||
}
|
||||
c.count += steps
|
||||
c.pos += steps
|
||||
c.bpos = c.bmax
|
||||
}
|
||||
}
|
||||
|
||||
func dupBytes(b []byte) []byte {
|
||||
out := make([]byte, len(b))
|
||||
copy(out, b)
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *Chunker) updateHash(data []byte) {
|
||||
if c.h != nil {
|
||||
// the hashes from crypto/sha* do not return an error
|
||||
_, err := c.h.Write(data)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Chunker) hashDigest() []byte {
|
||||
if c.h == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return c.h.Sum(nil)
|
||||
}
|
||||
|
||||
func (c *Chunker) append(b byte) {
|
||||
index := c.digest >> c.polShift
|
||||
c.digest <<= 8
|
||||
c.digest |= uint64(b)
|
||||
|
||||
c.digest ^= uint64(c.tables.mod[index])
|
||||
}
|
||||
|
||||
func (c *Chunker) slide(b byte) {
|
||||
out := c.window[c.wpos]
|
||||
c.window[c.wpos] = b
|
||||
c.digest ^= uint64(c.tables.out[out])
|
||||
c.wpos = (c.wpos + 1) % windowSize
|
||||
|
||||
c.append(b)
|
||||
}
|
||||
|
||||
func appendByte(hash Pol, b byte, pol Pol) Pol {
|
||||
hash <<= 8
|
||||
hash |= Pol(b)
|
||||
|
||||
return hash.Mod(pol)
|
||||
}
|
||||
298
Godeps/_workspace/src/github.com/whyrusleeping/chunker/chunker_test.go
generated
vendored
298
Godeps/_workspace/src/github.com/whyrusleeping/chunker/chunker_test.go
generated
vendored
@ -1,298 +0,0 @@
|
||||
package chunker_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"hash"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
. "github.com/restic/restic/test"
|
||||
)
|
||||
|
||||
func parseDigest(s string) []byte {
|
||||
d, err := hex.DecodeString(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
type chunk struct {
|
||||
Length uint
|
||||
CutFP uint64
|
||||
Digest []byte
|
||||
}
|
||||
|
||||
// polynomial used for all the tests below
|
||||
const testPol = chunker.Pol(0x3DA3358B4DC173)
|
||||
|
||||
// created for 32MB of random data out of math/rand's Uint32() seeded by
|
||||
// constant 23
|
||||
//
|
||||
// chunking configuration:
|
||||
// window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23
|
||||
// polynom 0x3DA3358B4DC173
|
||||
var chunks1 = []chunk{
|
||||
chunk{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")},
|
||||
chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
|
||||
chunk{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")},
|
||||
chunk{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")},
|
||||
chunk{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")},
|
||||
chunk{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")},
|
||||
chunk{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")},
|
||||
chunk{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")},
|
||||
chunk{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")},
|
||||
chunk{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")},
|
||||
chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
|
||||
chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
|
||||
chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
|
||||
chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
|
||||
chunk{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")},
|
||||
chunk{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")},
|
||||
chunk{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")},
|
||||
chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
|
||||
chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
|
||||
chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
|
||||
chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
|
||||
chunk{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")},
|
||||
chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
|
||||
}
|
||||
|
||||
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
|
||||
var chunks2 = []chunk{
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
}
|
||||
|
||||
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
|
||||
chunks := []*chunker.Chunk{}
|
||||
|
||||
pos := uint(0)
|
||||
for i, chunk := range testChunks {
|
||||
c, err := chnker.Next()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Error returned with chunk %d: %v", i, err)
|
||||
}
|
||||
|
||||
if c == nil {
|
||||
t.Fatalf("Nil chunk returned")
|
||||
}
|
||||
|
||||
if c != nil {
|
||||
if c.Start != pos {
|
||||
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
|
||||
i, pos, c.Start)
|
||||
}
|
||||
|
||||
if c.Length != chunk.Length {
|
||||
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
|
||||
i, chunk.Length, c.Length)
|
||||
}
|
||||
|
||||
if c.Cut != chunk.CutFP {
|
||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||
}
|
||||
|
||||
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
|
||||
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
|
||||
i, len(chunks)-1, chunk.Digest, c.Digest)
|
||||
}
|
||||
|
||||
pos += c.Length
|
||||
chunks = append(chunks, c)
|
||||
}
|
||||
}
|
||||
|
||||
c, err := chnker.Next()
|
||||
|
||||
if c != nil {
|
||||
t.Fatal("additional non-nil chunk returned")
|
||||
}
|
||||
|
||||
if err != io.EOF {
|
||||
t.Fatal("wrong error returned after last chunk")
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
func getRandom(seed, count int) []byte {
|
||||
buf := make([]byte, count)
|
||||
|
||||
rnd := rand.New(rand.NewSource(23))
|
||||
for i := 0; i < count; i += 4 {
|
||||
r := rnd.Uint32()
|
||||
buf[i] = byte(r)
|
||||
buf[i+1] = byte(r >> 8)
|
||||
buf[i+2] = byte(r >> 16)
|
||||
buf[i+3] = byte(r >> 24)
|
||||
}
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func TestChunker(t *testing.T) {
|
||||
// setup data source
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
chunks := testWithData(t, ch, chunks1)
|
||||
|
||||
// test reader
|
||||
for i, c := range chunks {
|
||||
rd := c.Reader(bytes.NewReader(buf))
|
||||
|
||||
h := sha256.New()
|
||||
n, err := io.Copy(h, rd)
|
||||
if err != nil {
|
||||
t.Fatalf("io.Copy(): %v", err)
|
||||
}
|
||||
|
||||
if uint(n) != chunks1[i].Length {
|
||||
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
|
||||
chunks1[i].Length, n)
|
||||
}
|
||||
|
||||
d := h.Sum(nil)
|
||||
if !bytes.Equal(d, chunks1[i].Digest) {
|
||||
t.Fatalf("wrong hash returned: expected %02x, got %02x",
|
||||
chunks1[i].Digest, d)
|
||||
}
|
||||
}
|
||||
|
||||
// setup nullbyte data source
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
|
||||
testWithData(t, ch, chunks2)
|
||||
}
|
||||
|
||||
func TestChunkerWithRandomPolynomial(t *testing.T) {
|
||||
// setup data source
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
|
||||
// generate a new random polynomial
|
||||
start := time.Now()
|
||||
p, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
t.Logf("generating random polynomial took %v", time.Since(start))
|
||||
|
||||
start = time.Now()
|
||||
ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
|
||||
t.Logf("creating chunker took %v", time.Since(start))
|
||||
|
||||
// make sure that first chunk is different
|
||||
c, err := ch.Next()
|
||||
|
||||
Assert(t, c.Cut != chunks1[0].CutFP,
|
||||
"Cut point is the same")
|
||||
Assert(t, c.Length != chunks1[0].Length,
|
||||
"Length is the same")
|
||||
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
|
||||
"Digest is the same")
|
||||
}
|
||||
|
||||
func TestChunkerWithoutHash(t *testing.T) {
|
||||
// setup data source
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
|
||||
ch := chunker.New(bytes.NewReader(buf), testPol, nil)
|
||||
chunks := testWithData(t, ch, chunks1)
|
||||
|
||||
// test reader
|
||||
for i, c := range chunks {
|
||||
rd := c.Reader(bytes.NewReader(buf))
|
||||
|
||||
buf2, err := ioutil.ReadAll(rd)
|
||||
if err != nil {
|
||||
t.Fatalf("io.Copy(): %v", err)
|
||||
}
|
||||
|
||||
if uint(len(buf2)) != chunks1[i].Length {
|
||||
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
|
||||
chunks1[i].Length, uint(len(buf2)))
|
||||
}
|
||||
|
||||
if uint(len(buf2)) != chunks1[i].Length {
|
||||
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
|
||||
chunks[i].Length, len(buf2))
|
||||
}
|
||||
|
||||
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
|
||||
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
|
||||
buf[c.Start:c.Start+c.Length], buf2)
|
||||
}
|
||||
}
|
||||
|
||||
// setup nullbyte data source
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
|
||||
testWithData(t, ch, chunks2)
|
||||
}
|
||||
|
||||
func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
||||
size := 10 * 1024 * 1024
|
||||
rd := bytes.NewReader(getRandom(23, size))
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(size))
|
||||
|
||||
var chunks int
|
||||
for i := 0; i < b.N; i++ {
|
||||
chunks = 0
|
||||
|
||||
rd.Seek(0, 0)
|
||||
ch := chunker.New(rd, testPol, hash)
|
||||
|
||||
for {
|
||||
_, err := ch.Next()
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
b.Fatalf("Unexpected error occurred: %v", err)
|
||||
}
|
||||
|
||||
chunks++
|
||||
}
|
||||
}
|
||||
|
||||
b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks)
|
||||
}
|
||||
|
||||
func BenchmarkChunkerWithSHA256(b *testing.B) {
|
||||
benchmarkChunker(b, sha256.New())
|
||||
}
|
||||
|
||||
func BenchmarkChunkerWithMD5(b *testing.B) {
|
||||
benchmarkChunker(b, md5.New())
|
||||
}
|
||||
|
||||
func BenchmarkChunker(b *testing.B) {
|
||||
benchmarkChunker(b, nil)
|
||||
}
|
||||
|
||||
func BenchmarkNewChunker(b *testing.B) {
|
||||
p, err := chunker.RandomPolynomial()
|
||||
OK(b, err)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
chunker.New(bytes.NewBuffer(nil), p, nil)
|
||||
}
|
||||
}
|
||||
82
Godeps/_workspace/src/github.com/whyrusleeping/chunker/doc.go
generated
vendored
82
Godeps/_workspace/src/github.com/whyrusleeping/chunker/doc.go
generated
vendored
@ -1,82 +0,0 @@
|
||||
// Copyright 2014 Alexander Neumann. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package chunker implements Content Defined Chunking (CDC) based on a rolling
|
||||
Rabin Checksum.
|
||||
|
||||
Choosing a Random Irreducible Polynomial
|
||||
|
||||
The function RandomPolynomial() returns a new random polynomial of degree 53
|
||||
for use with the chunker. The degree 53 is chosen because it is the largest
|
||||
prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
|
||||
optimising calculations in the chunker.
|
||||
|
||||
A random polynomial is chosen selecting 64 random bits, masking away bits
|
||||
64..54 and setting bit 53 to one (otherwise the polynomial is not of the
|
||||
desired degree) and bit 0 to one (otherwise the polynomial is trivially
|
||||
reducible), so that 51 bits are chosen at random.
|
||||
|
||||
This process is repeated until Irreducible() returns true, then this
|
||||
polynomials is returned. If this doesn't happen after 1 million tries, the
|
||||
function returns an error. The probability for selecting an irreducible
|
||||
polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
|
||||
that no irreducible polynomial has been found after 100 tries is lower than
|
||||
0.04%.
|
||||
|
||||
Verifying Irreducible Polynomials
|
||||
|
||||
During development the results have been verified using the computational
|
||||
discrete algebra system GAP, which can be obtained from the website at
|
||||
http://www.gap-system.org/.
|
||||
|
||||
For filtering a given list of polynomials in hexadecimal coefficient notation,
|
||||
the following script can be used:
|
||||
|
||||
# create x over F_2 = GF(2)
|
||||
x := Indeterminate(GF(2), "x");
|
||||
|
||||
# test if polynomial is irreducible, i.e. the number of factors is one
|
||||
IrredPoly := function (poly)
|
||||
return (Length(Factors(poly)) = 1);
|
||||
end;;
|
||||
|
||||
# create a polynomial in x from the hexadecimal representation of the
|
||||
# coefficients
|
||||
Hex2Poly := function (s)
|
||||
return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
|
||||
end;;
|
||||
|
||||
# list of candidates, in hex
|
||||
candidates := [ "3DA3358B4DC173" ];
|
||||
|
||||
# create real polynomials
|
||||
L := List(candidates, Hex2Poly);
|
||||
|
||||
# filter and display the list of irreducible polynomials contained in L
|
||||
Display(Filtered(L, x -> (IrredPoly(x))));
|
||||
|
||||
All irreducible polynomials from the list are written to the output.
|
||||
|
||||
Background Literature
|
||||
|
||||
An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
|
||||
|
||||
Michael O. Rabin (1981): "Fingerprinting by Random Polynomials"
|
||||
http://www.xmailserver.org/rabin.pdf
|
||||
|
||||
Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms"
|
||||
http://www.zlib.net/crc_v3.txt
|
||||
|
||||
Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
|
||||
http://www.xmailserver.org/rabin_apps.pdf
|
||||
|
||||
Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
|
||||
http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
|
||||
|
||||
Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
|
||||
http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
|
||||
|
||||
*/
|
||||
package chunker
|
||||
278
Godeps/_workspace/src/github.com/whyrusleeping/chunker/polynomials.go
generated
vendored
278
Godeps/_workspace/src/github.com/whyrusleeping/chunker/polynomials.go
generated
vendored
@ -1,278 +0,0 @@
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Pol is a polynomial from F_2[X].
|
||||
type Pol uint64
|
||||
|
||||
// Add returns x+y.
|
||||
func (x Pol) Add(y Pol) Pol {
|
||||
r := Pol(uint64(x) ^ uint64(y))
|
||||
return r
|
||||
}
|
||||
|
||||
// mulOverflows returns true if the multiplication would overflow uint64.
|
||||
// Code by Rob Pike, see
|
||||
// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
|
||||
func mulOverflows(a, b Pol) bool {
|
||||
if a <= 1 || b <= 1 {
|
||||
return false
|
||||
}
|
||||
c := a.mul(b)
|
||||
d := c.Div(b)
|
||||
if d != a {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (x Pol) mul(y Pol) Pol {
|
||||
if x == 0 || y == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var res Pol
|
||||
for i := 0; i <= y.Deg(); i++ {
|
||||
if (y & (1 << uint(i))) > 0 {
|
||||
res = res.Add(x << uint(i))
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// Mul returns x*y. When an overflow occurs, Mul panics.
|
||||
func (x Pol) Mul(y Pol) Pol {
|
||||
if mulOverflows(x, y) {
|
||||
panic("multiplication would overflow uint64")
|
||||
}
|
||||
|
||||
return x.mul(y)
|
||||
}
|
||||
|
||||
// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
|
||||
func (x Pol) Deg() int {
|
||||
// the degree of 0 is -1
|
||||
if x == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
var mask Pol = (1 << 63)
|
||||
for i := 63; i >= 0; i-- {
|
||||
// test if bit i is set
|
||||
if x&mask > 0 {
|
||||
// this is the degree of x
|
||||
return i
|
||||
}
|
||||
mask >>= 1
|
||||
}
|
||||
|
||||
// fall-through, return -1
|
||||
return -1
|
||||
}
|
||||
|
||||
// String returns the coefficients in hex.
|
||||
func (x Pol) String() string {
|
||||
return "0x" + strconv.FormatUint(uint64(x), 16)
|
||||
}
|
||||
|
||||
// Expand returns the string representation of the polynomial x.
|
||||
func (x Pol) Expand() string {
|
||||
if x == 0 {
|
||||
return "0"
|
||||
}
|
||||
|
||||
s := ""
|
||||
for i := x.Deg(); i > 1; i-- {
|
||||
if x&(1<<uint(i)) > 0 {
|
||||
s += fmt.Sprintf("+x^%d", i)
|
||||
}
|
||||
}
|
||||
|
||||
if x&2 > 0 {
|
||||
s += "+x"
|
||||
}
|
||||
|
||||
if x&1 > 0 {
|
||||
s += "+1"
|
||||
}
|
||||
|
||||
return s[1:]
|
||||
}
|
||||
|
||||
// DivMod returns x / d = q, and remainder r,
|
||||
// see https://en.wikipedia.org/wiki/Division_algorithm
|
||||
func (x Pol) DivMod(d Pol) (Pol, Pol) {
|
||||
if x == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
if d == 0 {
|
||||
panic("division by zero")
|
||||
}
|
||||
|
||||
D := d.Deg()
|
||||
diff := x.Deg() - D
|
||||
if diff < 0 {
|
||||
return 0, x
|
||||
}
|
||||
|
||||
var q Pol
|
||||
for diff >= 0 {
|
||||
m := d << uint(diff)
|
||||
q |= (1 << uint(diff))
|
||||
x = x.Add(m)
|
||||
|
||||
diff = x.Deg() - D
|
||||
}
|
||||
|
||||
return q, x
|
||||
}
|
||||
|
||||
// Div returns the integer division result x / d.
|
||||
func (x Pol) Div(d Pol) Pol {
|
||||
q, _ := x.DivMod(d)
|
||||
return q
|
||||
}
|
||||
|
||||
// Mod returns the remainder of x / d
|
||||
func (x Pol) Mod(d Pol) Pol {
|
||||
_, r := x.DivMod(d)
|
||||
return r
|
||||
}
|
||||
|
||||
// I really dislike having a function that does not terminate, so specify a
|
||||
// really large upper bound for finding a new irreducible polynomial, and
|
||||
// return an error when no irreducible polynomial has been found within
|
||||
// randPolMaxTries.
|
||||
const randPolMaxTries = 1e6
|
||||
|
||||
// RandomPolynomial returns a new random irreducible polynomial of degree 53
|
||||
// (largest prime number below 64-8). There are (2^53-2/53) irreducible
|
||||
// polynomials of degree 53 in F_2[X], c.f. Michael O. Rabin (1981):
|
||||
// "Fingerprinting by Random Polynomials", page 4. If no polynomial could be
|
||||
// found in one million tries, an error is returned.
|
||||
func RandomPolynomial() (Pol, error) {
|
||||
for i := 0; i < randPolMaxTries; i++ {
|
||||
var f Pol
|
||||
|
||||
// choose polynomial at random
|
||||
err := binary.Read(rand.Reader, binary.LittleEndian, &f)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// mask away bits above bit 53
|
||||
f &= Pol((1 << 54) - 1)
|
||||
|
||||
// set highest and lowest bit so that the degree is 53 and the
|
||||
// polynomial is not trivially reducible
|
||||
f |= (1 << 53) | 1
|
||||
|
||||
// test if f is irreducible
|
||||
if f.Irreducible() {
|
||||
return f, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If this is reached, we haven't found an irreducible polynomial in
|
||||
// randPolMaxTries. This error is very unlikely to occur.
|
||||
return 0, errors.New("unable to find new random irreducible polynomial")
|
||||
}
|
||||
|
||||
// GCD computes the Greatest Common Divisor x and f.
|
||||
func (x Pol) GCD(f Pol) Pol {
|
||||
if f == 0 {
|
||||
return x
|
||||
}
|
||||
|
||||
if x == 0 {
|
||||
return f
|
||||
}
|
||||
|
||||
if x.Deg() < f.Deg() {
|
||||
x, f = f, x
|
||||
}
|
||||
|
||||
return f.GCD(x.Mod(f))
|
||||
}
|
||||
|
||||
// Irreducible returns true iff x is irreducible over F_2. This function
|
||||
// uses Ben Or's reducibility test.
|
||||
//
|
||||
// For details see "Tests and Constructions of Irreducible Polynomials over
|
||||
// Finite Fields".
|
||||
func (x Pol) Irreducible() bool {
|
||||
for i := 1; i <= x.Deg()/2; i++ {
|
||||
if x.GCD(qp(uint(i), x)) != 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// MulMod computes x*f mod g
|
||||
func (x Pol) MulMod(f, g Pol) Pol {
|
||||
if x == 0 || f == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var res Pol
|
||||
for i := 0; i <= f.Deg(); i++ {
|
||||
if (f & (1 << uint(i))) > 0 {
|
||||
a := x
|
||||
for j := 0; j < i; j++ {
|
||||
a = a.Mul(2).Mod(g)
|
||||
}
|
||||
res = res.Add(a).Mod(g)
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
|
||||
// reducibility test.
|
||||
func qp(p uint, g Pol) Pol {
|
||||
num := (1 << p)
|
||||
i := 1
|
||||
|
||||
// start with x
|
||||
res := Pol(2)
|
||||
|
||||
for i < num {
|
||||
// repeatedly square res
|
||||
res = res.MulMod(res, g)
|
||||
i *= 2
|
||||
}
|
||||
|
||||
// add x
|
||||
return res.Add(2).Mod(g)
|
||||
}
|
||||
|
||||
func (p Pol) MarshalJSON() ([]byte, error) {
|
||||
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
|
||||
buf = append(buf, '"')
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (p *Pol) UnmarshalJSON(data []byte) error {
|
||||
if len(data) < 2 {
|
||||
return errors.New("invalid string for polynomial")
|
||||
}
|
||||
n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*p = Pol(n)
|
||||
|
||||
return nil
|
||||
}
|
||||
385
Godeps/_workspace/src/github.com/whyrusleeping/chunker/polynomials_test.go
generated
vendored
385
Godeps/_workspace/src/github.com/whyrusleeping/chunker/polynomials_test.go
generated
vendored
@ -1,385 +0,0 @@
|
||||
package chunker_test
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
. "github.com/restic/restic/test"
|
||||
)
|
||||
|
||||
var polAddTests = []struct {
|
||||
x, y chunker.Pol
|
||||
sum chunker.Pol
|
||||
}{
|
||||
{23, 16, 23 ^ 16},
|
||||
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
|
||||
{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
|
||||
}
|
||||
|
||||
func TestPolAdd(t *testing.T) {
|
||||
for _, test := range polAddTests {
|
||||
Equals(t, test.sum, test.x.Add(test.y))
|
||||
Equals(t, test.sum, test.y.Add(test.x))
|
||||
}
|
||||
}
|
||||
|
||||
func parseBin(s string) chunker.Pol {
|
||||
i, err := strconv.ParseUint(s, 2, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return chunker.Pol(i)
|
||||
}
|
||||
|
||||
var polMulTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
}{
|
||||
{1, 2, 2},
|
||||
{
|
||||
parseBin("1101"),
|
||||
parseBin("10"),
|
||||
parseBin("11010"),
|
||||
},
|
||||
{
|
||||
parseBin("1101"),
|
||||
parseBin("11"),
|
||||
parseBin("10111"),
|
||||
},
|
||||
{
|
||||
0x40000000,
|
||||
0x40000000,
|
||||
0x1000000000000000,
|
||||
},
|
||||
{
|
||||
parseBin("1010"),
|
||||
parseBin("100100"),
|
||||
parseBin("101101000"),
|
||||
},
|
||||
{
|
||||
parseBin("100"),
|
||||
parseBin("11"),
|
||||
parseBin("1100"),
|
||||
},
|
||||
{
|
||||
parseBin("11"),
|
||||
parseBin("110101"),
|
||||
parseBin("1011111"),
|
||||
},
|
||||
{
|
||||
parseBin("10011"),
|
||||
parseBin("110101"),
|
||||
parseBin("1100001111"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestPolMul(t *testing.T) {
|
||||
for i, test := range polMulTests {
|
||||
m := test.x.Mul(test.y)
|
||||
Assert(t, test.res == m,
|
||||
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
m = test.y.Mul(test.x)
|
||||
Assert(t, test.res == test.y.Mul(test.x),
|
||||
"TestPolMul failed for %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPolMulOverflow(t *testing.T) {
|
||||
defer func() {
|
||||
// try to recover overflow error
|
||||
err := recover()
|
||||
|
||||
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
|
||||
return
|
||||
} else {
|
||||
t.Logf("invalid error raised: %v", err)
|
||||
// re-raise error if not overflow
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
|
||||
x := chunker.Pol(1 << 63)
|
||||
x.Mul(2)
|
||||
t.Fatal("overflow test did not panic")
|
||||
}
|
||||
|
||||
var polDivTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
}{
|
||||
{10, 50, 0},
|
||||
{0, 1, 0},
|
||||
{
|
||||
parseBin("101101000"), // 0x168
|
||||
parseBin("1010"), // 0xa
|
||||
parseBin("100100"), // 0x24
|
||||
},
|
||||
{2, 2, 1},
|
||||
{
|
||||
0x8000000000000000,
|
||||
0x8000000000000000,
|
||||
1,
|
||||
},
|
||||
{
|
||||
parseBin("1100"),
|
||||
parseBin("100"),
|
||||
parseBin("11"),
|
||||
},
|
||||
{
|
||||
parseBin("1100001111"),
|
||||
parseBin("10011"),
|
||||
parseBin("110101"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestPolDiv(t *testing.T) {
|
||||
for i, test := range polDivTests {
|
||||
m := test.x.Div(test.y)
|
||||
Assert(t, test.res == m,
|
||||
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
}
|
||||
}
|
||||
|
||||
var polModTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
}{
|
||||
{10, 50, 10},
|
||||
{0, 1, 0},
|
||||
{
|
||||
parseBin("101101001"),
|
||||
parseBin("1010"),
|
||||
parseBin("1"),
|
||||
},
|
||||
{2, 2, 0},
|
||||
{
|
||||
0x8000000000000000,
|
||||
0x8000000000000000,
|
||||
0,
|
||||
},
|
||||
{
|
||||
parseBin("1100"),
|
||||
parseBin("100"),
|
||||
parseBin("0"),
|
||||
},
|
||||
{
|
||||
parseBin("1100001111"),
|
||||
parseBin("10011"),
|
||||
parseBin("0"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestPolModt(t *testing.T) {
|
||||
for _, test := range polModTests {
|
||||
Equals(t, test.res, test.x.Mod(test.y))
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolDivMod(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.DivMod(f)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolDiv(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.Div(f)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolMod(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.Mod(f)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolDeg(t *testing.B) {
|
||||
f := chunker.Pol(0x3af4b284899)
|
||||
d := f.Deg()
|
||||
if d != 41 {
|
||||
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
|
||||
d, 41)
|
||||
}
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
f.Deg()
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandomPolynomial(t *testing.T) {
|
||||
_, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
}
|
||||
|
||||
func BenchmarkRandomPolynomial(t *testing.B) {
|
||||
for i := 0; i < t.N; i++ {
|
||||
_, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandPolynomial(t *testing.T) {
|
||||
pol := chunker.Pol(0x3DA3358B4DC173)
|
||||
s := pol.Expand()
|
||||
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
|
||||
}
|
||||
|
||||
var polIrredTests = []struct {
|
||||
f chunker.Pol
|
||||
irred bool
|
||||
}{
|
||||
{0x38f1e565e288df, false},
|
||||
{0x3DA3358B4DC173, true},
|
||||
{0x30a8295b9d5c91, false},
|
||||
{0x255f4350b962cb, false},
|
||||
{0x267f776110a235, false},
|
||||
{0x2f4dae10d41227, false},
|
||||
{0x2482734cacca49, true},
|
||||
{0x312daf4b284899, false},
|
||||
{0x29dfb6553d01d1, false},
|
||||
{0x3548245eb26257, false},
|
||||
{0x3199e7ef4211b3, false},
|
||||
{0x362f39017dae8b, false},
|
||||
{0x200d57aa6fdacb, false},
|
||||
{0x35e0a4efa1d275, false},
|
||||
{0x2ced55b026577f, false},
|
||||
{0x260b012010893d, false},
|
||||
{0x2df29cbcd59e9d, false},
|
||||
{0x3f2ac7488bd429, false},
|
||||
{0x3e5cb1711669fb, false},
|
||||
{0x226d8de57a9959, false},
|
||||
{0x3c8de80aaf5835, false},
|
||||
{0x2026a59efb219b, false},
|
||||
{0x39dfa4d13fb231, false},
|
||||
{0x3143d0464b3299, false},
|
||||
}
|
||||
|
||||
func TestPolIrreducible(t *testing.T) {
|
||||
for _, test := range polIrredTests {
|
||||
Assert(t, test.f.Irreducible() == test.irred,
|
||||
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
|
||||
test.f, test.f.Irreducible(), test.irred)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolIrreducible(b *testing.B) {
|
||||
// find first irreducible polynomial
|
||||
var pol chunker.Pol
|
||||
for _, test := range polIrredTests {
|
||||
if test.irred {
|
||||
pol = test.f
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
Assert(b, pol.Irreducible(),
|
||||
"Irreducibility test for Polynomial %v failed", pol)
|
||||
}
|
||||
}
|
||||
|
||||
var polGCDTests = []struct {
|
||||
f1 chunker.Pol
|
||||
f2 chunker.Pol
|
||||
gcd chunker.Pol
|
||||
}{
|
||||
{10, 50, 2},
|
||||
{0, 1, 1},
|
||||
{
|
||||
parseBin("101101001"),
|
||||
parseBin("1010"),
|
||||
parseBin("1"),
|
||||
},
|
||||
{2, 2, 2},
|
||||
{
|
||||
parseBin("1010"),
|
||||
parseBin("11"),
|
||||
parseBin("11"),
|
||||
},
|
||||
{
|
||||
0x8000000000000000,
|
||||
0x8000000000000000,
|
||||
0x8000000000000000,
|
||||
},
|
||||
{
|
||||
parseBin("1100"),
|
||||
parseBin("101"),
|
||||
parseBin("11"),
|
||||
},
|
||||
{
|
||||
parseBin("1100001111"),
|
||||
parseBin("10011"),
|
||||
parseBin("10011"),
|
||||
},
|
||||
{
|
||||
0x3DA3358B4DC173,
|
||||
0x3DA3358B4DC173,
|
||||
0x3DA3358B4DC173,
|
||||
},
|
||||
{
|
||||
0x3DA3358B4DC173,
|
||||
0x230d2259defd,
|
||||
1,
|
||||
},
|
||||
{
|
||||
0x230d2259defd,
|
||||
0x51b492b3eff2,
|
||||
parseBin("10011"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestPolGCD(t *testing.T) {
|
||||
for i, test := range polGCDTests {
|
||||
gcd := test.f1.GCD(test.f2)
|
||||
Assert(t, test.gcd == gcd,
|
||||
"GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
gcd = test.f2.GCD(test.f1)
|
||||
Assert(t, test.gcd == gcd,
|
||||
"GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
}
|
||||
}
|
||||
|
||||
var polMulModTests = []struct {
|
||||
f1 chunker.Pol
|
||||
f2 chunker.Pol
|
||||
g chunker.Pol
|
||||
mod chunker.Pol
|
||||
}{
|
||||
{
|
||||
0x1230,
|
||||
0x230,
|
||||
0x55,
|
||||
0x22,
|
||||
},
|
||||
{
|
||||
0x0eae8c07dbbb3026,
|
||||
0xd5d6db9de04771de,
|
||||
0xdd2bda3b77c9,
|
||||
0x425ae8595b7a,
|
||||
},
|
||||
}
|
||||
|
||||
func TestPolMulMod(t *testing.T) {
|
||||
for i, test := range polMulModTests {
|
||||
mod := test.f1.MulMod(test.f2, test.g)
|
||||
Assert(t, mod == test.mod,
|
||||
"MulMod test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, mod, test.mod)
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,6 @@ import (
|
||||
coreapi "github.com/ipfs/go-ipfs/core/coreapi"
|
||||
coreiface "github.com/ipfs/go-ipfs/core/coreapi/interface"
|
||||
"github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
dagutils "github.com/ipfs/go-ipfs/merkledag/utils"
|
||||
path "github.com/ipfs/go-ipfs/path"
|
||||
@ -25,6 +24,7 @@ import (
|
||||
|
||||
humanize "gx/ipfs/QmPSBJL4momYnE7DcUyk2DVhD6rH488ZmHBGLbxNdhU44K/go-humanize"
|
||||
routing "gx/ipfs/QmTiWLZ6Fo5j4KcTVutZJ5KWRRJrbxzmxA4td8NfEdrPh7/go-libp2p-routing"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
multibase "gx/ipfs/QmexBtiTTEwwn42Yi6ouKt6VqzpA6wjJgiW1oh9VfaRrup/go-multibase"
|
||||
@ -58,7 +58,7 @@ func (i *gatewayHandler) newDagFromReader(r io.Reader) (ipld.Node, error) {
|
||||
// return ufs.AddFromReader(i.node, r.Body)
|
||||
return importer.BuildDagFromReader(
|
||||
i.node.DAG,
|
||||
chunk.DefaultSplitter(r))
|
||||
chunker.DefaultSplitter(r))
|
||||
}
|
||||
|
||||
// TODO(btc): break this apart into separate handlers using a more expressive muxer
|
||||
|
||||
@ -15,7 +15,6 @@ import (
|
||||
core "github.com/ipfs/go-ipfs/core"
|
||||
"github.com/ipfs/go-ipfs/exchange/offline"
|
||||
balanced "github.com/ipfs/go-ipfs/importer/balanced"
|
||||
"github.com/ipfs/go-ipfs/importer/chunk"
|
||||
ihelper "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
trickle "github.com/ipfs/go-ipfs/importer/trickle"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
@ -27,6 +26,7 @@ import (
|
||||
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
|
||||
syncds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
|
||||
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
files "gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
@ -134,7 +134,7 @@ func (adder *Adder) SetMfsRoot(r *mfs.Root) {
|
||||
|
||||
// Constructs a node from reader's data, and adds it. Doesn't pin.
|
||||
func (adder *Adder) add(reader io.Reader) (ipld.Node, error) {
|
||||
chnk, err := chunk.FromString(reader, adder.Chunker)
|
||||
chnk, err := chunker.FromString(reader, adder.Chunker)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -11,7 +11,6 @@ import (
|
||||
core "github.com/ipfs/go-ipfs/core"
|
||||
offline "github.com/ipfs/go-ipfs/exchange/offline"
|
||||
importer "github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
merkledag "github.com/ipfs/go-ipfs/merkledag"
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
@ -19,6 +18,7 @@ import (
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
|
||||
dssync "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
@ -37,7 +37,7 @@ func TestMetadata(t *testing.T) {
|
||||
data := make([]byte, 1000)
|
||||
u.NewTimeSeededRand().Read(data)
|
||||
r := bytes.NewReader(data)
|
||||
nd, err := importer.BuildDagFromReader(ds, chunk.DefaultSplitter(r))
|
||||
nd, err := importer.BuildDagFromReader(ds, chunker.DefaultSplitter(r))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@ -17,12 +17,12 @@ import (
|
||||
coreunix "github.com/ipfs/go-ipfs/core/coreunix"
|
||||
coremock "github.com/ipfs/go-ipfs/core/mock"
|
||||
importer "github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
ci "gx/ipfs/QmVvkK7s5imCiq3JVbL3pGfnhcCnf3LrFJPF4GE2sAoGZf/go-testutil/ci"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
ci "gx/ipfs/QmVvkK7s5imCiq3JVbL3pGfnhcCnf3LrFJPF4GE2sAoGZf/go-testutil/ci"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
fstest "gx/ipfs/QmaFNtBAXX4nVMQWbUqNysXyhevUj1k4B1y5uS45LC7Vw9/fuse/fs/fstestutil"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
@ -37,7 +37,7 @@ func randObj(t *testing.T, nd *core.IpfsNode, size int64) (ipld.Node, []byte) {
|
||||
buf := make([]byte, size)
|
||||
u.NewTimeSeededRand().Read(buf)
|
||||
read := bytes.NewReader(buf)
|
||||
obj, err := importer.BuildTrickleDagFromReader(nd.DAG, chunk.DefaultSplitter(read))
|
||||
obj, err := importer.BuildTrickleDagFromReader(nd.DAG, chunker.DefaultSplitter(read))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@ -9,19 +9,19 @@ import (
|
||||
mrand "math/rand"
|
||||
"testing"
|
||||
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
h "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
// TODO: extract these tests and more as a generic layout test suite
|
||||
|
||||
func buildTestDag(ds ipld.DAGService, spl chunk.Splitter) (*dag.ProtoNode, error) {
|
||||
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter) (*dag.ProtoNode, error) {
|
||||
dbp := h.DagBuilderParams{
|
||||
Dagserv: ds,
|
||||
Maxlinks: h.DefaultLinksPerBlock,
|
||||
@ -40,7 +40,7 @@ func getTestDag(t *testing.T, ds ipld.DAGService, size int64, blksize int64) (*d
|
||||
u.NewTimeSeededRand().Read(data)
|
||||
r := bytes.NewReader(data)
|
||||
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(r, blksize))
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(r, blksize))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -74,7 +74,7 @@ func testFileConsistency(t *testing.T, nbytes int64, blksize int64) {
|
||||
}
|
||||
|
||||
func TestBuilderConsistency(t *testing.T) {
|
||||
testFileConsistency(t, 100000, chunk.DefaultBlockSize)
|
||||
testFileConsistency(t, 100000, chunker.DefaultBlockSize)
|
||||
}
|
||||
|
||||
func TestNoChunking(t *testing.T) {
|
||||
|
||||
@ -1,79 +0,0 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// FromString returns a Splitter depending on the given string:
|
||||
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}" and
|
||||
// "rabin-{min}-{avg}-{max}".
|
||||
func FromString(r io.Reader, chunker string) (Splitter, error) {
|
||||
switch {
|
||||
case chunker == "" || chunker == "default":
|
||||
return DefaultSplitter(r), nil
|
||||
|
||||
case strings.HasPrefix(chunker, "size-"):
|
||||
sizeStr := strings.Split(chunker, "-")[1]
|
||||
size, err := strconv.Atoi(sizeStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewSizeSplitter(r, int64(size)), nil
|
||||
|
||||
case strings.HasPrefix(chunker, "rabin"):
|
||||
return parseRabinString(r, chunker)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
|
||||
parts := strings.Split(chunker, "-")
|
||||
switch len(parts) {
|
||||
case 1:
|
||||
return NewRabin(r, uint64(DefaultBlockSize)), nil
|
||||
case 2:
|
||||
size, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewRabin(r, uint64(size)), nil
|
||||
case 4:
|
||||
sub := strings.Split(parts[1], ":")
|
||||
if len(sub) > 1 && sub[0] != "min" {
|
||||
return nil, errors.New("first label must be min")
|
||||
}
|
||||
min, err := strconv.Atoi(sub[len(sub)-1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sub = strings.Split(parts[2], ":")
|
||||
if len(sub) > 1 && sub[0] != "avg" {
|
||||
log.Error("sub == ", sub)
|
||||
return nil, errors.New("second label must be avg")
|
||||
}
|
||||
avg, err := strconv.Atoi(sub[len(sub)-1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sub = strings.Split(parts[3], ":")
|
||||
if len(sub) > 1 && sub[0] != "max" {
|
||||
return nil, errors.New("final label must be max")
|
||||
}
|
||||
max, err := strconv.Atoi(sub[len(sub)-1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
|
||||
default:
|
||||
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
|
||||
}
|
||||
}
|
||||
@ -1,54 +0,0 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"hash/fnv"
|
||||
"io"
|
||||
|
||||
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/whyrusleeping/chunker"
|
||||
)
|
||||
|
||||
// IpfsRabinPoly is the irreducible polynomial of degree 53 used by for Rabin.
|
||||
var IpfsRabinPoly = chunker.Pol(17437180132763653)
|
||||
|
||||
// Rabin implements the Splitter interface and splits content with Rabin
|
||||
// fingerprints.
|
||||
type Rabin struct {
|
||||
r *chunker.Chunker
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
// NewRabin creates a new Rabin splitter with the given
|
||||
// average block size.
|
||||
func NewRabin(r io.Reader, avgBlkSize uint64) *Rabin {
|
||||
min := avgBlkSize / 3
|
||||
max := avgBlkSize + (avgBlkSize / 2)
|
||||
|
||||
return NewRabinMinMax(r, min, avgBlkSize, max)
|
||||
}
|
||||
|
||||
// NewRabinMinMax returns a new Rabin splitter which uses
|
||||
// the given min, average and max block sizes.
|
||||
func NewRabinMinMax(r io.Reader, min, avg, max uint64) *Rabin {
|
||||
h := fnv.New32a()
|
||||
ch := chunker.New(r, IpfsRabinPoly, h, avg, min, max)
|
||||
|
||||
return &Rabin{
|
||||
r: ch,
|
||||
reader: r,
|
||||
}
|
||||
}
|
||||
|
||||
// NextBytes reads the next bytes from the reader and returns a slice.
|
||||
func (r *Rabin) NextBytes() ([]byte, error) {
|
||||
ch, err := r.r.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ch.Data, nil
|
||||
}
|
||||
|
||||
// Reader returns the io.Reader associated to this Splitter.
|
||||
func (r *Rabin) Reader() io.Reader {
|
||||
return r.reader
|
||||
}
|
||||
@ -1,81 +0,0 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
util "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
blocks "gx/ipfs/Qmej7nf81hi2x2tvjRBF3mcp74sQyuDH4VMYDGd1YtXjb2/go-block-format"
|
||||
)
|
||||
|
||||
func TestRabinChunking(t *testing.T) {
|
||||
data := make([]byte, 1024*1024*16)
|
||||
util.NewTimeSeededRand().Read(data)
|
||||
|
||||
r := NewRabin(bytes.NewReader(data), 1024*256)
|
||||
|
||||
var chunks [][]byte
|
||||
|
||||
for {
|
||||
chunk, err := r.NextBytes()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
chunks = append(chunks, chunk)
|
||||
}
|
||||
|
||||
fmt.Printf("average block size: %d\n", len(data)/len(chunks))
|
||||
|
||||
unchunked := bytes.Join(chunks, nil)
|
||||
if !bytes.Equal(unchunked, data) {
|
||||
fmt.Printf("%d %d\n", len(unchunked), len(data))
|
||||
t.Fatal("data was chunked incorrectly")
|
||||
}
|
||||
}
|
||||
|
||||
func chunkData(t *testing.T, data []byte) map[string]blocks.Block {
|
||||
r := NewRabin(bytes.NewReader(data), 1024*256)
|
||||
|
||||
blkmap := make(map[string]blocks.Block)
|
||||
|
||||
for {
|
||||
blk, err := r.NextBytes()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b := blocks.NewBlock(blk)
|
||||
blkmap[b.Cid().KeyString()] = b
|
||||
}
|
||||
|
||||
return blkmap
|
||||
}
|
||||
|
||||
func TestRabinChunkReuse(t *testing.T) {
|
||||
data := make([]byte, 1024*1024*16)
|
||||
util.NewTimeSeededRand().Read(data)
|
||||
|
||||
ch1 := chunkData(t, data[1000:])
|
||||
ch2 := chunkData(t, data)
|
||||
|
||||
var extra int
|
||||
for k := range ch2 {
|
||||
_, ok := ch1[k]
|
||||
if !ok {
|
||||
extra++
|
||||
}
|
||||
}
|
||||
|
||||
if extra > 2 {
|
||||
t.Log("too many spare chunks made")
|
||||
}
|
||||
}
|
||||
@ -1,105 +0,0 @@
|
||||
// Package chunk implements streaming block splitters.
|
||||
// Splitters read data from a reader and provide byte slices (chunks)
|
||||
// The size and contents of these slices depend on the splitting method
|
||||
// used.
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
|
||||
mpool "gx/ipfs/QmWBug6eBS7AxRdCDVuSY5CnSit7cS2XnPFYJWqWDumhCG/go-msgio/mpool"
|
||||
)
|
||||
|
||||
var log = logging.Logger("chunk")
|
||||
|
||||
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
|
||||
var DefaultBlockSize int64 = 1024 * 256
|
||||
|
||||
// A Splitter reads bytes from a Reader and creates "chunks" (byte slices)
|
||||
// that can be used to build DAG nodes.
|
||||
type Splitter interface {
|
||||
Reader() io.Reader
|
||||
NextBytes() ([]byte, error)
|
||||
}
|
||||
|
||||
// SplitterGen is a splitter generator, given a reader.
|
||||
type SplitterGen func(r io.Reader) Splitter
|
||||
|
||||
// DefaultSplitter returns a SizeSplitter with the DefaultBlockSize.
|
||||
func DefaultSplitter(r io.Reader) Splitter {
|
||||
return NewSizeSplitter(r, DefaultBlockSize)
|
||||
}
|
||||
|
||||
// SizeSplitterGen returns a SplitterGen function which will create
|
||||
// a splitter with the given size when called.
|
||||
func SizeSplitterGen(size int64) SplitterGen {
|
||||
return func(r io.Reader) Splitter {
|
||||
return NewSizeSplitter(r, size)
|
||||
}
|
||||
}
|
||||
|
||||
// Chan returns a channel that receives each of the chunks produced
|
||||
// by a splitter, along with another one for errors.
|
||||
func Chan(s Splitter) (<-chan []byte, <-chan error) {
|
||||
out := make(chan []byte)
|
||||
errs := make(chan error, 1)
|
||||
go func() {
|
||||
defer close(out)
|
||||
defer close(errs)
|
||||
|
||||
// all-chunks loop (keep creating chunks)
|
||||
for {
|
||||
b, err := s.NextBytes()
|
||||
if err != nil {
|
||||
errs <- err
|
||||
return
|
||||
}
|
||||
|
||||
out <- b
|
||||
}
|
||||
}()
|
||||
return out, errs
|
||||
}
|
||||
|
||||
type sizeSplitterv2 struct {
|
||||
r io.Reader
|
||||
size uint32
|
||||
err error
|
||||
}
|
||||
|
||||
// NewSizeSplitter returns a new size-based Splitter with the given block size.
|
||||
func NewSizeSplitter(r io.Reader, size int64) Splitter {
|
||||
return &sizeSplitterv2{
|
||||
r: r,
|
||||
size: uint32(size),
|
||||
}
|
||||
}
|
||||
|
||||
// NextBytes produces a new chunk.
|
||||
func (ss *sizeSplitterv2) NextBytes() ([]byte, error) {
|
||||
if ss.err != nil {
|
||||
return nil, ss.err
|
||||
}
|
||||
|
||||
full := mpool.ByteSlicePool.Get(ss.size).([]byte)[:ss.size]
|
||||
n, err := io.ReadFull(ss.r, full)
|
||||
switch err {
|
||||
case io.ErrUnexpectedEOF:
|
||||
ss.err = io.EOF
|
||||
small := make([]byte, n)
|
||||
copy(small, full)
|
||||
mpool.ByteSlicePool.Put(ss.size, full)
|
||||
return small, nil
|
||||
case nil:
|
||||
return full, nil
|
||||
default:
|
||||
mpool.ByteSlicePool.Put(ss.size, full)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Reader returns the io.Reader associated to this Splitter.
|
||||
func (ss *sizeSplitterv2) Reader() io.Reader {
|
||||
return ss.r
|
||||
}
|
||||
@ -1,120 +0,0 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
)
|
||||
|
||||
func randBuf(t *testing.T, size int) []byte {
|
||||
buf := make([]byte, size)
|
||||
if _, err := u.NewTimeSeededRand().Read(buf); err != nil {
|
||||
t.Fatal("failed to read enough randomness")
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func copyBuf(buf []byte) []byte {
|
||||
cpy := make([]byte, len(buf))
|
||||
copy(cpy, buf)
|
||||
return cpy
|
||||
}
|
||||
|
||||
func TestSizeSplitterOverAllocate(t *testing.T) {
|
||||
max := 1000
|
||||
r := bytes.NewReader(randBuf(t, max))
|
||||
chunksize := int64(1024 * 256)
|
||||
splitter := NewSizeSplitter(r, chunksize)
|
||||
chunk, err := splitter.NextBytes()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if cap(chunk) > len(chunk) {
|
||||
t.Fatal("chunk capacity too large")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSizeSplitterIsDeterministic(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.SkipNow()
|
||||
}
|
||||
|
||||
test := func() {
|
||||
bufR := randBuf(t, 10000000) // crank this up to satisfy yourself.
|
||||
bufA := copyBuf(bufR)
|
||||
bufB := copyBuf(bufR)
|
||||
|
||||
chunksA, _ := Chan(DefaultSplitter(bytes.NewReader(bufA)))
|
||||
chunksB, _ := Chan(DefaultSplitter(bytes.NewReader(bufB)))
|
||||
|
||||
for n := 0; ; n++ {
|
||||
a, moreA := <-chunksA
|
||||
b, moreB := <-chunksB
|
||||
|
||||
if !moreA {
|
||||
if moreB {
|
||||
t.Fatal("A ended, B didnt.")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if !bytes.Equal(a, b) {
|
||||
t.Fatalf("chunk %d not equal", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for run := 0; run < 1; run++ { // crank this up to satisfy yourself.
|
||||
test()
|
||||
}
|
||||
}
|
||||
|
||||
func TestSizeSplitterFillsChunks(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.SkipNow()
|
||||
}
|
||||
|
||||
max := 10000000
|
||||
b := randBuf(t, max)
|
||||
r := &clipReader{r: bytes.NewReader(b), size: 4000}
|
||||
chunksize := int64(1024 * 256)
|
||||
c, _ := Chan(NewSizeSplitter(r, chunksize))
|
||||
|
||||
sofar := 0
|
||||
whole := make([]byte, max)
|
||||
for chunk := range c {
|
||||
|
||||
bc := b[sofar : sofar+len(chunk)]
|
||||
if !bytes.Equal(bc, chunk) {
|
||||
t.Fatalf("chunk not correct: (sofar: %d) %d != %d, %v != %v", sofar, len(bc), len(chunk), bc[:100], chunk[:100])
|
||||
}
|
||||
|
||||
copy(whole[sofar:], chunk)
|
||||
|
||||
sofar += len(chunk)
|
||||
if sofar != max && len(chunk) < int(chunksize) {
|
||||
t.Fatal("sizesplitter split at a smaller size")
|
||||
}
|
||||
}
|
||||
|
||||
if !bytes.Equal(b, whole) {
|
||||
t.Fatal("splitter did not split right")
|
||||
}
|
||||
}
|
||||
|
||||
type clipReader struct {
|
||||
size int
|
||||
r io.Reader
|
||||
}
|
||||
|
||||
func (s *clipReader) Read(buf []byte) (int, error) {
|
||||
|
||||
// clip the incoming buffer to produce smaller chunks
|
||||
if len(buf) > s.size {
|
||||
buf = buf[:s.size]
|
||||
}
|
||||
|
||||
return s.r.Read(buf)
|
||||
}
|
||||
@ -5,10 +5,10 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
files "gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
@ -18,7 +18,7 @@ import (
|
||||
// efficiently create unixfs dag trees
|
||||
type DagBuilderHelper struct {
|
||||
dserv ipld.DAGService
|
||||
spl chunk.Splitter
|
||||
spl chunker.Splitter
|
||||
recvdErr error
|
||||
rawLeaves bool
|
||||
nextData []byte // the next item to return.
|
||||
@ -30,7 +30,7 @@ type DagBuilderHelper struct {
|
||||
}
|
||||
|
||||
// DagBuilderParams wraps configuration options to create a DagBuilderHelper
|
||||
// from a chunk.Splitter.
|
||||
// from a chunker.Splitter.
|
||||
type DagBuilderParams struct {
|
||||
// Maximum number of links per intermediate node
|
||||
Maxlinks int
|
||||
@ -51,8 +51,8 @@ type DagBuilderParams struct {
|
||||
}
|
||||
|
||||
// New generates a new DagBuilderHelper from the given params and a given
|
||||
// chunk.Splitter as data source.
|
||||
func (dbp *DagBuilderParams) New(spl chunk.Splitter) *DagBuilderHelper {
|
||||
// chunker.Splitter as data source.
|
||||
func (dbp *DagBuilderParams) New(spl chunker.Splitter) *DagBuilderHelper {
|
||||
db := &DagBuilderHelper{
|
||||
dserv: dbp.Dagserv,
|
||||
spl: spl,
|
||||
|
||||
@ -6,11 +6,11 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
"gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
|
||||
bal "github.com/ipfs/go-ipfs/importer/balanced"
|
||||
"github.com/ipfs/go-ipfs/importer/chunk"
|
||||
h "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
trickle "github.com/ipfs/go-ipfs/importer/trickle"
|
||||
)
|
||||
@ -33,12 +33,12 @@ func BuildDagFromFile(fpath string, ds ipld.DAGService) (ipld.Node, error) {
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return BuildDagFromReader(ds, chunk.DefaultSplitter(f))
|
||||
return BuildDagFromReader(ds, chunker.DefaultSplitter(f))
|
||||
}
|
||||
|
||||
// BuildDagFromReader creates a DAG given a DAGService and a Splitter
|
||||
// implementation (Splitters are io.Readers), using a Balanced layout.
|
||||
func BuildDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, error) {
|
||||
func BuildDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
|
||||
dbp := h.DagBuilderParams{
|
||||
Dagserv: ds,
|
||||
Maxlinks: h.DefaultLinksPerBlock,
|
||||
@ -49,7 +49,7 @@ func BuildDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, erro
|
||||
|
||||
// BuildTrickleDagFromReader creates a DAG given a DAGService and a Splitter
|
||||
// implementation (Splitters are io.Readers), using a Trickle Layout.
|
||||
func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, error) {
|
||||
func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
|
||||
dbp := h.DagBuilderParams{
|
||||
Dagserv: ds,
|
||||
Maxlinks: h.DefaultLinksPerBlock,
|
||||
|
||||
@ -7,18 +7,18 @@ import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
|
||||
ds := mdtest.Mock()
|
||||
r := io.LimitReader(u.NewTimeSeededRand(), size)
|
||||
nd, err := BuildDagFromReader(ds, chunk.NewSizeSplitter(r, blksize))
|
||||
nd, err := BuildDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -28,7 +28,7 @@ func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DA
|
||||
func getTrickleDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
|
||||
ds := mdtest.Mock()
|
||||
r := io.LimitReader(u.NewTimeSeededRand(), size)
|
||||
nd, err := BuildTrickleDagFromReader(ds, chunk.NewSizeSplitter(r, blksize))
|
||||
nd, err := BuildTrickleDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -41,7 +41,7 @@ func TestBalancedDag(t *testing.T) {
|
||||
u.NewTimeSeededRand().Read(buf)
|
||||
r := bytes.NewReader(buf)
|
||||
|
||||
nd, err := BuildDagFromReader(ds, chunk.DefaultSplitter(r))
|
||||
nd, err := BuildDagFromReader(ds, chunker.DefaultSplitter(r))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -84,7 +84,7 @@ func BenchmarkTrickleReadSmallBlock(b *testing.B) {
|
||||
func BenchmarkBalancedReadFull(b *testing.B) {
|
||||
b.StopTimer()
|
||||
nbytes := int64(10000000)
|
||||
nd, ds := getBalancedDag(b, nbytes, chunk.DefaultBlockSize)
|
||||
nd, ds := getBalancedDag(b, nbytes, chunker.DefaultBlockSize)
|
||||
|
||||
b.SetBytes(nbytes)
|
||||
b.StartTimer()
|
||||
@ -94,7 +94,7 @@ func BenchmarkBalancedReadFull(b *testing.B) {
|
||||
func BenchmarkTrickleReadFull(b *testing.B) {
|
||||
b.StopTimer()
|
||||
nbytes := int64(10000000)
|
||||
nd, ds := getTrickleDag(b, nbytes, chunk.DefaultBlockSize)
|
||||
nd, ds := getTrickleDag(b, nbytes, chunker.DefaultBlockSize)
|
||||
|
||||
b.SetBytes(nbytes)
|
||||
b.StartTimer()
|
||||
|
||||
@ -9,7 +9,6 @@ import (
|
||||
mrand "math/rand"
|
||||
"testing"
|
||||
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
h "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
merkledag "github.com/ipfs/go-ipfs/merkledag"
|
||||
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
|
||||
@ -17,6 +16,7 @@ import (
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
@ -32,7 +32,7 @@ func runBothSubtests(t *testing.T, tfunc func(*testing.T, UseRawLeaves)) {
|
||||
t.Run("leaves=Raw", func(t *testing.T) { tfunc(t, RawLeaves) })
|
||||
}
|
||||
|
||||
func buildTestDag(ds ipld.DAGService, spl chunk.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) {
|
||||
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) {
|
||||
dbp := h.DagBuilderParams{
|
||||
Dagserv: ds,
|
||||
Maxlinks: h.DefaultLinksPerBlock,
|
||||
@ -66,10 +66,10 @@ func testSizeBasedSplit(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
if testing.Short() {
|
||||
t.SkipNow()
|
||||
}
|
||||
bs := chunk.SizeSplitterGen(512)
|
||||
bs := chunker.SizeSplitterGen(512)
|
||||
testFileConsistency(t, bs, 32*512, rawLeaves)
|
||||
|
||||
bs = chunk.SizeSplitterGen(4096)
|
||||
bs = chunker.SizeSplitterGen(4096)
|
||||
testFileConsistency(t, bs, 32*4096, rawLeaves)
|
||||
|
||||
// Uneven offset
|
||||
@ -82,7 +82,7 @@ func dup(b []byte) []byte {
|
||||
return o
|
||||
}
|
||||
|
||||
func testFileConsistency(t *testing.T, bs chunk.SplitterGen, nbytes int, rawLeaves UseRawLeaves) {
|
||||
func testFileConsistency(t *testing.T, bs chunker.SplitterGen, nbytes int, rawLeaves UseRawLeaves) {
|
||||
should := make([]byte, nbytes)
|
||||
u.NewTimeSeededRand().Read(should)
|
||||
|
||||
@ -119,7 +119,7 @@ func testBuilderConsistency(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
io.CopyN(buf, u.NewTimeSeededRand(), int64(nbytes))
|
||||
should := dup(buf.Bytes())
|
||||
dagserv := mdtest.Mock()
|
||||
nd, err := buildTestDag(dagserv, chunk.DefaultSplitter(buf), rawLeaves)
|
||||
nd, err := buildTestDag(dagserv, chunker.DefaultSplitter(buf), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -156,7 +156,7 @@ func TestIndirectBlocks(t *testing.T) {
|
||||
}
|
||||
|
||||
func testIndirectBlocks(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
splitter := chunk.SizeSplitterGen(512)
|
||||
splitter := chunker.SizeSplitterGen(512)
|
||||
nbytes := 1024 * 1024
|
||||
buf := make([]byte, nbytes)
|
||||
u.NewTimeSeededRand().Read(buf)
|
||||
@ -195,7 +195,7 @@ func testSeekingBasic(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 512), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 512), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -236,7 +236,7 @@ func testSeekToBegin(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -284,7 +284,7 @@ func testSeekToAlmostBegin(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -332,7 +332,7 @@ func testSeekEnd(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -362,7 +362,7 @@ func testSeekEndSingleBlockFile(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 5000), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 5000), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -392,7 +392,7 @@ func testSeekingStress(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 1000), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 1000), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -441,7 +441,7 @@ func testSeekingConsistency(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
|
||||
read := bytes.NewReader(should)
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -489,7 +489,7 @@ func testAppend(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
// Reader for half the bytes
|
||||
read := bytes.NewReader(should[:nbytes/2])
|
||||
ds := mdtest.Mock()
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -503,7 +503,7 @@ func testAppend(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
r := bytes.NewReader(should[nbytes/2:])
|
||||
|
||||
ctx := context.Background()
|
||||
nnode, err := Append(ctx, nd, dbp.New(chunk.NewSizeSplitter(r, 500)))
|
||||
nnode, err := Append(ctx, nd, dbp.New(chunker.NewSizeSplitter(r, 500)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -548,7 +548,7 @@ func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
u.NewTimeSeededRand().Read(should)
|
||||
|
||||
read := bytes.NewReader(nil)
|
||||
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
|
||||
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -559,7 +559,7 @@ func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) {
|
||||
RawLeaves: bool(rawLeaves),
|
||||
}
|
||||
|
||||
spl := chunk.SizeSplitterGen(500)
|
||||
spl := chunker.SizeSplitterGen(500)
|
||||
|
||||
ctx := context.Background()
|
||||
for i := 0; i < len(should); i++ {
|
||||
@ -609,7 +609,7 @@ func TestAppendSingleBytesToEmpty(t *testing.T) {
|
||||
Maxlinks: 4,
|
||||
}
|
||||
|
||||
spl := chunk.SizeSplitterGen(500)
|
||||
spl := chunker.SizeSplitterGen(500)
|
||||
|
||||
ctx := context.Background()
|
||||
nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(data[:1]))))
|
||||
|
||||
@ -17,13 +17,13 @@ import (
|
||||
bstest "github.com/ipfs/go-ipfs/blockservice/test"
|
||||
offline "github.com/ipfs/go-ipfs/exchange/offline"
|
||||
imp "github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
. "github.com/ipfs/go-ipfs/merkledag"
|
||||
mdpb "github.com/ipfs/go-ipfs/merkledag/pb"
|
||||
dstest "github.com/ipfs/go-ipfs/merkledag/test"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
blocks "gx/ipfs/Qmej7nf81hi2x2tvjRBF3mcp74sQyuDH4VMYDGd1YtXjb2/go-block-format"
|
||||
@ -136,7 +136,7 @@ func runBatchFetchTest(t *testing.T, read io.Reader) {
|
||||
dagservs = append(dagservs, NewDAGService(bsi))
|
||||
}
|
||||
|
||||
spl := chunk.NewSizeSplitter(read, 512)
|
||||
spl := chunker.NewSizeSplitter(read, 512)
|
||||
|
||||
root, err := imp.BuildDagFromReader(dagservs[0], spl)
|
||||
if err != nil {
|
||||
@ -228,7 +228,7 @@ func TestFetchGraph(t *testing.T) {
|
||||
}
|
||||
|
||||
read := io.LimitReader(u.NewTimeSeededRand(), 1024*32)
|
||||
root, err := imp.BuildDagFromReader(dservs[0], chunk.NewSizeSplitter(read, 512))
|
||||
root, err := imp.BuildDagFromReader(dservs[0], chunker.NewSizeSplitter(read, 512))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -254,7 +254,7 @@ func TestEnumerateChildren(t *testing.T) {
|
||||
ds := NewDAGService(bsi[0])
|
||||
|
||||
read := io.LimitReader(u.NewTimeSeededRand(), 1024*1024)
|
||||
root, err := imp.BuildDagFromReader(ds, chunk.NewSizeSplitter(read, 512))
|
||||
root, err := imp.BuildDagFromReader(ds, chunker.NewSizeSplitter(read, 512))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@ -5,11 +5,11 @@ import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
mod "github.com/ipfs/go-ipfs/unixfs/mod"
|
||||
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
@ -82,7 +82,7 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) {
|
||||
return nil, fmt.Errorf("mode not supported")
|
||||
}
|
||||
|
||||
dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dserv, chunk.DefaultSplitter)
|
||||
dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dserv, chunker.DefaultSplitter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -18,7 +18,6 @@ import (
|
||||
bserv "github.com/ipfs/go-ipfs/blockservice"
|
||||
offline "github.com/ipfs/go-ipfs/exchange/offline"
|
||||
importer "github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
"github.com/ipfs/go-ipfs/path"
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
@ -27,6 +26,7 @@ import (
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
|
||||
dssync "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
@ -48,7 +48,7 @@ func getRandFile(t *testing.T, ds ipld.DAGService, size int64) ipld.Node {
|
||||
}
|
||||
|
||||
func fileNodeFromReader(t *testing.T, ds ipld.DAGService, r io.Reader) ipld.Node {
|
||||
nd, err := importer.BuildDagFromReader(ds, chunk.DefaultSplitter(r))
|
||||
nd, err := importer.BuildDagFromReader(ds, chunker.DefaultSplitter(r))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@ -521,6 +521,12 @@
|
||||
"hash": "Qmb3jLEFAQrqdVgWUajqEyuuDoavkSq1XQXz6tWdFWF995",
|
||||
"name": "go-ipfs-posinfo",
|
||||
"version": "0.0.1"
|
||||
},
|
||||
{
|
||||
"author": "hsanjuan",
|
||||
"hash": "QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq",
|
||||
"name": "go-ipfs-chunker",
|
||||
"version": "0.0.2"
|
||||
}
|
||||
],
|
||||
"gxVersion": "0.10.0",
|
||||
|
||||
@ -9,13 +9,13 @@ import (
|
||||
"strings"
|
||||
|
||||
importer "github.com/ipfs/go-ipfs/importer"
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
dag "github.com/ipfs/go-ipfs/merkledag"
|
||||
dagutil "github.com/ipfs/go-ipfs/merkledag/utils"
|
||||
path "github.com/ipfs/go-ipfs/path"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
@ -63,7 +63,7 @@ func ImportTar(ctx context.Context, r io.Reader, ds ipld.DAGService) (*dag.Proto
|
||||
header.SetData(headerBytes)
|
||||
|
||||
if h.Size > 0 {
|
||||
spl := chunk.NewRabin(tr, uint64(chunk.DefaultBlockSize))
|
||||
spl := chunker.NewRabin(tr, uint64(chunker.DefaultBlockSize))
|
||||
nd, err := importer.BuildDagFromReader(ds, spl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@ -8,13 +8,13 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
chunk "github.com/ipfs/go-ipfs/importer/chunk"
|
||||
help "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
trickle "github.com/ipfs/go-ipfs/importer/trickle"
|
||||
mdag "github.com/ipfs/go-ipfs/merkledag"
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
uio "github.com/ipfs/go-ipfs/unixfs/io"
|
||||
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
@ -37,7 +37,7 @@ type DagModifier struct {
|
||||
dagserv ipld.DAGService
|
||||
curNode ipld.Node
|
||||
|
||||
splitter chunk.SplitterGen
|
||||
splitter chunker.SplitterGen
|
||||
ctx context.Context
|
||||
readCancel func()
|
||||
|
||||
@ -55,7 +55,7 @@ type DagModifier struct {
|
||||
// created nodes will be inherted from the passed in node. If the Cid
|
||||
// version if not 0 raw leaves will also be enabled. The Prefix and
|
||||
// RawLeaves options can be overridden by changing them after the call.
|
||||
func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunk.SplitterGen) (*DagModifier, error) {
|
||||
func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunker.SplitterGen) (*DagModifier, error) {
|
||||
switch from.(type) {
|
||||
case *mdag.ProtoNode, *mdag.RawNode:
|
||||
// ok
|
||||
@ -126,7 +126,7 @@ func (zr zeroReader) Read(b []byte) (int, error) {
|
||||
// A small blocksize is chosen to aid in deduplication
|
||||
func (dm *DagModifier) expandSparse(size int64) error {
|
||||
r := io.LimitReader(zeroReader{}, size)
|
||||
spl := chunk.NewSizeSplitter(r, 4096)
|
||||
spl := chunker.NewSizeSplitter(r, 4096)
|
||||
nnode, err := dm.appendData(dm.curNode, spl)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -356,7 +356,7 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64, data io.Reader) (*c
|
||||
}
|
||||
|
||||
// appendData appends the blocks from the given chan to the end of this dag
|
||||
func (dm *DagModifier) appendData(nd ipld.Node, spl chunk.Splitter) (ipld.Node, error) {
|
||||
func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) {
|
||||
switch nd := nd.(type) {
|
||||
case *mdag.ProtoNode, *mdag.RawNode:
|
||||
dbp := &help.DagBuilderParams{
|
||||
|
||||
@ -8,7 +8,6 @@ import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/ipfs/go-ipfs/importer/chunk"
|
||||
h "github.com/ipfs/go-ipfs/importer/helpers"
|
||||
trickle "github.com/ipfs/go-ipfs/importer/trickle"
|
||||
mdag "github.com/ipfs/go-ipfs/merkledag"
|
||||
@ -16,15 +15,16 @@ import (
|
||||
ft "github.com/ipfs/go-ipfs/unixfs"
|
||||
|
||||
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
|
||||
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
|
||||
mh "gx/ipfs/QmZyZDi491cCNTLfAhwcaDii2Kg4pwKRkhqQzURGDvY6ua/go-multihash"
|
||||
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
|
||||
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
|
||||
)
|
||||
|
||||
// SizeSplitterGen creates a generator.
|
||||
func SizeSplitterGen(size int64) chunk.SplitterGen {
|
||||
return func(r io.Reader) chunk.Splitter {
|
||||
return chunk.NewSizeSplitter(r, size)
|
||||
func SizeSplitterGen(size int64) chunker.SplitterGen {
|
||||
return func(r io.Reader) chunker.Splitter {
|
||||
return chunker.NewSizeSplitter(r, size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user