Merge pull request #4661 from ipfs/extract/chunk

Extract: importers/chunk module as go-ipfs-chunker
This commit is contained in:
Whyrusleeping 2018-02-08 08:43:58 -08:00 committed by GitHub
commit 990e4df32e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 72 additions and 1962 deletions

4
Godeps/Godeps.json generated
View File

@ -56,10 +56,6 @@
{
"ImportPath": "github.com/texttheater/golang-levenshtein/levenshtein",
"Rev": "dfd657628c58d3eeaa26391097853b2473c8b94e"
},
{
"ImportPath": "github.com/whyrusleeping/chunker",
"Rev": "537e901819164627ca4bb5ce4e3faa8ce7956564"
}
]
}

View File

@ -1,10 +0,0 @@
language: go
sudo: false
go:
- 1.3.3
- 1.4.2
os:
- linux
- osx

View File

@ -1,23 +0,0 @@
Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,7 +0,0 @@
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)
Content Defined Chunking (CDC) based on a rolling Rabin Checksum.
Part of https://github.com/restic/restic.
Better README will follow soon.

View File

@ -1,370 +0,0 @@
package chunker
import (
"errors"
"hash"
"io"
"math"
"sync"
)
const (
KiB = 1024
MiB = 1024 * KiB
// WindowSize is the size of the sliding window.
windowSize = 16
chunkerBufSize = 512 * KiB
)
var bufPool = sync.Pool{
New: func() interface{} { return make([]byte, chunkerBufSize) },
}
type tables struct {
out [256]Pol
mod [256]Pol
}
// cache precomputed tables, these are read-only anyway
var cache struct {
entries map[Pol]*tables
sync.Mutex
}
func init() {
cache.entries = make(map[Pol]*tables)
}
// Chunk is one content-dependent chunk of bytes whose end was cut when the
// Rabin Fingerprint had the value stored in Cut.
type Chunk struct {
Start uint64
Length uint64
Cut uint64
Digest []byte
Data []byte
}
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
}
// Chunker splits content with Rabin Fingerprints.
type Chunker struct {
pol Pol
polShift uint64
tables *tables
rd io.Reader
closed bool
chunkbuf []byte
window [windowSize]byte
wpos int
buf []byte
bpos uint64
bmax uint64
start uint64
count uint64
pos uint64
pre uint64 // wait for this many bytes before start calculating an new chunk
digest uint64
h hash.Hash
sizeMask uint64
// minimal and maximal size of the outputted blocks
MinSize uint64
MaxSize uint64
}
// New returns a new Chunker based on polynomial p that reads from rd
// with bufsize and pass all data to hash along the way.
func New(rd io.Reader, pol Pol, h hash.Hash, avSize, min, max uint64) *Chunker {
sizepow := uint(math.Log2(float64(avSize)))
c := &Chunker{
buf: bufPool.Get().([]byte),
h: h,
pol: pol,
rd: rd,
chunkbuf: make([]byte, 0, max),
sizeMask: (1 << sizepow) - 1,
MinSize: min,
MaxSize: max,
}
c.reset()
return c
}
func (c *Chunker) reset() {
c.polShift = uint64(c.pol.Deg() - 8)
c.fillTables()
for i := 0; i < windowSize; i++ {
c.window[i] = 0
}
c.closed = false
c.digest = 0
c.wpos = 0
c.count = 0
c.slide(1)
c.start = c.pos
if c.h != nil {
c.h.Reset()
}
// do not start a new chunk unless at least MinSize bytes have been read
c.pre = c.MinSize - windowSize
}
// Calculate out_table and mod_table for optimization. Must be called only
// once. This implementation uses a cache in the global variable cache.
func (c *Chunker) fillTables() {
// if polynomial hasn't been specified, do not compute anything for now
if c.pol == 0 {
return
}
// test if the tables are cached for this polynomial
cache.Lock()
defer cache.Unlock()
if t, ok := cache.entries[c.pol]; ok {
c.tables = t
return
}
// else create a new entry
c.tables = &tables{}
cache.entries[c.pol] = c.tables
// calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0)
// \ windowsize-1 zero bytes /
// To slide out byte b_0 for window size w with known hash
// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
// H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
// = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
// = H( 0 || b_1 || ... || b_w)
//
// Afterwards a new byte can be shifted in.
for b := 0; b < 256; b++ {
var h Pol
h = appendByte(h, byte(b), c.pol)
for i := 0; i < windowSize-1; i++ {
h = appendByte(h, 0, c.pol)
}
c.tables.out[b] = h
}
// calculate table for reduction mod Polynomial
k := c.pol.Deg()
for b := 0; b < 256; b++ {
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
//
// The 8 bits above deg(Polynomial) determine what happens next and so
// these bits are used as a lookup to this table. The value is split in
// two parts: Part A contains the result of the modulus operation, part
// B is used to cancel out the 8 top bits so that one XOR operation is
// enough to reduce modulo Polynomial
c.tables.mod[b] = Pol(uint64(b)<<uint64(k)).Mod(c.pol) | (Pol(b) << uint64(k))
}
}
func (c *Chunker) nextBytes() []byte {
data := dupBytes(c.chunkbuf[:c.count])
n := copy(c.chunkbuf, c.chunkbuf[c.count:])
c.chunkbuf = c.chunkbuf[:n]
return data
}
// Next returns the position and length of the next chunk of data. If an error
// occurs while reading, the error is returned with a nil chunk. The state of
// the current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) {
if c.tables == nil {
return nil, errors.New("polynomial is not set")
}
for {
if c.bpos >= c.bmax {
n, err := io.ReadFull(c.rd, c.buf[:])
c.chunkbuf = append(c.chunkbuf, c.buf[:n]...)
if err == io.ErrUnexpectedEOF {
err = nil
}
// io.ReadFull only returns io.EOF when no bytes could be read. If
// this is the case and we're in this branch, there are no more
// bytes to buffer, so this was the last chunk. If a different
// error has occurred, return that error and abandon the current
// chunk.
if err == io.EOF && !c.closed {
c.closed = true
// return the buffer to the pool
bufPool.Put(c.buf)
data := c.nextBytes()
// return current chunk, if any bytes have been processed
if c.count > 0 {
return &Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}, nil
}
}
if err != nil {
return nil, err
}
c.bpos = 0
c.bmax = uint64(n)
}
// check if bytes have to be dismissed before starting a new chunk
if c.pre > 0 {
n := c.bmax - c.bpos
if c.pre > uint64(n) {
c.pre -= uint64(n)
c.updateHash(c.buf[c.bpos:c.bmax])
c.count += uint64(n)
c.pos += uint64(n)
c.bpos = c.bmax
continue
}
c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
c.bpos += c.pre
c.count += c.pre
c.pos += c.pre
c.pre = 0
}
add := c.count
for _, b := range c.buf[c.bpos:c.bmax] {
// inline c.slide(b) and append(b) to increase performance
out := c.window[c.wpos]
c.window[c.wpos] = b
c.digest ^= uint64(c.tables.out[out])
c.wpos = (c.wpos + 1) % windowSize
// c.append(b)
index := c.digest >> c.polShift
c.digest <<= 8
c.digest |= uint64(b)
c.digest ^= uint64(c.tables.mod[index])
// end inline
add++
if add < c.MinSize {
continue
}
if (c.digest&c.sizeMask) == 0 || add >= c.MaxSize {
i := add - c.count - 1
c.updateHash(c.buf[c.bpos : c.bpos+uint64(i)+1])
c.count = add
c.pos += uint64(i) + 1
c.bpos += uint64(i) + 1
data := c.nextBytes()
chunk := &Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}
c.reset()
return chunk, nil
}
}
steps := c.bmax - c.bpos
if steps > 0 {
c.updateHash(c.buf[c.bpos : c.bpos+steps])
}
c.count += steps
c.pos += steps
c.bpos = c.bmax
}
}
func dupBytes(b []byte) []byte {
out := make([]byte, len(b))
copy(out, b)
return out
}
func (c *Chunker) updateHash(data []byte) {
if c.h != nil {
// the hashes from crypto/sha* do not return an error
_, err := c.h.Write(data)
if err != nil {
panic(err)
}
}
}
func (c *Chunker) hashDigest() []byte {
if c.h == nil {
return nil
}
return c.h.Sum(nil)
}
func (c *Chunker) append(b byte) {
index := c.digest >> c.polShift
c.digest <<= 8
c.digest |= uint64(b)
c.digest ^= uint64(c.tables.mod[index])
}
func (c *Chunker) slide(b byte) {
out := c.window[c.wpos]
c.window[c.wpos] = b
c.digest ^= uint64(c.tables.out[out])
c.wpos = (c.wpos + 1) % windowSize
c.append(b)
}
func appendByte(hash Pol, b byte, pol Pol) Pol {
hash <<= 8
hash |= Pol(b)
return hash.Mod(pol)
}

View File

@ -1,298 +0,0 @@
package chunker_test
import (
"bytes"
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"hash"
"io"
"io/ioutil"
"math/rand"
"testing"
"time"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
func parseDigest(s string) []byte {
d, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return d
}
type chunk struct {
Length uint
CutFP uint64
Digest []byte
}
// polynomial used for all the tests below
const testPol = chunker.Pol(0x3DA3358B4DC173)
// created for 32MB of random data out of math/rand's Uint32() seeded by
// constant 23
//
// chunking configuration:
// window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23
// polynom 0x3DA3358B4DC173
var chunks1 = []chunk{
chunk{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")},
chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
chunk{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")},
chunk{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")},
chunk{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")},
chunk{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")},
chunk{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")},
chunk{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")},
chunk{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")},
chunk{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")},
chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
chunk{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")},
chunk{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")},
chunk{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")},
chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
chunk{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")},
chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
}
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
var chunks2 = []chunk{
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
}
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
chunks := []*chunker.Chunk{}
pos := uint(0)
for i, chunk := range testChunks {
c, err := chnker.Next()
if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err)
}
if c == nil {
t.Fatalf("Nil chunk returned")
}
if c != nil {
if c.Start != pos {
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
i, pos, c.Start)
}
if c.Length != chunk.Length {
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, c.Length)
}
if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut)
}
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
i, len(chunks)-1, chunk.Digest, c.Digest)
}
pos += c.Length
chunks = append(chunks, c)
}
}
c, err := chnker.Next()
if c != nil {
t.Fatal("additional non-nil chunk returned")
}
if err != io.EOF {
t.Fatal("wrong error returned after last chunk")
}
return chunks
}
func getRandom(seed, count int) []byte {
buf := make([]byte, count)
rnd := rand.New(rand.NewSource(23))
for i := 0; i < count; i += 4 {
r := rnd.Uint32()
buf[i] = byte(r)
buf[i+1] = byte(r >> 8)
buf[i+2] = byte(r >> 16)
buf[i+3] = byte(r >> 24)
}
return buf
}
func TestChunker(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
chunks := testWithData(t, ch, chunks1)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
h := sha256.New()
n, err := io.Copy(h, rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(n) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, n)
}
d := h.Sum(nil)
if !bytes.Equal(d, chunks1[i].Digest) {
t.Fatalf("wrong hash returned: expected %02x, got %02x",
chunks1[i].Digest, d)
}
}
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
testWithData(t, ch, chunks2)
}
func TestChunkerWithRandomPolynomial(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
// generate a new random polynomial
start := time.Now()
p, err := chunker.RandomPolynomial()
OK(t, err)
t.Logf("generating random polynomial took %v", time.Since(start))
start = time.Now()
ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
t.Logf("creating chunker took %v", time.Since(start))
// make sure that first chunk is different
c, err := ch.Next()
Assert(t, c.Cut != chunks1[0].CutFP,
"Cut point is the same")
Assert(t, c.Length != chunks1[0].Length,
"Length is the same")
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
"Digest is the same")
}
func TestChunkerWithoutHash(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, nil)
chunks := testWithData(t, ch, chunks1)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
buf2, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, uint(len(buf2)))
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
chunks[i].Length, len(buf2))
}
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
buf[c.Start:c.Start+c.Length], buf2)
}
}
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
testWithData(t, ch, chunks2)
}
func benchmarkChunker(b *testing.B, hash hash.Hash) {
size := 10 * 1024 * 1024
rd := bytes.NewReader(getRandom(23, size))
b.ResetTimer()
b.SetBytes(int64(size))
var chunks int
for i := 0; i < b.N; i++ {
chunks = 0
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, hash)
for {
_, err := ch.Next()
if err == io.EOF {
break
}
if err != nil {
b.Fatalf("Unexpected error occurred: %v", err)
}
chunks++
}
}
b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks)
}
func BenchmarkChunkerWithSHA256(b *testing.B) {
benchmarkChunker(b, sha256.New())
}
func BenchmarkChunkerWithMD5(b *testing.B) {
benchmarkChunker(b, md5.New())
}
func BenchmarkChunker(b *testing.B) {
benchmarkChunker(b, nil)
}
func BenchmarkNewChunker(b *testing.B) {
p, err := chunker.RandomPolynomial()
OK(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
chunker.New(bytes.NewBuffer(nil), p, nil)
}
}

View File

@ -1,82 +0,0 @@
// Copyright 2014 Alexander Neumann. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package chunker implements Content Defined Chunking (CDC) based on a rolling
Rabin Checksum.
Choosing a Random Irreducible Polynomial
The function RandomPolynomial() returns a new random polynomial of degree 53
for use with the chunker. The degree 53 is chosen because it is the largest
prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
optimising calculations in the chunker.
A random polynomial is chosen selecting 64 random bits, masking away bits
64..54 and setting bit 53 to one (otherwise the polynomial is not of the
desired degree) and bit 0 to one (otherwise the polynomial is trivially
reducible), so that 51 bits are chosen at random.
This process is repeated until Irreducible() returns true, then this
polynomials is returned. If this doesn't happen after 1 million tries, the
function returns an error. The probability for selecting an irreducible
polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
that no irreducible polynomial has been found after 100 tries is lower than
0.04%.
Verifying Irreducible Polynomials
During development the results have been verified using the computational
discrete algebra system GAP, which can be obtained from the website at
http://www.gap-system.org/.
For filtering a given list of polynomials in hexadecimal coefficient notation,
the following script can be used:
# create x over F_2 = GF(2)
x := Indeterminate(GF(2), "x");
# test if polynomial is irreducible, i.e. the number of factors is one
IrredPoly := function (poly)
return (Length(Factors(poly)) = 1);
end;;
# create a polynomial in x from the hexadecimal representation of the
# coefficients
Hex2Poly := function (s)
return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
end;;
# list of candidates, in hex
candidates := [ "3DA3358B4DC173" ];
# create real polynomials
L := List(candidates, Hex2Poly);
# filter and display the list of irreducible polynomials contained in L
Display(Filtered(L, x -> (IrredPoly(x))));
All irreducible polynomials from the list are written to the output.
Background Literature
An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
Michael O. Rabin (1981): "Fingerprinting by Random Polynomials"
http://www.xmailserver.org/rabin.pdf
Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms"
http://www.zlib.net/crc_v3.txt
Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
http://www.xmailserver.org/rabin_apps.pdf
Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
*/
package chunker

View File

@ -1,278 +0,0 @@
package chunker
import (
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"strconv"
)
// Pol is a polynomial from F_2[X].
type Pol uint64
// Add returns x+y.
func (x Pol) Add(y Pol) Pol {
r := Pol(uint64(x) ^ uint64(y))
return r
}
// mulOverflows returns true if the multiplication would overflow uint64.
// Code by Rob Pike, see
// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
func mulOverflows(a, b Pol) bool {
if a <= 1 || b <= 1 {
return false
}
c := a.mul(b)
d := c.Div(b)
if d != a {
return true
}
return false
}
func (x Pol) mul(y Pol) Pol {
if x == 0 || y == 0 {
return 0
}
var res Pol
for i := 0; i <= y.Deg(); i++ {
if (y & (1 << uint(i))) > 0 {
res = res.Add(x << uint(i))
}
}
return res
}
// Mul returns x*y. When an overflow occurs, Mul panics.
func (x Pol) Mul(y Pol) Pol {
if mulOverflows(x, y) {
panic("multiplication would overflow uint64")
}
return x.mul(y)
}
// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
func (x Pol) Deg() int {
// the degree of 0 is -1
if x == 0 {
return -1
}
var mask Pol = (1 << 63)
for i := 63; i >= 0; i-- {
// test if bit i is set
if x&mask > 0 {
// this is the degree of x
return i
}
mask >>= 1
}
// fall-through, return -1
return -1
}
// String returns the coefficients in hex.
func (x Pol) String() string {
return "0x" + strconv.FormatUint(uint64(x), 16)
}
// Expand returns the string representation of the polynomial x.
func (x Pol) Expand() string {
if x == 0 {
return "0"
}
s := ""
for i := x.Deg(); i > 1; i-- {
if x&(1<<uint(i)) > 0 {
s += fmt.Sprintf("+x^%d", i)
}
}
if x&2 > 0 {
s += "+x"
}
if x&1 > 0 {
s += "+1"
}
return s[1:]
}
// DivMod returns x / d = q, and remainder r,
// see https://en.wikipedia.org/wiki/Division_algorithm
func (x Pol) DivMod(d Pol) (Pol, Pol) {
if x == 0 {
return 0, 0
}
if d == 0 {
panic("division by zero")
}
D := d.Deg()
diff := x.Deg() - D
if diff < 0 {
return 0, x
}
var q Pol
for diff >= 0 {
m := d << uint(diff)
q |= (1 << uint(diff))
x = x.Add(m)
diff = x.Deg() - D
}
return q, x
}
// Div returns the integer division result x / d.
func (x Pol) Div(d Pol) Pol {
q, _ := x.DivMod(d)
return q
}
// Mod returns the remainder of x / d
func (x Pol) Mod(d Pol) Pol {
_, r := x.DivMod(d)
return r
}
// I really dislike having a function that does not terminate, so specify a
// really large upper bound for finding a new irreducible polynomial, and
// return an error when no irreducible polynomial has been found within
// randPolMaxTries.
const randPolMaxTries = 1e6
// RandomPolynomial returns a new random irreducible polynomial of degree 53
// (largest prime number below 64-8). There are (2^53-2/53) irreducible
// polynomials of degree 53 in F_2[X], c.f. Michael O. Rabin (1981):
// "Fingerprinting by Random Polynomials", page 4. If no polynomial could be
// found in one million tries, an error is returned.
func RandomPolynomial() (Pol, error) {
for i := 0; i < randPolMaxTries; i++ {
var f Pol
// choose polynomial at random
err := binary.Read(rand.Reader, binary.LittleEndian, &f)
if err != nil {
return 0, err
}
// mask away bits above bit 53
f &= Pol((1 << 54) - 1)
// set highest and lowest bit so that the degree is 53 and the
// polynomial is not trivially reducible
f |= (1 << 53) | 1
// test if f is irreducible
if f.Irreducible() {
return f, nil
}
}
// If this is reached, we haven't found an irreducible polynomial in
// randPolMaxTries. This error is very unlikely to occur.
return 0, errors.New("unable to find new random irreducible polynomial")
}
// GCD computes the Greatest Common Divisor x and f.
func (x Pol) GCD(f Pol) Pol {
if f == 0 {
return x
}
if x == 0 {
return f
}
if x.Deg() < f.Deg() {
x, f = f, x
}
return f.GCD(x.Mod(f))
}
// Irreducible returns true iff x is irreducible over F_2. This function
// uses Ben Or's reducibility test.
//
// For details see "Tests and Constructions of Irreducible Polynomials over
// Finite Fields".
func (x Pol) Irreducible() bool {
for i := 1; i <= x.Deg()/2; i++ {
if x.GCD(qp(uint(i), x)) != 1 {
return false
}
}
return true
}
// MulMod computes x*f mod g
func (x Pol) MulMod(f, g Pol) Pol {
if x == 0 || f == 0 {
return 0
}
var res Pol
for i := 0; i <= f.Deg(); i++ {
if (f & (1 << uint(i))) > 0 {
a := x
for j := 0; j < i; j++ {
a = a.Mul(2).Mod(g)
}
res = res.Add(a).Mod(g)
}
}
return res
}
// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
// reducibility test.
func qp(p uint, g Pol) Pol {
num := (1 << p)
i := 1
// start with x
res := Pol(2)
for i < num {
// repeatedly square res
res = res.MulMod(res, g)
i *= 2
}
// add x
return res.Add(2).Mod(g)
}
func (p Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
buf = append(buf, '"')
return buf, nil
}
func (p *Pol) UnmarshalJSON(data []byte) error {
if len(data) < 2 {
return errors.New("invalid string for polynomial")
}
n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
if err != nil {
return err
}
*p = Pol(n)
return nil
}

View File

@ -1,385 +0,0 @@
package chunker_test
import (
"strconv"
"testing"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
var polAddTests = []struct {
x, y chunker.Pol
sum chunker.Pol
}{
{23, 16, 23 ^ 16},
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
}
func TestPolAdd(t *testing.T) {
for _, test := range polAddTests {
Equals(t, test.sum, test.x.Add(test.y))
Equals(t, test.sum, test.y.Add(test.x))
}
}
func parseBin(s string) chunker.Pol {
i, err := strconv.ParseUint(s, 2, 64)
if err != nil {
panic(err)
}
return chunker.Pol(i)
}
var polMulTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{1, 2, 2},
{
parseBin("1101"),
parseBin("10"),
parseBin("11010"),
},
{
parseBin("1101"),
parseBin("11"),
parseBin("10111"),
},
{
0x40000000,
0x40000000,
0x1000000000000000,
},
{
parseBin("1010"),
parseBin("100100"),
parseBin("101101000"),
},
{
parseBin("100"),
parseBin("11"),
parseBin("1100"),
},
{
parseBin("11"),
parseBin("110101"),
parseBin("1011111"),
},
{
parseBin("10011"),
parseBin("110101"),
parseBin("1100001111"),
},
}
func TestPolMul(t *testing.T) {
for i, test := range polMulTests {
m := test.x.Mul(test.y)
Assert(t, test.res == m,
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
m = test.y.Mul(test.x)
Assert(t, test.res == test.y.Mul(test.x),
"TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
func TestPolMulOverflow(t *testing.T) {
defer func() {
// try to recover overflow error
err := recover()
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
return
} else {
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}
}()
x := chunker.Pol(1 << 63)
x.Mul(2)
t.Fatal("overflow test did not panic")
}
var polDivTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 0},
{0, 1, 0},
{
parseBin("101101000"), // 0x168
parseBin("1010"), // 0xa
parseBin("100100"), // 0x24
},
{2, 2, 1},
{
0x8000000000000000,
0x8000000000000000,
1,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("110101"),
},
}
func TestPolDiv(t *testing.T) {
for i, test := range polDivTests {
m := test.x.Div(test.y)
Assert(t, test.res == m,
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
var polModTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 10},
{0, 1, 0},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 0},
{
0x8000000000000000,
0x8000000000000000,
0,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("0"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("0"),
},
}
func TestPolModt(t *testing.T) {
for _, test := range polModTests {
Equals(t, test.res, test.x.Mod(test.y))
}
}
func BenchmarkPolDivMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.DivMod(f)
}
}
func BenchmarkPolDiv(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Div(f)
}
}
func BenchmarkPolMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Mod(f)
}
}
func BenchmarkPolDeg(t *testing.B) {
f := chunker.Pol(0x3af4b284899)
d := f.Deg()
if d != 41 {
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
d, 41)
}
for i := 0; i < t.N; i++ {
f.Deg()
}
}
func TestRandomPolynomial(t *testing.T) {
_, err := chunker.RandomPolynomial()
OK(t, err)
}
func BenchmarkRandomPolynomial(t *testing.B) {
for i := 0; i < t.N; i++ {
_, err := chunker.RandomPolynomial()
OK(t, err)
}
}
func TestExpandPolynomial(t *testing.T) {
pol := chunker.Pol(0x3DA3358B4DC173)
s := pol.Expand()
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
}
var polIrredTests = []struct {
f chunker.Pol
irred bool
}{
{0x38f1e565e288df, false},
{0x3DA3358B4DC173, true},
{0x30a8295b9d5c91, false},
{0x255f4350b962cb, false},
{0x267f776110a235, false},
{0x2f4dae10d41227, false},
{0x2482734cacca49, true},
{0x312daf4b284899, false},
{0x29dfb6553d01d1, false},
{0x3548245eb26257, false},
{0x3199e7ef4211b3, false},
{0x362f39017dae8b, false},
{0x200d57aa6fdacb, false},
{0x35e0a4efa1d275, false},
{0x2ced55b026577f, false},
{0x260b012010893d, false},
{0x2df29cbcd59e9d, false},
{0x3f2ac7488bd429, false},
{0x3e5cb1711669fb, false},
{0x226d8de57a9959, false},
{0x3c8de80aaf5835, false},
{0x2026a59efb219b, false},
{0x39dfa4d13fb231, false},
{0x3143d0464b3299, false},
}
func TestPolIrreducible(t *testing.T) {
for _, test := range polIrredTests {
Assert(t, test.f.Irreducible() == test.irred,
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred)
}
}
func BenchmarkPolIrreducible(b *testing.B) {
// find first irreducible polynomial
var pol chunker.Pol
for _, test := range polIrredTests {
if test.irred {
pol = test.f
break
}
}
for i := 0; i < b.N; i++ {
Assert(b, pol.Irreducible(),
"Irreducibility test for Polynomial %v failed", pol)
}
}
var polGCDTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
gcd chunker.Pol
}{
{10, 50, 2},
{0, 1, 1},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 2},
{
parseBin("1010"),
parseBin("11"),
parseBin("11"),
},
{
0x8000000000000000,
0x8000000000000000,
0x8000000000000000,
},
{
parseBin("1100"),
parseBin("101"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("10011"),
},
{
0x3DA3358B4DC173,
0x3DA3358B4DC173,
0x3DA3358B4DC173,
},
{
0x3DA3358B4DC173,
0x230d2259defd,
1,
},
{
0x230d2259defd,
0x51b492b3eff2,
parseBin("10011"),
},
}
func TestPolGCD(t *testing.T) {
for i, test := range polGCDTests {
gcd := test.f1.GCD(test.f2)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
gcd = test.f2.GCD(test.f1)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
}
}
var polMulModTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
g chunker.Pol
mod chunker.Pol
}{
{
0x1230,
0x230,
0x55,
0x22,
},
{
0x0eae8c07dbbb3026,
0xd5d6db9de04771de,
0xdd2bda3b77c9,
0x425ae8595b7a,
},
}
func TestPolMulMod(t *testing.T) {
for i, test := range polMulModTests {
mod := test.f1.MulMod(test.f2, test.g)
Assert(t, mod == test.mod,
"MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod)
}
}

View File

@ -16,7 +16,6 @@ import (
coreapi "github.com/ipfs/go-ipfs/core/coreapi"
coreiface "github.com/ipfs/go-ipfs/core/coreapi/interface"
"github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
dagutils "github.com/ipfs/go-ipfs/merkledag/utils"
path "github.com/ipfs/go-ipfs/path"
@ -25,6 +24,7 @@ import (
humanize "gx/ipfs/QmPSBJL4momYnE7DcUyk2DVhD6rH488ZmHBGLbxNdhU44K/go-humanize"
routing "gx/ipfs/QmTiWLZ6Fo5j4KcTVutZJ5KWRRJrbxzmxA4td8NfEdrPh7/go-libp2p-routing"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
multibase "gx/ipfs/QmexBtiTTEwwn42Yi6ouKt6VqzpA6wjJgiW1oh9VfaRrup/go-multibase"
@ -58,7 +58,7 @@ func (i *gatewayHandler) newDagFromReader(r io.Reader) (ipld.Node, error) {
// return ufs.AddFromReader(i.node, r.Body)
return importer.BuildDagFromReader(
i.node.DAG,
chunk.DefaultSplitter(r))
chunker.DefaultSplitter(r))
}
// TODO(btc): break this apart into separate handlers using a more expressive muxer

View File

@ -15,7 +15,6 @@ import (
core "github.com/ipfs/go-ipfs/core"
"github.com/ipfs/go-ipfs/exchange/offline"
balanced "github.com/ipfs/go-ipfs/importer/balanced"
"github.com/ipfs/go-ipfs/importer/chunk"
ihelper "github.com/ipfs/go-ipfs/importer/helpers"
trickle "github.com/ipfs/go-ipfs/importer/trickle"
dag "github.com/ipfs/go-ipfs/merkledag"
@ -27,6 +26,7 @@ import (
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
syncds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
files "gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
@ -134,7 +134,7 @@ func (adder *Adder) SetMfsRoot(r *mfs.Root) {
// Constructs a node from reader's data, and adds it. Doesn't pin.
func (adder *Adder) add(reader io.Reader) (ipld.Node, error) {
chnk, err := chunk.FromString(reader, adder.Chunker)
chnk, err := chunker.FromString(reader, adder.Chunker)
if err != nil {
return nil, err
}

View File

@ -11,7 +11,6 @@ import (
core "github.com/ipfs/go-ipfs/core"
offline "github.com/ipfs/go-ipfs/exchange/offline"
importer "github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
merkledag "github.com/ipfs/go-ipfs/merkledag"
ft "github.com/ipfs/go-ipfs/unixfs"
uio "github.com/ipfs/go-ipfs/unixfs/io"
@ -19,6 +18,7 @@ import (
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
dssync "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -37,7 +37,7 @@ func TestMetadata(t *testing.T) {
data := make([]byte, 1000)
u.NewTimeSeededRand().Read(data)
r := bytes.NewReader(data)
nd, err := importer.BuildDagFromReader(ds, chunk.DefaultSplitter(r))
nd, err := importer.BuildDagFromReader(ds, chunker.DefaultSplitter(r))
if err != nil {
t.Fatal(err)
}

View File

@ -17,12 +17,12 @@ import (
coreunix "github.com/ipfs/go-ipfs/core/coreunix"
coremock "github.com/ipfs/go-ipfs/core/mock"
importer "github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
uio "github.com/ipfs/go-ipfs/unixfs/io"
ci "gx/ipfs/QmVvkK7s5imCiq3JVbL3pGfnhcCnf3LrFJPF4GE2sAoGZf/go-testutil/ci"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
ci "gx/ipfs/QmVvkK7s5imCiq3JVbL3pGfnhcCnf3LrFJPF4GE2sAoGZf/go-testutil/ci"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
fstest "gx/ipfs/QmaFNtBAXX4nVMQWbUqNysXyhevUj1k4B1y5uS45LC7Vw9/fuse/fs/fstestutil"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -37,7 +37,7 @@ func randObj(t *testing.T, nd *core.IpfsNode, size int64) (ipld.Node, []byte) {
buf := make([]byte, size)
u.NewTimeSeededRand().Read(buf)
read := bytes.NewReader(buf)
obj, err := importer.BuildTrickleDagFromReader(nd.DAG, chunk.DefaultSplitter(read))
obj, err := importer.BuildTrickleDagFromReader(nd.DAG, chunker.DefaultSplitter(read))
if err != nil {
t.Fatal(err)
}

View File

@ -9,19 +9,19 @@ import (
mrand "math/rand"
"testing"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
h "github.com/ipfs/go-ipfs/importer/helpers"
dag "github.com/ipfs/go-ipfs/merkledag"
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
uio "github.com/ipfs/go-ipfs/unixfs/io"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
// TODO: extract these tests and more as a generic layout test suite
func buildTestDag(ds ipld.DAGService, spl chunk.Splitter) (*dag.ProtoNode, error) {
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter) (*dag.ProtoNode, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
@ -40,7 +40,7 @@ func getTestDag(t *testing.T, ds ipld.DAGService, size int64, blksize int64) (*d
u.NewTimeSeededRand().Read(data)
r := bytes.NewReader(data)
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(r, blksize))
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
@ -74,7 +74,7 @@ func testFileConsistency(t *testing.T, nbytes int64, blksize int64) {
}
func TestBuilderConsistency(t *testing.T) {
testFileConsistency(t, 100000, chunk.DefaultBlockSize)
testFileConsistency(t, 100000, chunker.DefaultBlockSize)
}
func TestNoChunking(t *testing.T) {

View File

@ -1,79 +0,0 @@
package chunk
import (
"errors"
"fmt"
"io"
"strconv"
"strings"
)
// FromString returns a Splitter depending on the given string:
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}" and
// "rabin-{min}-{avg}-{max}".
func FromString(r io.Reader, chunker string) (Splitter, error) {
switch {
case chunker == "" || chunker == "default":
return DefaultSplitter(r), nil
case strings.HasPrefix(chunker, "size-"):
sizeStr := strings.Split(chunker, "-")[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
return nil, err
}
return NewSizeSplitter(r, int64(size)), nil
case strings.HasPrefix(chunker, "rabin"):
return parseRabinString(r, chunker)
default:
return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
}
}
func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
parts := strings.Split(chunker, "-")
switch len(parts) {
case 1:
return NewRabin(r, uint64(DefaultBlockSize)), nil
case 2:
size, err := strconv.Atoi(parts[1])
if err != nil {
return nil, err
}
return NewRabin(r, uint64(size)), nil
case 4:
sub := strings.Split(parts[1], ":")
if len(sub) > 1 && sub[0] != "min" {
return nil, errors.New("first label must be min")
}
min, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
sub = strings.Split(parts[2], ":")
if len(sub) > 1 && sub[0] != "avg" {
log.Error("sub == ", sub)
return nil, errors.New("second label must be avg")
}
avg, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
sub = strings.Split(parts[3], ":")
if len(sub) > 1 && sub[0] != "max" {
return nil, errors.New("final label must be max")
}
max, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
default:
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
}
}

View File

@ -1,54 +0,0 @@
package chunk
import (
"hash/fnv"
"io"
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/whyrusleeping/chunker"
)
// IpfsRabinPoly is the irreducible polynomial of degree 53 used by for Rabin.
var IpfsRabinPoly = chunker.Pol(17437180132763653)
// Rabin implements the Splitter interface and splits content with Rabin
// fingerprints.
type Rabin struct {
r *chunker.Chunker
reader io.Reader
}
// NewRabin creates a new Rabin splitter with the given
// average block size.
func NewRabin(r io.Reader, avgBlkSize uint64) *Rabin {
min := avgBlkSize / 3
max := avgBlkSize + (avgBlkSize / 2)
return NewRabinMinMax(r, min, avgBlkSize, max)
}
// NewRabinMinMax returns a new Rabin splitter which uses
// the given min, average and max block sizes.
func NewRabinMinMax(r io.Reader, min, avg, max uint64) *Rabin {
h := fnv.New32a()
ch := chunker.New(r, IpfsRabinPoly, h, avg, min, max)
return &Rabin{
r: ch,
reader: r,
}
}
// NextBytes reads the next bytes from the reader and returns a slice.
func (r *Rabin) NextBytes() ([]byte, error) {
ch, err := r.r.Next()
if err != nil {
return nil, err
}
return ch.Data, nil
}
// Reader returns the io.Reader associated to this Splitter.
func (r *Rabin) Reader() io.Reader {
return r.reader
}

View File

@ -1,81 +0,0 @@
package chunk
import (
"bytes"
"fmt"
"io"
"testing"
util "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
blocks "gx/ipfs/Qmej7nf81hi2x2tvjRBF3mcp74sQyuDH4VMYDGd1YtXjb2/go-block-format"
)
func TestRabinChunking(t *testing.T) {
data := make([]byte, 1024*1024*16)
util.NewTimeSeededRand().Read(data)
r := NewRabin(bytes.NewReader(data), 1024*256)
var chunks [][]byte
for {
chunk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
t.Fatal(err)
}
chunks = append(chunks, chunk)
}
fmt.Printf("average block size: %d\n", len(data)/len(chunks))
unchunked := bytes.Join(chunks, nil)
if !bytes.Equal(unchunked, data) {
fmt.Printf("%d %d\n", len(unchunked), len(data))
t.Fatal("data was chunked incorrectly")
}
}
func chunkData(t *testing.T, data []byte) map[string]blocks.Block {
r := NewRabin(bytes.NewReader(data), 1024*256)
blkmap := make(map[string]blocks.Block)
for {
blk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
t.Fatal(err)
}
b := blocks.NewBlock(blk)
blkmap[b.Cid().KeyString()] = b
}
return blkmap
}
func TestRabinChunkReuse(t *testing.T) {
data := make([]byte, 1024*1024*16)
util.NewTimeSeededRand().Read(data)
ch1 := chunkData(t, data[1000:])
ch2 := chunkData(t, data)
var extra int
for k := range ch2 {
_, ok := ch1[k]
if !ok {
extra++
}
}
if extra > 2 {
t.Log("too many spare chunks made")
}
}

View File

@ -1,105 +0,0 @@
// Package chunk implements streaming block splitters.
// Splitters read data from a reader and provide byte slices (chunks)
// The size and contents of these slices depend on the splitting method
// used.
package chunk
import (
"io"
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
mpool "gx/ipfs/QmWBug6eBS7AxRdCDVuSY5CnSit7cS2XnPFYJWqWDumhCG/go-msgio/mpool"
)
var log = logging.Logger("chunk")
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
var DefaultBlockSize int64 = 1024 * 256
// A Splitter reads bytes from a Reader and creates "chunks" (byte slices)
// that can be used to build DAG nodes.
type Splitter interface {
Reader() io.Reader
NextBytes() ([]byte, error)
}
// SplitterGen is a splitter generator, given a reader.
type SplitterGen func(r io.Reader) Splitter
// DefaultSplitter returns a SizeSplitter with the DefaultBlockSize.
func DefaultSplitter(r io.Reader) Splitter {
return NewSizeSplitter(r, DefaultBlockSize)
}
// SizeSplitterGen returns a SplitterGen function which will create
// a splitter with the given size when called.
func SizeSplitterGen(size int64) SplitterGen {
return func(r io.Reader) Splitter {
return NewSizeSplitter(r, size)
}
}
// Chan returns a channel that receives each of the chunks produced
// by a splitter, along with another one for errors.
func Chan(s Splitter) (<-chan []byte, <-chan error) {
out := make(chan []byte)
errs := make(chan error, 1)
go func() {
defer close(out)
defer close(errs)
// all-chunks loop (keep creating chunks)
for {
b, err := s.NextBytes()
if err != nil {
errs <- err
return
}
out <- b
}
}()
return out, errs
}
type sizeSplitterv2 struct {
r io.Reader
size uint32
err error
}
// NewSizeSplitter returns a new size-based Splitter with the given block size.
func NewSizeSplitter(r io.Reader, size int64) Splitter {
return &sizeSplitterv2{
r: r,
size: uint32(size),
}
}
// NextBytes produces a new chunk.
func (ss *sizeSplitterv2) NextBytes() ([]byte, error) {
if ss.err != nil {
return nil, ss.err
}
full := mpool.ByteSlicePool.Get(ss.size).([]byte)[:ss.size]
n, err := io.ReadFull(ss.r, full)
switch err {
case io.ErrUnexpectedEOF:
ss.err = io.EOF
small := make([]byte, n)
copy(small, full)
mpool.ByteSlicePool.Put(ss.size, full)
return small, nil
case nil:
return full, nil
default:
mpool.ByteSlicePool.Put(ss.size, full)
return nil, err
}
}
// Reader returns the io.Reader associated to this Splitter.
func (ss *sizeSplitterv2) Reader() io.Reader {
return ss.r
}

View File

@ -1,120 +0,0 @@
package chunk
import (
"bytes"
"io"
"testing"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
)
func randBuf(t *testing.T, size int) []byte {
buf := make([]byte, size)
if _, err := u.NewTimeSeededRand().Read(buf); err != nil {
t.Fatal("failed to read enough randomness")
}
return buf
}
func copyBuf(buf []byte) []byte {
cpy := make([]byte, len(buf))
copy(cpy, buf)
return cpy
}
func TestSizeSplitterOverAllocate(t *testing.T) {
max := 1000
r := bytes.NewReader(randBuf(t, max))
chunksize := int64(1024 * 256)
splitter := NewSizeSplitter(r, chunksize)
chunk, err := splitter.NextBytes()
if err != nil {
t.Fatal(err)
}
if cap(chunk) > len(chunk) {
t.Fatal("chunk capacity too large")
}
}
func TestSizeSplitterIsDeterministic(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
test := func() {
bufR := randBuf(t, 10000000) // crank this up to satisfy yourself.
bufA := copyBuf(bufR)
bufB := copyBuf(bufR)
chunksA, _ := Chan(DefaultSplitter(bytes.NewReader(bufA)))
chunksB, _ := Chan(DefaultSplitter(bytes.NewReader(bufB)))
for n := 0; ; n++ {
a, moreA := <-chunksA
b, moreB := <-chunksB
if !moreA {
if moreB {
t.Fatal("A ended, B didnt.")
}
return
}
if !bytes.Equal(a, b) {
t.Fatalf("chunk %d not equal", n)
}
}
}
for run := 0; run < 1; run++ { // crank this up to satisfy yourself.
test()
}
}
func TestSizeSplitterFillsChunks(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
max := 10000000
b := randBuf(t, max)
r := &clipReader{r: bytes.NewReader(b), size: 4000}
chunksize := int64(1024 * 256)
c, _ := Chan(NewSizeSplitter(r, chunksize))
sofar := 0
whole := make([]byte, max)
for chunk := range c {
bc := b[sofar : sofar+len(chunk)]
if !bytes.Equal(bc, chunk) {
t.Fatalf("chunk not correct: (sofar: %d) %d != %d, %v != %v", sofar, len(bc), len(chunk), bc[:100], chunk[:100])
}
copy(whole[sofar:], chunk)
sofar += len(chunk)
if sofar != max && len(chunk) < int(chunksize) {
t.Fatal("sizesplitter split at a smaller size")
}
}
if !bytes.Equal(b, whole) {
t.Fatal("splitter did not split right")
}
}
type clipReader struct {
size int
r io.Reader
}
func (s *clipReader) Read(buf []byte) (int, error) {
// clip the incoming buffer to produce smaller chunks
if len(buf) > s.size {
buf = buf[:s.size]
}
return s.r.Read(buf)
}

View File

@ -5,10 +5,10 @@ import (
"io"
"os"
"github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
ft "github.com/ipfs/go-ipfs/unixfs"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
files "gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
@ -18,7 +18,7 @@ import (
// efficiently create unixfs dag trees
type DagBuilderHelper struct {
dserv ipld.DAGService
spl chunk.Splitter
spl chunker.Splitter
recvdErr error
rawLeaves bool
nextData []byte // the next item to return.
@ -30,7 +30,7 @@ type DagBuilderHelper struct {
}
// DagBuilderParams wraps configuration options to create a DagBuilderHelper
// from a chunk.Splitter.
// from a chunker.Splitter.
type DagBuilderParams struct {
// Maximum number of links per intermediate node
Maxlinks int
@ -51,8 +51,8 @@ type DagBuilderParams struct {
}
// New generates a new DagBuilderHelper from the given params and a given
// chunk.Splitter as data source.
func (dbp *DagBuilderParams) New(spl chunk.Splitter) *DagBuilderHelper {
// chunker.Splitter as data source.
func (dbp *DagBuilderParams) New(spl chunker.Splitter) *DagBuilderHelper {
db := &DagBuilderHelper{
dserv: dbp.Dagserv,
spl: spl,

View File

@ -6,11 +6,11 @@ import (
"fmt"
"os"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
"gx/ipfs/QmceUdzxkimdYsgtX733uNgzf1DLHyBKN6ehGSp85ayppM/go-ipfs-cmdkit/files"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
bal "github.com/ipfs/go-ipfs/importer/balanced"
"github.com/ipfs/go-ipfs/importer/chunk"
h "github.com/ipfs/go-ipfs/importer/helpers"
trickle "github.com/ipfs/go-ipfs/importer/trickle"
)
@ -33,12 +33,12 @@ func BuildDagFromFile(fpath string, ds ipld.DAGService) (ipld.Node, error) {
}
defer f.Close()
return BuildDagFromReader(ds, chunk.DefaultSplitter(f))
return BuildDagFromReader(ds, chunker.DefaultSplitter(f))
}
// BuildDagFromReader creates a DAG given a DAGService and a Splitter
// implementation (Splitters are io.Readers), using a Balanced layout.
func BuildDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, error) {
func BuildDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
@ -49,7 +49,7 @@ func BuildDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, erro
// BuildTrickleDagFromReader creates a DAG given a DAGService and a Splitter
// implementation (Splitters are io.Readers), using a Trickle Layout.
func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunk.Splitter) (ipld.Node, error) {
func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,

View File

@ -7,18 +7,18 @@ import (
"io/ioutil"
"testing"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
uio "github.com/ipfs/go-ipfs/unixfs/io"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
ds := mdtest.Mock()
r := io.LimitReader(u.NewTimeSeededRand(), size)
nd, err := BuildDagFromReader(ds, chunk.NewSizeSplitter(r, blksize))
nd, err := BuildDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
@ -28,7 +28,7 @@ func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DA
func getTrickleDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
ds := mdtest.Mock()
r := io.LimitReader(u.NewTimeSeededRand(), size)
nd, err := BuildTrickleDagFromReader(ds, chunk.NewSizeSplitter(r, blksize))
nd, err := BuildTrickleDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
@ -41,7 +41,7 @@ func TestBalancedDag(t *testing.T) {
u.NewTimeSeededRand().Read(buf)
r := bytes.NewReader(buf)
nd, err := BuildDagFromReader(ds, chunk.DefaultSplitter(r))
nd, err := BuildDagFromReader(ds, chunker.DefaultSplitter(r))
if err != nil {
t.Fatal(err)
}
@ -84,7 +84,7 @@ func BenchmarkTrickleReadSmallBlock(b *testing.B) {
func BenchmarkBalancedReadFull(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getBalancedDag(b, nbytes, chunk.DefaultBlockSize)
nd, ds := getBalancedDag(b, nbytes, chunker.DefaultBlockSize)
b.SetBytes(nbytes)
b.StartTimer()
@ -94,7 +94,7 @@ func BenchmarkBalancedReadFull(b *testing.B) {
func BenchmarkTrickleReadFull(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getTrickleDag(b, nbytes, chunk.DefaultBlockSize)
nd, ds := getTrickleDag(b, nbytes, chunker.DefaultBlockSize)
b.SetBytes(nbytes)
b.StartTimer()

View File

@ -9,7 +9,6 @@ import (
mrand "math/rand"
"testing"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
h "github.com/ipfs/go-ipfs/importer/helpers"
merkledag "github.com/ipfs/go-ipfs/merkledag"
mdtest "github.com/ipfs/go-ipfs/merkledag/test"
@ -17,6 +16,7 @@ import (
uio "github.com/ipfs/go-ipfs/unixfs/io"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -32,7 +32,7 @@ func runBothSubtests(t *testing.T, tfunc func(*testing.T, UseRawLeaves)) {
t.Run("leaves=Raw", func(t *testing.T) { tfunc(t, RawLeaves) })
}
func buildTestDag(ds ipld.DAGService, spl chunk.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) {
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
@ -66,10 +66,10 @@ func testSizeBasedSplit(t *testing.T, rawLeaves UseRawLeaves) {
if testing.Short() {
t.SkipNow()
}
bs := chunk.SizeSplitterGen(512)
bs := chunker.SizeSplitterGen(512)
testFileConsistency(t, bs, 32*512, rawLeaves)
bs = chunk.SizeSplitterGen(4096)
bs = chunker.SizeSplitterGen(4096)
testFileConsistency(t, bs, 32*4096, rawLeaves)
// Uneven offset
@ -82,7 +82,7 @@ func dup(b []byte) []byte {
return o
}
func testFileConsistency(t *testing.T, bs chunk.SplitterGen, nbytes int, rawLeaves UseRawLeaves) {
func testFileConsistency(t *testing.T, bs chunker.SplitterGen, nbytes int, rawLeaves UseRawLeaves) {
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
@ -119,7 +119,7 @@ func testBuilderConsistency(t *testing.T, rawLeaves UseRawLeaves) {
io.CopyN(buf, u.NewTimeSeededRand(), int64(nbytes))
should := dup(buf.Bytes())
dagserv := mdtest.Mock()
nd, err := buildTestDag(dagserv, chunk.DefaultSplitter(buf), rawLeaves)
nd, err := buildTestDag(dagserv, chunker.DefaultSplitter(buf), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -156,7 +156,7 @@ func TestIndirectBlocks(t *testing.T) {
}
func testIndirectBlocks(t *testing.T, rawLeaves UseRawLeaves) {
splitter := chunk.SizeSplitterGen(512)
splitter := chunker.SizeSplitterGen(512)
nbytes := 1024 * 1024
buf := make([]byte, nbytes)
u.NewTimeSeededRand().Read(buf)
@ -195,7 +195,7 @@ func testSeekingBasic(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 512), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 512), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -236,7 +236,7 @@ func testSeekToBegin(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -284,7 +284,7 @@ func testSeekToAlmostBegin(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -332,7 +332,7 @@ func testSeekEnd(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -362,7 +362,7 @@ func testSeekEndSingleBlockFile(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 5000), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 5000), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -392,7 +392,7 @@ func testSeekingStress(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 1000), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 1000), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -441,7 +441,7 @@ func testSeekingConsistency(t *testing.T, rawLeaves UseRawLeaves) {
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -489,7 +489,7 @@ func testAppend(t *testing.T, rawLeaves UseRawLeaves) {
// Reader for half the bytes
read := bytes.NewReader(should[:nbytes/2])
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -503,7 +503,7 @@ func testAppend(t *testing.T, rawLeaves UseRawLeaves) {
r := bytes.NewReader(should[nbytes/2:])
ctx := context.Background()
nnode, err := Append(ctx, nd, dbp.New(chunk.NewSizeSplitter(r, 500)))
nnode, err := Append(ctx, nd, dbp.New(chunker.NewSizeSplitter(r, 500)))
if err != nil {
t.Fatal(err)
}
@ -548,7 +548,7 @@ func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) {
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(nil)
nd, err := buildTestDag(ds, chunk.NewSizeSplitter(read, 500), rawLeaves)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
@ -559,7 +559,7 @@ func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) {
RawLeaves: bool(rawLeaves),
}
spl := chunk.SizeSplitterGen(500)
spl := chunker.SizeSplitterGen(500)
ctx := context.Background()
for i := 0; i < len(should); i++ {
@ -609,7 +609,7 @@ func TestAppendSingleBytesToEmpty(t *testing.T) {
Maxlinks: 4,
}
spl := chunk.SizeSplitterGen(500)
spl := chunker.SizeSplitterGen(500)
ctx := context.Background()
nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(data[:1]))))

View File

@ -17,13 +17,13 @@ import (
bstest "github.com/ipfs/go-ipfs/blockservice/test"
offline "github.com/ipfs/go-ipfs/exchange/offline"
imp "github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
. "github.com/ipfs/go-ipfs/merkledag"
mdpb "github.com/ipfs/go-ipfs/merkledag/pb"
dstest "github.com/ipfs/go-ipfs/merkledag/test"
uio "github.com/ipfs/go-ipfs/unixfs/io"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
blocks "gx/ipfs/Qmej7nf81hi2x2tvjRBF3mcp74sQyuDH4VMYDGd1YtXjb2/go-block-format"
@ -136,7 +136,7 @@ func runBatchFetchTest(t *testing.T, read io.Reader) {
dagservs = append(dagservs, NewDAGService(bsi))
}
spl := chunk.NewSizeSplitter(read, 512)
spl := chunker.NewSizeSplitter(read, 512)
root, err := imp.BuildDagFromReader(dagservs[0], spl)
if err != nil {
@ -228,7 +228,7 @@ func TestFetchGraph(t *testing.T) {
}
read := io.LimitReader(u.NewTimeSeededRand(), 1024*32)
root, err := imp.BuildDagFromReader(dservs[0], chunk.NewSizeSplitter(read, 512))
root, err := imp.BuildDagFromReader(dservs[0], chunker.NewSizeSplitter(read, 512))
if err != nil {
t.Fatal(err)
}
@ -254,7 +254,7 @@ func TestEnumerateChildren(t *testing.T) {
ds := NewDAGService(bsi[0])
read := io.LimitReader(u.NewTimeSeededRand(), 1024*1024)
root, err := imp.BuildDagFromReader(ds, chunk.NewSizeSplitter(read, 512))
root, err := imp.BuildDagFromReader(ds, chunker.NewSizeSplitter(read, 512))
if err != nil {
t.Fatal(err)
}

View File

@ -5,11 +5,11 @@ import (
"fmt"
"sync"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
ft "github.com/ipfs/go-ipfs/unixfs"
mod "github.com/ipfs/go-ipfs/unixfs/mod"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -82,7 +82,7 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) {
return nil, fmt.Errorf("mode not supported")
}
dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dserv, chunk.DefaultSplitter)
dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dserv, chunker.DefaultSplitter)
if err != nil {
return nil, err
}

View File

@ -18,7 +18,6 @@ import (
bserv "github.com/ipfs/go-ipfs/blockservice"
offline "github.com/ipfs/go-ipfs/exchange/offline"
importer "github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
"github.com/ipfs/go-ipfs/path"
ft "github.com/ipfs/go-ipfs/unixfs"
@ -27,6 +26,7 @@ import (
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
ds "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore"
dssync "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore/sync"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -48,7 +48,7 @@ func getRandFile(t *testing.T, ds ipld.DAGService, size int64) ipld.Node {
}
func fileNodeFromReader(t *testing.T, ds ipld.DAGService, r io.Reader) ipld.Node {
nd, err := importer.BuildDagFromReader(ds, chunk.DefaultSplitter(r))
nd, err := importer.BuildDagFromReader(ds, chunker.DefaultSplitter(r))
if err != nil {
t.Fatal(err)
}

View File

@ -521,6 +521,12 @@
"hash": "Qmb3jLEFAQrqdVgWUajqEyuuDoavkSq1XQXz6tWdFWF995",
"name": "go-ipfs-posinfo",
"version": "0.0.1"
},
{
"author": "hsanjuan",
"hash": "QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq",
"name": "go-ipfs-chunker",
"version": "0.0.2"
}
],
"gxVersion": "0.10.0",

View File

@ -9,13 +9,13 @@ import (
"strings"
importer "github.com/ipfs/go-ipfs/importer"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
dag "github.com/ipfs/go-ipfs/merkledag"
dagutil "github.com/ipfs/go-ipfs/merkledag/utils"
path "github.com/ipfs/go-ipfs/path"
uio "github.com/ipfs/go-ipfs/unixfs/io"
logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
@ -63,7 +63,7 @@ func ImportTar(ctx context.Context, r io.Reader, ds ipld.DAGService) (*dag.Proto
header.SetData(headerBytes)
if h.Size > 0 {
spl := chunk.NewRabin(tr, uint64(chunk.DefaultBlockSize))
spl := chunker.NewRabin(tr, uint64(chunker.DefaultBlockSize))
nd, err := importer.BuildDagFromReader(ds, spl)
if err != nil {
return nil, err

View File

@ -8,13 +8,13 @@ import (
"errors"
"io"
chunk "github.com/ipfs/go-ipfs/importer/chunk"
help "github.com/ipfs/go-ipfs/importer/helpers"
trickle "github.com/ipfs/go-ipfs/importer/trickle"
mdag "github.com/ipfs/go-ipfs/merkledag"
ft "github.com/ipfs/go-ipfs/unixfs"
uio "github.com/ipfs/go-ipfs/unixfs/io"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
@ -37,7 +37,7 @@ type DagModifier struct {
dagserv ipld.DAGService
curNode ipld.Node
splitter chunk.SplitterGen
splitter chunker.SplitterGen
ctx context.Context
readCancel func()
@ -55,7 +55,7 @@ type DagModifier struct {
// created nodes will be inherted from the passed in node. If the Cid
// version if not 0 raw leaves will also be enabled. The Prefix and
// RawLeaves options can be overridden by changing them after the call.
func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunk.SplitterGen) (*DagModifier, error) {
func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunker.SplitterGen) (*DagModifier, error) {
switch from.(type) {
case *mdag.ProtoNode, *mdag.RawNode:
// ok
@ -126,7 +126,7 @@ func (zr zeroReader) Read(b []byte) (int, error) {
// A small blocksize is chosen to aid in deduplication
func (dm *DagModifier) expandSparse(size int64) error {
r := io.LimitReader(zeroReader{}, size)
spl := chunk.NewSizeSplitter(r, 4096)
spl := chunker.NewSizeSplitter(r, 4096)
nnode, err := dm.appendData(dm.curNode, spl)
if err != nil {
return err
@ -356,7 +356,7 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64, data io.Reader) (*c
}
// appendData appends the blocks from the given chan to the end of this dag
func (dm *DagModifier) appendData(nd ipld.Node, spl chunk.Splitter) (ipld.Node, error) {
func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) {
switch nd := nd.(type) {
case *mdag.ProtoNode, *mdag.RawNode:
dbp := &help.DagBuilderParams{

View File

@ -8,7 +8,6 @@ import (
"io/ioutil"
"testing"
"github.com/ipfs/go-ipfs/importer/chunk"
h "github.com/ipfs/go-ipfs/importer/helpers"
trickle "github.com/ipfs/go-ipfs/importer/trickle"
mdag "github.com/ipfs/go-ipfs/merkledag"
@ -16,15 +15,16 @@ import (
ft "github.com/ipfs/go-ipfs/unixfs"
u "gx/ipfs/QmNiJuT8Ja3hMVpBHXv3Q6dwmperaQ6JjLtpMQgMCD7xvx/go-ipfs-util"
chunker "gx/ipfs/QmWo8jYc19ppG7YoTsrr2kEtLRbARTJho5oNXFTR6B7Peq/go-ipfs-chunker"
mh "gx/ipfs/QmZyZDi491cCNTLfAhwcaDii2Kg4pwKRkhqQzURGDvY6ua/go-multihash"
cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid"
ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format"
)
// SizeSplitterGen creates a generator.
func SizeSplitterGen(size int64) chunk.SplitterGen {
return func(r io.Reader) chunk.Splitter {
return chunk.NewSizeSplitter(r, size)
func SizeSplitterGen(size int64) chunker.SplitterGen {
return func(r io.Reader) chunker.Splitter {
return chunker.NewSizeSplitter(r, size)
}
}