From fdcd015eedcdadbdb9d2b3ea8f03d01afe6dccc2 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Mon, 15 Sep 2014 05:35:31 +0000 Subject: [PATCH] move first data block into top level dag node --- importer/importer.go | 3 ++- importer/importer_test.go | 16 ++++++++++++++++ importer/rabin.go | 11 ++++++++--- merkledag/dagreader.go | 1 + merkledag/merkledag.go | 3 ++- 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/importer/importer.go b/importer/importer.go index 607a989a6..197eaef19 100644 --- a/importer/importer.go +++ b/importer/importer.go @@ -25,7 +25,8 @@ func NewDagFromReader(r io.Reader) (*dag.Node, error) { func NewDagFromReaderWithSplitter(r io.Reader, spl BlockSplitter) (*dag.Node, error) { blkChan := spl.Split(r) - root := &dag.Node{Data: dag.FilePBData()} + first := <-blkChan + root := &dag.Node{Data: dag.FilePBData(first)} for blk := range blkChan { child := &dag.Node{Data: dag.WrapData(blk)} diff --git a/importer/importer_test.go b/importer/importer_test.go index cefe4a9b4..9fb1afa08 100644 --- a/importer/importer_test.go +++ b/importer/importer_test.go @@ -82,3 +82,19 @@ func arrComp(a, b []byte) error { func TestMaybeRabinConsistency(t *testing.T) { testFileConsistency(t, NewMaybeRabin(4096), 256*4096) } + +func TestRabinBlockSize(t *testing.T) { + buf := new(bytes.Buffer) + nbytes := 1024 * 1024 + io.CopyN(buf, rand.Reader, int64(nbytes)) + rab := NewMaybeRabin(4096) + blkch := rab.Split(buf) + + var blocks [][]byte + for b := range blkch { + blocks = append(blocks, b) + } + + fmt.Printf("Avg block size: %d\n", nbytes/len(blocks)) + +} diff --git a/importer/rabin.go b/importer/rabin.go index 4671239ac..3eab5bc9c 100644 --- a/importer/rabin.go +++ b/importer/rabin.go @@ -9,8 +9,10 @@ import ( ) type MaybeRabin struct { - mask int - windowSize int + mask int + windowSize int + MinBlockSize int + MaxBlockSize int } func NewMaybeRabin(avgBlkSize int) *MaybeRabin { @@ -18,6 +20,8 @@ func NewMaybeRabin(avgBlkSize int) *MaybeRabin { rb := new(MaybeRabin) rb.mask = (1 << blkbits) - 1 rb.windowSize = 16 // probably a good number... + rb.MinBlockSize = avgBlkSize / 2 + rb.MaxBlockSize = (avgBlkSize / 2) * 3 return rb } @@ -70,7 +74,8 @@ func (mr *MaybeRabin) Split(r io.Reader) chan []byte { outval := push(i, b) blkbuf.WriteByte(b) rollingHash = (rollingHash*a + int(b) - an*outval) % MOD - if rollingHash&mr.mask == mr.mask { + if (rollingHash&mr.mask == mr.mask && blkbuf.Len() > mr.MinBlockSize) || + blkbuf.Len() >= mr.MaxBlockSize { out <- dup(blkbuf.Bytes()) blkbuf.Reset() } diff --git a/merkledag/dagreader.go b/merkledag/dagreader.go index 967ec63a4..5cf4e238e 100644 --- a/merkledag/dagreader.go +++ b/merkledag/dagreader.go @@ -34,6 +34,7 @@ func NewDagReader(n *Node, serv *DAGService) (io.Reader, error) { node: n, thisData: pb.GetData(), serv: serv, + buf: bytes.NewBuffer(pb.GetData()), }, nil case PBData_Raw: return bytes.NewBuffer(pb.GetData()), nil diff --git a/merkledag/merkledag.go b/merkledag/merkledag.go index accebb708..79530df6d 100644 --- a/merkledag/merkledag.go +++ b/merkledag/merkledag.go @@ -157,10 +157,11 @@ func (n *DAGService) Get(k u.Key) (*Node, error) { return Decoded(b.Data) } -func FilePBData() []byte { +func FilePBData(data []byte) []byte { pbfile := new(PBData) typ := PBData_File pbfile.Type = &typ + pbfile.Data = data data, err := proto.Marshal(pbfile) if err != nil {