From aafbe65a1315286cda90c2af59b464a50a9f9379 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 19 Dec 2017 14:23:31 -0800 Subject: [PATCH 1/4] Don't waste 256KiB buffers on small chunks. License: MIT Signed-off-by: Steven Allen --- importer/chunk/splitting.go | 26 ++++++++++++++++---------- package.json | 6 ++++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/importer/chunk/splitting.go b/importer/chunk/splitting.go index 6fd55e22d..ddd71e969 100644 --- a/importer/chunk/splitting.go +++ b/importer/chunk/splitting.go @@ -5,6 +5,7 @@ import ( "io" logging "gx/ipfs/QmSpJByNKFX1sCsHBEp3R73FL4NF6FnQTEGyNAXHm2GS52/go-log" + mpool "gx/ipfs/QmWBug6eBS7AxRdCDVuSY5CnSit7cS2XnPFYJWqWDumhCG/go-msgio/mpool" ) var log = logging.Logger("chunk") @@ -51,14 +52,14 @@ func Chan(s Splitter) (<-chan []byte, <-chan error) { type sizeSplitterv2 struct { r io.Reader - size int64 + size uint32 err error } func NewSizeSplitter(r io.Reader, size int64) Splitter { return &sizeSplitterv2{ r: r, - size: size, + size: uint32(size), } } @@ -66,17 +67,22 @@ func (ss *sizeSplitterv2) NextBytes() ([]byte, error) { if ss.err != nil { return nil, ss.err } - buf := make([]byte, ss.size) - n, err := io.ReadFull(ss.r, buf) - if err == io.ErrUnexpectedEOF { + + full := mpool.ByteSlicePool.Get(ss.size).([]byte)[:ss.size] + n, err := io.ReadFull(ss.r, full) + switch err { + case io.ErrUnexpectedEOF: ss.err = io.EOF - err = nil - } - if err != nil { + small := make([]byte, n) + copy(small, full) + mpool.ByteSlicePool.Put(ss.size, full) + return small, nil + case nil: + return full, nil + default: + mpool.ByteSlicePool.Put(ss.size, full) return nil, err } - - return buf[:n], nil } func (ss *sizeSplitterv2) Reader() io.Reader { diff --git a/package.json b/package.json index bf9b3142d..b4ecb2591 100644 --- a/package.json +++ b/package.json @@ -509,6 +509,12 @@ "hash": "QmYmhgAcvmDGXct1qBvc1kz9BxQSit1XBrTeiGZp2FvRyn", "name": "go-libp2p-blankhost", "version": "0.2.3" + }, + { + "author": "jbenet", + "hash": "QmWBug6eBS7AxRdCDVuSY5CnSit7cS2XnPFYJWqWDumhCG", + "name": "go-msgio", + "version": "0.0.3" } ], "gxVersion": "0.10.0", From 101e1c3cb1eb2419ece106325fa19a0291f9bcb7 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 19 Dec 2017 14:58:56 -0800 Subject: [PATCH 2/4] take adder by pointer, not by value... License: MIT Signed-off-by: Steven Allen --- core/coreunix/add.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/coreunix/add.go b/core/coreunix/add.go index 1acdba22e..88f5ab92f 100644 --- a/core/coreunix/add.go +++ b/core/coreunix/add.go @@ -131,7 +131,7 @@ func (adder *Adder) SetMfsRoot(r *mfs.Root) { } // Constructs a node from reader's data, and adds it. Doesn't pin. -func (adder Adder) add(reader io.Reader) (node.Node, error) { +func (adder *Adder) add(reader io.Reader) (node.Node, error) { chnk, err := chunk.FromString(reader, adder.Chunker) if err != nil { return nil, err From 414b0ff1ba5d4a7d992e34cc0289d713c4c2d4d7 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 19 Dec 2017 15:49:33 -0800 Subject: [PATCH 3/4] use DefaultSplitter function where appropriate License: MIT Signed-off-by: Steven Allen --- importer/chunk/parse.go | 2 +- importer/importer.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/importer/chunk/parse.go b/importer/chunk/parse.go index 55e96cc04..f4cc56290 100644 --- a/importer/chunk/parse.go +++ b/importer/chunk/parse.go @@ -11,7 +11,7 @@ import ( func FromString(r io.Reader, chunker string) (Splitter, error) { switch { case chunker == "" || chunker == "default": - return NewSizeSplitter(r, DefaultBlockSize), nil + return DefaultSplitter(r), nil case strings.HasPrefix(chunker, "size-"): sizeStr := strings.Split(chunker, "-")[1] diff --git a/importer/importer.go b/importer/importer.go index f0508aa1b..a5832e860 100644 --- a/importer/importer.go +++ b/importer/importer.go @@ -34,7 +34,7 @@ func BuildDagFromFile(fpath string, ds dag.DAGService) (node.Node, error) { } defer f.Close() - return BuildDagFromReader(ds, chunk.NewSizeSplitter(f, chunk.DefaultBlockSize)) + return BuildDagFromReader(ds, chunk.DefaultSplitter(f)) } func BuildDagFromReader(ds dag.DAGService, spl chunk.Splitter) (node.Node, error) { From c29f6289072eaae353a04616ef49645d9d54c146 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 19 Dec 2017 19:29:49 -0800 Subject: [PATCH 4/4] add test for overallocation in chunker License: MIT Signed-off-by: Steven Allen --- importer/chunk/splitting_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/importer/chunk/splitting_test.go b/importer/chunk/splitting_test.go index ff433f048..68df42803 100644 --- a/importer/chunk/splitting_test.go +++ b/importer/chunk/splitting_test.go @@ -22,6 +22,20 @@ func copyBuf(buf []byte) []byte { return cpy } +func TestSizeSplitterOverAllocate(t *testing.T) { + max := 1000 + r := bytes.NewReader(randBuf(t, max)) + chunksize := int64(1024 * 256) + splitter := NewSizeSplitter(r, chunksize) + chunk, err := splitter.NextBytes() + if err != nil { + t.Fatal(err) + } + if cap(chunk) > len(chunk) { + t.Fatal("chunk capacity too large") + } +} + func TestSizeSplitterIsDeterministic(t *testing.T) { if testing.Short() { t.SkipNow()