diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 9bbf03676..37c088e99 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -43,11 +43,6 @@ "Comment": "go.r60-152", "Rev": "36be16571e14f67e114bb0af619e5de2c1591679" }, - { - "ImportPath": "code.google.com/p/snappy-go/snappy", - "Comment": "null-15", - "Rev": "12e4b4183793ac4b061921e7980845e750679fd0" - }, { "ImportPath": "github.com/braintree/manners", "Comment": "0.3.1-2-g5280e25", @@ -99,7 +94,7 @@ }, { "ImportPath": "github.com/jbenet/go-datastore", - "Rev": "b31aad9b9b22e46d99a270ed5aebb259fab64dcc" + "Rev": "6a1c83bda2a71a9bdc936749fdb507df958ed949" }, { "ImportPath": "github.com/jbenet/go-is-domain", @@ -150,7 +145,11 @@ }, { "ImportPath": "github.com/syndtr/goleveldb/leveldb", - "Rev": "99056d50e56252fbe0021d5c893defca5a76baf8" + "Rev": "871eee0a7546bb7d1b2795142e29c4534abc49b3" + }, + { + "ImportPath": "github.com/syndtr/gosnappy/snappy", + "Rev": "ce8acff4829e0c2458a67ead32390ac0a381c862" }, { "ImportPath": "gopkg.in/natefinch/lumberjack.v2", diff --git a/Godeps/_workspace/src/github.com/jbenet/go-datastore/Godeps/Godeps.json b/Godeps/_workspace/src/github.com/jbenet/go-datastore/Godeps/Godeps.json index 427255afe..91aa65d50 100644 --- a/Godeps/_workspace/src/github.com/jbenet/go-datastore/Godeps/Godeps.json +++ b/Godeps/_workspace/src/github.com/jbenet/go-datastore/Godeps/Godeps.json @@ -1,6 +1,6 @@ { "ImportPath": "github.com/jbenet/go-datastore", - "GoVersion": "go1.3.3", + "GoVersion": "go1.3", "Packages": [ "./..." ], @@ -10,11 +10,6 @@ "Comment": "null-12", "Rev": "7dda39b2e7d5e265014674c5af696ba4186679e9" }, - { - "ImportPath": "code.google.com/p/snappy-go/snappy", - "Comment": "null-15", - "Rev": "12e4b4183793ac4b061921e7980845e750679fd0" - }, { "ImportPath": "github.com/codahale/blake2", "Rev": "3fa823583afba430e8fc7cdbcc670dbf90bfacc4" @@ -33,7 +28,11 @@ }, { "ImportPath": "github.com/syndtr/goleveldb/leveldb", - "Rev": "cd2b8f743192883ab9fbc5f070ebda1dc90f3732" + "Rev": "871eee0a7546bb7d1b2795142e29c4534abc49b3" + }, + { + "ImportPath": "github.com/syndtr/gosnappy/snappy", + "Rev": "ce8acff4829e0c2458a67ead32390ac0a381c862" }, { "ImportPath": "gopkg.in/check.v1", diff --git a/Godeps/_workspace/src/github.com/jbenet/go-datastore/leveldb/datastore.go b/Godeps/_workspace/src/github.com/jbenet/go-datastore/leveldb/datastore.go index 9e25018b3..f540e5c92 100644 --- a/Godeps/_workspace/src/github.com/jbenet/go-datastore/leveldb/datastore.go +++ b/Godeps/_workspace/src/github.com/jbenet/go-datastore/leveldb/datastore.go @@ -58,7 +58,7 @@ func (d *datastore) Get(key ds.Key) (value interface{}, err error) { } func (d *datastore) Has(key ds.Key) (exists bool, err error) { - return ds.GetBackedHas(d, key) + return d.DB.Has(key.Bytes(), nil) } func (d *datastore) Delete(key ds.Key) (err error) { diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go index dc7ced697..f4151d1f1 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go @@ -8,65 +8,84 @@ package leveldb import ( "encoding/binary" - "errors" + "fmt" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb" ) -var ( - errBatchTooShort = errors.New("leveldb: batch is too short") - errBatchBadRecord = errors.New("leveldb: bad record in batch") +type ErrBatchCorrupted struct { + Reason string +} + +func (e *ErrBatchCorrupted) Error() string { + return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason) +} + +func newErrBatchCorrupted(reason string) error { + return errors.NewErrCorrupted(nil, &ErrBatchCorrupted{reason}) +} + +const ( + batchHdrLen = 8 + 4 + batchGrowRec = 3000 ) -const kBatchHdrLen = 8 + 4 - -type batchReplay interface { - put(key, value []byte, seq uint64) - delete(key []byte, seq uint64) +type BatchReplay interface { + Put(key, value []byte) + Delete(key []byte) } // Batch is a write batch. type Batch struct { - buf []byte + data []byte rLen, bLen int seq uint64 sync bool } func (b *Batch) grow(n int) { - off := len(b.buf) + off := len(b.data) if off == 0 { - // include headers - off = kBatchHdrLen - n += off + off = batchHdrLen + if b.data != nil { + b.data = b.data[:off] + } } - if cap(b.buf)-off >= n { - return + if cap(b.data)-off < n { + if b.data == nil { + b.data = make([]byte, off, off+n) + } else { + odata := b.data + div := 1 + if b.rLen > batchGrowRec { + div = b.rLen / batchGrowRec + } + b.data = make([]byte, off, off+n+(off-batchHdrLen)/div) + copy(b.data, odata) + } } - buf := make([]byte, 2*cap(b.buf)+n) - copy(buf, b.buf) - b.buf = buf[:off] } -func (b *Batch) appendRec(t vType, key, value []byte) { +func (b *Batch) appendRec(kt kType, key, value []byte) { n := 1 + binary.MaxVarintLen32 + len(key) - if t == tVal { + if kt == ktVal { n += binary.MaxVarintLen32 + len(value) } b.grow(n) - off := len(b.buf) - buf := b.buf[:off+n] - buf[off] = byte(t) + off := len(b.data) + data := b.data[:off+n] + data[off] = byte(kt) off += 1 - off += binary.PutUvarint(buf[off:], uint64(len(key))) - copy(buf[off:], key) + off += binary.PutUvarint(data[off:], uint64(len(key))) + copy(data[off:], key) off += len(key) - if t == tVal { - off += binary.PutUvarint(buf[off:], uint64(len(value))) - copy(buf[off:], value) + if kt == ktVal { + off += binary.PutUvarint(data[off:], uint64(len(value))) + copy(data[off:], value) off += len(value) } - b.buf = buf[:off] + b.data = data[:off] b.rLen++ // Include 8-byte ikey header b.bLen += len(key) + len(value) + 8 @@ -75,18 +94,51 @@ func (b *Batch) appendRec(t vType, key, value []byte) { // Put appends 'put operation' of the given key/value pair to the batch. // It is safe to modify the contents of the argument after Put returns. func (b *Batch) Put(key, value []byte) { - b.appendRec(tVal, key, value) + b.appendRec(ktVal, key, value) } // Delete appends 'delete operation' of the given key to the batch. // It is safe to modify the contents of the argument after Delete returns. func (b *Batch) Delete(key []byte) { - b.appendRec(tDel, key, nil) + b.appendRec(ktDel, key, nil) +} + +// Dump dumps batch contents. The returned slice can be loaded into the +// batch using Load method. +// The returned slice is not its own copy, so the contents should not be +// modified. +func (b *Batch) Dump() []byte { + return b.encode() +} + +// Load loads given slice into the batch. Previous contents of the batch +// will be discarded. +// The given slice will not be copied and will be used as batch buffer, so +// it is not safe to modify the contents of the slice. +func (b *Batch) Load(data []byte) error { + return b.decode(0, data) +} + +// Replay replays batch contents. +func (b *Batch) Replay(r BatchReplay) error { + return b.decodeRec(func(i int, kt kType, key, value []byte) { + switch kt { + case ktVal: + r.Put(key, value) + case ktDel: + r.Delete(key) + } + }) +} + +// Len returns number of records in the batch. +func (b *Batch) Len() int { + return b.rLen } // Reset resets the batch. func (b *Batch) Reset() { - b.buf = nil + b.data = b.data[:0] b.seq = 0 b.rLen = 0 b.bLen = 0 @@ -97,24 +149,10 @@ func (b *Batch) init(sync bool) { b.sync = sync } -func (b *Batch) put(key, value []byte, seq uint64) { - if b.rLen == 0 { - b.seq = seq - } - b.Put(key, value) -} - -func (b *Batch) delete(key []byte, seq uint64) { - if b.rLen == 0 { - b.seq = seq - } - b.Delete(key) -} - func (b *Batch) append(p *Batch) { if p.rLen > 0 { - b.grow(len(p.buf) - kBatchHdrLen) - b.buf = append(b.buf, p.buf[kBatchHdrLen:]...) + b.grow(len(p.data) - batchHdrLen) + b.data = append(b.data, p.data[batchHdrLen:]...) b.rLen += p.rLen } if p.sync { @@ -122,95 +160,93 @@ func (b *Batch) append(p *Batch) { } } -func (b *Batch) len() int { - return b.rLen -} - +// size returns sums of key/value pair length plus 8-bytes ikey. func (b *Batch) size() int { return b.bLen } func (b *Batch) encode() []byte { b.grow(0) - binary.LittleEndian.PutUint64(b.buf, b.seq) - binary.LittleEndian.PutUint32(b.buf[8:], uint32(b.rLen)) + binary.LittleEndian.PutUint64(b.data, b.seq) + binary.LittleEndian.PutUint32(b.data[8:], uint32(b.rLen)) - return b.buf + return b.data } -func (b *Batch) decode(buf []byte) error { - if len(buf) < kBatchHdrLen { - return errBatchTooShort +func (b *Batch) decode(prevSeq uint64, data []byte) error { + if len(data) < batchHdrLen { + return newErrBatchCorrupted("too short") } - b.seq = binary.LittleEndian.Uint64(buf) - b.rLen = int(binary.LittleEndian.Uint32(buf[8:])) + b.seq = binary.LittleEndian.Uint64(data) + if b.seq < prevSeq { + return newErrBatchCorrupted("invalid sequence number") + } + b.rLen = int(binary.LittleEndian.Uint32(data[8:])) + if b.rLen < 0 { + return newErrBatchCorrupted("invalid records length") + } // No need to be precise at this point, it won't be used anyway - b.bLen = len(buf) - kBatchHdrLen - b.buf = buf + b.bLen = len(data) - batchHdrLen + b.data = data return nil } -func (b *Batch) decodeRec(f func(i int, t vType, key, value []byte)) error { - off := kBatchHdrLen +func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error) { + off := batchHdrLen for i := 0; i < b.rLen; i++ { - if off >= len(b.buf) { - return errors.New("leveldb: invalid batch record length") + if off >= len(b.data) { + return newErrBatchCorrupted("invalid records length") } - t := vType(b.buf[off]) - if t > tVal { - return errors.New("leveldb: invalid batch record type in batch") + kt := kType(b.data[off]) + if kt > ktVal { + return newErrBatchCorrupted("bad record: invalid type") } off += 1 - x, n := binary.Uvarint(b.buf[off:]) + x, n := binary.Uvarint(b.data[off:]) off += n - if n <= 0 || off+int(x) > len(b.buf) { - return errBatchBadRecord + if n <= 0 || off+int(x) > len(b.data) { + return newErrBatchCorrupted("bad record: invalid key length") } - key := b.buf[off : off+int(x)] + key := b.data[off : off+int(x)] off += int(x) - var value []byte - if t == tVal { - x, n := binary.Uvarint(b.buf[off:]) + if kt == ktVal { + x, n := binary.Uvarint(b.data[off:]) off += n - if n <= 0 || off+int(x) > len(b.buf) { - return errBatchBadRecord + if n <= 0 || off+int(x) > len(b.data) { + return newErrBatchCorrupted("bad record: invalid value length") } - value = b.buf[off : off+int(x)] + value = b.data[off : off+int(x)] off += int(x) } - f(i, t, key, value) + f(i, kt, key, value) } return nil } -func (b *Batch) replay(to batchReplay) error { - return b.decodeRec(func(i int, t vType, key, value []byte) { - switch t { - case tVal: - to.put(key, value, b.seq+uint64(i)) - case tDel: - to.delete(key, b.seq+uint64(i)) - } - }) -} - func (b *Batch) memReplay(to *memdb.DB) error { - return b.decodeRec(func(i int, t vType, key, value []byte) { - ikey := newIKey(key, b.seq+uint64(i), t) + return b.decodeRec(func(i int, kt kType, key, value []byte) { + ikey := newIkey(key, b.seq+uint64(i), kt) to.Put(ikey, value) }) } +func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) error { + if err := b.decode(prevSeq, data); err != nil { + return err + } + return b.memReplay(to) +} + func (b *Batch) revertMemReplay(to *memdb.DB) error { - return b.decodeRec(func(i int, t vType, key, value []byte) { - ikey := newIKey(key, b.seq+uint64(i), t) + return b.decodeRec(func(i int, kt kType, key, value []byte) { + ikey := newIkey(key, b.seq+uint64(i), kt) to.Delete(ikey) }) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go index a59a7b6a3..6caa5afc0 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go @@ -15,7 +15,7 @@ import ( ) type tbRec struct { - t vType + kt kType key, value []byte } @@ -23,39 +23,39 @@ type testBatch struct { rec []*tbRec } -func (p *testBatch) put(key, value []byte, seq uint64) { - p.rec = append(p.rec, &tbRec{tVal, key, value}) +func (p *testBatch) Put(key, value []byte) { + p.rec = append(p.rec, &tbRec{ktVal, key, value}) } -func (p *testBatch) delete(key []byte, seq uint64) { - p.rec = append(p.rec, &tbRec{tDel, key, nil}) +func (p *testBatch) Delete(key []byte) { + p.rec = append(p.rec, &tbRec{ktDel, key, nil}) } func compareBatch(t *testing.T, b1, b2 *Batch) { if b1.seq != b2.seq { t.Errorf("invalid seq number want %d, got %d", b1.seq, b2.seq) } - if b1.len() != b2.len() { - t.Fatalf("invalid record length want %d, got %d", b1.len(), b2.len()) + if b1.Len() != b2.Len() { + t.Fatalf("invalid record length want %d, got %d", b1.Len(), b2.Len()) } p1, p2 := new(testBatch), new(testBatch) - err := b1.replay(p1) + err := b1.Replay(p1) if err != nil { t.Fatal("error when replaying batch 1: ", err) } - err = b2.replay(p2) + err = b2.Replay(p2) if err != nil { t.Fatal("error when replaying batch 2: ", err) } for i := range p1.rec { r1, r2 := p1.rec[i], p2.rec[i] - if r1.t != r2.t { - t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.t, r2.t) + if r1.kt != r2.kt { + t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.kt, r2.kt) } if !bytes.Equal(r1.key, r2.key) { t.Errorf("invalid key on record '%d' want %s, got %s", i, string(r1.key), string(r2.key)) } - if r1.t == tVal { + if r1.kt == ktVal { if !bytes.Equal(r1.value, r2.value) { t.Errorf("invalid value on record '%d' want %s, got %s", i, string(r1.value), string(r2.value)) } @@ -75,7 +75,7 @@ func TestBatch_EncodeDecode(t *testing.T) { b1.Delete([]byte("k")) buf := b1.encode() b2 := new(Batch) - err := b2.decode(buf) + err := b2.decode(0, buf) if err != nil { t.Error("error when decoding batch: ", err) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go index 49f82f0fb..baced7717 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go @@ -128,7 +128,8 @@ const ( type nodeState int const ( - nodeEffective nodeState = iota + nodeZero nodeState = iota + nodeEffective nodeEvicted nodeDeleted ) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go index 6735e02ef..865bc5733 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go @@ -7,10 +7,8 @@ package cache import ( - "fmt" "math/rand" "runtime" - "strings" "sync" "sync/atomic" "testing" @@ -225,16 +223,16 @@ func TestLRUCache_Purge(t *testing.T) { } type testingCacheObjectCounter struct { - created uint32 - released uint32 + created uint + released uint } func (c *testingCacheObjectCounter) createOne() { - atomic.AddUint32(&c.created, 1) + c.created++ } func (c *testingCacheObjectCounter) releaseOne() { - atomic.AddUint32(&c.released, 1) + c.released++ } type testingCacheObject struct { @@ -243,17 +241,75 @@ type testingCacheObject struct { ns, key uint64 - releaseCalled uint32 + releaseCalled bool } func (x *testingCacheObject) Release() { - if atomic.CompareAndSwapUint32(&x.releaseCalled, 0, 1) { + if !x.releaseCalled { + x.releaseCalled = true x.cnt.releaseOne() } else { - x.t.Errorf("duplicate setfin NS#%d KEY#%s", x.ns, x.key) + x.t.Errorf("duplicate setfin NS#%d KEY#%d", x.ns, x.key) } } +func TestLRUCache_ConcurrentSetGet(t *testing.T) { + runtime.GOMAXPROCS(runtime.NumCPU()) + + seed := time.Now().UnixNano() + t.Logf("seed=%d", seed) + + const ( + N = 2000000 + M = 4000 + C = 3 + ) + + var set, get uint32 + + wg := &sync.WaitGroup{} + c := NewLRUCache(M / 4) + for ni := uint64(0); ni < C; ni++ { + r0 := rand.New(rand.NewSource(seed + int64(ni))) + r1 := rand.New(rand.NewSource(seed + int64(ni) + 1)) + ns := c.GetNamespace(ni) + + wg.Add(2) + go func(ns Namespace, r *rand.Rand) { + for i := 0; i < N; i++ { + x := uint64(r.Int63n(M)) + o := ns.Get(x, func() (int, interface{}) { + atomic.AddUint32(&set, 1) + return 1, x + }) + if v := o.Value().(uint64); v != x { + t.Errorf("#%d invalid value, got=%d", x, v) + } + o.Release() + } + wg.Done() + }(ns, r0) + go func(ns Namespace, r *rand.Rand) { + for i := 0; i < N; i++ { + x := uint64(r.Int63n(M)) + o := ns.Get(x, nil) + if o != nil { + atomic.AddUint32(&get, 1) + if v := o.Value().(uint64); v != x { + t.Errorf("#%d invalid value, got=%d", x, v) + } + o.Release() + } + } + wg.Done() + }(ns, r1) + } + + wg.Wait() + + t.Logf("set=%d get=%d", set, get) +} + func TestLRUCache_Finalizer(t *testing.T) { const ( capacity = 100 @@ -262,10 +318,6 @@ func TestLRUCache_Finalizer(t *testing.T) { keymax = 8000 ) - runtime.GOMAXPROCS(runtime.NumCPU()) - defer runtime.GOMAXPROCS(1) - - wg := &sync.WaitGroup{} cnt := &testingCacheObjectCounter{} c := NewLRUCache(capacity) @@ -273,38 +325,40 @@ func TestLRUCache_Finalizer(t *testing.T) { type instance struct { seed int64 rnd *rand.Rand - ns uint64 - effective int32 + nsid uint64 + ns Namespace + effective int handles []Handle handlesMap map[uint64]int delete bool purge bool zap bool - wantDel int32 - delfinCalledAll int32 - delfinCalledEff int32 - purgefinCalled int32 + wantDel int + delfinCalled int + delfinCalledAll int + delfinCalledEff int + purgefinCalled int } - instanceGet := func(p *instance, ns Namespace, key uint64) { - h := ns.Get(key, func() (charge int, value interface{}) { + instanceGet := func(p *instance, key uint64) { + h := p.ns.Get(key, func() (charge int, value interface{}) { to := &testingCacheObject{ t: t, cnt: cnt, - ns: p.ns, + ns: p.nsid, key: key, } - atomic.AddInt32(&p.effective, 1) + p.effective++ cnt.createOne() return 1, releaserFunc{func() { to.Release() - atomic.AddInt32(&p.effective, -1) + p.effective-- }, to} }) p.handles = append(p.handles, h) p.handlesMap[key] = p.handlesMap[key] + 1 } - instanceRelease := func(p *instance, ns Namespace, i int) { + instanceRelease := func(p *instance, i int) { h := p.handles[i] key := h.Value().(releaserFunc).value.(*testingCacheObject).key if n := p.handlesMap[key]; n == 0 { @@ -319,55 +373,71 @@ func TestLRUCache_Finalizer(t *testing.T) { p.handles[len(p.handles) : len(p.handles)+1][0] = nil } - seeds := make([]int64, goroutines) - instances := make([]instance, goroutines) + seed := time.Now().UnixNano() + t.Logf("seed=%d", seed) + + instances := make([]*instance, goroutines) for i := range instances { - p := &instances[i] + p := &instance{} p.handlesMap = make(map[uint64]int) - if seeds[i] == 0 { - seeds[i] = time.Now().UnixNano() - } - p.seed = seeds[i] + p.seed = seed + int64(i) p.rnd = rand.New(rand.NewSource(p.seed)) - p.ns = uint64(i) + p.nsid = uint64(i) + p.ns = c.GetNamespace(p.nsid) p.delete = i%6 == 0 p.purge = i%8 == 0 p.zap = i%12 == 0 || i%3 == 0 + instances[i] = p } - seedsStr := make([]string, len(seeds)) - for i, seed := range seeds { - seedsStr[i] = fmt.Sprint(seed) - } - t.Logf("seeds := []int64{%s}", strings.Join(seedsStr, ", ")) - - // Get and release. - for i := range instances { - p := &instances[i] - - wg.Add(1) - go func(p *instance) { - defer wg.Done() - - ns := c.GetNamespace(p.ns) - for i := 0; i < iterations; i++ { - if len(p.handles) == 0 || p.rnd.Int()%2 == 0 { - instanceGet(p, ns, uint64(p.rnd.Intn(keymax))) - } else { - instanceRelease(p, ns, p.rnd.Intn(len(p.handles))) + runr := rand.New(rand.NewSource(seed - 1)) + run := func(rnd *rand.Rand, x []*instance, init func(p *instance) bool, fn func(p *instance, i int) bool) { + var ( + rx []*instance + rn []int + ) + if init == nil { + rx = append([]*instance{}, x...) + rn = make([]int, len(x)) + } else { + for _, p := range x { + if init(p) { + rx = append(rx, p) + rn = append(rn, 0) } } - }(p) + } + for len(rx) > 0 { + i := rand.Intn(len(rx)) + if fn(rx[i], rn[i]) { + rn[i]++ + } else { + rx = append(rx[:i], rx[i+1:]...) + rn = append(rn[:i], rn[i+1:]...) + } + } } - wg.Wait() + + // Get and release. + run(runr, instances, nil, func(p *instance, i int) bool { + if i < iterations { + if len(p.handles) == 0 || p.rnd.Int()%2 == 0 { + instanceGet(p, uint64(p.rnd.Intn(keymax))) + } else { + instanceRelease(p, p.rnd.Intn(len(p.handles))) + } + return true + } else { + return false + } + }) if used, cap := c.Used(), c.Capacity(); used > cap { t.Errorf("Used > capacity, used=%d cap=%d", used, cap) } // Check effective objects. - for i := range instances { - p := &instances[i] + for i, p := range instances { if int(p.effective) < len(p.handlesMap) { t.Errorf("#%d effective objects < acquired handle, eo=%d ah=%d", i, p.effective, len(p.handlesMap)) } @@ -377,103 +447,93 @@ func TestLRUCache_Finalizer(t *testing.T) { t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size()) } - // Delete and purge. - for i := range instances { - p := &instances[i] + // First delete. + run(runr, instances, func(p *instance) bool { p.wantDel = p.effective - - wg.Add(1) - go func(p *instance) { - defer wg.Done() - - ns := c.GetNamespace(p.ns) - - if p.delete { - for key := uint64(0); key < keymax; key++ { - _, wantExist := p.handlesMap[key] - gotExist := ns.Delete(key, func(exist, pending bool) { - atomic.AddInt32(&p.delfinCalledAll, 1) - if exist { - atomic.AddInt32(&p.delfinCalledEff, 1) - } - }) - if !gotExist && wantExist { - t.Errorf("delete on NS#%d KEY#%d not found", p.ns, key) - } - } - - var delfinCalled int - for key := uint64(0); key < keymax; key++ { - func(key uint64) { - gotExist := ns.Delete(key, func(exist, pending bool) { - if exist && !pending { - t.Errorf("delete fin on NS#%d KEY#%d exist and not pending for deletion", p.ns, key) - } - delfinCalled++ - }) - if gotExist { - t.Errorf("delete on NS#%d KEY#%d found", p.ns, key) - } - }(key) - } - if delfinCalled != keymax { - t.Errorf("(2) #%d not all delete fin called, diff=%d", p.ns, keymax-delfinCalled) + return p.delete + }, func(p *instance, i int) bool { + key := uint64(i) + if key < keymax { + _, wantExist := p.handlesMap[key] + gotExist := p.ns.Delete(key, func(exist, pending bool) { + p.delfinCalledAll++ + if exist { + p.delfinCalledEff++ } + }) + if !gotExist && wantExist { + t.Errorf("delete on NS#%d KEY#%d not found", p.nsid, key) } + return true + } else { + return false + } + }) - if p.purge { - ns.Purge(func(ns, key uint64) { - atomic.AddInt32(&p.purgefinCalled, 1) - }) + // Second delete. + run(runr, instances, func(p *instance) bool { + p.delfinCalled = 0 + return p.delete + }, func(p *instance, i int) bool { + key := uint64(i) + if key < keymax { + gotExist := p.ns.Delete(key, func(exist, pending bool) { + if exist && !pending { + t.Errorf("delete fin on NS#%d KEY#%d exist and not pending for deletion", p.nsid, key) + } + p.delfinCalled++ + }) + if gotExist { + t.Errorf("delete on NS#%d KEY#%d found", p.nsid, key) } - }(p) - } - wg.Wait() + return true + } else { + if p.delfinCalled != keymax { + t.Errorf("(2) NS#%d not all delete fin called, diff=%d", p.nsid, keymax-p.delfinCalled) + } + return false + } + }) + + // Purge. + run(runr, instances, func(p *instance) bool { + return p.purge + }, func(p *instance, i int) bool { + p.ns.Purge(func(ns, key uint64) { + p.purgefinCalled++ + }) + return false + }) if want := int(cnt.created - cnt.released); c.Size() != want { t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size()) } // Release. - for i := range instances { - p := &instances[i] - - if !p.zap { - wg.Add(1) - go func(p *instance) { - defer wg.Done() - - ns := c.GetNamespace(p.ns) - for i := len(p.handles) - 1; i >= 0; i-- { - instanceRelease(p, ns, i) - } - }(p) + run(runr, instances, func(p *instance) bool { + return !p.zap + }, func(p *instance, i int) bool { + if len(p.handles) > 0 { + instanceRelease(p, len(p.handles)-1) + return true + } else { + return false } - } - wg.Wait() + }) if want := int(cnt.created - cnt.released); c.Size() != want { t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size()) } // Zap. - for i := range instances { - p := &instances[i] - - if p.zap { - wg.Add(1) - go func(p *instance) { - defer wg.Done() - - ns := c.GetNamespace(p.ns) - ns.Zap() - - p.handles = nil - p.handlesMap = nil - }(p) - } - } - wg.Wait() + run(runr, instances, func(p *instance) bool { + return p.zap + }, func(p *instance, i int) bool { + p.ns.Zap() + p.handles = nil + p.handlesMap = nil + return false + }) if want := int(cnt.created - cnt.released); c.Size() != want { t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size()) @@ -485,23 +545,21 @@ func TestLRUCache_Finalizer(t *testing.T) { c.Purge(nil) - for i := range instances { - p := &instances[i] - + for _, p := range instances { if p.delete { if p.delfinCalledAll != keymax { - t.Errorf("#%d not all delete fin called, purge=%v zap=%v diff=%d", p.ns, p.purge, p.zap, keymax-p.delfinCalledAll) + t.Errorf("#%d not all delete fin called, purge=%v zap=%v diff=%d", p.nsid, p.purge, p.zap, keymax-p.delfinCalledAll) } if p.delfinCalledEff != p.wantDel { - t.Errorf("#%d not all effective delete fin called, diff=%d", p.ns, p.wantDel-p.delfinCalledEff) + t.Errorf("#%d not all effective delete fin called, diff=%d", p.nsid, p.wantDel-p.delfinCalledEff) } if p.purge && p.purgefinCalled > 0 { - t.Errorf("#%d some purge fin called, delete=%v zap=%v n=%d", p.ns, p.delete, p.zap, p.purgefinCalled) + t.Errorf("#%d some purge fin called, delete=%v zap=%v n=%d", p.nsid, p.delete, p.zap, p.purgefinCalled) } } else { if p.purge { if p.purgefinCalled != p.wantDel { - t.Errorf("#%d not all purge fin called, delete=%v zap=%v diff=%d", p.ns, p.delete, p.zap, p.wantDel-p.purgefinCalled) + t.Errorf("#%d not all purge fin called, delete=%v zap=%v diff=%d", p.nsid, p.delete, p.zap, p.wantDel-p.purgefinCalled) } } } @@ -512,6 +570,56 @@ func TestLRUCache_Finalizer(t *testing.T) { } } +func BenchmarkLRUCache_Set(b *testing.B) { + c := NewLRUCache(0) + ns := c.GetNamespace(0) + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + set(ns, i, "", 1, nil) + } +} + +func BenchmarkLRUCache_Get(b *testing.B) { + c := NewLRUCache(0) + ns := c.GetNamespace(0) + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + set(ns, i, "", 1, nil) + } + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + ns.Get(i, nil) + } +} + +func BenchmarkLRUCache_Get2(b *testing.B) { + c := NewLRUCache(0) + ns := c.GetNamespace(0) + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + set(ns, i, "", 1, nil) + } + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + ns.Get(i, func() (charge int, value interface{}) { + return 0, nil + }) + } +} + +func BenchmarkLRUCache_Release(b *testing.B) { + c := NewLRUCache(0) + ns := c.GetNamespace(0) + handles := make([]Handle, b.N) + for i := uint64(0); i < uint64(b.N); i++ { + handles[i] = set(ns, i, "", 1, nil) + } + b.ResetTimer() + for _, h := range handles { + h.Release() + } +} + func BenchmarkLRUCache_SetRelease(b *testing.B) { capacity := b.N / 100 if capacity <= 0 { @@ -521,7 +629,7 @@ func BenchmarkLRUCache_SetRelease(b *testing.B) { ns := c.GetNamespace(0) b.ResetTimer() for i := uint64(0); i < uint64(b.N); i++ { - set(ns, i, nil, 1, nil).Release() + set(ns, i, "", 1, nil).Release() } } @@ -538,10 +646,10 @@ func BenchmarkLRUCache_SetReleaseTwice(b *testing.B) { nb := b.N - na for i := uint64(0); i < uint64(na); i++ { - set(ns, i, nil, 1, nil).Release() + set(ns, i, "", 1, nil).Release() } for i := uint64(0); i < uint64(nb); i++ { - set(ns, i, nil, 1, nil).Release() + set(ns, i, "", 1, nil).Release() } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go index d99477b01..853676cc4 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go @@ -13,6 +13,13 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) +// The LLRB implementation were taken from https://github.com/petar/GoLLRB. +// Which contains the following header: +// +// Copyright 2010 Petar Maymounkov. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + // lruCache represent a LRU cache state. type lruCache struct { mu sync.Mutex @@ -74,11 +81,7 @@ func (c *lruCache) GetNamespace(id uint64) Namespace { return ns } - ns := &lruNs{ - lru: c, - id: id, - table: make(map[uint64]*lruNode), - } + ns := &lruNs{lru: c, id: id} c.table[id] = ns return ns } @@ -121,6 +124,9 @@ func (c *lruCache) Zap() { func (c *lruCache) evict() { top := &c.recent for n := c.recent.rPrev; c.used > c.capacity && n != top; { + if n.state != nodeEffective { + panic("evicting non effective node") + } n.state = nodeEvicted n.rRemove() n.derefNB() @@ -130,130 +136,271 @@ func (c *lruCache) evict() { } type lruNs struct { - lru *lruCache - id uint64 - table map[uint64]*lruNode - state nsState + lru *lruCache + id uint64 + rbRoot *lruNode + state nsState +} + +func (ns *lruNs) rbGetOrCreateNode(h *lruNode, key uint64) (hn, n *lruNode) { + if h == nil { + n = &lruNode{ns: ns, key: key} + return n, n + } + + if key < h.key { + hn, n = ns.rbGetOrCreateNode(h.rbLeft, key) + if hn != nil { + h.rbLeft = hn + } else { + return nil, n + } + } else if key > h.key { + hn, n = ns.rbGetOrCreateNode(h.rbRight, key) + if hn != nil { + h.rbRight = hn + } else { + return nil, n + } + } else { + return nil, h + } + + if rbIsRed(h.rbRight) && !rbIsRed(h.rbLeft) { + h = rbRotLeft(h) + } + if rbIsRed(h.rbLeft) && rbIsRed(h.rbLeft.rbLeft) { + h = rbRotRight(h) + } + if rbIsRed(h.rbLeft) && rbIsRed(h.rbRight) { + rbFlip(h) + } + return h, n +} + +func (ns *lruNs) getOrCreateNode(key uint64) *lruNode { + hn, n := ns.rbGetOrCreateNode(ns.rbRoot, key) + if hn != nil { + ns.rbRoot = hn + ns.rbRoot.rbBlack = true + } + return n +} + +func (ns *lruNs) rbGetNode(key uint64) *lruNode { + h := ns.rbRoot + for h != nil { + switch { + case key < h.key: + h = h.rbLeft + case key > h.key: + h = h.rbRight + default: + return h + } + } + return nil +} + +func (ns *lruNs) getNode(key uint64) *lruNode { + return ns.rbGetNode(key) +} + +func (ns *lruNs) rbDeleteNode(h *lruNode, key uint64) *lruNode { + if h == nil { + return nil + } + + if key < h.key { + if h.rbLeft == nil { // key not present. Nothing to delete + return h + } + if !rbIsRed(h.rbLeft) && !rbIsRed(h.rbLeft.rbLeft) { + h = rbMoveLeft(h) + } + h.rbLeft = ns.rbDeleteNode(h.rbLeft, key) + } else { + if rbIsRed(h.rbLeft) { + h = rbRotRight(h) + } + // If @key equals @h.key and no right children at @h + if h.key == key && h.rbRight == nil { + return nil + } + if h.rbRight != nil && !rbIsRed(h.rbRight) && !rbIsRed(h.rbRight.rbLeft) { + h = rbMoveRight(h) + } + // If @key equals @h.key, and (from above) 'h.Right != nil' + if h.key == key { + var x *lruNode + h.rbRight, x = rbDeleteMin(h.rbRight) + if x == nil { + panic("logic") + } + x.rbLeft, h.rbLeft = h.rbLeft, nil + x.rbRight, h.rbRight = h.rbRight, nil + x.rbBlack = h.rbBlack + h = x + } else { // Else, @key is bigger than @h.key + h.rbRight = ns.rbDeleteNode(h.rbRight, key) + } + } + + return rbFixup(h) +} + +func (ns *lruNs) deleteNode(key uint64) { + ns.rbRoot = ns.rbDeleteNode(ns.rbRoot, key) + if ns.rbRoot != nil { + ns.rbRoot.rbBlack = true + } +} + +func (ns *lruNs) rbIterateNodes(h *lruNode, pivot uint64, iter func(n *lruNode) bool) bool { + if h == nil { + return true + } + if h.key >= pivot { + if !ns.rbIterateNodes(h.rbLeft, pivot, iter) { + return false + } + if !iter(h) { + return false + } + } + return ns.rbIterateNodes(h.rbRight, pivot, iter) +} + +func (ns *lruNs) iterateNodes(iter func(n *lruNode) bool) { + ns.rbIterateNodes(ns.rbRoot, 0, iter) } func (ns *lruNs) Get(key uint64, setf SetFunc) Handle { ns.lru.mu.Lock() + defer ns.lru.mu.Unlock() if ns.state != nsEffective { - ns.lru.mu.Unlock() return nil } - node, ok := ns.table[key] - if ok { - switch node.state { - case nodeEvicted: - // Insert to recent list. - node.state = nodeEffective - node.ref++ - ns.lru.used += node.charge - ns.lru.evict() - fallthrough - case nodeEffective: - // Bump to front. - node.rRemove() - node.rInsert(&ns.lru.recent) - } - node.ref++ - } else { - if setf == nil { - ns.lru.mu.Unlock() + var n *lruNode + if setf == nil { + n = ns.getNode(key) + if n == nil { return nil } - + } else { + n = ns.getOrCreateNode(key) + } + switch n.state { + case nodeZero: charge, value := setf() if value == nil { - ns.lru.mu.Unlock() + ns.deleteNode(key) return nil } - - node = &lruNode{ - ns: ns, - key: key, - value: value, - charge: charge, - ref: 1, + if charge < 0 { + charge = 0 } - ns.table[key] = node + + n.value = value + n.charge = charge + n.state = nodeEvicted ns.lru.size += charge ns.lru.alive++ - if charge > 0 { - node.ref++ - node.rInsert(&ns.lru.recent) - ns.lru.used += charge - ns.lru.evict() - } - } - ns.lru.mu.Unlock() - return &lruHandle{node: node} + fallthrough + case nodeEvicted: + if n.charge == 0 { + break + } + + // Insert to recent list. + n.state = nodeEffective + n.ref++ + ns.lru.used += n.charge + ns.lru.evict() + + fallthrough + case nodeEffective: + // Bump to front. + n.rRemove() + n.rInsert(&ns.lru.recent) + case nodeDeleted: + // Do nothing. + default: + panic("invalid state") + } + n.ref++ + + return &lruHandle{node: n} } func (ns *lruNs) Delete(key uint64, fin DelFin) bool { ns.lru.mu.Lock() + defer ns.lru.mu.Unlock() if ns.state != nsEffective { if fin != nil { fin(false, false) } - ns.lru.mu.Unlock() return false } - node, exist := ns.table[key] - if !exist { + n := ns.getNode(key) + if n == nil { if fin != nil { fin(false, false) } - ns.lru.mu.Unlock() return false + } - switch node.state { + switch n.state { + case nodeEffective: + ns.lru.used -= n.charge + n.state = nodeDeleted + n.delfin = fin + n.rRemove() + n.derefNB() + case nodeEvicted: + n.state = nodeDeleted + n.delfin = fin case nodeDeleted: if fin != nil { fin(true, true) } - ns.lru.mu.Unlock() return false - case nodeEffective: - ns.lru.used -= node.charge - node.state = nodeDeleted - node.delfin = fin - node.rRemove() - node.derefNB() default: - node.state = nodeDeleted - node.delfin = fin + panic("invalid state") } - ns.lru.mu.Unlock() return true } func (ns *lruNs) purgeNB(fin PurgeFin) { - if ns.state != nsEffective { - return - } - - for _, node := range ns.table { - switch node.state { - case nodeDeleted: - case nodeEffective: - ns.lru.used -= node.charge - node.state = nodeDeleted - node.purgefin = fin - node.rRemove() - node.derefNB() - default: - node.state = nodeDeleted - node.purgefin = fin + if ns.state == nsEffective { + var nodes []*lruNode + ns.iterateNodes(func(n *lruNode) bool { + nodes = append(nodes, n) + return true + }) + for _, n := range nodes { + switch n.state { + case nodeEffective: + ns.lru.used -= n.charge + n.state = nodeDeleted + n.purgefin = fin + n.rRemove() + n.derefNB() + case nodeEvicted: + n.state = nodeDeleted + n.purgefin = fin + case nodeDeleted: + default: + panic("invalid state") + } } } } @@ -265,22 +412,22 @@ func (ns *lruNs) Purge(fin PurgeFin) { } func (ns *lruNs) zapNB() { - if ns.state != nsEffective { - return - } + if ns.state == nsEffective { + ns.state = nsZapped - ns.state = nsZapped + ns.iterateNodes(func(n *lruNode) bool { + if n.state == nodeEffective { + ns.lru.used -= n.charge + n.rRemove() + } + ns.lru.size -= n.charge + n.state = nodeDeleted + n.fin() - for _, node := range ns.table { - if node.state == nodeEffective { - ns.lru.used -= node.charge - node.rRemove() - } - ns.lru.size -= node.charge - node.state = nodeDeleted - node.fin() + return true + }) + ns.rbRoot = nil } - ns.table = nil } func (ns *lruNs) Zap() { @@ -293,7 +440,9 @@ func (ns *lruNs) Zap() { type lruNode struct { ns *lruNs - rNext, rPrev *lruNode + rNext, rPrev *lruNode + rbLeft, rbRight *lruNode + rbBlack bool key uint64 value interface{} @@ -330,8 +479,10 @@ func (n *lruNode) fin() { r.Release() } if n.purgefin != nil { + if n.delfin != nil { + panic("conflicting delete and purge fin") + } n.purgefin(n.ns.id, n.key) - n.delfin = nil n.purgefin = nil } else if n.delfin != nil { n.delfin(true, false) @@ -344,7 +495,7 @@ func (n *lruNode) derefNB() { if n.ref == 0 { if n.ns.state == nsEffective { // Remove elemement. - delete(n.ns.table, n.key) + n.ns.deleteNode(n.key) n.ns.lru.size -= n.charge n.ns.lru.alive-- n.fin() @@ -380,3 +531,92 @@ func (h *lruHandle) Release() { h.node.deref() h.node = nil } + +func rbIsRed(h *lruNode) bool { + if h == nil { + return false + } + return !h.rbBlack +} + +func rbRotLeft(h *lruNode) *lruNode { + x := h.rbRight + if x.rbBlack { + panic("rotating a black link") + } + h.rbRight = x.rbLeft + x.rbLeft = h + x.rbBlack = h.rbBlack + h.rbBlack = false + return x +} + +func rbRotRight(h *lruNode) *lruNode { + x := h.rbLeft + if x.rbBlack { + panic("rotating a black link") + } + h.rbLeft = x.rbRight + x.rbRight = h + x.rbBlack = h.rbBlack + h.rbBlack = false + return x +} + +func rbFlip(h *lruNode) { + h.rbBlack = !h.rbBlack + h.rbLeft.rbBlack = !h.rbLeft.rbBlack + h.rbRight.rbBlack = !h.rbRight.rbBlack +} + +func rbMoveLeft(h *lruNode) *lruNode { + rbFlip(h) + if rbIsRed(h.rbRight.rbLeft) { + h.rbRight = rbRotRight(h.rbRight) + h = rbRotLeft(h) + rbFlip(h) + } + return h +} + +func rbMoveRight(h *lruNode) *lruNode { + rbFlip(h) + if rbIsRed(h.rbLeft.rbLeft) { + h = rbRotRight(h) + rbFlip(h) + } + return h +} + +func rbFixup(h *lruNode) *lruNode { + if rbIsRed(h.rbRight) { + h = rbRotLeft(h) + } + + if rbIsRed(h.rbLeft) && rbIsRed(h.rbLeft.rbLeft) { + h = rbRotRight(h) + } + + if rbIsRed(h.rbLeft) && rbIsRed(h.rbRight) { + rbFlip(h) + } + + return h +} + +func rbDeleteMin(h *lruNode) (hn, n *lruNode) { + if h == nil { + return nil, nil + } + if h.rbLeft == nil { + return nil, h + } + + if !rbIsRed(h.rbLeft) && !rbIsRed(h.rbLeft.rbLeft) { + h = rbMoveLeft(h) + } + + h.rbLeft, n = rbDeleteMin(h.rbLeft) + + return rbFixup(h), n +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go deleted file mode 100644 index 511058897..000000000 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2012, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -package leveldb - -const ( - kNumLevels = 7 - - // Level-0 compaction is started when we hit this many files. - kL0_CompactionTrigger float64 = 4 - - // Soft limit on number of level-0 files. We slow down writes at this point. - kL0_SlowdownWritesTrigger = 8 - - // Maximum number of level-0 files. We stop writes at this point. - kL0_StopWritesTrigger = 12 - - // Maximum level to which a new compacted memdb is pushed if it - // does not create overlap. We try to push to level 2 to avoid the - // relatively expensive level 0=>1 compactions and to avoid some - // expensive manifest file operations. We do not push all the way to - // the largest level since that can generate a lot of wasted disk - // space if the same key space is being repeatedly overwritten. - kMaxMemCompactLevel = 2 - - // Maximum size of a table. - kMaxTableSize = 2 * 1048576 - - // Maximum bytes of overlaps in grandparent (i.e., level+2) before we - // stop building a single file in a level->level+1 compaction. - kMaxGrandParentOverlapBytes = 10 * kMaxTableSize - - // Maximum number of bytes in all compacted files. We avoid expanding - // the lower level file set of a compaction if it would make the - // total compaction cover more than this many bytes. - kExpCompactionMaxBytes = 25 * kMaxTableSize -) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go index 1f45a5a6e..336549896 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go @@ -14,6 +14,7 @@ import ( "testing" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" ) @@ -96,21 +97,22 @@ func (h *dbCorruptHarness) deleteRand(n, max int, rnd *rand.Rand) { } } -func (h *dbCorruptHarness) corrupt(ft storage.FileType, offset, n int) { +func (h *dbCorruptHarness) corrupt(ft storage.FileType, fi, offset, n int) { p := &h.dbHarness t := p.t - var file storage.File ff, _ := p.stor.GetFiles(ft) - for _, f := range ff { - if file == nil || f.Num() > file.Num() { - file = f - } + sff := files(ff) + sff.sort() + if fi < 0 { + fi = len(sff) - 1 } - if file == nil { - t.Fatalf("no such file with type %q", ft) + if fi >= len(sff) { + t.Fatalf("no such file with type %q with index %d", ft, fi) } + file := sff[fi] + r, err := file.Open() if err != nil { t.Fatal("cannot open file: ", err) @@ -225,8 +227,8 @@ func TestCorruptDB_Journal(t *testing.T) { h.build(100) h.check(100, 100) h.closeDB() - h.corrupt(storage.TypeJournal, 19, 1) - h.corrupt(storage.TypeJournal, 32*1024+1000, 1) + h.corrupt(storage.TypeJournal, -1, 19, 1) + h.corrupt(storage.TypeJournal, -1, 32*1024+1000, 1) h.openDB() h.check(36, 36) @@ -242,7 +244,7 @@ func TestCorruptDB_Table(t *testing.T) { h.compactRangeAt(0, "", "") h.compactRangeAt(1, "", "") h.closeDB() - h.corrupt(storage.TypeTable, 100, 1) + h.corrupt(storage.TypeTable, -1, 100, 1) h.openDB() h.check(99, 99) @@ -256,7 +258,7 @@ func TestCorruptDB_TableIndex(t *testing.T) { h.build(10000) h.compactMem() h.closeDB() - h.corrupt(storage.TypeTable, -2000, 500) + h.corrupt(storage.TypeTable, -1, -2000, 500) h.openDB() h.check(5000, 9999) @@ -355,7 +357,7 @@ func TestCorruptDB_CorruptedManifest(t *testing.T) { h.compactMem() h.compactRange("", "") h.closeDB() - h.corrupt(storage.TypeManifest, 0, 1000) + h.corrupt(storage.TypeManifest, -1, 0, 1000) h.openAssert(false) h.recover() @@ -370,7 +372,7 @@ func TestCorruptDB_CompactionInputError(t *testing.T) { h.build(10) h.compactMem() h.closeDB() - h.corrupt(storage.TypeTable, 100, 1) + h.corrupt(storage.TypeTable, -1, 100, 1) h.openDB() h.check(9, 9) @@ -387,7 +389,7 @@ func TestCorruptDB_UnrelatedKeys(t *testing.T) { h.build(10) h.compactMem() h.closeDB() - h.corrupt(storage.TypeTable, 100, 1) + h.corrupt(storage.TypeTable, -1, 100, 1) h.openDB() h.put(string(tkey(1000)), string(tval(1000, ctValSize))) @@ -470,3 +472,31 @@ func TestCorruptDB_MissingTableFiles(t *testing.T) { h.close() } + +func TestCorruptDB_RecoverTable(t *testing.T) { + h := newDbCorruptHarnessWopt(t, &opt.Options{ + WriteBuffer: 112 * opt.KiB, + CompactionTableSize: 90 * opt.KiB, + Filter: filter.NewBloomFilter(10), + }) + + h.build(1000) + h.compactMem() + h.compactRangeAt(0, "", "") + h.compactRangeAt(1, "", "") + seq := h.db.seq + h.closeDB() + h.corrupt(storage.TypeTable, 0, 1000, 1) + h.corrupt(storage.TypeTable, 3, 10000, 1) + // Corrupted filter shouldn't affect recovery. + h.corrupt(storage.TypeTable, 3, 113888, 10) + h.corrupt(storage.TypeTable, -1, 20000, 1) + + h.recover() + if h.db.seq != seq { + t.Errorf("invalid seq, want=%d got=%d", seq, h.db.seq) + } + h.check(985, 985) + + h.close() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go index 73a691218..1d5e73d2b 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go @@ -7,7 +7,7 @@ package leveldb import ( - "errors" + "container/list" "fmt" "io" "os" @@ -17,6 +17,7 @@ import ( "sync/atomic" "time" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb" @@ -46,7 +47,7 @@ type DB struct { // Snapshot. snapsMu sync.Mutex - snapsRoot snapshotElement + snapsList *list.List // Stats. aliveSnaps, aliveIters int32 @@ -56,18 +57,19 @@ type DB struct { writeMergedC chan bool writeLockC chan struct{} writeAckC chan error + writeDelay time.Duration + writeDelayN int journalC chan *Batch journalAckC chan error // Compaction. - tcompCmdC chan cCmd - tcompPauseC chan chan<- struct{} - tcompTriggerC chan struct{} - mcompCmdC chan cCmd - mcompTriggerC chan struct{} - compErrC chan error - compErrSetC chan error - compStats [kNumLevels]cStats + tcompCmdC chan cCmd + tcompPauseC chan chan<- struct{} + mcompCmdC chan cCmd + compErrC chan error + compPerErrC chan error + compErrSetC chan error + compStats []cStats // Close. closeW sync.WaitGroup @@ -82,9 +84,11 @@ func openDB(s *session) (*DB, error) { db := &DB{ s: s, // Initial sequence - seq: s.stSeq, + seq: s.stSeqNum, // MemDB memPool: make(chan *memdb.DB, 1), + // Snapshot + snapsList: list.New(), // Write writeC: make(chan *Batch), writeMergedC: make(chan bool), @@ -93,17 +97,16 @@ func openDB(s *session) (*DB, error) { journalC: make(chan *Batch), journalAckC: make(chan error), // Compaction - tcompCmdC: make(chan cCmd), - tcompPauseC: make(chan chan<- struct{}), - tcompTriggerC: make(chan struct{}, 1), - mcompCmdC: make(chan cCmd), - mcompTriggerC: make(chan struct{}, 1), - compErrC: make(chan error), - compErrSetC: make(chan error), + tcompCmdC: make(chan cCmd), + tcompPauseC: make(chan chan<- struct{}), + mcompCmdC: make(chan cCmd), + compErrC: make(chan error), + compPerErrC: make(chan error), + compErrSetC: make(chan error), + compStats: make([]cStats, s.o.GetNumLevel()), // Close closeC: make(chan struct{}), } - db.initSnapshot() if err := db.recoverJournal(); err != nil { return nil, err @@ -119,14 +122,14 @@ func openDB(s *session) (*DB, error) { return nil, err } - // Don't include compaction error goroutine into wait group. + // Doesn't need to be included in the wait group. go db.compactionError() + go db.mpoolDrain() db.closeW.Add(3) go db.tCompaction() go db.mCompaction() go db.jWriter() - go db.mpoolDrain() s.logf("db@open done T·%v", time.Since(start)) @@ -253,6 +256,10 @@ func RecoverFile(path string, o *opt.Options) (db *DB, err error) { } func recoverTable(s *session, o *opt.Options) error { + o = dupOptions(o) + // Mask StrictReader, lets StrictRecovery doing its job. + o.Strict &= ^opt.StrictReader + // Get all tables and sort it by file number. tableFiles_, err := s.getFiles(storage.TypeTable) if err != nil { @@ -261,10 +268,16 @@ func recoverTable(s *session, o *opt.Options) error { tableFiles := files(tableFiles_) tableFiles.sort() - var mSeq uint64 - var good, corrupted int - rec := new(sessionRecord) - bpool := util.NewBufferPool(o.GetBlockSize() + 5) + var ( + maxSeq uint64 + recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int + + // We will drop corrupted table. + strict = o.GetStrict(opt.StrictRecovery) + + rec = &sessionRecord{numLevel: o.GetNumLevel()} + bpool = util.NewBufferPool(o.GetBlockSize() + 5) + ) buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { tmp = s.newTemp() writer, err := tmp.Create() @@ -311,7 +324,12 @@ func recoverTable(s *session, o *opt.Options) error { if err != nil { return err } - defer reader.Close() + var closed bool + defer func() { + if !closed { + reader.Close() + } + }() // Get file size. size, err := reader.Seek(0, 2) @@ -319,25 +337,32 @@ func recoverTable(s *session, o *opt.Options) error { return err } - var tSeq uint64 - var tgood, tcorrupted, blockerr int - var imin, imax []byte - tr := table.NewReader(reader, size, nil, bpool, o) + var ( + tSeq uint64 + tgoodKey, tcorruptedKey, tcorruptedBlock int + imin, imax []byte + ) + tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o) + if err != nil { + return err + } iter := tr.NewIterator(nil, nil) iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) { - s.logf("table@recovery found error @%d %q", file.Num(), err) - blockerr++ + if errors.IsCorrupted(err) { + s.logf("table@recovery block corruption @%d %q", file.Num(), err) + tcorruptedBlock++ + } }) // Scan the table. for iter.Next() { key := iter.Key() - _, seq, _, ok := parseIkey(key) - if !ok { - tcorrupted++ + _, seq, _, kerr := parseIkey(key) + if kerr != nil { + tcorruptedKey++ continue } - tgood++ + tgoodKey++ if seq > tSeq { tSeq = seq } @@ -352,8 +377,18 @@ func recoverTable(s *session, o *opt.Options) error { } iter.Release() - if tgood > 0 { - if tcorrupted > 0 || blockerr > 0 { + goodKey += tgoodKey + corruptedKey += tcorruptedKey + corruptedBlock += tcorruptedBlock + + if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { + droppedTable++ + s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) + return nil + } + + if tgoodKey > 0 { + if tcorruptedKey > 0 || tcorruptedBlock > 0 { // Rebuild the table. s.logf("table@recovery rebuilding @%d", file.Num()) iter := tr.NewIterator(nil, nil) @@ -362,25 +397,25 @@ func recoverTable(s *session, o *opt.Options) error { if err != nil { return err } + closed = true reader.Close() if err := file.Replace(tmp); err != nil { return err } size = newSize } - if tSeq > mSeq { - mSeq = tSeq + if tSeq > maxSeq { + maxSeq = tSeq } + recoveredKey += tgoodKey // Add table to level 0. rec.addTable(0, file.Num(), uint64(size), imin, imax) - s.logf("table@recovery recovered @%d N·%d C·%d B·%d S·%d Q·%d", file.Num(), tgood, tcorrupted, blockerr, size, tSeq) + s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) } else { - s.logf("table@recovery unrecoverable @%d C·%d B·%d S·%d", file.Num(), tcorrupted, blockerr, size) + droppedTable++ + s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size) } - good += tgood - corrupted += tcorrupted - return nil } @@ -397,11 +432,11 @@ func recoverTable(s *session, o *opt.Options) error { } } - s.logf("table@recovery recovered F·%d N·%d C·%d Q·%d", len(tableFiles), good, corrupted, mSeq) + s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq) } // Set sequence number. - rec.setSeq(mSeq + 1) + rec.setSeqNum(maxSeq) // Create new manifest. if err := s.create(); err != nil { @@ -484,26 +519,30 @@ func (db *DB) recoverJournal() error { if err == io.EOF { break } - return err + return errors.SetFile(err, file) } buf.Reset() if _, err := buf.ReadFrom(r); err != nil { if err == io.ErrUnexpectedEOF { + // This is error returned due to corruption, with strict == false. continue } else { - return err + return errors.SetFile(err, file) } } - if err := batch.decode(buf.Bytes()); err != nil { - return err - } - if err := batch.memReplay(mem); err != nil { - return err + if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil { + if strict || !errors.IsCorrupted(err) { + return errors.SetFile(err, file) + } else { + db.s.logf("journal error: %v (skipped)", err) + // We won't apply sequence number as it might be corrupted. + continue + } } // Save sequence number. - db.seq = batch.seq + uint64(batch.len()) + db.seq = batch.seq + uint64(batch.Len()) // Flush it if large enough. if mem.Size() >= writeBuffer { @@ -564,7 +603,7 @@ func (db *DB) recoverJournal() error { } func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { - ikey := newIKey(key, seq, tSeek) + ikey := newIkey(key, seq, ktSeek) em, fm := db.getMems() for _, m := range [...]*memDB{em, fm} { @@ -575,9 +614,13 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er mk, mv, me := m.mdb.Find(ikey) if me == nil { - ukey, _, t, ok := parseIkey(mk) - if ok && db.s.icmp.uCompare(ukey, key) == 0 { - if t == tDel { + ukey, _, kt, kerr := parseIkey(mk) + if kerr != nil { + // Shouldn't have had happen. + panic(kerr) + } + if db.s.icmp.uCompare(ukey, key) == 0 { + if kt == ktDel { return nil, ErrNotFound } return append([]byte{}, mv...), nil @@ -588,17 +631,60 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er } v := db.s.version() - value, cSched, err := v.get(ikey, ro) + value, cSched, err := v.get(ikey, ro, false) v.release() if cSched { // Trigger table compaction. - db.compTrigger(db.tcompTriggerC) + db.compSendTrigger(db.tcompCmdC) + } + return +} + +func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { + ikey := newIkey(key, seq, ktSeek) + + em, fm := db.getMems() + for _, m := range [...]*memDB{em, fm} { + if m == nil { + continue + } + defer m.decref() + + mk, _, me := m.mdb.Find(ikey) + if me == nil { + ukey, _, kt, kerr := parseIkey(mk) + if kerr != nil { + // Shouldn't have had happen. + panic(kerr) + } + if db.s.icmp.uCompare(ukey, key) == 0 { + if kt == ktDel { + return false, nil + } + return true, nil + } + } else if me != ErrNotFound { + return false, me + } + } + + v := db.s.version() + _, cSched, err := v.get(ikey, ro, true) + v.release() + if cSched { + // Trigger table compaction. + db.compSendTrigger(db.tcompCmdC) + } + if err == nil { + ret = true + } else if err == ErrNotFound { + err = nil } return } // Get gets the value for the given key. It returns ErrNotFound if the -// DB does not contain the key. +// DB does not contains the key. // // The returned slice is its own copy, it is safe to modify the contents // of the returned slice. @@ -609,7 +695,23 @@ func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { return } - return db.get(key, db.getSeq(), ro) + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + return db.get(key, se.seq, ro) +} + +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Get returns. +func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { + err = db.ok() + if err != nil { + return + } + + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + return db.has(key, se.seq, ro) } // NewIterator returns an iterator for the latest snapshot of the @@ -633,9 +735,11 @@ func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Itera return iterator.NewEmptyIterator(err) } - snap := db.newSnapshot() - defer snap.Release() - return snap.NewIterator(slice, ro) + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + // Iterator holds 'version' lock, 'version' is immutable so snapshot + // can be released after iterator created. + return db.newIterator(se.seq, slice, ro) } // GetSnapshot returns a latest snapshot of the underlying DB. A snapshot @@ -655,7 +759,7 @@ func (db *DB) GetSnapshot() (*Snapshot, error) { // // Property names: // leveldb.num-files-at-level{n} -// Returns the number of filer at level 'n'. +// Returns the number of files at level 'n'. // leveldb.stats // Returns statistics of the underlying DB. // leveldb.sstables @@ -685,12 +789,13 @@ func (db *DB) GetProperty(name string) (value string, err error) { v := db.s.version() defer v.release() + numFilesPrefix := "num-files-at-level" switch { - case strings.HasPrefix(p, "num-files-at-level"): + case strings.HasPrefix(p, numFilesPrefix): var level uint var rest string - n, _ := fmt.Scanf("%d%s", &level, &rest) - if n != 1 || level >= kNumLevels { + n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest) + if n != 1 || int(level) >= db.s.o.GetNumLevel() { err = errors.New("leveldb: GetProperty: invalid property: " + name) } else { value = fmt.Sprint(v.tLen(int(level))) @@ -752,8 +857,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { sizes := make(Sizes, 0, len(ranges)) for _, r := range ranges { - imin := newIKey(r.Start, kMaxSeq, tSeek) - imax := newIKey(r.Limit, kMaxSeq, tSeek) + imin := newIkey(r.Start, kMaxSeq, ktSeek) + imax := newIkey(r.Limit, kMaxSeq, ktSeek) start, err := v.offsetOf(imin) if err != nil { return nil, err @@ -796,18 +901,23 @@ func (db *DB) Close() error { default: } + // Signal all goroutines. close(db.closeC) - // Wait for the close WaitGroup. + // Wait for all gorotines to exit. db.closeW.Wait() - // Close journal. + // Lock writer and closes journal. db.writeLockC <- struct{}{} if db.journal != nil { db.journal.Close() db.journalWriter.Close() } + if db.writeDelayN > 0 { + db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay) + } + // Close session. db.s.close() db.logf("db@close done T·%v", time.Since(start)) @@ -827,7 +937,6 @@ func (db *DB) Close() error { db.journalWriter = nil db.journalFile = nil db.frozenJournalFile = nil - db.snapsRoot = snapshotElement{} db.closer = nil return err diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go index ad385e91e..d9630e0a9 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go @@ -7,11 +7,12 @@ package leveldb import ( - "errors" "sync" "time" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" ) var ( @@ -68,7 +69,7 @@ type cMem struct { } func newCMem(s *session) *cMem { - return &cMem{s: s, rec: new(sessionRecord)} + return &cMem{s: s, rec: &sessionRecord{numLevel: s.o.GetNumLevel()}} } func (c *cMem) flush(mem *memdb.DB, level int) error { @@ -84,7 +85,9 @@ func (c *cMem) flush(mem *memdb.DB, level int) error { // Pick level. if level < 0 { - level = s.version_NB().pickLevel(t.imin.ukey(), t.imax.ukey()) + v := s.version() + level = v.pickLevel(t.imin.ukey(), t.imax.ukey()) + v.release() } c.rec.addTableFile(level, t) @@ -95,24 +98,32 @@ func (c *cMem) flush(mem *memdb.DB, level int) error { } func (c *cMem) reset() { - c.rec = new(sessionRecord) + c.rec = &sessionRecord{numLevel: c.s.o.GetNumLevel()} } func (c *cMem) commit(journal, seq uint64) error { c.rec.setJournalNum(journal) - c.rec.setSeq(seq) + c.rec.setSeqNum(seq) // Commit changes. return c.s.commit(c.rec) } func (db *DB) compactionError() { - var err error + var ( + err error + wlocked bool + ) noerr: + // No error. for { select { case err = <-db.compErrSetC: - if err != nil { + switch { + case err == nil: + case errors.IsCorrupted(err): + goto hasperr + default: goto haserr } case _, _ = <-db.closeC: @@ -120,17 +131,39 @@ noerr: } } haserr: + // Transient error. for { select { case db.compErrC <- err: case err = <-db.compErrSetC: - if err == nil { + switch { + case err == nil: goto noerr + case errors.IsCorrupted(err): + goto hasperr + default: } case _, _ = <-db.closeC: return } } +hasperr: + // Persistent error. + for { + select { + case db.compErrC <- err: + case db.compPerErrC <- err: + case db.writeLockC <- struct{}{}: + // Hold write lock, so that write won't pass-through. + wlocked = true + case _, _ = <-db.closeC: + if wlocked { + // We should release the lock or Close will hang. + <-db.writeLockC + } + return + } + } } type compactionTransactCounter int @@ -139,12 +172,17 @@ func (cnt *compactionTransactCounter) incr() { *cnt++ } -func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactCounter) error, rollback func() error) { +type compactionTransactInterface interface { + run(cnt *compactionTransactCounter) error + revert() error +} + +func (db *DB) compactionTransact(name string, t compactionTransactInterface) { defer func() { if x := recover(); x != nil { - if x == errCompactionTransactExiting && rollback != nil { - if err := rollback(); err != nil { - db.logf("%s rollback error %q", name, err) + if x == errCompactionTransactExiting { + if err := t.revert(); err != nil { + db.logf("%s revert error %q", name, err) } } panic(x) @@ -156,9 +194,13 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC backoffMax = 8 * time.Second backoffMul = 2 * time.Second ) - backoff := backoffMin - backoffT := time.NewTimer(backoff) - lastCnt := compactionTransactCounter(0) + var ( + backoff = backoffMin + backoffT = time.NewTimer(backoff) + lastCnt = compactionTransactCounter(0) + + disableBackoff = db.s.o.GetDisableCompactionBackoff() + ) for n := 0; ; n++ { // Check wether the DB is closed. if db.isClosed() { @@ -170,11 +212,19 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC // Execute. cnt := compactionTransactCounter(0) - err := exec(&cnt) + err := t.run(&cnt) + if err != nil { + db.logf("%s error I·%d %q", name, cnt, err) + } // Set compaction error status. select { case db.compErrSetC <- err: + case perr := <-db.compPerErrC: + if err != nil { + db.logf("%s exiting (persistent error %q)", name, perr) + db.compactionExitTransact() + } case _, _ = <-db.closeC: db.logf("%s exiting", name) db.compactionExitTransact() @@ -182,31 +232,56 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC if err == nil { return } - db.logf("%s error I·%d %q", name, cnt, err) - - // Reset backoff duration if counter is advancing. - if cnt > lastCnt { - backoff = backoffMin - lastCnt = cnt - } - - // Backoff. - backoffT.Reset(backoff) - if backoff < backoffMax { - backoff *= backoffMul - if backoff > backoffMax { - backoff = backoffMax - } - } - select { - case <-backoffT.C: - case _, _ = <-db.closeC: - db.logf("%s exiting", name) + if errors.IsCorrupted(err) { + db.logf("%s exiting (corruption detected)", name) db.compactionExitTransact() } + + if !disableBackoff { + // Reset backoff duration if counter is advancing. + if cnt > lastCnt { + backoff = backoffMin + lastCnt = cnt + } + + // Backoff. + backoffT.Reset(backoff) + if backoff < backoffMax { + backoff *= backoffMul + if backoff > backoffMax { + backoff = backoffMax + } + } + select { + case <-backoffT.C: + case _, _ = <-db.closeC: + db.logf("%s exiting", name) + db.compactionExitTransact() + } + } } } +type compactionTransactFunc struct { + runFunc func(cnt *compactionTransactCounter) error + revertFunc func() error +} + +func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error { + return t.runFunc(cnt) +} + +func (t *compactionTransactFunc) revert() error { + if t.revertFunc != nil { + return t.revertFunc() + } + return nil +} + +func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) { + db.compactionTransact(name, &compactionTransactFunc{run, revert}) +} + func (db *DB) compactionExitTransact() { panic(errCompactionTransactExiting) } @@ -232,20 +307,23 @@ func (db *DB) memCompaction() { } // Pause table compaction. - ch := make(chan struct{}) + resumeC := make(chan struct{}) select { - case db.tcompPauseC <- (chan<- struct{})(ch): + case db.tcompPauseC <- (chan<- struct{})(resumeC): + case <-db.compPerErrC: + close(resumeC) + resumeC = nil case _, _ = <-db.closeC: return } - db.compactionTransact("mem@flush", func(cnt *compactionTransactCounter) (err error) { + db.compactionTransactFunc("mem@flush", func(cnt *compactionTransactCounter) (err error) { stats.startTimer() defer stats.stopTimer() return c.flush(mem.mdb, -1) }, func() error { for _, r := range c.rec.addedTables { - db.logf("mem@flush rollback @%d", r.num) + db.logf("mem@flush revert @%d", r.num) f := db.s.getTableFile(r.num) if err := f.Remove(); err != nil { return err @@ -254,13 +332,13 @@ func (db *DB) memCompaction() { return nil }) - db.compactionTransact("mem@commit", func(cnt *compactionTransactCounter) (err error) { + db.compactionTransactFunc("mem@commit", func(cnt *compactionTransactCounter) (err error) { stats.startTimer() defer stats.stopTimer() return c.commit(db.journalFile.Num(), db.frozenSeq) }, nil) - db.logf("mem@flush commited F·%d T·%v", len(c.rec.addedTables), stats.duration) + db.logf("mem@flush committed F·%d T·%v", len(c.rec.addedTables), stats.duration) for _, r := range c.rec.addedTables { stats.write += r.size @@ -271,26 +349,223 @@ func (db *DB) memCompaction() { db.dropFrozenMem() // Resume table compaction. - select { - case <-ch: - case _, _ = <-db.closeC: - return + if resumeC != nil { + select { + case <-resumeC: + close(resumeC) + case _, _ = <-db.closeC: + return + } } // Trigger table compaction. - db.compTrigger(db.mcompTriggerC) + db.compSendTrigger(db.tcompCmdC) +} + +type tableCompactionBuilder struct { + db *DB + s *session + c *compaction + rec *sessionRecord + stat0, stat1 *cStatsStaging + + snapHasLastUkey bool + snapLastUkey []byte + snapLastSeq uint64 + snapIter int + snapKerrCnt int + snapDropCnt int + + kerrCnt int + dropCnt int + + minSeq uint64 + strict bool + tableSize int + + tw *tWriter +} + +func (b *tableCompactionBuilder) appendKV(key, value []byte) error { + // Create new table if not already. + if b.tw == nil { + // Check for pause event. + if b.db != nil { + select { + case ch := <-b.db.tcompPauseC: + b.db.pauseCompaction(ch) + case _, _ = <-b.db.closeC: + b.db.compactionExitTransact() + default: + } + } + + // Create new table. + var err error + b.tw, err = b.s.tops.create() + if err != nil { + return err + } + } + + // Write key/value into table. + return b.tw.append(key, value) +} + +func (b *tableCompactionBuilder) needFlush() bool { + return b.tw.tw.BytesLen() >= b.tableSize +} + +func (b *tableCompactionBuilder) flush() error { + t, err := b.tw.finish() + if err != nil { + return err + } + b.rec.addTableFile(b.c.level+1, t) + b.stat1.write += t.size + b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.level+1, t.file.Num(), b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax) + b.tw = nil + return nil +} + +func (b *tableCompactionBuilder) cleanup() { + if b.tw != nil { + b.tw.drop() + b.tw = nil + } +} + +func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { + snapResumed := b.snapIter > 0 + hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary. + lastUkey := append([]byte{}, b.snapLastUkey...) + lastSeq := b.snapLastSeq + b.kerrCnt = b.snapKerrCnt + b.dropCnt = b.snapDropCnt + // Restore compaction state. + b.c.restore() + + defer b.cleanup() + + b.stat1.startTimer() + defer b.stat1.stopTimer() + + iter := b.c.newIterator() + defer iter.Release() + for i := 0; iter.Next(); i++ { + // Incr transact counter. + cnt.incr() + + // Skip until last state. + if i < b.snapIter { + continue + } + + resumed := false + if snapResumed { + resumed = true + snapResumed = false + } + + ikey := iter.Key() + ukey, seq, kt, kerr := parseIkey(ikey) + + if kerr == nil { + shouldStop := !resumed && b.c.shouldStopBefore(ikey) + + if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 { + // First occurrence of this user key. + + // Only rotate tables if ukey doesn't hop across. + if b.tw != nil && (shouldStop || b.needFlush()) { + if err := b.flush(); err != nil { + return err + } + + // Creates snapshot of the state. + b.c.save() + b.snapHasLastUkey = hasLastUkey + b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...) + b.snapLastSeq = lastSeq + b.snapIter = i + b.snapKerrCnt = b.kerrCnt + b.snapDropCnt = b.dropCnt + } + + hasLastUkey = true + lastUkey = append(lastUkey[:0], ukey...) + lastSeq = kMaxSeq + } + + switch { + case lastSeq <= b.minSeq: + // Dropped because newer entry for same user key exist + fallthrough // (A) + case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): + // For this user key: + // (1) there is no data in higher levels + // (2) data in lower levels will have larger seq numbers + // (3) data in layers that are being compacted here and have + // smaller seq numbers will be dropped in the next + // few iterations of this loop (by rule (A) above). + // Therefore this deletion marker is obsolete and can be dropped. + lastSeq = seq + b.dropCnt++ + continue + default: + lastSeq = seq + } + } else { + if b.strict { + return kerr + } + + // Don't drop corrupted keys. + hasLastUkey = false + lastUkey = lastUkey[:0] + lastSeq = kMaxSeq + b.kerrCnt++ + } + + if err := b.appendKV(ikey, iter.Value()); err != nil { + return err + } + } + + if err := iter.Error(); err != nil { + return err + } + + // Finish last table. + if b.tw != nil && !b.tw.empty() { + return b.flush() + } + return nil +} + +func (b *tableCompactionBuilder) revert() error { + for _, at := range b.rec.addedTables { + b.s.logf("table@build revert @%d", at.num) + f := b.s.getTableFile(at.num) + if err := f.Remove(); err != nil { + return err + } + } + return nil } func (db *DB) tableCompaction(c *compaction, noTrivial bool) { - rec := new(sessionRecord) - rec.addCompactionPointer(c.level, c.imax) + defer c.release() + + rec := &sessionRecord{numLevel: db.s.o.GetNumLevel()} + rec.addCompPtr(c.level, c.imax) if !noTrivial && c.trivial() { t := c.tables[0][0] db.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1) - rec.deleteTable(c.level, t.file.Num()) + rec.delTable(c.level, t.file.Num()) rec.addTableFile(c.level+1, t) - db.compactionTransact("table@move", func(cnt *compactionTransactCounter) (err error) { + db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) { return db.s.commit(rec) }, nil) return @@ -301,184 +576,34 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) { for _, t := range tables { stats[i].read += t.size // Insert deleted tables into record - rec.deleteTable(c.level+i, t.file.Num()) + rec.delTable(c.level+i, t.file.Num()) } } sourceSize := int(stats[0].read + stats[1].read) minSeq := db.minSeq() db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq) - var snapUkey []byte - var snapHasUkey bool - var snapSeq uint64 - var snapIter int - var snapDropCnt int - var dropCnt int - db.compactionTransact("table@build", func(cnt *compactionTransactCounter) (err error) { - ukey := append([]byte{}, snapUkey...) - hasUkey := snapHasUkey - lseq := snapSeq - dropCnt = snapDropCnt - snapSched := snapIter == 0 - - var tw *tWriter - finish := func() error { - t, err := tw.finish() - if err != nil { - return err - } - rec.addTableFile(c.level+1, t) - stats[1].write += t.size - db.logf("table@build created L%d@%d N·%d S·%s %q:%q", c.level+1, t.file.Num(), tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax) - return nil - } - - defer func() { - stats[1].stopTimer() - if tw != nil { - tw.drop() - tw = nil - } - }() - - stats[1].startTimer() - iter := c.newIterator() - defer iter.Release() - for i := 0; iter.Next(); i++ { - // Incr transact counter. - cnt.incr() - - // Skip until last state. - if i < snapIter { - continue - } - - ikey := iKey(iter.Key()) - - if c.shouldStopBefore(ikey) && tw != nil { - err = finish() - if err != nil { - return - } - snapSched = true - tw = nil - } - - // Scheduled for snapshot, snapshot will used to retry compaction - // if error occured. - if snapSched { - snapUkey = append(snapUkey[:0], ukey...) - snapHasUkey = hasUkey - snapSeq = lseq - snapIter = i - snapDropCnt = dropCnt - snapSched = false - } - - if seq, vt, ok := ikey.parseNum(); !ok { - // Don't drop error keys - ukey = ukey[:0] - hasUkey = false - lseq = kMaxSeq - } else { - if !hasUkey || db.s.icmp.uCompare(ikey.ukey(), ukey) != 0 { - // First occurrence of this user key - ukey = append(ukey[:0], ikey.ukey()...) - hasUkey = true - lseq = kMaxSeq - } - - drop := false - if lseq <= minSeq { - // Dropped because newer entry for same user key exist - drop = true // (A) - } else if vt == tDel && seq <= minSeq && c.baseLevelForKey(ukey) { - // For this user key: - // (1) there is no data in higher levels - // (2) data in lower levels will have larger seq numbers - // (3) data in layers that are being compacted here and have - // smaller seq numbers will be dropped in the next - // few iterations of this loop (by rule (A) above). - // Therefore this deletion marker is obsolete and can be dropped. - drop = true - } - - lseq = seq - if drop { - dropCnt++ - continue - } - } - - // Create new table if not already - if tw == nil { - // Check for pause event. - select { - case ch := <-db.tcompPauseC: - db.pauseCompaction(ch) - case _, _ = <-db.closeC: - db.compactionExitTransact() - default: - } - - // Create new table. - tw, err = db.s.tops.create() - if err != nil { - return - } - } - - // Write key/value into table - err = tw.append(ikey, iter.Value()) - if err != nil { - return - } - - // Finish table if it is big enough - if tw.tw.BytesLen() >= kMaxTableSize { - err = finish() - if err != nil { - return - } - snapSched = true - tw = nil - } - } - - err = iter.Error() - if err != nil { - return - } - - // Finish last table - if tw != nil && !tw.empty() { - err = finish() - if err != nil { - return - } - tw = nil - } - return - }, func() error { - for _, r := range rec.addedTables { - db.logf("table@build rollback @%d", r.num) - f := db.s.getTableFile(r.num) - if err := f.Remove(); err != nil { - return err - } - } - return nil - }) + b := &tableCompactionBuilder{ + db: db, + s: db.s, + c: c, + rec: rec, + stat1: &stats[1], + minSeq: minSeq, + strict: db.s.o.GetStrict(opt.StrictCompaction), + tableSize: db.s.o.GetCompactionTableSize(c.level + 1), + } + db.compactionTransact("table@build", b) // Commit changes - db.compactionTransact("table@commit", func(cnt *compactionTransactCounter) (err error) { + db.compactionTransactFunc("table@commit", func(cnt *compactionTransactCounter) (err error) { stats[1].startTimer() defer stats[1].stopTimer() return db.s.commit(rec) }, nil) resultSize := int(stats[1].write) - db.logf("table@compaction commited F%s S%s D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), dropCnt, stats[1].duration) + db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration) // Save compaction stats for i := range stats { @@ -494,14 +619,14 @@ func (db *DB) tableRangeCompaction(level int, umin, umax []byte) { db.tableCompaction(c, true) } } else { - v := db.s.version_NB() - + v := db.s.version() m := 1 for i, t := range v.tables[1:] { if t.overlaps(db.s.icmp, umin, umax, false) { m = i + 1 } } + v.release() for level := 0; level < m; level++ { if c := db.s.getCompactionRange(level, umin, umax); c != nil { @@ -518,7 +643,9 @@ func (db *DB) tableAutoCompaction() { } func (db *DB) tableNeedCompaction() bool { - return db.s.version_NB().needCompaction() + v := db.s.version() + defer v.release() + return v.needCompaction() } func (db *DB) pauseCompaction(ch chan<- struct{}) { @@ -538,7 +665,12 @@ type cIdle struct { } func (r cIdle) ack(err error) { - r.ackC <- err + if r.ackC != nil { + defer func() { + recover() + }() + r.ackC <- err + } } type cRange struct { @@ -548,29 +680,45 @@ type cRange struct { } func (r cRange) ack(err error) { - defer func() { - recover() - }() if r.ackC != nil { + defer func() { + recover() + }() r.ackC <- err } } -func (db *DB) compSendIdle(compC chan<- cCmd) error { +// This will trigger auto compation and/or wait for all compaction to be done. +func (db *DB) compSendIdle(compC chan<- cCmd) (err error) { ch := make(chan error) defer close(ch) // Send cmd. select { case compC <- cIdle{ch}: - case err := <-db.compErrC: - return err + case err = <-db.compErrC: + return case _, _ = <-db.closeC: return ErrClosed } // Wait cmd. - return <-ch + select { + case err = <-ch: + case err = <-db.compErrC: + case _, _ = <-db.closeC: + return ErrClosed + } + return err } +// This will trigger auto compaction but will not wait for it. +func (db *DB) compSendTrigger(compC chan<- cCmd) { + select { + case compC <- cIdle{}: + default: + } +} + +// Send range compaction request. func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) { ch := make(chan error) defer close(ch) @@ -584,19 +732,14 @@ func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err } // Wait cmd. select { - case err = <-db.compErrC: case err = <-ch: + case err = <-db.compErrC: + case _, _ = <-db.closeC: + return ErrClosed } return err } -func (db *DB) compTrigger(compTriggerC chan struct{}) { - select { - case compTriggerC <- struct{}{}: - default: - } -} - func (db *DB) mCompaction() { var x cCmd @@ -615,11 +758,14 @@ func (db *DB) mCompaction() { for { select { case x = <-db.mcompCmdC: - db.memCompaction() - x.ack(nil) - x = nil - case <-db.mcompTriggerC: - db.memCompaction() + switch x.(type) { + case cIdle: + db.memCompaction() + x.ack(nil) + x = nil + default: + panic("leveldb: unknown command") + } case _, _ = <-db.closeC: return } @@ -650,7 +796,6 @@ func (db *DB) tCompaction() { if db.tableNeedCompaction() { select { case x = <-db.tcompCmdC: - case <-db.tcompTriggerC: case ch := <-db.tcompPauseC: db.pauseCompaction(ch) continue @@ -666,7 +811,6 @@ func (db *DB) tCompaction() { ackQ = ackQ[:0] select { case x = <-db.tcompCmdC: - case <-db.tcompTriggerC: case ch := <-db.tcompPauseC: db.pauseCompaction(ch) continue @@ -681,6 +825,8 @@ func (db *DB) tCompaction() { case cRange: db.tableRangeCompaction(cmd.level, cmd.min, cmd.max) x.ack(nil) + default: + panic("leveldb: unknown command") } x = nil } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go index 120e6b0ed..ef34711b0 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go @@ -48,7 +48,7 @@ func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.It i = append(i, fmi) } i = append(i, ti...) - strict := db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator) + strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader) mi := iterator.NewMergedIterator(i, db.s.icmp, strict) mi.SetReleaser(&versionReleaser{v: v}) return mi @@ -59,10 +59,10 @@ func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *d if slice != nil { islice = &util.Range{} if slice.Start != nil { - islice.Start = newIKey(slice.Start, kMaxSeq, tSeek) + islice.Start = newIkey(slice.Start, kMaxSeq, ktSeek) } if slice.Limit != nil { - islice.Limit = newIKey(slice.Limit, kMaxSeq, tSeek) + islice.Limit = newIkey(slice.Limit, kMaxSeq, ktSeek) } } rawIter := db.newRawIterator(islice, ro) @@ -71,7 +71,7 @@ func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *d icmp: db.s.icmp, iter: rawIter, seq: seq, - strict: db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator), + strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader), key: make([]byte, 0), value: make([]byte, 0), } @@ -162,7 +162,7 @@ func (i *dbIter) Seek(key []byte) bool { return false } - ikey := newIKey(key, i.seq, tSeek) + ikey := newIkey(key, i.seq, ktSeek) if i.iter.Seek(ikey) { i.dir = dirSOI return i.next() @@ -174,15 +174,14 @@ func (i *dbIter) Seek(key []byte) bool { func (i *dbIter) next() bool { for { - ukey, seq, t, ok := parseIkey(i.iter.Key()) - if ok { + if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { if seq <= i.seq { - switch t { - case tDel: + switch kt { + case ktDel: // Skip deleted key. i.key = append(i.key[:0], ukey...) i.dir = dirForward - case tVal: + case ktVal: if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) @@ -192,7 +191,7 @@ func (i *dbIter) next() bool { } } } else if i.strict { - i.setErr(errInvalidIkey) + i.setErr(kerr) break } if !i.iter.Next() { @@ -225,20 +224,19 @@ func (i *dbIter) prev() bool { del := true if i.iter.Valid() { for { - ukey, seq, t, ok := parseIkey(i.iter.Key()) - if ok { + if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { if seq <= i.seq { if !del && i.icmp.uCompare(ukey, i.key) < 0 { return true } - del = (t == tDel) + del = (kt == ktDel) if !del { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) } } } else if i.strict { - i.setErr(errInvalidIkey) + i.setErr(kerr) return false } if !i.iter.Prev() { @@ -267,13 +265,12 @@ func (i *dbIter) Prev() bool { return i.Last() case dirForward: for i.iter.Prev() { - ukey, _, _, ok := parseIkey(i.iter.Key()) - if ok { + if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil { if i.icmp.uCompare(ukey, i.key) < 0 { goto cont } } else if i.strict { - i.setErr(errInvalidIkey) + i.setErr(kerr) return false } } @@ -321,9 +318,13 @@ func (i *dbIter) Release() { } func (i *dbIter) SetReleaser(releaser util.Releaser) { - if i.dir != dirReleased { - i.releaser = releaser + if i.dir == dirReleased { + panic(util.ErrReleased) } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser } func (i *dbIter) Error() error { diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go index d7625b1a6..2df76584f 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go @@ -7,6 +7,8 @@ package leveldb import ( + "container/list" + "fmt" "runtime" "sync" "sync/atomic" @@ -19,51 +21,41 @@ import ( type snapshotElement struct { seq uint64 ref int - // Next and previous pointers in the doubly-linked list of elements. - next, prev *snapshotElement -} - -// Initialize the snapshot. -func (db *DB) initSnapshot() { - db.snapsRoot.next = &db.snapsRoot - db.snapsRoot.prev = &db.snapsRoot + e *list.Element } // Acquires a snapshot, based on latest sequence. func (db *DB) acquireSnapshot() *snapshotElement { db.snapsMu.Lock() + defer db.snapsMu.Unlock() + seq := db.getSeq() - elem := db.snapsRoot.prev - if elem == &db.snapsRoot || elem.seq != seq { - at := db.snapsRoot.prev - next := at.next - elem = &snapshotElement{ - seq: seq, - prev: at, - next: next, + + if e := db.snapsList.Back(); e != nil { + se := e.Value.(*snapshotElement) + if se.seq == seq { + se.ref++ + return se + } else if seq < se.seq { + panic("leveldb: sequence number is not increasing") } - at.next = elem - next.prev = elem } - elem.ref++ - db.snapsMu.Unlock() - return elem + se := &snapshotElement{seq: seq, ref: 1} + se.e = db.snapsList.PushBack(se) + return se } // Releases given snapshot element. -func (db *DB) releaseSnapshot(elem *snapshotElement) { - if !db.isClosed() { - db.snapsMu.Lock() - elem.ref-- - if elem.ref == 0 { - elem.prev.next = elem.next - elem.next.prev = elem.prev - elem.next = nil - elem.prev = nil - } else if elem.ref < 0 { - panic("leveldb: Snapshot: negative element reference") - } - db.snapsMu.Unlock() +func (db *DB) releaseSnapshot(se *snapshotElement) { + db.snapsMu.Lock() + defer db.snapsMu.Unlock() + + se.ref-- + if se.ref == 0 { + db.snapsList.Remove(se.e) + se.e = nil + } else if se.ref < 0 { + panic("leveldb: Snapshot: negative element reference") } } @@ -71,10 +63,11 @@ func (db *DB) releaseSnapshot(elem *snapshotElement) { func (db *DB) minSeq() uint64 { db.snapsMu.Lock() defer db.snapsMu.Unlock() - elem := db.snapsRoot.prev - if elem != &db.snapsRoot { - return elem.seq + + if e := db.snapsList.Front(); e != nil { + return e.Value.(*snapshotElement).seq } + return db.getSeq() } @@ -97,8 +90,12 @@ func (db *DB) newSnapshot() *Snapshot { return snap } +func (snap *Snapshot) String() string { + return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq) +} + // Get gets the value for the given key. It returns ErrNotFound if -// the DB does not contain the key. +// the DB does not contains the key. // // The caller should not modify the contents of the returned slice, but // it is safe to modify the contents of the argument after Get returns. @@ -116,6 +113,23 @@ func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err er return snap.db.get(key, snap.elem.seq, ro) } +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Get returns. +func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { + err = snap.db.ok() + if err != nil { + return + } + snap.mu.RLock() + defer snap.mu.RUnlock() + if snap.released { + err = ErrSnapshotReleased + return + } + return snap.db.has(key, snap.elem.seq, ro) +} + // NewIterator returns an iterator for the snapshot of the uderlying DB. // The returned iterator is not goroutine-safe, but it is safe to use // multiple iterators concurrently, with each in a dedicated goroutine. diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go index 02f69bf1a..9c83497f5 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go @@ -7,6 +7,10 @@ package leveldb import ( + "bytes" + "container/list" + crand "crypto/rand" + "encoding/binary" "fmt" "math/rand" "os" @@ -20,6 +24,7 @@ import ( "unsafe" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" @@ -148,7 +153,10 @@ func (h *dbHarness) maxNextLevelOverlappingBytes(want uint64) { t := h.t db := h.db - var res uint64 + var ( + maxOverlaps uint64 + maxLevel int + ) v := db.s.version() for i, tt := range v.tables[1 : len(v.tables)-1] { level := i + 1 @@ -156,15 +164,18 @@ func (h *dbHarness) maxNextLevelOverlappingBytes(want uint64) { for _, t := range tt { r := next.getOverlaps(nil, db.s.icmp, t.imin.ukey(), t.imax.ukey(), false) sum := r.size() - if sum > res { - res = sum + if sum > maxOverlaps { + maxOverlaps = sum + maxLevel = level } } } v.release() - if res > want { - t.Errorf("next level overlapping bytes is more than %d, got=%d", want, res) + if maxOverlaps > want { + t.Errorf("next level most overlapping bytes is more than %d, got=%d level=%d", want, maxOverlaps, maxLevel) + } else { + t.Logf("next level most overlapping bytes is %d, level=%d want=%d", maxOverlaps, maxLevel, want) } } @@ -237,7 +248,7 @@ func (h *dbHarness) allEntriesFor(key, want string) { db := h.db s := db.s - ikey := newIKey([]byte(key), kMaxSeq, tVal) + ikey := newIkey([]byte(key), kMaxSeq, ktVal) iter := db.newRawIterator(nil, nil) if !iter.Seek(ikey) && iter.Error() != nil { t.Error("AllEntries: error during seek, err: ", iter.Error()) @@ -246,19 +257,18 @@ func (h *dbHarness) allEntriesFor(key, want string) { res := "[ " first := true for iter.Valid() { - rkey := iKey(iter.Key()) - if _, t, ok := rkey.parseNum(); ok { - if s.icmp.uCompare(ikey.ukey(), rkey.ukey()) != 0 { + if ukey, _, kt, kerr := parseIkey(iter.Key()); kerr == nil { + if s.icmp.uCompare(ikey.ukey(), ukey) != 0 { break } if !first { res += ", " } first = false - switch t { - case tVal: + switch kt { + case ktVal: res += string(iter.Value()) - case tDel: + case ktDel: res += "DEL" } } else { @@ -323,6 +333,8 @@ func (h *dbHarness) compactMem() { t := h.t db := h.db + t.Log("starting memdb compaction") + db.writeLockC <- struct{}{} defer func() { <-db.writeLockC @@ -338,6 +350,8 @@ func (h *dbHarness) compactMem() { if h.totalTables() == 0 { t.Error("zero tables after mem compaction") } + + t.Log("memdb compaction done") } func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool) { @@ -352,6 +366,8 @@ func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool) _max = []byte(max) } + t.Logf("starting table range compaction: level=%d, min=%q, max=%q", level, min, max) + if err := db.compSendRange(db.tcompCmdC, level, _min, _max); err != nil { if wanterr { t.Log("CompactRangeAt: got error (expected): ", err) @@ -361,6 +377,8 @@ func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool) } else if wanterr { t.Error("CompactRangeAt: expect error") } + + t.Log("table range compaction done") } func (h *dbHarness) compactRangeAt(level int, min, max string) { @@ -371,6 +389,8 @@ func (h *dbHarness) compactRange(min, max string) { t := h.t db := h.db + t.Logf("starting DB range compaction: min=%q, max=%q", min, max) + var r util.Range if min != "" { r.Start = []byte(min) @@ -381,6 +401,8 @@ func (h *dbHarness) compactRange(min, max string) { if err := db.CompactRange(r); err != nil { t.Error("CompactRange: got error: ", err) } + + t.Log("DB range compaction done") } func (h *dbHarness) sizeAssert(start, limit string, low, hi uint64) { @@ -502,13 +524,13 @@ func Test_FieldsAligned(t *testing.T) { p1 := new(DB) testAligned(t, "DB.seq", unsafe.Offsetof(p1.seq)) p2 := new(session) - testAligned(t, "session.stFileNum", unsafe.Offsetof(p2.stFileNum)) + testAligned(t, "session.stNextFileNum", unsafe.Offsetof(p2.stNextFileNum)) testAligned(t, "session.stJournalNum", unsafe.Offsetof(p2.stJournalNum)) testAligned(t, "session.stPrevJournalNum", unsafe.Offsetof(p2.stPrevJournalNum)) - testAligned(t, "session.stSeq", unsafe.Offsetof(p2.stSeq)) + testAligned(t, "session.stSeqNum", unsafe.Offsetof(p2.stSeqNum)) } -func TestDb_Locking(t *testing.T) { +func TestDB_Locking(t *testing.T) { h := newDbHarness(t) defer h.stor.Close() h.openAssert(false) @@ -516,7 +538,7 @@ func TestDb_Locking(t *testing.T) { h.openAssert(true) } -func TestDb_Empty(t *testing.T) { +func TestDB_Empty(t *testing.T) { trun(t, func(h *dbHarness) { h.get("foo", false) @@ -525,7 +547,7 @@ func TestDb_Empty(t *testing.T) { }) } -func TestDb_ReadWrite(t *testing.T) { +func TestDB_ReadWrite(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.getVal("foo", "v1") @@ -540,7 +562,7 @@ func TestDb_ReadWrite(t *testing.T) { }) } -func TestDb_PutDeleteGet(t *testing.T) { +func TestDB_PutDeleteGet(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.getVal("foo", "v1") @@ -554,7 +576,7 @@ func TestDb_PutDeleteGet(t *testing.T) { }) } -func TestDb_EmptyBatch(t *testing.T) { +func TestDB_EmptyBatch(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -566,7 +588,7 @@ func TestDb_EmptyBatch(t *testing.T) { h.get("foo", false) } -func TestDb_GetFromFrozen(t *testing.T) { +func TestDB_GetFromFrozen(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100100}) defer h.close() @@ -592,7 +614,7 @@ func TestDb_GetFromFrozen(t *testing.T) { h.get("k2", true) } -func TestDb_GetFromTable(t *testing.T) { +func TestDB_GetFromTable(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.compactMem() @@ -600,7 +622,7 @@ func TestDb_GetFromTable(t *testing.T) { }) } -func TestDb_GetSnapshot(t *testing.T) { +func TestDB_GetSnapshot(t *testing.T) { trun(t, func(h *dbHarness) { bar := strings.Repeat("b", 200) h.put("foo", "v1") @@ -634,7 +656,7 @@ func TestDb_GetSnapshot(t *testing.T) { }) } -func TestDb_GetLevel0Ordering(t *testing.T) { +func TestDB_GetLevel0Ordering(t *testing.T) { trun(t, func(h *dbHarness) { for i := 0; i < 4; i++ { h.put("bar", fmt.Sprintf("b%d", i)) @@ -657,7 +679,7 @@ func TestDb_GetLevel0Ordering(t *testing.T) { }) } -func TestDb_GetOrderedByLevels(t *testing.T) { +func TestDB_GetOrderedByLevels(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.compactMem() @@ -669,7 +691,7 @@ func TestDb_GetOrderedByLevels(t *testing.T) { }) } -func TestDb_GetPicksCorrectFile(t *testing.T) { +func TestDB_GetPicksCorrectFile(t *testing.T) { trun(t, func(h *dbHarness) { // Arrange to have multiple files in a non-level-0 level. h.put("a", "va") @@ -693,7 +715,7 @@ func TestDb_GetPicksCorrectFile(t *testing.T) { }) } -func TestDb_GetEncountersEmptyLevel(t *testing.T) { +func TestDB_GetEncountersEmptyLevel(t *testing.T) { trun(t, func(h *dbHarness) { // Arrange for the following to happen: // * sstable A in level 0 @@ -748,7 +770,7 @@ func TestDb_GetEncountersEmptyLevel(t *testing.T) { }) } -func TestDb_IterMultiWithDelete(t *testing.T) { +func TestDB_IterMultiWithDelete(t *testing.T) { trun(t, func(h *dbHarness) { h.put("a", "va") h.put("b", "vb") @@ -774,7 +796,7 @@ func TestDb_IterMultiWithDelete(t *testing.T) { }) } -func TestDb_IteratorPinsRef(t *testing.T) { +func TestDB_IteratorPinsRef(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -798,7 +820,7 @@ func TestDb_IteratorPinsRef(t *testing.T) { iter.Release() } -func TestDb_Recover(t *testing.T) { +func TestDB_Recover(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.put("baz", "v5") @@ -820,7 +842,7 @@ func TestDb_Recover(t *testing.T) { }) } -func TestDb_RecoverWithEmptyJournal(t *testing.T) { +func TestDB_RecoverWithEmptyJournal(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") h.put("foo", "v2") @@ -834,7 +856,7 @@ func TestDb_RecoverWithEmptyJournal(t *testing.T) { }) } -func TestDb_RecoverDuringMemtableCompaction(t *testing.T) { +func TestDB_RecoverDuringMemtableCompaction(t *testing.T) { truno(t, &opt.Options{WriteBuffer: 1000000}, func(h *dbHarness) { h.stor.DelaySync(storage.TypeTable) @@ -850,7 +872,7 @@ func TestDb_RecoverDuringMemtableCompaction(t *testing.T) { }) } -func TestDb_MinorCompactionsHappen(t *testing.T) { +func TestDB_MinorCompactionsHappen(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 10000}) defer h.close() @@ -874,7 +896,7 @@ func TestDb_MinorCompactionsHappen(t *testing.T) { } } -func TestDb_RecoverWithLargeJournal(t *testing.T) { +func TestDB_RecoverWithLargeJournal(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -899,7 +921,7 @@ func TestDb_RecoverWithLargeJournal(t *testing.T) { v.release() } -func TestDb_CompactionsGenerateMultipleFiles(t *testing.T) { +func TestDB_CompactionsGenerateMultipleFiles(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{ WriteBuffer: 10000000, Compression: opt.NoCompression, @@ -937,11 +959,11 @@ func TestDb_CompactionsGenerateMultipleFiles(t *testing.T) { } } -func TestDb_RepeatedWritesToSameKey(t *testing.T) { +func TestDB_RepeatedWritesToSameKey(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000}) defer h.close() - maxTables := kNumLevels + kL0_StopWritesTrigger + maxTables := h.o.GetNumLevel() + h.o.GetWriteL0PauseTrigger() value := strings.Repeat("v", 2*h.o.GetWriteBuffer()) for i := 0; i < 5*maxTables; i++ { @@ -953,13 +975,13 @@ func TestDb_RepeatedWritesToSameKey(t *testing.T) { } } -func TestDb_RepeatedWritesToSameKeyAfterReopen(t *testing.T) { +func TestDB_RepeatedWritesToSameKeyAfterReopen(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000}) defer h.close() h.reopenDB() - maxTables := kNumLevels + kL0_StopWritesTrigger + maxTables := h.o.GetNumLevel() + h.o.GetWriteL0PauseTrigger() value := strings.Repeat("v", 2*h.o.GetWriteBuffer()) for i := 0; i < 5*maxTables; i++ { @@ -971,11 +993,11 @@ func TestDb_RepeatedWritesToSameKeyAfterReopen(t *testing.T) { } } -func TestDb_SparseMerge(t *testing.T) { +func TestDB_SparseMerge(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression}) defer h.close() - h.putMulti(kNumLevels, "A", "Z") + h.putMulti(h.o.GetNumLevel(), "A", "Z") // Suppose there is: // small amount of data with prefix A @@ -999,6 +1021,7 @@ func TestDb_SparseMerge(t *testing.T) { h.put("C", "vc2") h.compactMem() + h.waitCompaction() h.maxNextLevelOverlappingBytes(20 * 1048576) h.compactRangeAt(0, "", "") h.waitCompaction() @@ -1008,7 +1031,7 @@ func TestDb_SparseMerge(t *testing.T) { h.maxNextLevelOverlappingBytes(20 * 1048576) } -func TestDb_SizeOf(t *testing.T) { +func TestDB_SizeOf(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{ Compression: opt.NoCompression, WriteBuffer: 10000000, @@ -1058,7 +1081,7 @@ func TestDb_SizeOf(t *testing.T) { } } -func TestDb_SizeOf_MixOfSmallAndLarge(t *testing.T) { +func TestDB_SizeOf_MixOfSmallAndLarge(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression}) defer h.close() @@ -1096,7 +1119,7 @@ func TestDb_SizeOf_MixOfSmallAndLarge(t *testing.T) { } } -func TestDb_Snapshot(t *testing.T) { +func TestDB_Snapshot(t *testing.T) { trun(t, func(h *dbHarness) { h.put("foo", "v1") s1 := h.getSnapshot() @@ -1125,13 +1148,51 @@ func TestDb_Snapshot(t *testing.T) { }) } -func TestDb_HiddenValuesAreRemoved(t *testing.T) { +func TestDB_SnapshotList(t *testing.T) { + db := &DB{snapsList: list.New()} + e0a := db.acquireSnapshot() + e0b := db.acquireSnapshot() + db.seq = 1 + e1 := db.acquireSnapshot() + db.seq = 2 + e2 := db.acquireSnapshot() + + if db.minSeq() != 0 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + db.releaseSnapshot(e0a) + if db.minSeq() != 0 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + db.releaseSnapshot(e2) + if db.minSeq() != 0 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + db.releaseSnapshot(e0b) + if db.minSeq() != 1 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + e2 = db.acquireSnapshot() + if db.minSeq() != 1 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + db.releaseSnapshot(e1) + if db.minSeq() != 2 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } + db.releaseSnapshot(e2) + if db.minSeq() != 2 { + t.Fatalf("invalid sequence number, got=%d", db.minSeq()) + } +} + +func TestDB_HiddenValuesAreRemoved(t *testing.T) { trun(t, func(h *dbHarness) { s := h.db.s h.put("foo", "v1") h.compactMem() - m := kMaxMemCompactLevel + m := h.o.GetMaxMemCompationLevel() v := s.version() num := v.tLen(m) v.release() @@ -1168,14 +1229,14 @@ func TestDb_HiddenValuesAreRemoved(t *testing.T) { }) } -func TestDb_DeletionMarkers2(t *testing.T) { +func TestDB_DeletionMarkers2(t *testing.T) { h := newDbHarness(t) defer h.close() s := h.db.s h.put("foo", "v1") h.compactMem() - m := kMaxMemCompactLevel + m := h.o.GetMaxMemCompationLevel() v := s.version() num := v.tLen(m) v.release() @@ -1209,7 +1270,7 @@ func TestDb_DeletionMarkers2(t *testing.T) { h.allEntriesFor("foo", "[ ]") } -func TestDb_CompactionTableOpenError(t *testing.T) { +func TestDB_CompactionTableOpenError(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{CachedOpenFiles: -1}) defer h.close() @@ -1228,14 +1289,14 @@ func TestDb_CompactionTableOpenError(t *testing.T) { t.Errorf("total tables is %d, want %d", n, im) } - h.stor.SetOpenErr(storage.TypeTable) + h.stor.SetEmuErr(storage.TypeTable, tsOpOpen) go h.db.CompactRange(util.Range{}) if err := h.db.compSendIdle(h.db.tcompCmdC); err != nil { t.Log("compaction error: ", err) } h.closeDB0() h.openDB() - h.stor.SetOpenErr(0) + h.stor.SetEmuErr(0, tsOpOpen) for i := 0; i < im; i++ { for j := 0; j < jm; j++ { @@ -1244,9 +1305,9 @@ func TestDb_CompactionTableOpenError(t *testing.T) { } } -func TestDb_OverlapInLevel0(t *testing.T) { +func TestDB_OverlapInLevel0(t *testing.T) { trun(t, func(h *dbHarness) { - if kMaxMemCompactLevel != 2 { + if h.o.GetMaxMemCompationLevel() != 2 { t.Fatal("fix test to reflect the config") } @@ -1287,7 +1348,7 @@ func TestDb_OverlapInLevel0(t *testing.T) { }) } -func TestDb_L0_CompactionBug_Issue44_a(t *testing.T) { +func TestDB_L0_CompactionBug_Issue44_a(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -1307,7 +1368,7 @@ func TestDb_L0_CompactionBug_Issue44_a(t *testing.T) { h.getKeyVal("(a->v)") } -func TestDb_L0_CompactionBug_Issue44_b(t *testing.T) { +func TestDB_L0_CompactionBug_Issue44_b(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -1336,7 +1397,7 @@ func TestDb_L0_CompactionBug_Issue44_b(t *testing.T) { h.getKeyVal("(->)(c->cv)") } -func TestDb_SingleEntryMemCompaction(t *testing.T) { +func TestDB_SingleEntryMemCompaction(t *testing.T) { trun(t, func(h *dbHarness) { for i := 0; i < 10; i++ { h.put("big", strings.Repeat("v", opt.DefaultWriteBuffer)) @@ -1353,7 +1414,7 @@ func TestDb_SingleEntryMemCompaction(t *testing.T) { }) } -func TestDb_ManifestWriteError(t *testing.T) { +func TestDB_ManifestWriteError(t *testing.T) { for i := 0; i < 2; i++ { func() { h := newDbHarness(t) @@ -1366,23 +1427,23 @@ func TestDb_ManifestWriteError(t *testing.T) { h.compactMem() h.getVal("foo", "bar") v := h.db.s.version() - if n := v.tLen(kMaxMemCompactLevel); n != 1 { + if n := v.tLen(h.o.GetMaxMemCompationLevel()); n != 1 { t.Errorf("invalid total tables, want=1 got=%d", n) } v.release() if i == 0 { - h.stor.SetWriteErr(storage.TypeManifest) + h.stor.SetEmuErr(storage.TypeManifest, tsOpWrite) } else { - h.stor.SetSyncErr(storage.TypeManifest) + h.stor.SetEmuErr(storage.TypeManifest, tsOpSync) } // Merging compaction (will fail) - h.compactRangeAtErr(kMaxMemCompactLevel, "", "", true) + h.compactRangeAtErr(h.o.GetMaxMemCompationLevel(), "", "", true) h.db.Close() - h.stor.SetWriteErr(0) - h.stor.SetSyncErr(0) + h.stor.SetEmuErr(0, tsOpWrite) + h.stor.SetEmuErr(0, tsOpSync) // Should not lose data h.openDB() @@ -1403,7 +1464,7 @@ func assertErr(t *testing.T, err error, wanterr bool) { } } -func TestDb_ClosedIsClosed(t *testing.T) { +func TestDB_ClosedIsClosed(t *testing.T) { h := newDbHarness(t) db := h.db @@ -1498,7 +1559,7 @@ func (p numberComparer) Compare(a, b []byte) int { func (numberComparer) Separator(dst, a, b []byte) []byte { return nil } func (numberComparer) Successor(dst, b []byte) []byte { return nil } -func TestDb_CustomComparer(t *testing.T) { +func TestDB_CustomComparer(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{ Comparer: numberComparer{}, WriteBuffer: 1000, @@ -1528,11 +1589,11 @@ func TestDb_CustomComparer(t *testing.T) { } } -func TestDb_ManualCompaction(t *testing.T) { +func TestDB_ManualCompaction(t *testing.T) { h := newDbHarness(t) defer h.close() - if kMaxMemCompactLevel != 2 { + if h.o.GetMaxMemCompationLevel() != 2 { t.Fatal("fix test to reflect the config") } @@ -1566,7 +1627,7 @@ func TestDb_ManualCompaction(t *testing.T) { h.tablesPerLevel("0,0,1") } -func TestDb_BloomFilter(t *testing.T) { +func TestDB_BloomFilter(t *testing.T) { h := newDbHarnessWopt(t, &opt.Options{ BlockCache: opt.NoCache, Filter: filter.NewBloomFilter(10), @@ -1577,11 +1638,7 @@ func TestDb_BloomFilter(t *testing.T) { return fmt.Sprintf("key%06d", i) } - const ( - n = 10000 - indexOverheat = 19898 - filterOverheat = 19799 - ) + const n = 10000 // Populate multiple layers for i := 0; i < n; i++ { @@ -1605,7 +1662,7 @@ func TestDb_BloomFilter(t *testing.T) { cnt := int(h.stor.ReadCounter()) t.Logf("lookup of %d present keys yield %d sstable I/O reads", n, cnt) - if min, max := n+indexOverheat+filterOverheat, n+indexOverheat+filterOverheat+2*n/100; cnt < min || cnt > max { + if min, max := n, n+2*n/100; cnt < min || cnt > max { t.Errorf("num of sstable I/O reads of present keys not in range of %d - %d, got %d", min, max, cnt) } @@ -1616,14 +1673,14 @@ func TestDb_BloomFilter(t *testing.T) { } cnt = int(h.stor.ReadCounter()) t.Logf("lookup of %d missing keys yield %d sstable I/O reads", n, cnt) - if max := 3*n/100 + indexOverheat + filterOverheat; cnt > max { + if max := 3 * n / 100; cnt > max { t.Errorf("num of sstable I/O reads of missing keys was more than %d, got %d", max, cnt) } h.stor.ReleaseSync(storage.TypeTable) } -func TestDb_Concurrent(t *testing.T) { +func TestDB_Concurrent(t *testing.T) { const n, secs, maxkey = 4, 2, 1000 runtime.GOMAXPROCS(n) @@ -1688,7 +1745,7 @@ func TestDb_Concurrent(t *testing.T) { runtime.GOMAXPROCS(1) } -func TestDb_Concurrent2(t *testing.T) { +func TestDB_Concurrent2(t *testing.T) { const n, n2 = 4, 4000 runtime.GOMAXPROCS(n*2 + 2) @@ -1759,7 +1816,7 @@ func TestDb_Concurrent2(t *testing.T) { runtime.GOMAXPROCS(1) } -func TestDb_CreateReopenDbOnFile(t *testing.T) { +func TestDB_CreateReopenDbOnFile(t *testing.T) { dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile-%d", os.Getuid())) if err := os.RemoveAll(dbpath); err != nil { t.Fatal("cannot remove old db: ", err) @@ -1787,7 +1844,7 @@ func TestDb_CreateReopenDbOnFile(t *testing.T) { } } -func TestDb_CreateReopenDbOnFile2(t *testing.T) { +func TestDB_CreateReopenDbOnFile2(t *testing.T) { dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile2-%d", os.Getuid())) if err := os.RemoveAll(dbpath); err != nil { t.Fatal("cannot remove old db: ", err) @@ -1808,7 +1865,7 @@ func TestDb_CreateReopenDbOnFile2(t *testing.T) { } } -func TestDb_DeletionMarkersOnMemdb(t *testing.T) { +func TestDB_DeletionMarkersOnMemdb(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -1819,8 +1876,8 @@ func TestDb_DeletionMarkersOnMemdb(t *testing.T) { h.getKeyVal("") } -func TestDb_LeveldbIssue178(t *testing.T) { - nKeys := (kMaxTableSize / 30) * 5 +func TestDB_LeveldbIssue178(t *testing.T) { + nKeys := (opt.DefaultCompactionTableSize / 30) * 5 key1 := func(i int) string { return fmt.Sprintf("my_key_%d", i) } @@ -1862,7 +1919,7 @@ func TestDb_LeveldbIssue178(t *testing.T) { h.assertNumKeys(nKeys) } -func TestDb_LeveldbIssue200(t *testing.T) { +func TestDB_LeveldbIssue200(t *testing.T) { h := newDbHarness(t) defer h.close() @@ -1888,3 +1945,635 @@ func TestDb_LeveldbIssue200(t *testing.T) { iter.Next() assertBytes(t, []byte("5"), iter.Key()) } + +func TestDB_GoleveldbIssue74(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + WriteBuffer: 1 * opt.MiB, + }) + defer h.close() + + const n, dur = 10000, 5 * time.Second + + runtime.GOMAXPROCS(runtime.NumCPU()) + + until := time.Now().Add(dur) + wg := new(sync.WaitGroup) + wg.Add(2) + var done uint32 + go func() { + var i int + defer func() { + t.Logf("WRITER DONE #%d", i) + atomic.StoreUint32(&done, 1) + wg.Done() + }() + + b := new(Batch) + for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ { + iv := fmt.Sprintf("VAL%010d", i) + for k := 0; k < n; k++ { + key := fmt.Sprintf("KEY%06d", k) + b.Put([]byte(key), []byte(key+iv)) + b.Put([]byte(fmt.Sprintf("PTR%06d", k)), []byte(key)) + } + h.write(b) + + b.Reset() + snap := h.getSnapshot() + iter := snap.NewIterator(util.BytesPrefix([]byte("PTR")), nil) + var k int + for ; iter.Next(); k++ { + ptrKey := iter.Key() + key := iter.Value() + + if _, err := snap.Get(ptrKey, nil); err != nil { + t.Fatalf("WRITER #%d snapshot.Get %q: %v", i, ptrKey, err) + } + if value, err := snap.Get(key, nil); err != nil { + t.Fatalf("WRITER #%d snapshot.Get %q: %v", i, key, err) + } else if string(value) != string(key)+iv { + t.Fatalf("WRITER #%d snapshot.Get %q got invalid value, want %q got %q", i, key, string(key)+iv, value) + } + + b.Delete(key) + b.Delete(ptrKey) + } + h.write(b) + iter.Release() + snap.Release() + if k != n { + t.Fatalf("#%d %d != %d", i, k, n) + } + } + }() + go func() { + var i int + defer func() { + t.Logf("READER DONE #%d", i) + atomic.StoreUint32(&done, 1) + wg.Done() + }() + for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ { + snap := h.getSnapshot() + iter := snap.NewIterator(util.BytesPrefix([]byte("PTR")), nil) + var prevValue string + var k int + for ; iter.Next(); k++ { + ptrKey := iter.Key() + key := iter.Value() + + if _, err := snap.Get(ptrKey, nil); err != nil { + t.Fatalf("READER #%d snapshot.Get %q: %v", i, ptrKey, err) + } + + if value, err := snap.Get(key, nil); err != nil { + t.Fatalf("READER #%d snapshot.Get %q: %v", i, key, err) + } else if prevValue != "" && string(value) != string(key)+prevValue { + t.Fatalf("READER #%d snapshot.Get %q got invalid value, want %q got %q", i, key, string(key)+prevValue, value) + } else { + prevValue = string(value[len(key):]) + } + } + iter.Release() + snap.Release() + if k > 0 && k != n { + t.Fatalf("#%d %d != %d", i, k, n) + } + } + }() + wg.Wait() +} + +func TestDB_GetProperties(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + _, err := h.db.GetProperty("leveldb.num-files-at-level") + if err == nil { + t.Error("GetProperty() failed to detect missing level") + } + + _, err = h.db.GetProperty("leveldb.num-files-at-level0") + if err != nil { + t.Error("got unexpected error", err) + } + + _, err = h.db.GetProperty("leveldb.num-files-at-level0x") + if err == nil { + t.Error("GetProperty() failed to detect invalid level") + } +} + +func TestDB_GoleveldbIssue72and83(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + WriteBuffer: 1 * opt.MiB, + CachedOpenFiles: 3, + }) + defer h.close() + + const n, wn, dur = 10000, 100, 30 * time.Second + + runtime.GOMAXPROCS(runtime.NumCPU()) + + randomData := func(prefix byte, i int) []byte { + data := make([]byte, 1+4+32+64+32) + _, err := crand.Reader.Read(data[1 : len(data)-8]) + if err != nil { + panic(err) + } + data[0] = prefix + binary.LittleEndian.PutUint32(data[len(data)-8:], uint32(i)) + binary.LittleEndian.PutUint32(data[len(data)-4:], util.NewCRC(data[:len(data)-4]).Value()) + return data + } + + keys := make([][]byte, n) + for i := range keys { + keys[i] = randomData(1, 0) + } + + until := time.Now().Add(dur) + wg := new(sync.WaitGroup) + wg.Add(3) + var done uint32 + go func() { + i := 0 + defer func() { + t.Logf("WRITER DONE #%d", i) + wg.Done() + }() + + b := new(Batch) + for ; i < wn && atomic.LoadUint32(&done) == 0; i++ { + b.Reset() + for _, k1 := range keys { + k2 := randomData(2, i) + b.Put(k2, randomData(42, i)) + b.Put(k1, k2) + } + if err := h.db.Write(b, h.wo); err != nil { + atomic.StoreUint32(&done, 1) + t.Fatalf("WRITER #%d db.Write: %v", i, err) + } + } + }() + go func() { + var i int + defer func() { + t.Logf("READER0 DONE #%d", i) + atomic.StoreUint32(&done, 1) + wg.Done() + }() + for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ { + snap := h.getSnapshot() + seq := snap.elem.seq + if seq == 0 { + snap.Release() + continue + } + iter := snap.NewIterator(util.BytesPrefix([]byte{1}), nil) + writei := int(seq/(n*2) - 1) + var k int + for ; iter.Next(); k++ { + k1 := iter.Key() + k2 := iter.Value() + k1checksum0 := binary.LittleEndian.Uint32(k1[len(k1)-4:]) + k1checksum1 := util.NewCRC(k1[:len(k1)-4]).Value() + if k1checksum0 != k1checksum1 { + t.Fatalf("READER0 #%d.%d W#%d invalid K1 checksum: %#x != %#x", i, k, k1checksum0, k1checksum0) + } + k2checksum0 := binary.LittleEndian.Uint32(k2[len(k2)-4:]) + k2checksum1 := util.NewCRC(k2[:len(k2)-4]).Value() + if k2checksum0 != k2checksum1 { + t.Fatalf("READER0 #%d.%d W#%d invalid K2 checksum: %#x != %#x", i, k, k2checksum0, k2checksum1) + } + kwritei := int(binary.LittleEndian.Uint32(k2[len(k2)-8:])) + if writei != kwritei { + t.Fatalf("READER0 #%d.%d W#%d invalid write iteration num: %d", i, k, writei, kwritei) + } + if _, err := snap.Get(k2, nil); err != nil { + t.Fatalf("READER0 #%d.%d W#%d snap.Get: %v\nk1: %x\n -> k2: %x", i, k, writei, err, k1, k2) + } + } + if err := iter.Error(); err != nil { + t.Fatalf("READER0 #%d.%d W#%d snap.Iterator: %v", i, k, writei, err) + } + iter.Release() + snap.Release() + if k > 0 && k != n { + t.Fatalf("READER0 #%d W#%d short read, got=%d want=%d", i, writei, k, n) + } + } + }() + go func() { + var i int + defer func() { + t.Logf("READER1 DONE #%d", i) + atomic.StoreUint32(&done, 1) + wg.Done() + }() + for ; time.Now().Before(until) && atomic.LoadUint32(&done) == 0; i++ { + iter := h.db.NewIterator(nil, nil) + seq := iter.(*dbIter).seq + if seq == 0 { + iter.Release() + continue + } + writei := int(seq/(n*2) - 1) + var k int + for ok := iter.Last(); ok; ok = iter.Prev() { + k++ + } + if err := iter.Error(); err != nil { + t.Fatalf("READER1 #%d.%d W#%d db.Iterator: %v", i, k, writei, err) + } + iter.Release() + if m := (writei+1)*n + n; k != m { + t.Fatalf("READER1 #%d W#%d short read, got=%d want=%d", i, writei, k, m) + } + } + }() + + wg.Wait() +} + +func TestDB_TransientError(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + WriteBuffer: 128 * opt.KiB, + CachedOpenFiles: 3, + DisableCompactionBackoff: true, + }) + defer h.close() + + const ( + nSnap = 20 + nKey = 10000 + ) + + var ( + snaps [nSnap]*Snapshot + b = &Batch{} + ) + for i := range snaps { + vtail := fmt.Sprintf("VAL%030d", i) + b.Reset() + for k := 0; k < nKey; k++ { + key := fmt.Sprintf("KEY%8d", k) + b.Put([]byte(key), []byte(key+vtail)) + } + h.stor.SetEmuRandErr(storage.TypeTable, tsOpOpen, tsOpRead, tsOpReadAt) + if err := h.db.Write(b, nil); err != nil { + t.Logf("WRITE #%d error: %v", i, err) + h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt, tsOpWrite) + for { + if err := h.db.Write(b, nil); err == nil { + break + } else if errors.IsCorrupted(err) { + t.Fatalf("WRITE #%d corrupted: %v", i, err) + } + } + } + + snaps[i] = h.db.newSnapshot() + b.Reset() + for k := 0; k < nKey; k++ { + key := fmt.Sprintf("KEY%8d", k) + b.Delete([]byte(key)) + } + h.stor.SetEmuRandErr(storage.TypeTable, tsOpOpen, tsOpRead, tsOpReadAt) + if err := h.db.Write(b, nil); err != nil { + t.Logf("WRITE #%d error: %v", i, err) + h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt) + for { + if err := h.db.Write(b, nil); err == nil { + break + } else if errors.IsCorrupted(err) { + t.Fatalf("WRITE #%d corrupted: %v", i, err) + } + } + } + } + h.stor.SetEmuRandErr(0, tsOpOpen, tsOpRead, tsOpReadAt) + + runtime.GOMAXPROCS(runtime.NumCPU()) + + rnd := rand.New(rand.NewSource(0xecafdaed)) + wg := &sync.WaitGroup{} + for i, snap := range snaps { + wg.Add(2) + + go func(i int, snap *Snapshot, sk []int) { + defer wg.Done() + + vtail := fmt.Sprintf("VAL%030d", i) + for _, k := range sk { + key := fmt.Sprintf("KEY%8d", k) + xvalue, err := snap.Get([]byte(key), nil) + if err != nil { + t.Fatalf("READER_GET #%d SEQ=%d K%d error: %v", i, snap.elem.seq, k, err) + } + value := key + vtail + if !bytes.Equal([]byte(value), xvalue) { + t.Fatalf("READER_GET #%d SEQ=%d K%d invalid value: want %q, got %q", i, snap.elem.seq, k, value, xvalue) + } + } + }(i, snap, rnd.Perm(nKey)) + + go func(i int, snap *Snapshot) { + defer wg.Done() + + vtail := fmt.Sprintf("VAL%030d", i) + iter := snap.NewIterator(nil, nil) + defer iter.Release() + for k := 0; k < nKey; k++ { + if !iter.Next() { + if err := iter.Error(); err != nil { + t.Fatalf("READER_ITER #%d K%d error: %v", i, k, err) + } else { + t.Fatalf("READER_ITER #%d K%d eoi", i, k) + } + } + key := fmt.Sprintf("KEY%8d", k) + xkey := iter.Key() + if !bytes.Equal([]byte(key), xkey) { + t.Fatalf("READER_ITER #%d K%d invalid key: want %q, got %q", i, k, key, xkey) + } + value := key + vtail + xvalue := iter.Value() + if !bytes.Equal([]byte(value), xvalue) { + t.Fatalf("READER_ITER #%d K%d invalid value: want %q, got %q", i, k, value, xvalue) + } + } + }(i, snap) + } + + wg.Wait() +} + +func TestDB_UkeyShouldntHopAcrossTable(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + WriteBuffer: 112 * opt.KiB, + CompactionTableSize: 90 * opt.KiB, + CompactionExpandLimitFactor: 1, + }) + defer h.close() + + const ( + nSnap = 190 + nKey = 140 + ) + + var ( + snaps [nSnap]*Snapshot + b = &Batch{} + ) + for i := range snaps { + vtail := fmt.Sprintf("VAL%030d", i) + b.Reset() + for k := 0; k < nKey; k++ { + key := fmt.Sprintf("KEY%08d", k) + b.Put([]byte(key), []byte(key+vtail)) + } + if err := h.db.Write(b, nil); err != nil { + t.Fatalf("WRITE #%d error: %v", i, err) + } + + snaps[i] = h.db.newSnapshot() + b.Reset() + for k := 0; k < nKey; k++ { + key := fmt.Sprintf("KEY%08d", k) + b.Delete([]byte(key)) + } + if err := h.db.Write(b, nil); err != nil { + t.Fatalf("WRITE #%d error: %v", i, err) + } + } + + h.compactMem() + + h.waitCompaction() + for level, tables := range h.db.s.stVersion.tables { + for _, table := range tables { + t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax) + } + } + + h.compactRangeAt(0, "", "") + h.waitCompaction() + for level, tables := range h.db.s.stVersion.tables { + for _, table := range tables { + t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax) + } + } + h.compactRangeAt(1, "", "") + h.waitCompaction() + for level, tables := range h.db.s.stVersion.tables { + for _, table := range tables { + t.Logf("L%d@%d %q:%q", level, table.file.Num(), table.imin, table.imax) + } + } + runtime.GOMAXPROCS(runtime.NumCPU()) + + wg := &sync.WaitGroup{} + for i, snap := range snaps { + wg.Add(1) + + go func(i int, snap *Snapshot) { + defer wg.Done() + + vtail := fmt.Sprintf("VAL%030d", i) + for k := 0; k < nKey; k++ { + key := fmt.Sprintf("KEY%08d", k) + xvalue, err := snap.Get([]byte(key), nil) + if err != nil { + t.Fatalf("READER_GET #%d SEQ=%d K%d error: %v", i, snap.elem.seq, k, err) + } + value := key + vtail + if !bytes.Equal([]byte(value), xvalue) { + t.Fatalf("READER_GET #%d SEQ=%d K%d invalid value: want %q, got %q", i, snap.elem.seq, k, value, xvalue) + } + } + }(i, snap) + } + + wg.Wait() +} + +func TestDB_TableCompactionBuilder(t *testing.T) { + stor := newTestStorage(t) + defer stor.Close() + + const nSeq = 99 + + o := &opt.Options{ + WriteBuffer: 112 * opt.KiB, + CompactionTableSize: 43 * opt.KiB, + CompactionExpandLimitFactor: 1, + CompactionGPOverlapsFactor: 1, + BlockCache: opt.NoCache, + } + s, err := newSession(stor, o) + if err != nil { + t.Fatal(err) + } + if err := s.create(); err != nil { + t.Fatal(err) + } + defer s.close() + var ( + seq uint64 + targetSize = 5 * o.CompactionTableSize + value = bytes.Repeat([]byte{'0'}, 100) + ) + for i := 0; i < 2; i++ { + tw, err := s.tops.create() + if err != nil { + t.Fatal(err) + } + for k := 0; tw.tw.BytesLen() < targetSize; k++ { + key := []byte(fmt.Sprintf("%09d", k)) + seq += nSeq - 1 + for x := uint64(0); x < nSeq; x++ { + if err := tw.append(newIkey(key, seq-x, ktVal), value); err != nil { + t.Fatal(err) + } + } + } + tf, err := tw.finish() + if err != nil { + t.Fatal(err) + } + rec := &sessionRecord{numLevel: s.o.GetNumLevel()} + rec.addTableFile(i, tf) + if err := s.commit(rec); err != nil { + t.Fatal(err) + } + } + + // Build grandparent. + v := s.version() + c := newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...)) + rec := &sessionRecord{numLevel: s.o.GetNumLevel()} + b := &tableCompactionBuilder{ + s: s, + c: c, + rec: rec, + stat1: new(cStatsStaging), + minSeq: 0, + strict: true, + tableSize: o.CompactionTableSize/3 + 961, + } + if err := b.run(new(compactionTransactCounter)); err != nil { + t.Fatal(err) + } + for _, t := range c.tables[0] { + rec.delTable(c.level, t.file.Num()) + } + if err := s.commit(rec); err != nil { + t.Fatal(err) + } + c.release() + + // Build level-1. + v = s.version() + c = newCompaction(s, v, 0, append(tFiles{}, v.tables[0]...)) + rec = &sessionRecord{numLevel: s.o.GetNumLevel()} + b = &tableCompactionBuilder{ + s: s, + c: c, + rec: rec, + stat1: new(cStatsStaging), + minSeq: 0, + strict: true, + tableSize: o.CompactionTableSize, + } + if err := b.run(new(compactionTransactCounter)); err != nil { + t.Fatal(err) + } + for _, t := range c.tables[0] { + rec.delTable(c.level, t.file.Num()) + } + // Move grandparent to level-3 + for _, t := range v.tables[2] { + rec.delTable(2, t.file.Num()) + rec.addTableFile(3, t) + } + if err := s.commit(rec); err != nil { + t.Fatal(err) + } + c.release() + + v = s.version() + for level, want := range []bool{false, true, false, true, false} { + got := len(v.tables[level]) > 0 + if want != got { + t.Fatalf("invalid level-%d tables len: want %v, got %v", level, want, got) + } + } + for i, f := range v.tables[1][:len(v.tables[1])-1] { + nf := v.tables[1][i+1] + if bytes.Equal(f.imax.ukey(), nf.imin.ukey()) { + t.Fatalf("KEY %q hop across table %d .. %d", f.imax.ukey(), f.file.Num(), nf.file.Num()) + } + } + v.release() + + // Compaction with transient error. + v = s.version() + c = newCompaction(s, v, 1, append(tFiles{}, v.tables[1]...)) + rec = &sessionRecord{numLevel: s.o.GetNumLevel()} + b = &tableCompactionBuilder{ + s: s, + c: c, + rec: rec, + stat1: new(cStatsStaging), + minSeq: 0, + strict: true, + tableSize: o.CompactionTableSize, + } + stor.SetEmuErrOnce(storage.TypeTable, tsOpSync) + stor.SetEmuRandErr(storage.TypeTable, tsOpRead, tsOpReadAt, tsOpWrite) + stor.SetEmuRandErrProb(0xf0) + for { + if err := b.run(new(compactionTransactCounter)); err != nil { + t.Logf("(expected) b.run: %v", err) + } else { + break + } + } + if err := s.commit(rec); err != nil { + t.Fatal(err) + } + c.release() + + stor.SetEmuErrOnce(0, tsOpSync) + stor.SetEmuRandErr(0, tsOpRead, tsOpReadAt, tsOpWrite) + + v = s.version() + if len(v.tables[1]) != len(v.tables[2]) { + t.Fatalf("invalid tables length, want %d, got %d", len(v.tables[1]), len(v.tables[2])) + } + for i, f0 := range v.tables[1] { + f1 := v.tables[2][i] + iter0 := s.tops.newIterator(f0, nil, nil) + iter1 := s.tops.newIterator(f1, nil, nil) + for j := 0; true; j++ { + next0 := iter0.Next() + next1 := iter1.Next() + if next0 != next1 { + t.Fatalf("#%d.%d invalid eoi: want %v, got %v", i, j, next0, next1) + } + key0 := iter0.Key() + key1 := iter1.Key() + if !bytes.Equal(key0, key1) { + t.Fatalf("#%d.%d invalid key: want %q, got %q", i, j, key0, key1) + } + if next0 == false { + break + } + } + iter0.Release() + iter1.Release() + } + v.release() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go index 4da98a77e..fbabc1f60 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go @@ -7,8 +7,7 @@ package leveldb import ( - "errors" - + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" @@ -38,7 +37,9 @@ func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) } // Check and clean files. func (db *DB) checkAndCleanFiles() error { - v := db.s.version_NB() + v := db.s.version() + defer v.release() + tablesMap := make(map[uint64]bool) for _, tables := range v.tables { for _, t := range tables { @@ -78,12 +79,14 @@ func (db *DB) checkAndCleanFiles() error { } if nTables != len(tablesMap) { + var missing []*storage.FileInfo for num, present := range tablesMap { if !present { + missing = append(missing, &storage.FileInfo{Type: storage.TypeTable, Num: num}) db.logf("db@janitor table missing @%d", num) } } - return ErrCorrupted{Type: MissingFiles, Err: errors.New("leveldb: table files missing")} + return errors.NewErrCorrupted(nil, &errors.ErrMissingFiles{Files: missing}) } db.logf("db@janitor F·%d G·%d", len(files), len(rem)) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go index 85be3867d..a76319124 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go @@ -59,7 +59,7 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) { } // Schedule memdb compaction. - db.compTrigger(db.mcompTriggerC) + db.compSendTrigger(db.mcompCmdC) return } @@ -77,12 +77,12 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) { }() nn = mem.mdb.Free() switch { - case v.tLen(0) >= kL0_SlowdownWritesTrigger && !delayed: + case v.tLen(0) >= db.s.o.GetWriteL0SlowdownTrigger() && !delayed: delayed = true time.Sleep(time.Millisecond) case nn >= n: return false - case v.tLen(0) >= kL0_StopWritesTrigger: + case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger(): delayed = true err = db.compSendIdle(db.tcompCmdC) if err != nil { @@ -109,7 +109,12 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) { for flush() { } if delayed { - db.logf("db@write delayed T·%v", time.Since(start)) + db.writeDelay += time.Since(start) + db.writeDelayN++ + } else if db.writeDelayN > 0 { + db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay) + db.writeDelay = 0 + db.writeDelayN = 0 } return } @@ -120,28 +125,33 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) { // It is safe to modify the contents of the arguments after Write returns. func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) { err = db.ok() - if err != nil || b == nil || b.len() == 0 { + if err != nil || b == nil || b.Len() == 0 { return } b.init(wo.GetSync()) // The write happen synchronously. -retry: select { case db.writeC <- b: if <-db.writeMergedC { return <-db.writeAckC } - goto retry case db.writeLockC <- struct{}{}: + case err = <-db.compPerErrC: + return case _, _ = <-db.closeC: return ErrClosed } merged := 0 + danglingMerge := false defer func() { - <-db.writeLockC + if danglingMerge { + db.writeMergedC <- false + } else { + <-db.writeLockC + } for i := 0; i < merged; i++ { db.writeAckC <- err } @@ -170,7 +180,7 @@ drain: db.writeMergedC <- true merged++ } else { - db.writeMergedC <- false + danglingMerge = true break drain } default: @@ -185,35 +195,43 @@ drain: if b.size() >= (128 << 10) { // Push the write batch to the journal writer select { + case db.journalC <- b: + // Write into memdb + if berr := b.memReplay(mem.mdb); berr != nil { + panic(berr) + } + case err = <-db.compPerErrC: + return case _, _ = <-db.closeC: err = ErrClosed return - case db.journalC <- b: - // Write into memdb - b.memReplay(mem.mdb) } // Wait for journal writer select { - case _, _ = <-db.closeC: - err = ErrClosed - return case err = <-db.journalAckC: if err != nil { // Revert memdb if error detected - b.revertMemReplay(mem.mdb) + if berr := b.revertMemReplay(mem.mdb); berr != nil { + panic(berr) + } return } + case _, _ = <-db.closeC: + err = ErrClosed + return } } else { err = db.writeJournal(b) if err != nil { return } - b.memReplay(mem.mdb) + if berr := b.memReplay(mem.mdb); berr != nil { + panic(berr) + } } // Set last seq number. - db.addSeq(uint64(b.len())) + db.addSeq(uint64(b.Len())) if b.size() >= memFree { db.rotateMem(0) @@ -262,8 +280,11 @@ func (db *DB) CompactRange(r util.Range) error { return err } + // Lock writer. select { case db.writeLockC <- struct{}{}: + case err := <-db.compPerErrC: + return err case _, _ = <-db.closeC: return ErrClosed } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go similarity index 55% rename from Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go rename to Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go index 4fa488e5b..1466a6b06 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors.go @@ -7,32 +7,12 @@ package leveldb import ( - "errors" - - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" ) var ( - ErrNotFound = util.ErrNotFound + ErrNotFound = errors.ErrNotFound ErrSnapshotReleased = errors.New("leveldb: snapshot released") ErrIterReleased = errors.New("leveldb: iterator released") ErrClosed = errors.New("leveldb: closed") ) - -type CorruptionType int - -const ( - CorruptedManifest CorruptionType = iota - MissingFiles -) - -// ErrCorrupted is the type that wraps errors that indicate corruption in -// the database. -type ErrCorrupted struct { - Type CorruptionType - Err error -} - -func (e ErrCorrupted) Error() string { - return e.Err.Error() -} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go new file mode 100644 index 000000000..7b46613be --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors/errors.go @@ -0,0 +1,76 @@ +// Copyright (c) 2014, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package errors provides common error types used throughout leveldb. +package errors + +import ( + "errors" + "fmt" + + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrNotFound = New("leveldb: not found") + ErrReleased = util.ErrReleased + ErrHasReleaser = util.ErrHasReleaser +) + +// New returns an error that formats as the given text. +func New(text string) error { + return errors.New(text) +} + +// ErrCorrupted is the type that wraps errors that indicate corruption in +// the database. +type ErrCorrupted struct { + File *storage.FileInfo + Err error +} + +func (e *ErrCorrupted) Error() string { + if e.File != nil { + return fmt.Sprintf("%v [file=%v]", e.Err, e.File) + } else { + return e.Err.Error() + } +} + +// NewErrCorrupted creates new ErrCorrupted error. +func NewErrCorrupted(f storage.File, err error) error { + return &ErrCorrupted{storage.NewFileInfo(f), err} +} + +// IsCorrupted returns a boolean indicating whether the error is indicating +// a corruption. +func IsCorrupted(err error) bool { + switch err.(type) { + case *ErrCorrupted: + return true + } + return false +} + +// ErrMissingFiles is the type that indicating a corruption due to missing +// files. +type ErrMissingFiles struct { + Files []*storage.FileInfo +} + +func (e *ErrMissingFiles) Error() string { return "file missing" } + +// SetFile sets 'file info' of the given error with the given file. +// Currently only ErrCorrupted is supported, otherwise will do nothing. +func SetFile(err error, f storage.File) error { + switch x := err.(type) { + case *ErrCorrupted: + x.File = storage.NewFileInfo(f) + return x + } + return err +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go index 1694997d4..aa7eb9a78 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go @@ -19,11 +19,12 @@ var _ = testutil.Defer(func() { o := &opt.Options{ BlockCache: opt.NoCache, BlockRestartInterval: 5, - BlockSize: 50, + BlockSize: 80, Compression: opt.NoCompression, CachedOpenFiles: -1, Strict: opt.StrictAll, WriteBuffer: 1000, + CompactionTableSize: 2000, } Describe("write test", func() { @@ -40,18 +41,17 @@ var _ = testutil.Defer(func() { }) Describe("read test", func() { - testutil.AllKeyValueTesting(nil, func(kv testutil.KeyValue) testutil.DB { + testutil.AllKeyValueTesting(nil, nil, func(kv testutil.KeyValue) testutil.DB { // Building the DB. db := newTestingDB(o, nil, nil) kv.IterateShuffled(nil, func(i int, key, value []byte) { err := db.TestPut(key, value) Expect(err).NotTo(HaveOccurred()) }) - testutil.Defer("teardown", func() { - db.TestClose() - }) return db + }, func(db testutil.DB) { + db.(*testingDB).TestClose() }) }) }) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go index 8391e12b4..42dc09dbb 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go @@ -40,13 +40,19 @@ type basicArrayIterator struct { util.BasicReleaser array BasicArray pos int + err error } func (i *basicArrayIterator) Valid() bool { - return i.pos >= 0 && i.pos < i.array.Len() + return i.pos >= 0 && i.pos < i.array.Len() && !i.Released() } func (i *basicArrayIterator) First() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + if i.array.Len() == 0 { i.pos = -1 return false @@ -56,6 +62,11 @@ func (i *basicArrayIterator) First() bool { } func (i *basicArrayIterator) Last() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + n := i.array.Len() if n == 0 { i.pos = 0 @@ -66,6 +77,11 @@ func (i *basicArrayIterator) Last() bool { } func (i *basicArrayIterator) Seek(key []byte) bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + n := i.array.Len() if n == 0 { i.pos = 0 @@ -79,6 +95,11 @@ func (i *basicArrayIterator) Seek(key []byte) bool { } func (i *basicArrayIterator) Next() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + i.pos++ if n := i.array.Len(); i.pos >= n { i.pos = n @@ -88,6 +109,11 @@ func (i *basicArrayIterator) Next() bool { } func (i *basicArrayIterator) Prev() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + i.pos-- if i.pos < 0 { i.pos = -1 @@ -96,7 +122,7 @@ func (i *basicArrayIterator) Prev() bool { return true } -func (i *basicArrayIterator) Error() error { return nil } +func (i *basicArrayIterator) Error() error { return i.err } type arrayIterator struct { basicArrayIterator diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go index 3a61c3d9f..86c201383 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go @@ -7,6 +7,7 @@ package iterator import ( + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) @@ -22,13 +23,13 @@ type IteratorIndexer interface { type indexedIterator struct { util.BasicReleaser - index IteratorIndexer - strict bool - strictGet bool + index IteratorIndexer + strict bool - data Iterator - err error - errf func(err error) + data Iterator + err error + errf func(err error) + closed bool } func (i *indexedIterator) setData() { @@ -36,11 +37,6 @@ func (i *indexedIterator) setData() { i.data.Release() } i.data = i.index.Get() - if i.strictGet { - if err := i.data.Error(); err != nil { - i.err = err - } - } } func (i *indexedIterator) clearData() { @@ -50,14 +46,21 @@ func (i *indexedIterator) clearData() { i.data = nil } -func (i *indexedIterator) dataErr() bool { - if i.errf != nil { - if err := i.data.Error(); err != nil { +func (i *indexedIterator) indexErr() { + if err := i.index.Error(); err != nil { + if i.errf != nil { i.errf(err) } + i.err = err } - if i.strict { - if err := i.data.Error(); err != nil { +} + +func (i *indexedIterator) dataErr() bool { + if err := i.data.Error(); err != nil { + if i.errf != nil { + i.errf(err) + } + if i.strict || !errors.IsCorrupted(err) { i.err = err return true } @@ -72,9 +75,13 @@ func (i *indexedIterator) Valid() bool { func (i *indexedIterator) First() bool { if i.err != nil { return false + } else if i.Released() { + i.err = ErrIterReleased + return false } if !i.index.First() { + i.indexErr() i.clearData() return false } @@ -85,9 +92,13 @@ func (i *indexedIterator) First() bool { func (i *indexedIterator) Last() bool { if i.err != nil { return false + } else if i.Released() { + i.err = ErrIterReleased + return false } if !i.index.Last() { + i.indexErr() i.clearData() return false } @@ -105,9 +116,13 @@ func (i *indexedIterator) Last() bool { func (i *indexedIterator) Seek(key []byte) bool { if i.err != nil { return false + } else if i.Released() { + i.err = ErrIterReleased + return false } if !i.index.Seek(key) { + i.indexErr() i.clearData() return false } @@ -125,6 +140,9 @@ func (i *indexedIterator) Seek(key []byte) bool { func (i *indexedIterator) Next() bool { if i.err != nil { return false + } else if i.Released() { + i.err = ErrIterReleased + return false } switch { @@ -136,6 +154,7 @@ func (i *indexedIterator) Next() bool { fallthrough case i.data == nil: if !i.index.Next() { + i.indexErr() return false } i.setData() @@ -147,6 +166,9 @@ func (i *indexedIterator) Next() bool { func (i *indexedIterator) Prev() bool { if i.err != nil { return false + } else if i.Released() { + i.err = ErrIterReleased + return false } switch { @@ -158,6 +180,7 @@ func (i *indexedIterator) Prev() bool { fallthrough case i.data == nil: if !i.index.Prev() { + i.indexErr() return false } i.setData() @@ -206,16 +229,14 @@ func (i *indexedIterator) SetErrorCallback(f func(err error)) { i.errf = f } -// NewIndexedIterator returns an indexed iterator. An index is iterator -// that returns another iterator, a data iterator. A data iterator is the +// NewIndexedIterator returns an 'indexed iterator'. An index is iterator +// that returns another iterator, a 'data iterator'. A 'data iterator' is the // iterator that contains actual key/value pairs. // -// If strict is true then error yield by data iterator will halt the indexed -// iterator, on contrary if strict is false then the indexed iterator will -// ignore those error and move on to the next index. If strictGet is true and -// index.Get() yield an 'error iterator' then the indexed iterator will be halted. -// An 'error iterator' is iterator which its Error() method always return non-nil -// even before any 'seeks method' is called. -func NewIndexedIterator(index IteratorIndexer, strict, strictGet bool) Iterator { - return &indexedIterator{index: index, strict: strict, strictGet: strictGet} +// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true) +// won't be ignored and will halt 'indexed iterator', otherwise the iterator will +// continue to the next 'data iterator'. Corruption on 'index iterator' will not be +// ignored and will halt the iterator. +func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator { + return &indexedIterator{index: index, strict: strict} } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go index b22efedbb..c34351afa 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go @@ -65,7 +65,7 @@ var _ = testutil.Defer(func() { // Test the iterator. t := testutil.IteratorTesting{ KeyValue: kv.Clone(), - Iter: NewIndexedIterator(NewArrayIndexer(index), true, true), + Iter: NewIndexedIterator(NewArrayIndexer(index), true), } testutil.DoIteratorTesting(&t) done <- true diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go index cd1df6de2..2901967a4 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go @@ -14,6 +14,10 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) +var ( + ErrIterReleased = errors.New("leveldb/iterator: iterator released") +) + // IteratorSeeker is the interface that wraps the 'seeks method'. type IteratorSeeker interface { // First moves the iterator to the first key/value pair. If the iterator @@ -100,28 +104,13 @@ type ErrorCallbackSetter interface { } type emptyIterator struct { - releaser util.Releaser - released bool - err error + util.BasicReleaser + err error } func (i *emptyIterator) rErr() { - if i.err == nil && i.released { - i.err = errors.New("leveldb/iterator: iterator released") - } -} - -func (i *emptyIterator) Release() { - if i.releaser != nil { - i.releaser.Release() - i.releaser = nil - } - i.released = true -} - -func (i *emptyIterator) SetReleaser(releaser util.Releaser) { - if !i.released { - i.releaser = releaser + if i.err == nil && i.Released() { + i.err = ErrIterReleased } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go index ef8cdb14f..2b3bce5dd 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go @@ -3,15 +3,9 @@ package iterator_test import ( "testing" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil" ) func TestIterator(t *testing.T) { - testutil.RunDefer() - - RegisterFailHandler(Fail) - RunSpecs(t, "Iterator Suite") + testutil.RunSuite(t, "Iterator Suite") } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go index 508f6a7aa..b39085522 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go @@ -7,16 +7,11 @@ package iterator import ( - "errors" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) -var ( - ErrIterReleased = errors.New("leveldb/iterator: iterator released") -) - type dir int const ( @@ -48,13 +43,11 @@ func assertKey(key []byte) []byte { } func (i *mergedIterator) iterErr(iter Iterator) bool { - if i.errf != nil { - if err := iter.Error(); err != nil { + if err := iter.Error(); err != nil { + if i.errf != nil { i.errf(err) } - } - if i.strict { - if err := iter.Error(); err != nil { + if i.strict || !errors.IsCorrupted(err) { i.err = err return true } @@ -274,9 +267,13 @@ func (i *mergedIterator) Release() { } func (i *mergedIterator) SetReleaser(releaser util.Releaser) { - if i.dir != dirReleased { - i.releaser = releaser + if i.dir == dirReleased { + panic(util.ErrReleased) } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser } func (i *mergedIterator) Error() error { @@ -294,9 +291,9 @@ func (i *mergedIterator) SetErrorCallback(f func(err error)) { // keys: if iters[i] contains a key k then iters[j] will not contain that key k. // None of the iters may be nil. // -// If strict is true then error yield by any iterators will halt the merged -// iterator, on contrary if strict is false then the merged iterator will -// ignore those error and move on to the next iterator. +// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true) +// won't be ignored and will halt 'merged iterator', otherwise the iterator will +// continue to the next 'input iterator'. func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator { return &mergedIterator{ iters: iters, diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go index 6fcf79fb9..76dba9ea9 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go @@ -79,10 +79,10 @@ package journal import ( "encoding/binary" - "errors" "fmt" "io" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) @@ -109,7 +109,7 @@ type ErrCorrupted struct { Reason string } -func (e ErrCorrupted) Error() string { +func (e *ErrCorrupted) Error() string { return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size) } @@ -162,10 +162,10 @@ var errSkip = errors.New("leveldb/journal: skipped") func (r *Reader) corrupt(n int, reason string, skip bool) error { if r.dropper != nil { - r.dropper.Drop(ErrCorrupted{n, reason}) + r.dropper.Drop(&ErrCorrupted{n, reason}) } if r.strict && !skip { - r.err = ErrCorrupted{n, reason} + r.err = errors.NewErrCorrupted(nil, &ErrCorrupted{n, reason}) return r.err } return errSkip diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go index b9acf932d..3246ed946 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go @@ -9,15 +9,30 @@ package leveldb import ( "encoding/binary" "fmt" + + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" ) -type vType int +type ErrIkeyCorrupted struct { + Ikey []byte + Reason string +} -func (t vType) String() string { - switch t { - case tDel: +func (e *ErrIkeyCorrupted) Error() string { + return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason) +} + +func newErrIkeyCorrupted(ikey []byte, reason string) error { + return errors.NewErrCorrupted(nil, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason}) +} + +type kType int + +func (kt kType) String() string { + switch kt { + case ktDel: return "d" - case tVal: + case ktVal: return "v" } return "x" @@ -26,16 +41,16 @@ func (t vType) String() string { // Value types encoded as the last component of internal keys. // Don't modify; this value are saved to disk. const ( - tDel vType = iota - tVal + ktDel kType = iota + ktVal ) -// tSeek defines the vType that should be passed when constructing an +// ktSeek defines the kType that should be passed when constructing an // internal key for seeking to a particular sequence number (since we // sort sequence numbers in decreasing order and the value type is // embedded as the low 8 bits in the sequence number in internal keys, // we need to use the highest-numbered ValueType, not the lowest). -const tSeek = tVal +const ktSeek = ktVal const ( // Maximum value possible for sequence number; the 8-bits are @@ -43,7 +58,7 @@ const ( // 64-bit integer. kMaxSeq uint64 = (uint64(1) << 56) - 1 // Maximum value possible for packed sequence number and type. - kMaxNum uint64 = (kMaxSeq << 8) | uint64(tSeek) + kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek) ) // Maximum number encoded in bytes. @@ -55,85 +70,73 @@ func init() { type iKey []byte -func newIKey(ukey []byte, seq uint64, t vType) iKey { - if seq > kMaxSeq || t > tVal { - panic("invalid seq number or value type") +func newIkey(ukey []byte, seq uint64, kt kType) iKey { + if seq > kMaxSeq { + panic("leveldb: invalid sequence number") + } else if kt > ktVal { + panic("leveldb: invalid type") } - b := make(iKey, len(ukey)+8) - copy(b, ukey) - binary.LittleEndian.PutUint64(b[len(ukey):], (seq<<8)|uint64(t)) - return b + ik := make(iKey, len(ukey)+8) + copy(ik, ukey) + binary.LittleEndian.PutUint64(ik[len(ukey):], (seq<<8)|uint64(kt)) + return ik } -func parseIkey(p []byte) (ukey []byte, seq uint64, t vType, ok bool) { - if len(p) < 8 { - return +func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) { + if len(ik) < 8 { + return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length") } - num := binary.LittleEndian.Uint64(p[len(p)-8:]) - seq, t = uint64(num>>8), vType(num&0xff) - if t > tVal { - return + num := binary.LittleEndian.Uint64(ik[len(ik)-8:]) + seq, kt = uint64(num>>8), kType(num&0xff) + if kt > ktVal { + return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type") } - ukey = p[:len(p)-8] - ok = true + ukey = ik[:len(ik)-8] return } -func validIkey(p []byte) bool { - _, _, _, ok := parseIkey(p) - return ok +func validIkey(ik []byte) bool { + _, _, _, err := parseIkey(ik) + return err == nil } -func (p iKey) assert() { - if p == nil { - panic("nil iKey") +func (ik iKey) assert() { + if ik == nil { + panic("leveldb: nil iKey") } - if len(p) < 8 { - panic(fmt.Sprintf("invalid iKey %q, len=%d", []byte(p), len(p))) + if len(ik) < 8 { + panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik))) } } -func (p iKey) ok() bool { - if len(p) < 8 { - return false - } - _, _, ok := p.parseNum() - return ok +func (ik iKey) ukey() []byte { + ik.assert() + return ik[:len(ik)-8] } -func (p iKey) ukey() []byte { - p.assert() - return p[:len(p)-8] +func (ik iKey) num() uint64 { + ik.assert() + return binary.LittleEndian.Uint64(ik[len(ik)-8:]) } -func (p iKey) num() uint64 { - p.assert() - return binary.LittleEndian.Uint64(p[len(p)-8:]) -} - -func (p iKey) parseNum() (seq uint64, t vType, ok bool) { - if p == nil { - panic("nil iKey") +func (ik iKey) parseNum() (seq uint64, kt kType) { + num := ik.num() + seq, kt = uint64(num>>8), kType(num&0xff) + if kt > ktVal { + panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) } - if len(p) < 8 { - return - } - num := p.num() - seq, t = uint64(num>>8), vType(num&0xff) - if t > tVal { - return 0, 0, false - } - ok = true return } -func (p iKey) String() string { - if len(p) == 0 { +func (ik iKey) String() string { + if ik == nil { return "" } - if seq, t, ok := p.parseNum(); ok { - return fmt.Sprintf("%s,%s%d", shorten(string(p.ukey())), t, seq) + + if ukey, seq, kt, err := parseIkey(ik); err == nil { + return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq) + } else { + return "" } - return "" } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go index 2b055ecfa..4dedd297b 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go @@ -15,8 +15,8 @@ import ( var defaultIComparer = &iComparer{comparer.DefaultComparer} -func ikey(key string, seq uint64, t vType) iKey { - return newIKey([]byte(key), uint64(seq), t) +func ikey(key string, seq uint64, kt kType) iKey { + return newIkey([]byte(key), uint64(seq), kt) } func shortSep(a, b []byte) []byte { @@ -37,27 +37,37 @@ func shortSuccessor(b []byte) []byte { return dst } -func testSingleKey(t *testing.T, key string, seq uint64, vt vType) { - ik := ikey(key, seq, vt) +func testSingleKey(t *testing.T, key string, seq uint64, kt kType) { + ik := ikey(key, seq, kt) if !bytes.Equal(ik.ukey(), []byte(key)) { t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key) } - if rseq, rt, ok := ik.parseNum(); ok { + rseq, rt := ik.parseNum() + if rseq != seq { + t.Errorf("seq number does not equal, got %v, want %v", rseq, seq) + } + if rt != kt { + t.Errorf("type does not equal, got %v, want %v", rt, kt) + } + + if rukey, rseq, rt, kerr := parseIkey(ik); kerr == nil { + if !bytes.Equal(rukey, []byte(key)) { + t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key) + } if rseq != seq { t.Errorf("seq number does not equal, got %v, want %v", rseq, seq) } - - if rt != vt { - t.Errorf("type does not equal, got %v, want %v", rt, vt) + if rt != kt { + t.Errorf("type does not equal, got %v, want %v", rt, kt) } } else { - t.Error("cannot parse seq and type") + t.Errorf("key error: %v", kerr) } } -func TestIKey_EncodeDecode(t *testing.T) { +func TestIkey_EncodeDecode(t *testing.T) { keys := []string{"", "k", "hello", "longggggggggggggggggggggg"} seqs := []uint64{ 1, 2, 3, @@ -67,8 +77,8 @@ func TestIKey_EncodeDecode(t *testing.T) { } for _, key := range keys { for _, seq := range seqs { - testSingleKey(t, key, seq, tVal) - testSingleKey(t, "hello", 1, tDel) + testSingleKey(t, key, seq, ktVal) + testSingleKey(t, "hello", 1, ktDel) } } } @@ -79,45 +89,45 @@ func assertBytes(t *testing.T, want, got []byte) { } } -func TestIKeyShortSeparator(t *testing.T) { +func TestIkeyShortSeparator(t *testing.T) { // When user keys are same - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("foo", 99, tVal))) - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("foo", 101, tVal))) - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("foo", 100, tVal))) - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("foo", 100, tDel))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("foo", 99, ktVal))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("foo", 101, ktVal))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("foo", 100, ktVal))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("foo", 100, ktDel))) // When user keys are misordered - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("bar", 99, tVal))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("bar", 99, ktVal))) // When user keys are different, but correctly ordered - assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek), - shortSep(ikey("foo", 100, tVal), - ikey("hello", 200, tVal))) + assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek), + shortSep(ikey("foo", 100, ktVal), + ikey("hello", 200, ktVal))) // When start user key is prefix of limit user key - assertBytes(t, ikey("foo", 100, tVal), - shortSep(ikey("foo", 100, tVal), - ikey("foobar", 200, tVal))) + assertBytes(t, ikey("foo", 100, ktVal), + shortSep(ikey("foo", 100, ktVal), + ikey("foobar", 200, ktVal))) // When limit user key is prefix of start user key - assertBytes(t, ikey("foobar", 100, tVal), - shortSep(ikey("foobar", 100, tVal), - ikey("foo", 200, tVal))) + assertBytes(t, ikey("foobar", 100, ktVal), + shortSep(ikey("foobar", 100, ktVal), + ikey("foo", 200, ktVal))) } -func TestIKeyShortestSuccessor(t *testing.T) { - assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek), - shortSuccessor(ikey("foo", 100, tVal))) - assertBytes(t, ikey("\xff\xff", 100, tVal), - shortSuccessor(ikey("\xff\xff", 100, tVal))) +func TestIkeyShortestSuccessor(t *testing.T) { + assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek), + shortSuccessor(ikey("foo", 100, ktVal))) + assertBytes(t, ikey("\xff\xff", 100, ktVal), + shortSuccessor(ikey("\xff\xff", 100, ktVal))) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go index 797a42c6c..c39a6e6c6 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go @@ -3,18 +3,9 @@ package leveldb import ( "testing" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil" ) -func TestLeveldb(t *testing.T) { - testutil.RunDefer() - - RegisterFailHandler(Fail) - RunSpecs(t, "Leveldb Suite") - - RegisterTestingT(t) - testutil.RunDefer("teardown") +func TestLevelDB(t *testing.T) { + testutil.RunSuite(t, "LevelDB Suite") } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go index 7f63810f4..55ebcb488 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go @@ -12,12 +12,14 @@ import ( "sync" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) var ( - ErrNotFound = util.ErrNotFound + ErrNotFound = errors.ErrNotFound + ErrIterReleased = errors.New("leveldb/memdb: iterator released") ) const tMaxHeight = 12 @@ -29,6 +31,7 @@ type dbIter struct { node int forward bool key, value []byte + err error } func (i *dbIter) fill(checkStart, checkLimit bool) bool { @@ -59,6 +62,11 @@ func (i *dbIter) Valid() bool { } func (i *dbIter) First() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + i.forward = true i.p.mu.RLock() defer i.p.mu.RUnlock() @@ -71,9 +79,11 @@ func (i *dbIter) First() bool { } func (i *dbIter) Last() bool { - if i.p == nil { + if i.Released() { + i.err = ErrIterReleased return false } + i.forward = false i.p.mu.RLock() defer i.p.mu.RUnlock() @@ -86,9 +96,11 @@ func (i *dbIter) Last() bool { } func (i *dbIter) Seek(key []byte) bool { - if i.p == nil { + if i.Released() { + i.err = ErrIterReleased return false } + i.forward = true i.p.mu.RLock() defer i.p.mu.RUnlock() @@ -100,9 +112,11 @@ func (i *dbIter) Seek(key []byte) bool { } func (i *dbIter) Next() bool { - if i.p == nil { + if i.Released() { + i.err = ErrIterReleased return false } + if i.node == 0 { if !i.forward { return i.First() @@ -117,9 +131,11 @@ func (i *dbIter) Next() bool { } func (i *dbIter) Prev() bool { - if i.p == nil { + if i.Released() { + i.err = ErrIterReleased return false } + if i.node == 0 { if i.forward { return i.Last() @@ -141,10 +157,10 @@ func (i *dbIter) Value() []byte { return i.value } -func (i *dbIter) Error() error { return nil } +func (i *dbIter) Error() error { return i.err } func (i *dbIter) Release() { - if i.p != nil { + if !i.Released() { i.p = nil i.node = 0 i.key = nil diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go index 171289ecc..d3abfa613 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go @@ -3,15 +3,9 @@ package memdb import ( "testing" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil" ) -func TestMemdb(t *testing.T) { - testutil.RunDefer() - - RegisterFailHandler(Fail) - RunSpecs(t, "Memdb Suite") +func TestMemDB(t *testing.T) { + testutil.RunSuite(t, "MemDB Suite") } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go index d9542e9fb..8386293c1 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go @@ -129,7 +129,7 @@ var _ = testutil.Defer(func() { } return db - }) + }, nil, nil) }) }) }) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go index c2c474a8f..7f1c069dc 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go @@ -11,6 +11,7 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter" + "math" ) const ( @@ -20,12 +21,24 @@ const ( ) const ( - DefaultBlockCacheSize = 8 * MiB - DefaultBlockRestartInterval = 16 - DefaultBlockSize = 4 * KiB - DefaultCompressionType = SnappyCompression - DefaultCachedOpenFiles = 500 - DefaultWriteBuffer = 4 * MiB + DefaultBlockCacheSize = 8 * MiB + DefaultBlockRestartInterval = 16 + DefaultBlockSize = 4 * KiB + DefaultCompactionExpandLimitFactor = 25 + DefaultCompactionGPOverlapsFactor = 10 + DefaultCompactionL0Trigger = 4 + DefaultCompactionSourceLimitFactor = 1 + DefaultCompactionTableSize = 2 * MiB + DefaultCompactionTableSizeMultiplier = 1.0 + DefaultCompactionTotalSize = 10 * MiB + DefaultCompactionTotalSizeMultiplier = 10.0 + DefaultCompressionType = SnappyCompression + DefaultCachedOpenFiles = 500 + DefaultMaxMemCompationLevel = 2 + DefaultNumLevel = 7 + DefaultWriteBuffer = 4 * MiB + DefaultWriteL0PauseTrigger = 12 + DefaultWriteL0SlowdownTrigger = 8 ) type noCache struct{} @@ -65,34 +78,47 @@ const ( nCompression ) -// Strict is the DB strict level. +// Strict is the DB 'strict level'. type Strict uint const ( // If present then a corrupted or invalid chunk or block in manifest - // journal will cause an error istead of being dropped. + // journal will cause an error instead of being dropped. + // This will prevent database with corrupted manifest to be opened. StrictManifest Strict = 1 << iota - // If present then a corrupted or invalid chunk or block in journal - // will cause an error istead of being dropped. - StrictJournal - // If present then journal chunk checksum will be verified. StrictJournalChecksum - // If present then an invalid key/value pair will cause an error - // instead of being skipped. - StrictIterator + // If present then a corrupted or invalid chunk or block in journal + // will cause an error instead of being dropped. + // This will prevent database with corrupted journal to be opened. + StrictJournal // If present then 'sorted table' block checksum will be verified. + // This has effect on both 'read operation' and compaction. StrictBlockChecksum + // If present then a corrupted 'sorted table' will fails compaction. + // The database will enter read-only mode. + StrictCompaction + + // If present then a corrupted 'sorted table' will halts 'read operation'. + StrictReader + + // If present then leveldb.Recover will drop corrupted 'sorted table'. + StrictRecovery + + // This only applicable for ReadOptions, if present then this ReadOptions + // 'strict level' will override global ones. + StrictOverride + // StrictAll enables all strict flags. - StrictAll = StrictManifest | StrictJournal | StrictJournalChecksum | StrictIterator | StrictBlockChecksum + StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery // DefaultStrict is the default strict flags. Specify any strict flags // will override default strict flags as whole (i.e. not OR'ed). - DefaultStrict = StrictJournalChecksum | StrictBlockChecksum + DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader // NoStrict disables all strict flags. Override default strict flags. NoStrict = ^StrictAll @@ -110,9 +136,14 @@ type Options struct { // BlockCache provides per-block caching for LevelDB. Specify NoCache to // disable block caching. // - // By default LevelDB will create LRU-cache with capacity of 8MiB. + // By default LevelDB will create LRU-cache with capacity of BlockCacheSize. BlockCache cache.Cache + // BlockCacheSize defines the capacity of the default 'block cache'. + // + // The default value is 8MiB. + BlockCacheSize int + // BlockRestartInterval is the number of keys between restart points for // delta encoding of keys. // @@ -132,6 +163,73 @@ type Options struct { // The default value is 500. CachedOpenFiles int + // CompactionExpandLimitFactor limits compaction size after expanded. + // This will be multiplied by table size limit at compaction target level. + // + // The default value is 25. + CompactionExpandLimitFactor int + + // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a + // single 'sorted table' generates. + // This will be multiplied by table size limit at grandparent level. + // + // The default value is 10. + CompactionGPOverlapsFactor int + + // CompactionL0Trigger defines number of 'sorted table' at level-0 that will + // trigger compaction. + // + // The default value is 4. + CompactionL0Trigger int + + // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to + // level-0. + // This will be multiplied by table size limit at compaction target level. + // + // The default value is 1. + CompactionSourceLimitFactor int + + // CompactionTableSize limits size of 'sorted table' that compaction generates. + // The limits for each level will be calculated as: + // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level) + // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel. + // + // The default value is 2MiB. + CompactionTableSize int + + // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize. + // + // The default value is 1. + CompactionTableSizeMultiplier float64 + + // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for + // CompactionTableSize. + // Use zero to skip a level. + // + // The default value is nil. + CompactionTableSizeMultiplierPerLevel []float64 + + // CompactionTotalSize limits total size of 'sorted table' for each level. + // The limits for each level will be calculated as: + // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level) + // The multiplier for each level can also fine-tuned using + // CompactionTotalSizeMultiplierPerLevel. + // + // The default value is 10MiB. + CompactionTotalSize int + + // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize. + // + // The default value is 10. + CompactionTotalSizeMultiplier float64 + + // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for + // CompactionTotalSize. + // Use zero to skip a level. + // + // The default value is nil. + CompactionTotalSizeMultiplierPerLevel []float64 + // Comparer defines a total ordering over the space of []byte keys: a 'less // than' relationship. The same comparison algorithm must be used for reads // and writes over the lifetime of the DB. @@ -144,6 +242,11 @@ type Options struct { // The default value (DefaultCompression) uses snappy compression. Compression Compression + // DisableCompactionBackoff allows disable compaction retry backoff. + // + // The default value is false. + DisableCompactionBackoff bool + // ErrorIfExist defines whether an error should returned if the DB already // exist. // @@ -172,6 +275,19 @@ type Options struct { // The default value is nil. Filter filter.Filter + // MaxMemCompationLevel defines maximum level a newly compacted 'memdb' + // will be pushed into if doesn't creates overlap. This should less than + // NumLevel. Use -1 for level-0. + // + // The default is 2. + MaxMemCompationLevel int + + // NumLevel defines number of database level. The level shouldn't changed + // between opens, or the database will panic. + // + // The default is 7. + NumLevel int + // Strict defines the DB strict level. Strict Strict @@ -183,6 +299,18 @@ type Options struct { // // The default value is 4MiB. WriteBuffer int + + // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will + // pause write. + // + // The default value is 12. + WriteL0PauseTrigger int + + // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that + // will trigger write slowdown. + // + // The default value is 8. + WriteL0SlowdownTrigger int } func (o *Options) GetAltFilters() []filter.Filter { @@ -199,6 +327,13 @@ func (o *Options) GetBlockCache() cache.Cache { return o.BlockCache } +func (o *Options) GetBlockCacheSize() int { + if o == nil || o.BlockCacheSize <= 0 { + return DefaultBlockCacheSize + } + return o.BlockCacheSize +} + func (o *Options) GetBlockRestartInterval() int { if o == nil || o.BlockRestartInterval <= 0 { return DefaultBlockRestartInterval @@ -222,6 +357,79 @@ func (o *Options) GetCachedOpenFiles() int { return o.CachedOpenFiles } +func (o *Options) GetCompactionExpandLimit(level int) int { + factor := DefaultCompactionExpandLimitFactor + if o != nil && o.CompactionExpandLimitFactor > 0 { + factor = o.CompactionExpandLimitFactor + } + return o.GetCompactionTableSize(level+1) * factor +} + +func (o *Options) GetCompactionGPOverlaps(level int) int { + factor := DefaultCompactionGPOverlapsFactor + if o != nil && o.CompactionGPOverlapsFactor > 0 { + factor = o.CompactionGPOverlapsFactor + } + return o.GetCompactionTableSize(level+2) * factor +} + +func (o *Options) GetCompactionL0Trigger() int { + if o == nil || o.CompactionL0Trigger == 0 { + return DefaultCompactionL0Trigger + } + return o.CompactionL0Trigger +} + +func (o *Options) GetCompactionSourceLimit(level int) int { + factor := DefaultCompactionSourceLimitFactor + if o != nil && o.CompactionSourceLimitFactor > 0 { + factor = o.CompactionSourceLimitFactor + } + return o.GetCompactionTableSize(level+1) * factor +} + +func (o *Options) GetCompactionTableSize(level int) int { + var ( + base = DefaultCompactionTableSize + mult float64 + ) + if o != nil { + if o.CompactionTableSize > 0 { + base = o.CompactionTableSize + } + if len(o.CompactionTableSizeMultiplierPerLevel) > level && o.CompactionTableSizeMultiplierPerLevel[level] > 0 { + mult = o.CompactionTableSizeMultiplierPerLevel[level] + } else if o.CompactionTableSizeMultiplier > 0 { + mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level)) + } + } + if mult == 0 { + mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level)) + } + return int(float64(base) * mult) +} + +func (o *Options) GetCompactionTotalSize(level int) int64 { + var ( + base = DefaultCompactionTotalSize + mult float64 + ) + if o != nil { + if o.CompactionTotalSize > 0 { + base = o.CompactionTotalSize + } + if len(o.CompactionTotalSizeMultiplierPerLevel) > level && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 { + mult = o.CompactionTotalSizeMultiplierPerLevel[level] + } else if o.CompactionTotalSizeMultiplier > 0 { + mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level)) + } + } + if mult == 0 { + mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level)) + } + return int64(float64(base) * mult) +} + func (o *Options) GetComparer() comparer.Comparer { if o == nil || o.Comparer == nil { return comparer.DefaultComparer @@ -236,6 +444,13 @@ func (o *Options) GetCompression() Compression { return o.Compression } +func (o *Options) GetDisableCompactionBackoff() bool { + if o == nil { + return false + } + return o.DisableCompactionBackoff +} + func (o *Options) GetErrorIfExist() bool { if o == nil { return false @@ -257,6 +472,28 @@ func (o *Options) GetFilter() filter.Filter { return o.Filter } +func (o *Options) GetMaxMemCompationLevel() int { + level := DefaultMaxMemCompationLevel + if o != nil { + if o.MaxMemCompationLevel > 0 { + level = o.MaxMemCompationLevel + } else if o.MaxMemCompationLevel == -1 { + level = 0 + } + } + if level >= o.GetNumLevel() { + return o.GetNumLevel() - 1 + } + return level +} + +func (o *Options) GetNumLevel() int { + if o == nil || o.NumLevel <= 0 { + return DefaultNumLevel + } + return o.NumLevel +} + func (o *Options) GetStrict(strict Strict) bool { if o == nil || o.Strict == 0 { return DefaultStrict&strict != 0 @@ -271,6 +508,20 @@ func (o *Options) GetWriteBuffer() int { return o.WriteBuffer } +func (o *Options) GetWriteL0PauseTrigger() int { + if o == nil || o.WriteL0PauseTrigger == 0 { + return DefaultWriteL0PauseTrigger + } + return o.WriteL0PauseTrigger +} + +func (o *Options) GetWriteL0SlowdownTrigger() int { + if o == nil || o.WriteL0SlowdownTrigger == 0 { + return DefaultWriteL0SlowdownTrigger + } + return o.WriteL0SlowdownTrigger +} + // ReadOptions holds the optional parameters for 'read operation'. The // 'read operation' includes Get, Find and NewIterator. type ReadOptions struct { @@ -281,8 +532,8 @@ type ReadOptions struct { // The default value is false. DontFillCache bool - // Strict overrides global DB strict level. Only StrictIterator and - // StrictBlockChecksum that does have effects here. + // Strict will be OR'ed with global DB 'strict level' unless StrictOverride + // is present. Currently only StrictReader that has effect here. Strict Strict } @@ -324,3 +575,11 @@ func (wo *WriteOptions) GetSync() bool { } return wo.Sync } + +func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool { + if ro.GetStrict(StrictOverride) { + return ro.GetStrict(strict) + } else { + return o.GetStrict(strict) || ro.GetStrict(strict) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go index 9d3f05ccc..9c3538541 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go @@ -12,30 +12,89 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" ) -func (s *session) setOptions(o *opt.Options) { - s.o = &opt.Options{} +func dupOptions(o *opt.Options) *opt.Options { + newo := &opt.Options{} if o != nil { - *s.o = *o + *newo = *o } + if newo.Strict == 0 { + newo.Strict = opt.DefaultStrict + } + return newo +} + +func (s *session) setOptions(o *opt.Options) { + no := dupOptions(o) // Alternative filters. if filters := o.GetAltFilters(); len(filters) > 0 { - s.o.AltFilters = make([]filter.Filter, len(filters)) + no.AltFilters = make([]filter.Filter, len(filters)) for i, filter := range filters { - s.o.AltFilters[i] = &iFilter{filter} + no.AltFilters[i] = &iFilter{filter} } } // Block cache. switch o.GetBlockCache() { case nil: - s.o.BlockCache = cache.NewLRUCache(opt.DefaultBlockCacheSize) + no.BlockCache = cache.NewLRUCache(o.GetBlockCacheSize()) case opt.NoCache: - s.o.BlockCache = nil + no.BlockCache = nil } // Comparer. s.icmp = &iComparer{o.GetComparer()} - s.o.Comparer = s.icmp + no.Comparer = s.icmp // Filter. if filter := o.GetFilter(); filter != nil { - s.o.Filter = &iFilter{filter} + no.Filter = &iFilter{filter} + } + + s.o = &cachedOptions{Options: no} + s.o.cache() +} + +type cachedOptions struct { + *opt.Options + + compactionExpandLimit []int + compactionGPOverlaps []int + compactionSourceLimit []int + compactionTableSize []int + compactionTotalSize []int64 +} + +func (co *cachedOptions) cache() { + numLevel := co.Options.GetNumLevel() + + co.compactionExpandLimit = make([]int, numLevel) + co.compactionGPOverlaps = make([]int, numLevel) + co.compactionSourceLimit = make([]int, numLevel) + co.compactionTableSize = make([]int, numLevel) + co.compactionTotalSize = make([]int64, numLevel) + + for level := 0; level < numLevel; level++ { + co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level) + co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level) + co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level) + co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level) + co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level) } } + +func (co *cachedOptions) GetCompactionExpandLimit(level int) int { + return co.compactionExpandLimit[level] +} + +func (co *cachedOptions) GetCompactionGPOverlaps(level int) int { + return co.compactionGPOverlaps[level] +} + +func (co *cachedOptions) GetCompactionSourceLimit(level int) int { + return co.compactionSourceLimit[level] +} + +func (co *cachedOptions) GetCompactionTableSize(level int) int { + return co.compactionTableSize[level] +} + +func (co *cachedOptions) GetCompactionTotalSize(level int) int64 { + return co.compactionTotalSize[level] +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go index 94c05844f..e0c908370 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go @@ -7,12 +7,13 @@ package leveldb import ( - "errors" + "fmt" "io" "os" "sync" "sync/atomic" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" @@ -20,18 +21,31 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) +type ErrManifestCorrupted struct { + Field string + Reason string +} + +func (e *ErrManifestCorrupted) Error() string { + return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason) +} + +func newErrManifestCorrupted(f storage.File, field, reason string) error { + return errors.NewErrCorrupted(f, &ErrManifestCorrupted{field, reason}) +} + // session represent a persistent database session. type session struct { // Need 64-bit alignment. - stFileNum uint64 // current unused file number + stNextFileNum uint64 // current unused file number stJournalNum uint64 // current journal file number; need external synchronization stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb - stSeq uint64 // last mem compacted seq; need external synchronization + stSeqNum uint64 // last mem compacted seq; need external synchronization stTempFileNum uint64 stor storage.Storage storLock util.Releaser - o *opt.Options + o *cachedOptions icmp *iComparer tops *tOps @@ -39,9 +53,9 @@ type session struct { manifestWriter storage.Writer manifestFile storage.File - stCptrs [kNumLevels]iKey // compact pointers; need external synchronization - stVersion *version // current version - vmu sync.Mutex + stCompPtrs []iKey // compaction pointers; need external synchronization + stVersion *version // current version + vmu sync.Mutex } // Creates new initialized session instance. @@ -54,13 +68,14 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) { return } s = &session{ - stor: stor, - storLock: storLock, + stor: stor, + storLock: storLock, + stCompPtrs: make([]iKey, o.GetNumLevel()), } s.setOptions(o) s.tops = newTableOps(s, s.o.GetCachedOpenFiles()) - s.setVersion(&version{s: s}) - s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock D·DeletedEntry L·Level Q·SeqNum T·TimeElapsed") + s.setVersion(newVersion(s)) + s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed") return } @@ -100,26 +115,26 @@ func (s *session) recover() (err error) { // Don't return os.ErrNotExist if the underlying storage contains // other files that belong to LevelDB. So the DB won't get trashed. if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 { - err = ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest file missing")} + err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}} } } }() - file, err := s.stor.GetManifest() + m, err := s.stor.GetManifest() if err != nil { return } - reader, err := file.Open() + reader, err := m.Open() if err != nil { return } defer reader.Close() strict := s.o.GetStrict(opt.StrictManifest) - jr := journal.NewReader(reader, dropper{s, file}, strict, true) + jr := journal.NewReader(reader, dropper{s, m}, strict, true) - staging := s.version_NB().newStaging() - rec := &sessionRecord{} + staging := s.stVersion.newStaging() + rec := &sessionRecord{numLevel: s.o.GetNumLevel()} for { var r io.Reader r, err = jr.Next() @@ -128,51 +143,57 @@ func (s *session) recover() (err error) { err = nil break } - return + return errors.SetFile(err, m) } err = rec.decode(r) if err == nil { // save compact pointers - for _, r := range rec.compactionPointers { - s.stCptrs[r.level] = iKey(r.ikey) + for _, r := range rec.compPtrs { + s.stCompPtrs[r.level] = iKey(r.ikey) } // commit record to version staging staging.commit(rec) - } else if strict { - return ErrCorrupted{Type: CorruptedManifest, Err: err} } else { - s.logf("manifest error: %v (skipped)", err) + err = errors.SetFile(err, m) + if strict || !errors.IsCorrupted(err) { + return + } else { + s.logf("manifest error: %v (skipped)", errors.SetFile(err, m)) + } } - rec.resetCompactionPointers() + rec.resetCompPtrs() rec.resetAddedTables() rec.resetDeletedTables() } switch { case !rec.has(recComparer): - return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing comparer name")} + return newErrManifestCorrupted(m, "comparer", "missing") case rec.comparer != s.icmp.uName(): - return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: comparer mismatch, " + "want '" + s.icmp.uName() + "', " + "got '" + rec.comparer + "'")} - case !rec.has(recNextNum): - return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing next file number")} + return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer)) + case !rec.has(recNextFileNum): + return newErrManifestCorrupted(m, "next-file-num", "missing") case !rec.has(recJournalNum): - return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing journal file number")} - case !rec.has(recSeq): - return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing seq number")} + return newErrManifestCorrupted(m, "journal-file-num", "missing") + case !rec.has(recSeqNum): + return newErrManifestCorrupted(m, "seq-num", "missing") } - s.manifestFile = file + s.manifestFile = m s.setVersion(staging.finish()) - s.setFileNum(rec.nextNum) + s.setNextFileNum(rec.nextFileNum) s.recordCommited(rec) return nil } // Commit session; need external synchronization. func (s *session) commit(r *sessionRecord) (err error) { + v := s.version() + defer v.release() + // spawn new version based on current version - nv := s.version_NB().spawn(r) + nv := v.spawn(r) if s.manifest == nil { // manifest journal writer not yet created, create one @@ -191,13 +212,13 @@ func (s *session) commit(r *sessionRecord) (err error) { // Pick a compaction based on current state; need external synchronization. func (s *session) pickCompaction() *compaction { - v := s.version_NB() + v := s.version() var level int var t0 tFiles if v.cScore >= 1 { level = v.cLevel - cptr := s.stCptrs[level] + cptr := s.stCompPtrs[level] tables := v.tables[level] for _, t := range tables { if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 { @@ -214,27 +235,21 @@ func (s *session) pickCompaction() *compaction { level = ts.level t0 = append(t0, ts.table) } else { + v.release() return nil } } - c := &compaction{s: s, v: v, level: level} - if level == 0 { - imin, imax := t0.getRange(s.icmp) - t0 = v.tables[0].getOverlaps(t0[:0], s.icmp, imin.ukey(), imax.ukey(), true) - } - - c.tables[0] = t0 - c.expand() - return c + return newCompaction(s, v, level, t0) } // Create compaction from given level and range; need external synchronization. func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { - v := s.version_NB() + v := s.version() t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0) if len(t0) == 0 { + v.release() return nil } @@ -243,7 +258,7 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { // and we must not pick one file and drop another older file if the // two files overlap. if level > 0 { - limit := uint64(kMaxTableSize) + limit := uint64(v.s.o.GetCompactionSourceLimit(level)) total := uint64(0) for i, t := range t0 { total += t.size @@ -255,9 +270,20 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { } } - c := &compaction{s: s, v: v, level: level} - c.tables[0] = t0 + return newCompaction(s, v, level, t0) +} + +func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction { + c := &compaction{ + s: s, + v: v, + level: level, + tables: [2]tFiles{t0, nil}, + maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)), + tPtrs: make([]int, s.o.GetNumLevel()), + } c.expand() + c.save() return c } @@ -266,25 +292,57 @@ type compaction struct { s *session v *version - level int - tables [2]tFiles + level int + tables [2]tFiles + maxGPOverlaps uint64 - gp tFiles - gpidx int - seenKey bool - overlappedBytes uint64 - imin, imax iKey + gp tFiles + gpi int + seenKey bool + gpOverlappedBytes uint64 + imin, imax iKey + tPtrs []int + released bool - tPtrs [kNumLevels]int + snapGPI int + snapSeenKey bool + snapGPOverlappedBytes uint64 + snapTPtrs []int +} + +func (c *compaction) save() { + c.snapGPI = c.gpi + c.snapSeenKey = c.seenKey + c.snapGPOverlappedBytes = c.gpOverlappedBytes + c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...) +} + +func (c *compaction) restore() { + c.gpi = c.snapGPI + c.seenKey = c.snapSeenKey + c.gpOverlappedBytes = c.snapGPOverlappedBytes + c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...) +} + +func (c *compaction) release() { + if !c.released { + c.released = true + c.v.release() + } } // Expand compacted tables; need external synchronization. func (c *compaction) expand() { - level := c.level - vt0, vt1 := c.v.tables[level], c.v.tables[level+1] + limit := uint64(c.s.o.GetCompactionExpandLimit(c.level)) + vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1] t0, t1 := c.tables[0], c.tables[1] imin, imax := t0.getRange(c.s.icmp) + // We expand t0 here just incase ukey hop across tables. + t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0) + if len(t0) != len(c.tables[0]) { + imin, imax = t0.getRange(c.s.icmp) + } t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false) // Get entire range covered by compaction. amin, amax := append(t0, t1...).getRange(c.s.icmp) @@ -292,13 +350,13 @@ func (c *compaction) expand() { // See if we can grow the number of inputs in "level" without // changing the number of "level+1" files we pick up. if len(t1) > 0 { - exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), level == 0) - if len(exp0) > len(t0) && t1.size()+exp0.size() < kExpCompactionMaxBytes { + exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0) + if len(exp0) > len(t0) && t1.size()+exp0.size() < limit { xmin, xmax := exp0.getRange(c.s.icmp) exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false) if len(exp1) == len(t1) { c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", - level, level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), + c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) imin, imax = xmin, xmax t0, t1 = exp0, exp1 @@ -309,8 +367,8 @@ func (c *compaction) expand() { // Compute the set of grandparent files that overlap this compaction // (parent == level+1; grandparent == level+2) - if level+2 < kNumLevels { - c.gp = c.v.tables[level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) + if c.level+2 < c.s.o.GetNumLevel() { + c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) } c.tables[0], c.tables[1] = t0, t1 @@ -319,7 +377,7 @@ func (c *compaction) expand() { // Check whether compaction is trivial. func (c *compaction) trivial() bool { - return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= kMaxGrandParentOverlapBytes + return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps } func (c *compaction) baseLevelForKey(ukey []byte) bool { @@ -341,20 +399,20 @@ func (c *compaction) baseLevelForKey(ukey []byte) bool { } func (c *compaction) shouldStopBefore(ikey iKey) bool { - for ; c.gpidx < len(c.gp); c.gpidx++ { - gp := c.gp[c.gpidx] + for ; c.gpi < len(c.gp); c.gpi++ { + gp := c.gp[c.gpi] if c.s.icmp.Compare(ikey, gp.imax) <= 0 { break } if c.seenKey { - c.overlappedBytes += gp.size + c.gpOverlappedBytes += gp.size } } c.seenKey = true - if c.overlappedBytes > kMaxGrandParentOverlapBytes { + if c.gpOverlappedBytes > c.maxGPOverlaps { // Too much overlap for current output; start new output. - c.overlappedBytes = 0 + c.gpOverlappedBytes = 0 return true } return false @@ -373,8 +431,12 @@ func (c *compaction) newIterator() iterator.Iterator { // Options. ro := &opt.ReadOptions{ DontFillCache: true, + Strict: opt.StrictOverride, + } + strict := c.s.o.GetStrict(opt.StrictCompaction) + if strict { + ro.Strict |= opt.StrictReader } - strict := c.s.o.GetStrict(opt.StrictIterator) for i, tables := range c.tables { if len(tables) == 0 { @@ -387,10 +449,10 @@ func (c *compaction) newIterator() iterator.Iterator { its = append(its, c.s.tops.newIterator(t, nil, ro)) } } else { - it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict, true) + it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict) its = append(its, it) } } - return iterator.NewMergedIterator(its, c.s.icmp, true) + return iterator.NewMergedIterator(its, c.s.icmp, strict) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go index 272129589..904dc956c 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go @@ -9,11 +9,11 @@ package leveldb import ( "bufio" "encoding/binary" - "errors" "io" -) + "strings" -var errCorruptManifest = errors.New("leveldb: corrupt manifest") + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" +) type byteReader interface { io.Reader @@ -22,13 +22,13 @@ type byteReader interface { // These numbers are written to disk and should not be changed. const ( - recComparer = 1 - recJournalNum = 2 - recNextNum = 3 - recSeq = 4 - recCompactionPointer = 5 - recDeletedTable = 6 - recNewTable = 7 + recComparer = 1 + recJournalNum = 2 + recNextFileNum = 3 + recSeqNum = 4 + recCompPtr = 5 + recDelTable = 6 + recAddTable = 7 // 8 was used for large value refs recPrevJournalNum = 9 ) @@ -38,7 +38,7 @@ type cpRecord struct { ikey iKey } -type ntRecord struct { +type atRecord struct { level int num uint64 size uint64 @@ -46,27 +46,26 @@ type ntRecord struct { imax iKey } -func (r ntRecord) makeFile(s *session) *tFile { - return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax) -} - type dtRecord struct { level int num uint64 } type sessionRecord struct { - hasRec int - comparer string - journalNum uint64 - prevJournalNum uint64 - nextNum uint64 - seq uint64 - compactionPointers []cpRecord - addedTables []ntRecord - deletedTables []dtRecord - scratch [binary.MaxVarintLen64]byte - err error + numLevel int + + hasRec int + comparer string + journalNum uint64 + prevJournalNum uint64 + nextFileNum uint64 + seqNum uint64 + compPtrs []cpRecord + addedTables []atRecord + deletedTables []dtRecord + + scratch [binary.MaxVarintLen64]byte + err error } func (p *sessionRecord) has(rec int) bool { @@ -88,29 +87,29 @@ func (p *sessionRecord) setPrevJournalNum(num uint64) { p.prevJournalNum = num } -func (p *sessionRecord) setNextNum(num uint64) { - p.hasRec |= 1 << recNextNum - p.nextNum = num +func (p *sessionRecord) setNextFileNum(num uint64) { + p.hasRec |= 1 << recNextFileNum + p.nextFileNum = num } -func (p *sessionRecord) setSeq(seq uint64) { - p.hasRec |= 1 << recSeq - p.seq = seq +func (p *sessionRecord) setSeqNum(num uint64) { + p.hasRec |= 1 << recSeqNum + p.seqNum = num } -func (p *sessionRecord) addCompactionPointer(level int, ikey iKey) { - p.hasRec |= 1 << recCompactionPointer - p.compactionPointers = append(p.compactionPointers, cpRecord{level, ikey}) +func (p *sessionRecord) addCompPtr(level int, ikey iKey) { + p.hasRec |= 1 << recCompPtr + p.compPtrs = append(p.compPtrs, cpRecord{level, ikey}) } -func (p *sessionRecord) resetCompactionPointers() { - p.hasRec &= ^(1 << recCompactionPointer) - p.compactionPointers = p.compactionPointers[:0] +func (p *sessionRecord) resetCompPtrs() { + p.hasRec &= ^(1 << recCompPtr) + p.compPtrs = p.compPtrs[:0] } func (p *sessionRecord) addTable(level int, num, size uint64, imin, imax iKey) { - p.hasRec |= 1 << recNewTable - p.addedTables = append(p.addedTables, ntRecord{level, num, size, imin, imax}) + p.hasRec |= 1 << recAddTable + p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax}) } func (p *sessionRecord) addTableFile(level int, t *tFile) { @@ -118,17 +117,17 @@ func (p *sessionRecord) addTableFile(level int, t *tFile) { } func (p *sessionRecord) resetAddedTables() { - p.hasRec &= ^(1 << recNewTable) + p.hasRec &= ^(1 << recAddTable) p.addedTables = p.addedTables[:0] } -func (p *sessionRecord) deleteTable(level int, num uint64) { - p.hasRec |= 1 << recDeletedTable +func (p *sessionRecord) delTable(level int, num uint64) { + p.hasRec |= 1 << recDelTable p.deletedTables = append(p.deletedTables, dtRecord{level, num}) } func (p *sessionRecord) resetDeletedTables() { - p.hasRec &= ^(1 << recDeletedTable) + p.hasRec &= ^(1 << recDelTable) p.deletedTables = p.deletedTables[:0] } @@ -161,26 +160,26 @@ func (p *sessionRecord) encode(w io.Writer) error { p.putUvarint(w, recJournalNum) p.putUvarint(w, p.journalNum) } - if p.has(recNextNum) { - p.putUvarint(w, recNextNum) - p.putUvarint(w, p.nextNum) + if p.has(recNextFileNum) { + p.putUvarint(w, recNextFileNum) + p.putUvarint(w, p.nextFileNum) } - if p.has(recSeq) { - p.putUvarint(w, recSeq) - p.putUvarint(w, p.seq) + if p.has(recSeqNum) { + p.putUvarint(w, recSeqNum) + p.putUvarint(w, p.seqNum) } - for _, r := range p.compactionPointers { - p.putUvarint(w, recCompactionPointer) + for _, r := range p.compPtrs { + p.putUvarint(w, recCompPtr) p.putUvarint(w, uint64(r.level)) p.putBytes(w, r.ikey) } for _, r := range p.deletedTables { - p.putUvarint(w, recDeletedTable) + p.putUvarint(w, recDelTable) p.putUvarint(w, uint64(r.level)) p.putUvarint(w, r.num) } for _, r := range p.addedTables { - p.putUvarint(w, recNewTable) + p.putUvarint(w, recAddTable) p.putUvarint(w, uint64(r.level)) p.putUvarint(w, r.num) p.putUvarint(w, r.size) @@ -190,14 +189,16 @@ func (p *sessionRecord) encode(w io.Writer) error { return p.err } -func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 { +func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 { if p.err != nil { return 0 } x, err := binary.ReadUvarint(r) if err != nil { - if err == io.EOF { - p.err = errCorruptManifest + if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) { + p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"}) + } else if strings.HasPrefix(err.Error(), "binary:") { + p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, err.Error()}) } else { p.err = err } @@ -206,35 +207,39 @@ func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 { return x } -func (p *sessionRecord) readBytes(r byteReader) []byte { +func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 { + return p.readUvarintMayEOF(field, r, false) +} + +func (p *sessionRecord) readBytes(field string, r byteReader) []byte { if p.err != nil { return nil } - n := p.readUvarint(r) + n := p.readUvarint(field, r) if p.err != nil { return nil } x := make([]byte, n) _, p.err = io.ReadFull(r, x) if p.err != nil { - if p.err == io.EOF { - p.err = errCorruptManifest + if p.err == io.ErrUnexpectedEOF { + p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"}) } return nil } return x } -func (p *sessionRecord) readLevel(r io.ByteReader) int { +func (p *sessionRecord) readLevel(field string, r io.ByteReader) int { if p.err != nil { return 0 } - x := p.readUvarint(r) + x := p.readUvarint(field, r) if p.err != nil { return 0 } - if x >= kNumLevels { - p.err = errCorruptManifest + if x >= uint64(p.numLevel) { + p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"}) return 0 } return int(x) @@ -247,59 +252,59 @@ func (p *sessionRecord) decode(r io.Reader) error { } p.err = nil for p.err == nil { - rec, err := binary.ReadUvarint(br) - if err != nil { - if err == io.EOF { - err = nil + rec := p.readUvarintMayEOF("field-header", br, true) + if p.err != nil { + if p.err == io.EOF { + return nil } - return err + return p.err } switch rec { case recComparer: - x := p.readBytes(br) + x := p.readBytes("comparer", br) if p.err == nil { p.setComparer(string(x)) } case recJournalNum: - x := p.readUvarint(br) + x := p.readUvarint("journal-num", br) if p.err == nil { p.setJournalNum(x) } case recPrevJournalNum: - x := p.readUvarint(br) + x := p.readUvarint("prev-journal-num", br) if p.err == nil { p.setPrevJournalNum(x) } - case recNextNum: - x := p.readUvarint(br) + case recNextFileNum: + x := p.readUvarint("next-file-num", br) if p.err == nil { - p.setNextNum(x) + p.setNextFileNum(x) } - case recSeq: - x := p.readUvarint(br) + case recSeqNum: + x := p.readUvarint("seq-num", br) if p.err == nil { - p.setSeq(x) + p.setSeqNum(x) } - case recCompactionPointer: - level := p.readLevel(br) - ikey := p.readBytes(br) + case recCompPtr: + level := p.readLevel("comp-ptr.level", br) + ikey := p.readBytes("comp-ptr.ikey", br) if p.err == nil { - p.addCompactionPointer(level, iKey(ikey)) + p.addCompPtr(level, iKey(ikey)) } - case recNewTable: - level := p.readLevel(br) - num := p.readUvarint(br) - size := p.readUvarint(br) - imin := p.readBytes(br) - imax := p.readBytes(br) + case recAddTable: + level := p.readLevel("add-table.level", br) + num := p.readUvarint("add-table.num", br) + size := p.readUvarint("add-table.size", br) + imin := p.readBytes("add-table.imin", br) + imax := p.readBytes("add-table.imax", br) if p.err == nil { p.addTable(level, num, size, imin, imax) } - case recDeletedTable: - level := p.readLevel(br) - num := p.readUvarint(br) + case recDelTable: + level := p.readLevel("del-table.level", br) + num := p.readUvarint("del-table.num", br) if p.err == nil { - p.deleteTable(level, num) + p.delTable(level, num) } } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go index 029fabfe6..2977473aa 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go @@ -9,6 +9,8 @@ package leveldb import ( "bytes" "testing" + + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" ) func decodeEncode(v *sessionRecord) (res bool, err error) { @@ -17,7 +19,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) { if err != nil { return } - v2 := new(sessionRecord) + v2 := &sessionRecord{numLevel: opt.DefaultNumLevel} err = v.decode(b) if err != nil { return @@ -32,7 +34,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) { func TestSessionRecord_EncodeDecode(t *testing.T) { big := uint64(1) << 50 - v := new(sessionRecord) + v := &sessionRecord{numLevel: opt.DefaultNumLevel} i := uint64(0) test := func() { res, err := decodeEncode(v) @@ -47,16 +49,16 @@ func TestSessionRecord_EncodeDecode(t *testing.T) { for ; i < 4; i++ { test() v.addTable(3, big+300+i, big+400+i, - newIKey([]byte("foo"), big+500+1, tVal), - newIKey([]byte("zoo"), big+600+1, tDel)) - v.deleteTable(4, big+700+i) - v.addCompactionPointer(int(i), newIKey([]byte("x"), big+900+1, tVal)) + newIkey([]byte("foo"), big+500+1, ktVal), + newIkey([]byte("zoo"), big+600+1, ktDel)) + v.delTable(4, big+700+i) + v.addCompPtr(int(i), newIkey([]byte("x"), big+900+1, ktVal)) } v.setComparer("foo") v.setJournalNum(big + 100) v.setPrevJournalNum(big + 99) - v.setNextNum(big + 200) - v.setSeq(big + 1000) + v.setNextFileNum(big + 200) + v.setSeqNum(big + 1000) test() } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go index a34c9eb4a..8584ee5da 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go @@ -22,7 +22,7 @@ type dropper struct { } func (d dropper) Drop(err error) { - if e, ok := err.(journal.ErrCorrupted); ok { + if e, ok := err.(*journal.ErrCorrupted); ok { d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason) } else { d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err) @@ -51,9 +51,14 @@ func (s *session) newTemp() storage.File { return s.stor.GetFile(num, storage.TypeTemp) } +func (s *session) tableFileFromRecord(r atRecord) *tFile { + return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax) +} + // Session state. -// Get current version. +// Get current version. This will incr version ref, must call +// version.release (exactly once) after use. func (s *session) version() *version { s.vmu.Lock() defer s.vmu.Unlock() @@ -61,61 +66,56 @@ func (s *session) version() *version { return s.stVersion } -// Get current version; no barrier. -func (s *session) version_NB() *version { - return s.stVersion -} - // Set current version to v. func (s *session) setVersion(v *version) { s.vmu.Lock() - v.ref = 1 + v.ref = 1 // Holds by session. if old := s.stVersion; old != nil { - v.ref++ + v.ref++ // Holds by old version. old.next = v - old.release_NB() + old.releaseNB() } s.stVersion = v s.vmu.Unlock() } // Get current unused file number. -func (s *session) fileNum() uint64 { - return atomic.LoadUint64(&s.stFileNum) +func (s *session) nextFileNum() uint64 { + return atomic.LoadUint64(&s.stNextFileNum) } -// Get current unused file number to num. -func (s *session) setFileNum(num uint64) { - atomic.StoreUint64(&s.stFileNum, num) +// Set current unused file number to num. +func (s *session) setNextFileNum(num uint64) { + atomic.StoreUint64(&s.stNextFileNum, num) } // Mark file number as used. func (s *session) markFileNum(num uint64) { - num += 1 + nextFileNum := num + 1 for { - old, x := s.stFileNum, num + old, x := s.stNextFileNum, nextFileNum if old > x { x = old } - if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) { + if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) { break } } } // Allocate a file number. -func (s *session) allocFileNum() (num uint64) { - return atomic.AddUint64(&s.stFileNum, 1) - 1 +func (s *session) allocFileNum() uint64 { + return atomic.AddUint64(&s.stNextFileNum, 1) - 1 } // Reuse given file number. func (s *session) reuseFileNum(num uint64) { for { - old, x := s.stFileNum, num + old, x := s.stNextFileNum, num if old != x+1 { x = old } - if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) { + if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) { break } } @@ -126,20 +126,20 @@ func (s *session) reuseFileNum(num uint64) { // Fill given session record obj with current states; need external // synchronization. func (s *session) fillRecord(r *sessionRecord, snapshot bool) { - r.setNextNum(s.fileNum()) + r.setNextFileNum(s.nextFileNum()) if snapshot { if !r.has(recJournalNum) { r.setJournalNum(s.stJournalNum) } - if !r.has(recSeq) { - r.setSeq(s.stSeq) + if !r.has(recSeqNum) { + r.setSeqNum(s.stSeqNum) } - for level, ik := range s.stCptrs { + for level, ik := range s.stCompPtrs { if ik != nil { - r.addCompactionPointer(level, ik) + r.addCompPtr(level, ik) } } @@ -147,7 +147,7 @@ func (s *session) fillRecord(r *sessionRecord, snapshot bool) { } } -// Mark if record has been commited, this will update session state; +// Mark if record has been committed, this will update session state; // need external synchronization. func (s *session) recordCommited(r *sessionRecord) { if r.has(recJournalNum) { @@ -158,12 +158,12 @@ func (s *session) recordCommited(r *sessionRecord) { s.stPrevJournalNum = r.prevJournalNum } - if r.has(recSeq) { - s.stSeq = r.seq + if r.has(recSeqNum) { + s.stSeqNum = r.seqNum } - for _, p := range r.compactionPointers { - s.stCptrs[p.level] = iKey(p.ikey) + for _, p := range r.compPtrs { + s.stCompPtrs[p.level] = iKey(p.ikey) } } @@ -178,10 +178,11 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { jw := journal.NewWriter(writer) if v == nil { - v = s.version_NB() + v = s.version() + defer v.release() } if rec == nil { - rec = new(sessionRecord) + rec = &sessionRecord{numLevel: s.o.GetNumLevel()} } s.fillRecord(rec, true) v.fillRecord(rec) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go index 9b7fa2f12..95a6bff7b 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go @@ -221,7 +221,7 @@ func (fs *fileStorage) GetManifest() (f File, err error) { fs.log(fmt.Sprintf("skipping %s: invalid file name", fn)) continue } - if _, e1 := strconv.ParseUint(fn[7:], 10, 0); e1 != nil { + if _, e1 := strconv.ParseUint(fn[8:], 10, 0); e1 != nil { fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1)) continue } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go index bd62220c8..f996c66f1 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go @@ -125,3 +125,33 @@ type Storage interface { // Other methods should not be called after the storage has been closed. Close() error } + +// FileInfo wraps basic file info. +type FileInfo struct { + Type FileType + Num uint64 +} + +func (fi FileInfo) String() string { + switch fi.Type { + case TypeManifest: + return fmt.Sprintf("MANIFEST-%06d", fi.Num) + case TypeJournal: + return fmt.Sprintf("%06d.log", fi.Num) + case TypeTable: + return fmt.Sprintf("%06d.ldb", fi.Num) + case TypeTemp: + return fmt.Sprintf("%06d.tmp", fi.Num) + default: + return fmt.Sprintf("%#x-%d", fi.Type, fi.Num) + } +} + +// NewFileInfo creates new FileInfo from the given File. It will returns nil +// if File is nil. +func NewFileInfo(f File) *FileInfo { + if f == nil { + return nil + } + return &FileInfo{f.Type(), f.Num()} +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go index 77c0cb6d2..1b24d0ec3 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "io/ioutil" + "math/rand" "os" "path/filepath" "sync" @@ -28,11 +29,25 @@ var ( ) var ( - tsFSEnv = os.Getenv("GOLEVELDB_USEFS") - tsKeepFS = tsFSEnv == "2" - tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1" - tsMU = &sync.Mutex{} - tsNum = 0 + tsFSEnv = os.Getenv("GOLEVELDB_USEFS") + tsTempdir = os.Getenv("GOLEVELDB_TEMPDIR") + tsKeepFS = tsFSEnv == "2" + tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1" + tsMU = &sync.Mutex{} + tsNum = 0 +) + +type tsOp uint + +const ( + tsOpOpen tsOp = iota + tsOpCreate + tsOpRead + tsOpReadAt + tsOpWrite + tsOpSync + + tsOpNum ) type tsLock struct { @@ -53,6 +68,9 @@ type tsReader struct { func (tr tsReader) Read(b []byte) (n int, err error) { ts := tr.tf.ts ts.countRead(tr.tf.Type()) + if tr.tf.shouldErrLocked(tsOpRead) { + return 0, errors.New("leveldb.testStorage: emulated read error") + } n, err = tr.Reader.Read(b) if err != nil && err != io.EOF { ts.t.Errorf("E: read error, num=%d type=%v n=%d: %v", tr.tf.Num(), tr.tf.Type(), n, err) @@ -63,6 +81,9 @@ func (tr tsReader) Read(b []byte) (n int, err error) { func (tr tsReader) ReadAt(b []byte, off int64) (n int, err error) { ts := tr.tf.ts ts.countRead(tr.tf.Type()) + if tr.tf.shouldErrLocked(tsOpReadAt) { + return 0, errors.New("leveldb.testStorage: emulated readAt error") + } n, err = tr.Reader.ReadAt(b, off) if err != nil && err != io.EOF { ts.t.Errorf("E: readAt error, num=%d type=%v off=%d n=%d: %v", tr.tf.Num(), tr.tf.Type(), off, n, err) @@ -82,15 +103,12 @@ type tsWriter struct { } func (tw tsWriter) Write(b []byte) (n int, err error) { - ts := tw.tf.ts - ts.mu.Lock() - defer ts.mu.Unlock() - if ts.emuWriteErr&tw.tf.Type() != 0 { + if tw.tf.shouldErrLocked(tsOpWrite) { return 0, errors.New("leveldb.testStorage: emulated write error") } n, err = tw.Writer.Write(b) if err != nil { - ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err) + tw.tf.ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err) } return } @@ -98,23 +116,23 @@ func (tw tsWriter) Write(b []byte) (n int, err error) { func (tw tsWriter) Sync() (err error) { ts := tw.tf.ts ts.mu.Lock() - defer ts.mu.Unlock() for ts.emuDelaySync&tw.tf.Type() != 0 { ts.cond.Wait() } - if ts.emuSyncErr&tw.tf.Type() != 0 { + ts.mu.Unlock() + if tw.tf.shouldErrLocked(tsOpSync) { return errors.New("leveldb.testStorage: emulated sync error") } err = tw.Writer.Sync() if err != nil { - ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err) + tw.tf.ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err) } return } func (tw tsWriter) Close() (err error) { err = tw.Writer.Close() - tw.tf.close("reader", err) + tw.tf.close("writer", err) return } @@ -127,6 +145,16 @@ func (tf tsFile) x() uint64 { return tf.Num()< 0 { // Find the earliest possible internal key for min. - i = tf.searchMax(icmp, newIKey(umin, kMaxSeq, tSeek)) + i = tf.searchMax(icmp, newIkey(umin, kMaxSeq, ktSeek)) } if i >= len(tf) { // Beginning of range is after all files, so no overlap. @@ -159,24 +172,25 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo } // Returns tables whose its key range overlaps with given key range. -// If overlapped is true then the search will be expanded to tables that -// overlaps with each other. +// Range will be expanded if ukey found hop across tables. +// If overlapped is true then the search will be restarted if umax +// expanded. +// The dst content will be overwritten. func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles { - x := len(dst) + dst = dst[:0] for i := 0; i < len(tf); { t := tf[i] if t.overlaps(icmp, umin, umax) { - if overlapped { - // For overlapped files, check if the newly added file has - // expanded the range. If so, restart search. - if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 { - umin = t.imin.ukey() - dst = dst[:x] - i = 0 - continue - } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 { - umax = t.imax.ukey() - dst = dst[:x] + if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 { + umin = t.imin.ukey() + dst = dst[:0] + i = 0 + continue + } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 { + umax = t.imax.ukey() + // Restart search if it is overlapped. + if overlapped { + dst = dst[:0] i = 0 continue } @@ -289,7 +303,7 @@ func (t *tOps) create() (*tWriter, error) { t: t, file: file, w: fw, - tw: table.NewWriter(fw, t.s.o), + tw: table.NewWriter(fw, t.s.o.Options), }, nil } @@ -337,7 +351,13 @@ func (t *tOps) open(f *tFile) (ch cache.Handle, err error) { if bc := t.s.o.GetBlockCache(); bc != nil { bcacheNS = bc.GetNamespace(num) } - return 1, table.NewReader(r, int64(f.size), bcacheNS, t.bpool, t.s.o) + var tr *table.Reader + tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcacheNS, t.bpool, t.s.o.Options) + if err != nil { + r.Close() + return 0, nil + } + return 1, tr }) if ch == nil && err == nil { err = ErrClosed @@ -353,7 +373,17 @@ func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []b return nil, nil, err } defer ch.Release() - return ch.Value().(*table.Reader).Find(key, ro) + return ch.Value().(*table.Reader).Find(key, true, ro) +} + +// Finds key that is greater than or equal to the given key. +func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) { + ch, err := t.open(f) + if err != nil { + return nil, err + } + defer ch.Release() + return ch.Value().(*table.Reader).FindKey(key, true, ro) } // Returns approximate offset of the given key. @@ -440,28 +470,34 @@ func (w *tWriter) empty() bool { return w.first == nil } +// Closes the storage.Writer. +func (w *tWriter) close() { + if w.w != nil { + w.w.Close() + w.w = nil + } +} + // Finalizes the table and returns table file. func (w *tWriter) finish() (f *tFile, err error) { + defer w.close() err = w.tw.Close() if err != nil { return } err = w.w.Sync() if err != nil { - w.w.Close() return } - w.w.Close() f = newTableFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last)) return } // Drops the table. func (w *tWriter) drop() { - w.w.Close() + w.close() w.file.Remove() w.t.s.reuseFileNum(w.file.Num()) - w.w = nil w.file = nil w.tw = nil w.first = nil diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go index 790e1443a..a3f9222aa 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go @@ -19,13 +19,18 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) -func (b *block) TestNewIterator(slice *util.Range) iterator.Iterator { - return b.newIterator(slice, false, nil) +type blockTesting struct { + tr *Reader + b *block +} + +func (t *blockTesting) TestNewIterator(slice *util.Range) iterator.Iterator { + return t.tr.newBlockIter(t.b, nil, slice, false) } var _ = testutil.Defer(func() { Describe("Block", func() { - Build := func(kv *testutil.KeyValue, restartInterval int) *block { + Build := func(kv *testutil.KeyValue, restartInterval int) *blockTesting { // Building the block. bw := &blockWriter{ restartInterval: restartInterval, @@ -39,11 +44,13 @@ var _ = testutil.Defer(func() { // Opening the block. data := bw.buf.Bytes() restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) - return &block{ - tr: &Reader{cmp: comparer.DefaultComparer}, - data: data, - restartsLen: restartsLen, - restartsOffset: len(data) - (restartsLen+1)*4, + return &blockTesting{ + tr: &Reader{cmp: comparer.DefaultComparer}, + b: &block{ + data: data, + restartsLen: restartsLen, + restartsOffset: len(data) - (restartsLen+1)*4, + }, } } @@ -59,7 +66,7 @@ var _ = testutil.Defer(func() { // Make block. br := Build(kv, restartInterval) // Do testing. - testutil.KeyValueTesting(nil, br, kv.Clone()) + testutil.KeyValueTesting(nil, kv.Clone(), br, nil, nil) } Describe(Text(), Test) @@ -102,11 +109,11 @@ var _ = testutil.Defer(func() { for restartInterval := 1; restartInterval <= 5; restartInterval++ { Describe(fmt.Sprintf("with restart interval of %d", restartInterval), func() { // Make block. - br := Build(kv, restartInterval) + bt := Build(kv, restartInterval) Test := func(r *util.Range) func(done Done) { return func(done Done) { - iter := br.newIterator(r, false, nil) + iter := bt.TestNewIterator(r) Expect(iter.Error()).ShouldNot(HaveOccurred()) t := testutil.IteratorTesting{ @@ -115,6 +122,7 @@ var _ = testutil.Defer(func() { } testutil.DoIteratorTesting(&t) + iter.Release() done <- true } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go index fc4c3ed26..480a014ff 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go @@ -8,27 +8,41 @@ package table import ( "encoding/binary" - "errors" "fmt" "io" "sort" "strings" + "sync" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) var ( - ErrNotFound = util.ErrNotFound - ErrIterReleased = errors.New("leveldb/table: iterator released") + ErrNotFound = errors.ErrNotFound + ErrReaderReleased = errors.New("leveldb/table: reader released") + ErrIterReleased = errors.New("leveldb/table: iterator released") ) +type ErrCorrupted struct { + Pos int64 + Size int64 + Kind string + Reason string +} + +func (e *ErrCorrupted) Error() string { + return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason) +} + func max(x, y int) int { if x > y { return x @@ -37,22 +51,21 @@ func max(x, y int) int { } type block struct { - tr *Reader + bpool *util.BufferPool + bh blockHandle data []byte restartsLen int restartsOffset int - // Whether checksum is verified and valid. - checksum bool } -func (b *block) seek(rstart, rlimit int, key []byte) (index, offset int, err error) { +func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) { index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) offset += 1 // shared always zero, since this is a restart point v1, n1 := binary.Uvarint(b.data[offset:]) // key length _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length m := offset + n1 + n2 - return b.tr.cmp.Compare(b.data[m:m+int(v1)], key) > 0 + return cmp.Compare(b.data[m:m+int(v1)], key) > 0 }) + rstart - 1 if index < rstart { // The smallest key is greater-than key sought. @@ -75,7 +88,7 @@ func (b *block) restartOffset(index int) int { func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) { if offset >= b.restartsOffset { if offset != b.restartsOffset { - err = errors.New("leveldb/table: Reader: BlockEntry: invalid block (block entries offset not aligned)") + err = &ErrCorrupted{Reason: "entries offset not aligned"} } return } @@ -85,7 +98,7 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) m := n0 + n1 + n2 n = m + int(v1) + int(v2) if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset { - err = errors.New("leveldb/table: Reader: invalid block (block entries corrupted)") + err = &ErrCorrupted{Reason: "entries corrupted"} return } key = b.data[offset+m : offset+m+int(v1)] @@ -94,50 +107,9 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) return } -func (b *block) newIterator(slice *util.Range, inclLimit bool, cache util.Releaser) *blockIter { - bi := &blockIter{ - block: b, - cache: cache, - // Valid key should never be nil. - key: make([]byte, 0), - dir: dirSOI, - riStart: 0, - riLimit: b.restartsLen, - offsetStart: 0, - offsetRealStart: 0, - offsetLimit: b.restartsOffset, - } - if slice != nil { - if slice.Start != nil { - if bi.Seek(slice.Start) { - bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset) - bi.offsetStart = b.restartOffset(bi.riStart) - bi.offsetRealStart = bi.prevOffset - } else { - bi.riStart = b.restartsLen - bi.offsetStart = b.restartsOffset - bi.offsetRealStart = b.restartsOffset - } - } - if slice.Limit != nil { - if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) { - bi.offsetLimit = bi.prevOffset - bi.riLimit = bi.restartIndex + 1 - } - } - bi.reset() - if bi.offsetStart > bi.offsetLimit { - bi.sErr(errors.New("leveldb/table: Reader: invalid slice range")) - } - } - return bi -} - func (b *block) Release() { - if b.tr.bpool != nil { - b.tr.bpool.Put(b.data) - } - b.tr = nil + b.bpool.Put(b.data) + b.bpool = nil b.data = nil } @@ -152,10 +124,12 @@ const ( ) type blockIter struct { - block *block - cache, releaser util.Releaser - key, value []byte - offset int + tr *Reader + block *block + blockReleaser util.Releaser + releaser util.Releaser + key, value []byte + offset int // Previous offset, only filled by Next. prevOffset int prevNode []int @@ -252,7 +226,7 @@ func (i *blockIter) Seek(key []byte) bool { return false } - ri, offset, err := i.block.seek(i.riStart, i.riLimit, key) + ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key) if err != nil { i.sErr(err) return false @@ -263,7 +237,7 @@ func (i *blockIter) Seek(key []byte) bool { i.dir = dirForward } for i.Next() { - if i.block.tr.cmp.Compare(i.key, key) >= 0 { + if i.tr.cmp.Compare(i.key, key) >= 0 { return true } } @@ -288,7 +262,7 @@ func (i *blockIter) Next() bool { for i.offset < i.offsetRealStart { key, value, nShared, n, err := i.block.entry(i.offset) if err != nil { - i.sErr(err) + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) return false } if n == 0 { @@ -302,13 +276,13 @@ func (i *blockIter) Next() bool { if i.offset >= i.offsetLimit { i.dir = dirEOI if i.offset != i.offsetLimit { - i.sErr(errors.New("leveldb/table: Reader: Next: invalid block (block entries offset not aligned)")) + i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) } return false } key, value, nShared, n, err := i.block.entry(i.offset) if err != nil { - i.sErr(err) + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) return false } if n == 0 { @@ -393,7 +367,7 @@ func (i *blockIter) Prev() bool { for { key, value, nShared, n, err := i.block.entry(offset) if err != nil { - i.sErr(err) + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) return false } if offset >= i.offsetRealStart { @@ -412,7 +386,7 @@ func (i *blockIter) Prev() bool { // Stop if target offset reached. if offset >= i.offset { if offset != i.offset { - i.sErr(errors.New("leveldb/table: Reader: Prev: invalid block (block entries offset not aligned)")) + i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) return false } @@ -439,16 +413,17 @@ func (i *blockIter) Value() []byte { } func (i *blockIter) Release() { - if i.dir > dirReleased { + if i.dir != dirReleased { + i.tr = nil i.block = nil i.prevNode = nil i.prevKeys = nil i.key = nil i.value = nil i.dir = dirReleased - if i.cache != nil { - i.cache.Release() - i.cache = nil + if i.blockReleaser != nil { + i.blockReleaser.Release() + i.blockReleaser = nil } if i.releaser != nil { i.releaser.Release() @@ -458,9 +433,13 @@ func (i *blockIter) Release() { } func (i *blockIter) SetReleaser(releaser util.Releaser) { - if i.dir > dirReleased { - i.releaser = releaser + if i.dir == dirReleased { + panic(util.ErrReleased) } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser } func (i *blockIter) Valid() bool { @@ -472,21 +451,21 @@ func (i *blockIter) Error() error { } type filterBlock struct { - tr *Reader + bpool *util.BufferPool data []byte oOffset int baseLg uint filtersNum int } -func (b *filterBlock) contains(offset uint64, key []byte) bool { +func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool { i := int(offset >> b.baseLg) if i < b.filtersNum { o := b.data[b.oOffset+i*4:] n := int(binary.LittleEndian.Uint32(o)) m := int(binary.LittleEndian.Uint32(o[4:])) if n < m && m <= b.oOffset { - return b.tr.filter.Contains(b.data[n:m], key) + return filter.Contains(b.data[n:m], key) } else if n == m { return false } @@ -495,18 +474,16 @@ func (b *filterBlock) contains(offset uint64, key []byte) bool { } func (b *filterBlock) Release() { - if b.tr.bpool != nil { - b.tr.bpool.Put(b.data) - } - b.tr = nil + b.bpool.Put(b.data) + b.bpool = nil b.data = nil } type indexIter struct { *blockIter + tr *Reader slice *util.Range // Options - checksum bool fillCache bool } @@ -517,87 +494,124 @@ func (i *indexIter) Get() iterator.Iterator { } dataBH, n := decodeBlockHandle(value) if n == 0 { - return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid table (bad data block handle)")) + return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle")) } + var slice *util.Range if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) { slice = i.slice } - return i.blockIter.block.tr.getDataIter(dataBH, slice, i.checksum, i.fillCache) + return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache) } // Reader is a table reader. type Reader struct { + mu sync.RWMutex + fi *storage.FileInfo reader io.ReaderAt cache cache.Namespace err error bpool *util.BufferPool // Options - cmp comparer.Comparer - filter filter.Filter - checksum bool - strictIter bool + o *opt.Options + cmp comparer.Comparer + filter filter.Filter + verifyChecksum bool - dataEnd int64 - indexBH, filterBH blockHandle + dataEnd int64 + metaBH, indexBH, filterBH blockHandle + indexBlock *block + filterBlock *filterBlock } -func verifyChecksum(data []byte) bool { - n := len(data) - 4 - checksum0 := binary.LittleEndian.Uint32(data[n:]) - checksum1 := util.NewCRC(data[:n]).Value() - return checksum0 == checksum1 +func (r *Reader) blockKind(bh blockHandle) string { + switch bh.offset { + case r.metaBH.offset: + return "meta-block" + case r.indexBH.offset: + return "index-block" + case r.filterBH.offset: + if r.filterBH.length > 0 { + return "filter-block" + } + } + return "data-block" } -func (r *Reader) readRawBlock(bh blockHandle, checksum bool) ([]byte, error) { +func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error { + return &errors.ErrCorrupted{File: r.fi, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}} +} + +func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error { + return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason) +} + +func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error { + if cerr, ok := err.(*ErrCorrupted); ok { + cerr.Pos = int64(bh.offset) + cerr.Size = int64(bh.length) + cerr.Kind = r.blockKind(bh) + return &errors.ErrCorrupted{File: r.fi, Err: cerr} + } + return err +} + +func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) { data := r.bpool.Get(int(bh.length + blockTrailerLen)) if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF { return nil, err } - if checksum || r.checksum { - if !verifyChecksum(data) { + + if verifyChecksum { + n := bh.length + 1 + checksum0 := binary.LittleEndian.Uint32(data[n:]) + checksum1 := util.NewCRC(data[:n]).Value() + if checksum0 != checksum1 { r.bpool.Put(data) - return nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)") + return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1)) } } + switch data[bh.length] { case blockTypeNoCompression: data = data[:bh.length] case blockTypeSnappyCompression: decLen, err := snappy.DecodedLen(data[:bh.length]) if err != nil { - return nil, err + return nil, r.newErrCorruptedBH(bh, err.Error()) } - tmp := data - data, err = snappy.Decode(r.bpool.Get(decLen), tmp[:bh.length]) - r.bpool.Put(tmp) + decData := r.bpool.Get(decLen) + decData, err = snappy.Decode(decData, data[:bh.length]) + r.bpool.Put(data) if err != nil { - return nil, err + r.bpool.Put(decData) + return nil, r.newErrCorruptedBH(bh, err.Error()) } + data = decData default: r.bpool.Put(data) - return nil, fmt.Errorf("leveldb/table: Reader: unknown block compression type: %d", data[bh.length]) + return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length])) } return data, nil } -func (r *Reader) readBlock(bh blockHandle, checksum bool) (*block, error) { - data, err := r.readRawBlock(bh, checksum) +func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) { + data, err := r.readRawBlock(bh, verifyChecksum) if err != nil { return nil, err } restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) b := &block{ - tr: r, + bpool: r.bpool, + bh: bh, data: data, restartsLen: restartsLen, restartsOffset: len(data) - (restartsLen+1)*4, - checksum: checksum || r.checksum, } return b, nil } -func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*block, util.Releaser, error) { +func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) { if r.cache != nil { var err error ch := r.cache.Get(bh.offset, func() (charge int, value interface{}) { @@ -605,7 +619,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo return 0, nil } var b *block - b, err = r.readBlock(bh, checksum) + b, err = r.readBlock(bh, verifyChecksum) if err != nil { return 0, nil } @@ -615,14 +629,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo b, ok := ch.Value().(*block) if !ok { ch.Release() - return nil, nil, errors.New("leveldb/table: Reader: inconsistent block type") - } - if !b.checksum && (r.checksum || checksum) { - if !verifyChecksum(b.data) { - ch.Release() - return nil, nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)") - } - b.checksum = true + return nil, nil, errors.New("leveldb/table: inconsistent block type") } return b, ch, err } else if err != nil { @@ -630,7 +637,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo } } - b, err := r.readBlock(bh, checksum) + b, err := r.readBlock(bh, verifyChecksum) return b, b, err } @@ -641,15 +648,15 @@ func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) { } n := len(data) if n < 5 { - return nil, errors.New("leveldb/table: Reader: invalid filter block (too short)") + return nil, r.newErrCorruptedBH(bh, "too short") } m := n - 5 oOffset := int(binary.LittleEndian.Uint32(data[m:])) if oOffset > m { - return nil, errors.New("leveldb/table: Reader: invalid filter block (invalid offset)") + return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset") } b := &filterBlock{ - tr: r, + bpool: r.bpool, data: data, oOffset: oOffset, baseLg: uint(data[n-1]), @@ -676,7 +683,7 @@ func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterB b, ok := ch.Value().(*filterBlock) if !ok { ch.Release() - return nil, nil, errors.New("leveldb/table: Reader: inconsistent block type") + return nil, nil, errors.New("leveldb/table: inconsistent block type") } return b, ch, err } else if err != nil { @@ -688,12 +695,77 @@ func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterB return b, b, err } -func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fillCache bool) iterator.Iterator { - b, rel, err := r.readBlockCached(dataBH, checksum, fillCache) +func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) { + if r.indexBlock == nil { + return r.readBlockCached(r.indexBH, true, fillCache) + } + return r.indexBlock, util.NoopReleaser{}, nil +} + +func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) { + if r.filterBlock == nil { + return r.readFilterBlockCached(r.filterBH, fillCache) + } + return r.filterBlock, util.NoopReleaser{}, nil +} + +func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter { + bi := &blockIter{ + tr: r, + block: b, + blockReleaser: bReleaser, + // Valid key should never be nil. + key: make([]byte, 0), + dir: dirSOI, + riStart: 0, + riLimit: b.restartsLen, + offsetStart: 0, + offsetRealStart: 0, + offsetLimit: b.restartsOffset, + } + if slice != nil { + if slice.Start != nil { + if bi.Seek(slice.Start) { + bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset) + bi.offsetStart = b.restartOffset(bi.riStart) + bi.offsetRealStart = bi.prevOffset + } else { + bi.riStart = b.restartsLen + bi.offsetStart = b.restartsOffset + bi.offsetRealStart = b.restartsOffset + } + } + if slice.Limit != nil { + if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) { + bi.offsetLimit = bi.prevOffset + bi.riLimit = bi.restartIndex + 1 + } + } + bi.reset() + if bi.offsetStart > bi.offsetLimit { + bi.sErr(errors.New("leveldb/table: invalid slice range")) + } + } + return bi +} + +func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { + b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache) if err != nil { return iterator.NewEmptyIterator(err) } - return b.newIterator(slice, false, rel) + return r.newBlockIter(b, rel, slice, false) +} + +func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + return iterator.NewEmptyIterator(r.err) + } + + return r.getDataIter(dataBH, slice, verifyChecksum, fillCache) } // NewIterator creates an iterator from the table. @@ -708,43 +780,43 @@ func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fi // // Also read Iterator documentation of the leveldb/iterator package. func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + r.mu.RLock() + defer r.mu.RUnlock() + if r.err != nil { return iterator.NewEmptyIterator(r.err) } fillCache := !ro.GetDontFillCache() - b, rel, err := r.readBlockCached(r.indexBH, true, fillCache) + indexBlock, rel, err := r.getIndexBlock(fillCache) if err != nil { return iterator.NewEmptyIterator(err) } index := &indexIter{ - blockIter: b.newIterator(slice, true, rel), + blockIter: r.newBlockIter(indexBlock, rel, slice, true), + tr: r, slice: slice, - checksum: ro.GetStrict(opt.StrictBlockChecksum), fillCache: !ro.GetDontFillCache(), } - return iterator.NewIndexedIterator(index, r.strictIter || ro.GetStrict(opt.StrictIterator), false) + return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader)) } -// Find finds key/value pair whose key is greater than or equal to the -// given key. It returns ErrNotFound if the table doesn't contain -// such pair. -// -// The caller should not modify the contents of the returned slice, but -// it is safe to modify the contents of the argument after Find returns. -func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err error) { +func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + if r.err != nil { err = r.err return } - indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true) + indexBlock, rel, err := r.getIndexBlock(true) if err != nil { return } defer rel.Release() - index := indexBlock.newIterator(nil, true, nil) + index := r.newBlockIter(indexBlock, nil, nil, true) defer index.Release() if !index.Seek(key) { err = index.Error() @@ -755,20 +827,23 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err } dataBH, n := decodeBlockHandle(index.Value()) if n == 0 { - err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)") + r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") return } - if r.filter != nil { - filterBlock, rel, ferr := r.readFilterBlockCached(r.filterBH, true) + if filtered && r.filter != nil { + filterBlock, frel, ferr := r.getFilterBlock(true) if ferr == nil { - if !filterBlock.contains(dataBH.offset, key) { - rel.Release() + if !filterBlock.contains(r.filter, dataBH.offset, key) { + frel.Release() return nil, nil, ErrNotFound } - rel.Release() + frel.Release() + } else if !errors.IsCorrupted(ferr) { + err = ferr + return } } - data := r.getDataIter(dataBH, nil, ro.GetStrict(opt.StrictBlockChecksum), !ro.GetDontFillCache()) + data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache()) defer data.Release() if !data.Seek(key) { err = data.Error() @@ -779,24 +854,62 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err } // Don't use block buffer, no need to copy the buffer. rkey = data.Key() - // Use block buffer, and since the buffer will be recycled, the buffer - // need to be copied. - value = append([]byte{}, data.Value()...) + if !noValue { + if r.bpool == nil { + value = data.Value() + } else { + // Use block buffer, and since the buffer will be recycled, the buffer + // need to be copied. + value = append([]byte{}, data.Value()...) + } + } + return +} + +// Find finds key/value pair whose key is greater than or equal to the +// given key. It returns ErrNotFound if the table doesn't contain +// such pair. +// If filtered is true then the nearest 'block' will be checked against +// 'filter data' (if present) and will immediately return ErrNotFound if +// 'filter data' indicates that such pair doesn't exist. +// +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. +func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) { + return r.find(key, filtered, ro, false) +} + +// Find finds key that is greater than or equal to the given key. +// It returns ErrNotFound if the table doesn't contain such key. +// If filtered is true then the nearest 'block' will be checked against +// 'filter data' (if present) and will immediately return ErrNotFound if +// 'filter data' indicates that such key doesn't exist. +// +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. +func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) { + rkey, _, err = r.find(key, filtered, ro, true) return } // Get gets the value for the given key. It returns errors.ErrNotFound // if the table does not contain the key. // -// The caller should not modify the contents of the returned slice, but -// it is safe to modify the contents of the argument after Get returns. +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + if r.err != nil { err = r.err return } - rkey, value, err := r.Find(key, ro) + rkey, value, err := r.find(key, false, ro, false) if err == nil && r.cmp.Compare(rkey, key) != 0 { value = nil err = ErrNotFound @@ -808,6 +921,9 @@ func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) // // It is safe to modify the contents of the argument after Get returns. func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + if r.err != nil { err = r.err return @@ -819,12 +935,12 @@ func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { } defer rel.Release() - index := indexBlock.newIterator(nil, true, nil) + index := r.newBlockIter(indexBlock, nil, nil, true) defer index.Release() if index.Seek(key) { dataBH, n := decodeBlockHandle(index.Value()) if n == 0 { - err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)") + r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") return } offset = int64(dataBH.offset) @@ -840,67 +956,91 @@ func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { // Release implements util.Releaser. // It also close the file if it is an io.Closer. func (r *Reader) Release() { + r.mu.Lock() + defer r.mu.Unlock() + if closer, ok := r.reader.(io.Closer); ok { closer.Close() } + if r.indexBlock != nil { + r.indexBlock.Release() + r.indexBlock = nil + } + if r.filterBlock != nil { + r.filterBlock.Release() + r.filterBlock = nil + } r.reader = nil r.cache = nil r.bpool = nil + r.err = ErrReaderReleased } // NewReader creates a new initialized table reader for the file. -// The cache and bpool is optional and can be nil. +// The fi, cache and bpool is optional and can be nil. // // The returned table reader instance is goroutine-safe. -func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, bpool *util.BufferPool, o *opt.Options) *Reader { - if bpool == nil { - bpool = util.NewBufferPool(o.GetBlockSize() + blockTrailerLen) - } - r := &Reader{ - reader: f, - cache: cache, - bpool: bpool, - cmp: o.GetComparer(), - checksum: o.GetStrict(opt.StrictBlockChecksum), - strictIter: o.GetStrict(opt.StrictIterator), - } +func NewReader(f io.ReaderAt, size int64, fi *storage.FileInfo, cache cache.Namespace, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { if f == nil { - r.err = errors.New("leveldb/table: Reader: nil file") - return r + return nil, errors.New("leveldb/table: nil file") } + + r := &Reader{ + fi: fi, + reader: f, + cache: cache, + bpool: bpool, + o: o, + cmp: o.GetComparer(), + verifyChecksum: o.GetStrict(opt.StrictBlockChecksum), + } + if size < footerLen { - r.err = errors.New("leveldb/table: Reader: invalid table (file size is too small)") - return r + r.err = r.newErrCorrupted(0, size, "table", "too small") + return r, nil } + + footerPos := size - footerLen var footer [footerLen]byte - if _, err := r.reader.ReadAt(footer[:], size-footerLen); err != nil && err != io.EOF { - r.err = fmt.Errorf("leveldb/table: Reader: invalid table (could not read footer): %v", err) + if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF { + return nil, err } if string(footer[footerLen-len(magic):footerLen]) != magic { - r.err = errors.New("leveldb/table: Reader: invalid table (bad magic number)") - return r + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number") + return r, nil } + + var n int // Decode the metaindex block handle. - metaBH, n := decodeBlockHandle(footer[:]) + r.metaBH, n = decodeBlockHandle(footer[:]) if n == 0 { - r.err = errors.New("leveldb/table: Reader: invalid table (bad metaindex block handle)") - return r + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle") + return r, nil } + // Decode the index block handle. r.indexBH, n = decodeBlockHandle(footer[n:]) if n == 0 { - r.err = errors.New("leveldb/table: Reader: invalid table (bad index block handle)") - return r + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle") + return r, nil } + // Read metaindex block. - metaBlock, err := r.readBlock(metaBH, true) + metaBlock, err := r.readBlock(r.metaBH, true) if err != nil { - r.err = err - return r + if errors.IsCorrupted(err) { + r.err = err + return r, nil + } else { + return nil, err + } } + // Set data end. - r.dataEnd = int64(metaBH.offset) - metaIter := metaBlock.newIterator(nil, false, nil) + r.dataEnd = int64(r.metaBH.offset) + + // Read metaindex. + metaIter := r.newBlockIter(metaBlock, nil, nil, true) for metaIter.Next() { key := string(metaIter.Key()) if !strings.HasPrefix(key, "filter.") { @@ -930,5 +1070,30 @@ func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, bpool *util.Buf } metaIter.Release() metaBlock.Release() - return r + + // Cache index and filter block locally, since we don't have global cache. + if cache == nil { + r.indexBlock, err = r.readBlock(r.indexBH, true) + if err != nil { + if errors.IsCorrupted(err) { + r.err = err + return r, nil + } else { + return nil, err + } + } + if r.filter != nil { + r.filterBlock, err = r.readFilterBlock(r.filterBH) + if err != nil { + if !errors.IsCorrupted(err) { + return nil, err + } + + // Don't use filter then. + r.filter = nil + } + } + } + + return r, nil } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go index c0ac70d9e..beacdc1f0 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go @@ -133,9 +133,9 @@ Filter block trailer: +- 4-bytes -+ / \ - +---------------+---------------+---------------+-------------------------+------------------+ - | offset 1 | .... | offset n | filter offset (4-bytes) | base Lg (1-byte) | - +-------------- +---------------+---------------+-------------------------+------------------+ + +---------------+---------------+---------------+-------------------------------+------------------+ + | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) | + +-------------- +---------------+---------------+-------------------------------+------------------+ NOTE: All fixed-length integer are little-endian. diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go index 6ab892d54..dc251278a 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go @@ -3,15 +3,9 @@ package table import ( "testing" - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil" ) func TestTable(t *testing.T) { - testutil.RunDefer() - - RegisterFailHandler(Fail) - RunSpecs(t, "Table Suite") + testutil.RunSuite(t, "Table Suite") } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go index 4aad03019..130f373cb 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go @@ -23,7 +23,7 @@ type tableWrapper struct { } func (t tableWrapper) TestFind(key []byte) (rkey, rvalue []byte, err error) { - return t.Reader.Find(key, nil) + return t.Reader.Find(key, false, nil) } func (t tableWrapper) TestGet(key []byte) (value []byte, err error) { @@ -59,7 +59,8 @@ var _ = testutil.Defer(func() { It("Should be able to approximate offset of a key correctly", func() { Expect(err).ShouldNot(HaveOccurred()) - tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, o) + tr, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o) + Expect(err).ShouldNot(HaveOccurred()) CheckOffset := func(key string, expect, threshold int) { offset, err := tr.OffsetOf([]byte(key)) Expect(err).ShouldNot(HaveOccurred()) @@ -95,7 +96,7 @@ var _ = testutil.Defer(func() { tw.Close() // Opening the table. - tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, o) + tr, _ := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o) return tableWrapper{tr} } Test := func(kv *testutil.KeyValue, body func(r *Reader)) func() { @@ -104,11 +105,11 @@ var _ = testutil.Defer(func() { if body != nil { body(db.(tableWrapper).Reader) } - testutil.KeyValueTesting(nil, db, *kv) + testutil.KeyValueTesting(nil, *kv, db, nil, nil) } } - testutil.AllKeyValueTesting(nil, Build) + testutil.AllKeyValueTesting(nil, Build, nil, nil) Describe("with one key per block", Test(testutil.KeyValue_Generate(nil, 9, 1, 10, 512, 512), func(r *Reader) { It("should have correct blocks number", func() { indexBlock, err := r.readBlock(r.indexBH, true) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go index 9dea5b87c..7a819d593 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go @@ -12,7 +12,7 @@ import ( "fmt" "io" - "github.com/jbenet/go-ipfs/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter" diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go index 5b6e0344e..aa317956a 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go @@ -12,6 +12,7 @@ import ( . "github.com/onsi/gomega" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) @@ -34,6 +35,10 @@ type Get interface { TestGet(key []byte) (value []byte, err error) } +type Has interface { + TestHas(key []byte) (ret bool, err error) +} + type NewIterator interface { TestNewIterator(slice *util.Range) iterator.Iterator } @@ -110,7 +115,7 @@ func (t *DBTesting) TestAllPresent() { func (t *DBTesting) TestDeletedKey(key []byte) { _, err := t.DB.TestGet(key) - Expect(err).Should(Equal(util.ErrNotFound), "Get on deleted key %q, %s", key, t.text()) + Expect(err).Should(Equal(errors.ErrNotFound), "Get on deleted key %q, %s", key, t.text()) } func (t *DBTesting) TestAllDeleted() { @@ -212,5 +217,6 @@ func DoDBTesting(t *DBTesting) { } DoIteratorTesting(&it) + iter.Release() } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go new file mode 100644 index 000000000..82f3d0e81 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/ginkgo.go @@ -0,0 +1,21 @@ +package testutil + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func RunSuite(t GinkgoTestingT, name string) { + RunDefer() + + SynchronizedBeforeSuite(func() []byte { + RunDefer("setup") + return nil + }, func(data []byte) {}) + SynchronizedAfterSuite(func() { + RunDefer("teardown") + }, func() {}) + + RegisterFailHandler(Fail) + RunSpecs(t, name) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go index 77547aaa2..a8354d194 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go @@ -13,16 +13,28 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors" "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) -func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { +func KeyValueTesting(rnd *rand.Rand, kv KeyValue, p DB, setup func(KeyValue) DB, teardown func(DB)) { if rnd == nil { rnd = NewRand() } - if db, ok := p.(Find); ok { - It("Should find all keys with Find", func() { + if p == nil { + BeforeEach(func() { + p = setup(kv) + }) + if teardown != nil { + AfterEach(func() { + teardown(p) + }) + } + } + + It("Should find all keys with Find", func() { + if db, ok := p.(Find); ok { ShuffledIndex(nil, kv.Len(), 1, func(i int) { key_, key, value := kv.IndexInexact(i) @@ -38,9 +50,11 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { Expect(rkey).Should(Equal(key)) Expect(rvalue).Should(Equal(value), "Value for key %q (%q)", key_, key) }) - }) + } + }) - It("Should return error if the key is not present", func() { + It("Should return error if the key is not present", func() { + if db, ok := p.(Find); ok { var key []byte if kv.Len() > 0 { key_, _ := kv.Index(kv.Len() - 1) @@ -48,12 +62,12 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { } rkey, _, err := db.TestFind(key) Expect(err).Should(HaveOccurred(), "Find for key %q yield key %q", key, rkey) - Expect(err).Should(Equal(util.ErrNotFound)) - }) - } + Expect(err).Should(Equal(errors.ErrNotFound)) + } + }) - if db, ok := p.(Get); ok { - It("Should only find exact key with Get", func() { + It("Should only find exact key with Get", func() { + if db, ok := p.(Get); ok { ShuffledIndex(nil, kv.Len(), 1, func(i int) { key_, key, value := kv.IndexInexact(i) @@ -66,14 +80,34 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { if len(key_) > 0 { _, err = db.TestGet(key_) Expect(err).Should(HaveOccurred(), "Error for key %q", key_) - Expect(err).Should(Equal(util.ErrNotFound)) + Expect(err).Should(Equal(errors.ErrNotFound)) } }) - }) - } + } + }) - if db, ok := p.(NewIterator); ok { - TestIter := func(r *util.Range, _kv KeyValue) { + It("Should only find present key with Has", func() { + if db, ok := p.(Has); ok { + ShuffledIndex(nil, kv.Len(), 1, func(i int) { + key_, key, _ := kv.IndexInexact(i) + + // Using exact key. + ret, err := db.TestHas(key) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key) + Expect(ret).Should(BeTrue(), "False for key %q", key) + + // Using inexact key. + if len(key_) > 0 { + ret, err = db.TestHas(key_) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key_) + Expect(ret).ShouldNot(BeTrue(), "True for key %q", key) + } + }) + } + }) + + TestIter := func(r *util.Range, _kv KeyValue) { + if db, ok := p.(NewIterator); ok { iter := db.TestNewIterator(r) Expect(iter.Error()).ShouldNot(HaveOccurred()) @@ -83,46 +117,62 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { } DoIteratorTesting(&t) + iter.Release() + } + } + + It("Should iterates and seeks correctly", func(done Done) { + TestIter(nil, kv.Clone()) + done <- true + }, 3.0) + + RandomIndex(rnd, kv.Len(), Min(kv.Len(), 50), func(i int) { + type slice struct { + r *util.Range + start, limit int } - It("Should iterates and seeks correctly", func(done Done) { - TestIter(nil, kv.Clone()) - done <- true - }, 3.0) - - RandomIndex(rnd, kv.Len(), kv.Len(), func(i int) { - type slice struct { - r *util.Range - start, limit int - } - - key_, _, _ := kv.IndexInexact(i) - for _, x := range []slice{ - {&util.Range{Start: key_, Limit: nil}, i, kv.Len()}, - {&util.Range{Start: nil, Limit: key_}, 0, i}, - } { - It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) { - TestIter(x.r, kv.Slice(x.start, x.limit)) - done <- true - }, 3.0) - } - }) - - RandomRange(rnd, kv.Len(), kv.Len(), func(start, limit int) { - It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) { - r := kv.Range(start, limit) - TestIter(&r, kv.Slice(start, limit)) + key_, _, _ := kv.IndexInexact(i) + for _, x := range []slice{ + {&util.Range{Start: key_, Limit: nil}, i, kv.Len()}, + {&util.Range{Start: nil, Limit: key_}, 0, i}, + } { + It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) { + TestIter(x.r, kv.Slice(x.start, x.limit)) done <- true }, 3.0) - }) - } + } + }) + + RandomRange(rnd, kv.Len(), Min(kv.Len(), 50), func(start, limit int) { + It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) { + r := kv.Range(start, limit) + TestIter(&r, kv.Slice(start, limit)) + done <- true + }, 3.0) + }) } -func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) { +func AllKeyValueTesting(rnd *rand.Rand, body, setup func(KeyValue) DB, teardown func(DB)) { Test := func(kv *KeyValue) func() { return func() { - db := body(*kv) - KeyValueTesting(rnd, db, *kv) + var p DB + if setup != nil { + Defer("setup", func() { + p = setup(*kv) + }) + } + if teardown != nil { + Defer("teardown", func() { + teardown(p) + }) + } + if body != nil { + p = body(*kv) + } + KeyValueTesting(rnd, *kv, p, func(KeyValue) DB { + return p + }, nil) } } @@ -133,4 +183,5 @@ func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) { Describe("with big value", Test(KeyValue_BigValue())) Describe("with special key", Test(KeyValue_SpecialKey())) Describe("with multiple key/value", Test(KeyValue_MultipleKeyValue())) + Describe("with generated key/value", Test(KeyValue_Generate(nil, 120, 1, 50, 10, 120))) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go index 4c8c3f0a0..74757e22e 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go @@ -397,6 +397,7 @@ func (s *Storage) logI(format string, args ...interface{}) { func (s *Storage) Log(str string) { s.log(1, "Log: "+str) + s.Storage.Log(str) } func (s *Storage) Lock() (r util.Releaser, err error) { diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go index 516e4e50d..4a933c12c 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go @@ -155,3 +155,17 @@ func RandomRange(rnd *rand.Rand, n, round int, fn func(start, limit int)) { } return } + +func Max(x, y int) int { + if x > y { + return x + } + return y +} + +func Min(x, y int) int { + if x < y { + return x + } + return y +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go index 1678e9e6e..493f5948a 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go @@ -34,6 +34,10 @@ func (t *testingDB) TestGet(key []byte) (value []byte, err error) { return t.Get(key, t.ro) } +func (t *testingDB) TestHas(key []byte) (ret bool, err error) { + return t.Has(key, t.ro) +} + func (t *testingDB) TestNewIterator(slice *util.Range) iterator.Iterator { return t.NewIterator(slice, t.ro) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go index 95bfc4320..da72f99d0 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go @@ -14,10 +14,10 @@ import ( ) func shorten(str string) string { - if len(str) <= 4 { + if len(str) <= 8 { return str } - return str[:1] + ".." + str[len(str)-1:] + return str[:3] + ".." + str[len(str)-3:] } var bunits = [...]string{"", "Ki", "Mi", "Gi"} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go index 554e28ebd..2b8453d75 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go @@ -8,6 +8,7 @@ package util import ( "fmt" + "sync" "sync/atomic" "time" ) @@ -19,17 +20,16 @@ type buffer struct { // BufferPool is a 'buffer pool'. type BufferPool struct { - pool [6]chan []byte - size [5]uint32 - sizeMiss [5]uint32 - sizeHalf [5]uint32 - baseline [4]int - baselinex0 int - baselinex1 int - baseline0 int - baseline1 int - baseline2 int - close chan struct{} + pool [6]chan []byte + size [5]uint32 + sizeMiss [5]uint32 + sizeHalf [5]uint32 + baseline [4]int + baseline0 int + + mu sync.RWMutex + closed bool + closeC chan struct{} get uint32 put uint32 @@ -54,6 +54,17 @@ func (p *BufferPool) poolNum(n int) int { // Get returns buffer with length of n. func (p *BufferPool) Get(n int) []byte { + if p == nil { + return make([]byte, n) + } + + p.mu.RLock() + defer p.mu.RUnlock() + + if p.closed { + return make([]byte, n) + } + atomic.AddUint32(&p.get, 1) poolNum := p.poolNum(n) @@ -145,6 +156,17 @@ func (p *BufferPool) Get(n int) []byte { // Put adds given buffer to the pool. func (p *BufferPool) Put(b []byte) { + if p == nil { + return + } + + p.mu.RLock() + defer p.mu.RUnlock() + + if p.closed { + return + } + atomic.AddUint32(&p.put, 1) pool := p.pool[p.poolNum(cap(b))] @@ -156,13 +178,23 @@ func (p *BufferPool) Put(b []byte) { } func (p *BufferPool) Close() { - select { - case p.close <- struct{}{}: - default: + if p == nil { + return } + + p.mu.Lock() + if !p.closed { + p.closed = true + p.closeC <- struct{}{} + } + p.mu.Unlock() } func (p *BufferPool) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}", p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss) } @@ -178,7 +210,8 @@ func (p *BufferPool) drain() { default: } } - case <-p.close: + case <-p.closeC: + close(p.closeC) for _, ch := range p.pool { close(ch) } @@ -195,7 +228,7 @@ func NewBufferPool(baseline int) *BufferPool { p := &BufferPool{ baseline0: baseline, baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4}, - close: make(chan struct{}, 1), + closeC: make(chan struct{}, 1), } for i, cap := range []int{2, 2, 4, 4, 2, 1} { p.pool[i] = make(chan []byte, cap) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go index 229c7d41f..f35976865 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go @@ -12,7 +12,8 @@ import ( ) var ( - ErrNotFound = errors.New("leveldb: not found") + ErrReleased = errors.New("leveldb: resource already relesed") + ErrHasReleaser = errors.New("leveldb: releaser already defined") ) // Releaser is the interface that wraps the basic Release method. @@ -27,23 +28,46 @@ type ReleaseSetter interface { // SetReleaser associates the given releaser to the resources. The // releaser will be called once coresponding resources released. // Calling SetReleaser with nil will clear the releaser. + // + // This will panic if a releaser already present or coresponding + // resource is already released. Releaser should be cleared first + // before assigned a new one. SetReleaser(releaser Releaser) } // BasicReleaser provides basic implementation of Releaser and ReleaseSetter. type BasicReleaser struct { releaser Releaser + released bool +} + +// Released returns whether Release method already called. +func (r *BasicReleaser) Released() bool { + return r.released } // Release implements Releaser.Release. func (r *BasicReleaser) Release() { - if r.releaser != nil { - r.releaser.Release() - r.releaser = nil + if !r.released { + if r.releaser != nil { + r.releaser.Release() + r.releaser = nil + } + r.released = true } } // SetReleaser implements ReleaseSetter.SetReleaser. func (r *BasicReleaser) SetReleaser(releaser Releaser) { + if r.released { + panic(ErrReleased) + } + if r.releaser != nil && releaser != nil { + panic(ErrHasReleaser) + } r.releaser = releaser } + +type NoopReleaser struct{} + +func (NoopReleaser) Release() {} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go index 3ab8daf4b..345015d14 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go @@ -7,7 +7,6 @@ package leveldb import ( - "errors" "sync/atomic" "unsafe" @@ -16,19 +15,6 @@ import ( "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util" ) -var levelMaxSize [kNumLevels]float64 - -func init() { - // Precompute max size of each level - for level := range levelMaxSize { - res := float64(10 * 1048576) - for n := level; n > 1; n-- { - res *= 10 - } - levelMaxSize[level] = res - } -} - type tSet struct { level int table *tFile @@ -37,7 +23,7 @@ type tSet struct { type version struct { s *session - tables [kNumLevels]tFiles + tables []tFiles // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields @@ -47,11 +33,16 @@ type version struct { cSeek unsafe.Pointer - ref int + ref int + // Succeeding version. next *version } -func (v *version) release_NB() { +func newVersion(s *session) *version { + return &version{s: s, tables: make([]tFiles, s.o.GetNumLevel())} +} + +func (v *version) releaseNB() { v.ref-- if v.ref > 0 { return @@ -77,13 +68,13 @@ func (v *version) release_NB() { } } - v.next.release_NB() + v.next.releaseNB() v.next = nil } func (v *version) release() { v.s.vmu.Lock() - v.release_NB() + v.releaseNB() v.s.vmu.Unlock() } @@ -123,17 +114,18 @@ func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, l } } -func (v *version) get(ikey iKey, ro *opt.ReadOptions) (value []byte, tcomp bool, err error) { +func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { ukey := ikey.ukey() var ( tset *tSet tseek bool - l0found bool - l0seq uint64 - l0vt vType - l0val []byte + // Level-0. + zfound bool + zseq uint64 + zkt kType + zval []byte ) err = ErrNotFound @@ -150,55 +142,60 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions) (value []byte, tcomp bool, } } - ikey__, val_, err_ := v.s.tops.find(t, ikey, ro) - switch err_ { + var ( + fikey, fval []byte + ferr error + ) + if noValue { + fikey, ferr = v.s.tops.findKey(t, ikey, ro) + } else { + fikey, fval, ferr = v.s.tops.find(t, ikey, ro) + } + switch ferr { case nil: case ErrNotFound: return true default: - err = err_ + err = ferr return false } - ikey_ := iKey(ikey__) - if seq, vt, ok := ikey_.parseNum(); ok { - if v.s.icmp.uCompare(ukey, ikey_.ukey()) != 0 { - return true - } - - if level == 0 { - if seq >= l0seq { - l0found = true - l0seq = seq - l0vt = vt - l0val = val_ + if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil { + if v.s.icmp.uCompare(ukey, fukey) == 0 { + if level == 0 { + if fseq >= zseq { + zfound = true + zseq = fseq + zkt = fkt + zval = fval + } + } else { + switch fkt { + case ktVal: + value = fval + err = nil + case ktDel: + default: + panic("leveldb: invalid iKey type") + } + return false } - } else { - switch vt { - case tVal: - value = val_ - err = nil - case tDel: - default: - panic("leveldb: invalid internal key type") - } - return false } } else { - err = errors.New("leveldb: internal key corrupted") + err = fkerr return false } return true }, func(level int) bool { - if l0found { - switch l0vt { - case tVal: - value = l0val + if zfound { + switch zkt { + case ktVal: + value = zval err = nil - case tDel: + case ktDel: default: - panic("leveldb: invalid internal key type") + panic("leveldb: invalid iKey type") } return false } @@ -216,13 +213,13 @@ func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []it its = append(its, it) } - strict := v.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator) + strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader) for _, tables := range v.tables[1:] { if len(tables) == 0 { continue } - it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict, true) + it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict) its = append(its, it) } @@ -230,7 +227,7 @@ func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []it } func (v *version) newStaging() *versionStaging { - return &versionStaging{base: v} + return &versionStaging{base: v, tables: make([]tablesScratch, v.s.o.GetNumLevel())} } // Spawn a new version based on this version. @@ -285,12 +282,13 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) { func (v *version) pickLevel(umin, umax []byte) (level int) { if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) { var overlaps tFiles - for ; level < kMaxMemCompactLevel; level++ { + maxLevel := v.s.o.GetMaxMemCompationLevel() + for ; level < maxLevel; level++ { if v.tables[level+1].overlaps(v.s.icmp, umin, umax, false) { break } overlaps = v.tables[level+2].getOverlaps(overlaps, v.s.icmp, umin, umax, false) - if overlaps.size() > kMaxGrandParentOverlapBytes { + if overlaps.size() > uint64(v.s.o.GetCompactionGPOverlaps(level)) { break } } @@ -318,9 +316,9 @@ func (v *version) computeCompaction() { // file size is small (perhaps because of a small write-buffer // setting, or very high compression ratios, or lots of // overwrites/deletions). - score = float64(len(tables)) / kL0_CompactionTrigger + score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger()) } else { - score = float64(tables.size()) / levelMaxSize[level] + score = float64(tables.size()) / float64(v.s.o.GetCompactionTotalSize(level)) } if score > bestScore { @@ -337,12 +335,14 @@ func (v *version) needCompaction() bool { return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil } +type tablesScratch struct { + added map[uint64]atRecord + deleted map[uint64]struct{} +} + type versionStaging struct { base *version - tables [kNumLevels]struct { - added map[uint64]ntRecord - deleted map[uint64]struct{} - } + tables []tablesScratch } func (p *versionStaging) commit(r *sessionRecord) { @@ -367,7 +367,7 @@ func (p *versionStaging) commit(r *sessionRecord) { tm := &(p.tables[r.level]) if tm.added == nil { - tm.added = make(map[uint64]ntRecord) + tm.added = make(map[uint64]atRecord) } tm.added[r.num] = r @@ -379,7 +379,7 @@ func (p *versionStaging) commit(r *sessionRecord) { func (p *versionStaging) finish() *version { // Build new version. - nv := &version{s: p.base.s} + nv := newVersion(p.base.s) for level, tm := range p.tables { btables := p.base.tables[level] @@ -402,7 +402,7 @@ func (p *versionStaging) finish() *version { // New tables. for _, r := range tm.added { - nt = append(nt, r.makeFile(p.base.s)) + nt = append(nt, p.base.s.tableFileFromRecord(r)) } // Sort tables. @@ -429,7 +429,7 @@ func (vr *versionReleaser) Release() { v := vr.v v.s.vmu.Lock() if !vr.once { - v.release_NB() + v.releaseNB() vr.once = true } v.s.vmu.Unlock() diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go similarity index 100% rename from Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/decode.go rename to Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/decode.go diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go similarity index 100% rename from Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/encode.go rename to Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/encode.go diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go similarity index 100% rename from Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy.go rename to Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy.go diff --git a/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go b/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go similarity index 100% rename from Godeps/_workspace/src/code.google.com/p/snappy-go/snappy/snappy_test.go rename to Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy/snappy_test.go diff --git a/blocks/blockstore/blockstore.go b/blocks/blockstore/blockstore.go index 68ccc7c74..3fe742ef8 100644 --- a/blocks/blockstore/blockstore.go +++ b/blocks/blockstore/blockstore.go @@ -45,7 +45,13 @@ func (bs *blockstore) Get(k u.Key) (*blocks.Block, error) { } func (bs *blockstore) Put(block *blocks.Block) error { - return bs.datastore.Put(block.Key().DsKey(), block.Data) + // Has is cheaper than + k := block.Key().DsKey() + exists, err := bs.datastore.Has(k) + if err != nil && exists { + return nil // already stored. + } + return bs.datastore.Put(k, block.Data) } func (bs *blockstore) Has(k u.Key) (bool, error) { diff --git a/blockservice/blockservice.go b/blockservice/blockservice.go index 0ebe30a4d..f44eaa0f5 100644 --- a/blockservice/blockservice.go +++ b/blockservice/blockservice.go @@ -44,25 +44,10 @@ func New(bs blockstore.Blockstore, rem exchange.Interface) (*BlockService, error func (s *BlockService) AddBlock(b *blocks.Block) (u.Key, error) { k := b.Key() log.Debugf("blockservice: storing [%s] in datastore", k) - // TODO(brian): define a block datastore with a Put method which accepts a - // block parameter - - // check if we have it before adding. this is an extra read, but large writes - // are more expensive. - // TODO(jbenet) cheaper has. https://github.com/jbenet/go-datastore/issues/6 - has, err := s.Blockstore.Has(k) + err := s.Blockstore.Put(b) if err != nil { return k, err } - if has { - log.Debugf("blockservice: storing [%s] in datastore (already stored)", k) - } else { - log.Debugf("blockservice: storing [%s] in datastore", k) - err := s.Blockstore.Put(b) - if err != nil { - return k, err - } - } // TODO this operation rate-limits blockservice operations, we should // consider moving this to an sync process.