Merge pull request #408 from jbenet/update-datastore

Updated Datastore, to have a Has.
This commit is contained in:
Juan Batiz-Benet 2014-12-06 16:29:38 -08:00
commit 6d488bd9f6
65 changed files with 4105 additions and 1805 deletions

13
Godeps/Godeps.json generated
View File

@ -43,11 +43,6 @@
"Comment": "go.r60-152",
"Rev": "36be16571e14f67e114bb0af619e5de2c1591679"
},
{
"ImportPath": "code.google.com/p/snappy-go/snappy",
"Comment": "null-15",
"Rev": "12e4b4183793ac4b061921e7980845e750679fd0"
},
{
"ImportPath": "github.com/braintree/manners",
"Comment": "0.3.1-2-g5280e25",
@ -99,7 +94,7 @@
},
{
"ImportPath": "github.com/jbenet/go-datastore",
"Rev": "b31aad9b9b22e46d99a270ed5aebb259fab64dcc"
"Rev": "6a1c83bda2a71a9bdc936749fdb507df958ed949"
},
{
"ImportPath": "github.com/jbenet/go-is-domain",
@ -150,7 +145,11 @@
},
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "99056d50e56252fbe0021d5c893defca5a76baf8"
"Rev": "871eee0a7546bb7d1b2795142e29c4534abc49b3"
},
{
"ImportPath": "github.com/syndtr/gosnappy/snappy",
"Rev": "ce8acff4829e0c2458a67ead32390ac0a381c862"
},
{
"ImportPath": "gopkg.in/natefinch/lumberjack.v2",

View File

@ -1,6 +1,6 @@
{
"ImportPath": "github.com/jbenet/go-datastore",
"GoVersion": "go1.3.3",
"GoVersion": "go1.3",
"Packages": [
"./..."
],
@ -10,11 +10,6 @@
"Comment": "null-12",
"Rev": "7dda39b2e7d5e265014674c5af696ba4186679e9"
},
{
"ImportPath": "code.google.com/p/snappy-go/snappy",
"Comment": "null-15",
"Rev": "12e4b4183793ac4b061921e7980845e750679fd0"
},
{
"ImportPath": "github.com/codahale/blake2",
"Rev": "3fa823583afba430e8fc7cdbcc670dbf90bfacc4"
@ -33,7 +28,11 @@
},
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "cd2b8f743192883ab9fbc5f070ebda1dc90f3732"
"Rev": "871eee0a7546bb7d1b2795142e29c4534abc49b3"
},
{
"ImportPath": "github.com/syndtr/gosnappy/snappy",
"Rev": "ce8acff4829e0c2458a67ead32390ac0a381c862"
},
{
"ImportPath": "gopkg.in/check.v1",

View File

@ -58,7 +58,7 @@ func (d *datastore) Get(key ds.Key) (value interface{}, err error) {
}
func (d *datastore) Has(key ds.Key) (exists bool, err error) {
return ds.GetBackedHas(d, key)
return d.DB.Has(key.Bytes(), nil)
}
func (d *datastore) Delete(key ds.Key) (err error) {

View File

@ -8,65 +8,84 @@ package leveldb
import (
"encoding/binary"
"errors"
"fmt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb"
)
var (
errBatchTooShort = errors.New("leveldb: batch is too short")
errBatchBadRecord = errors.New("leveldb: bad record in batch")
type ErrBatchCorrupted struct {
Reason string
}
func (e *ErrBatchCorrupted) Error() string {
return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
}
func newErrBatchCorrupted(reason string) error {
return errors.NewErrCorrupted(nil, &ErrBatchCorrupted{reason})
}
const (
batchHdrLen = 8 + 4
batchGrowRec = 3000
)
const kBatchHdrLen = 8 + 4
type batchReplay interface {
put(key, value []byte, seq uint64)
delete(key []byte, seq uint64)
type BatchReplay interface {
Put(key, value []byte)
Delete(key []byte)
}
// Batch is a write batch.
type Batch struct {
buf []byte
data []byte
rLen, bLen int
seq uint64
sync bool
}
func (b *Batch) grow(n int) {
off := len(b.buf)
off := len(b.data)
if off == 0 {
// include headers
off = kBatchHdrLen
n += off
off = batchHdrLen
if b.data != nil {
b.data = b.data[:off]
}
}
if cap(b.buf)-off >= n {
return
if cap(b.data)-off < n {
if b.data == nil {
b.data = make([]byte, off, off+n)
} else {
odata := b.data
div := 1
if b.rLen > batchGrowRec {
div = b.rLen / batchGrowRec
}
b.data = make([]byte, off, off+n+(off-batchHdrLen)/div)
copy(b.data, odata)
}
}
buf := make([]byte, 2*cap(b.buf)+n)
copy(buf, b.buf)
b.buf = buf[:off]
}
func (b *Batch) appendRec(t vType, key, value []byte) {
func (b *Batch) appendRec(kt kType, key, value []byte) {
n := 1 + binary.MaxVarintLen32 + len(key)
if t == tVal {
if kt == ktVal {
n += binary.MaxVarintLen32 + len(value)
}
b.grow(n)
off := len(b.buf)
buf := b.buf[:off+n]
buf[off] = byte(t)
off := len(b.data)
data := b.data[:off+n]
data[off] = byte(kt)
off += 1
off += binary.PutUvarint(buf[off:], uint64(len(key)))
copy(buf[off:], key)
off += binary.PutUvarint(data[off:], uint64(len(key)))
copy(data[off:], key)
off += len(key)
if t == tVal {
off += binary.PutUvarint(buf[off:], uint64(len(value)))
copy(buf[off:], value)
if kt == ktVal {
off += binary.PutUvarint(data[off:], uint64(len(value)))
copy(data[off:], value)
off += len(value)
}
b.buf = buf[:off]
b.data = data[:off]
b.rLen++
// Include 8-byte ikey header
b.bLen += len(key) + len(value) + 8
@ -75,18 +94,51 @@ func (b *Batch) appendRec(t vType, key, value []byte) {
// Put appends 'put operation' of the given key/value pair to the batch.
// It is safe to modify the contents of the argument after Put returns.
func (b *Batch) Put(key, value []byte) {
b.appendRec(tVal, key, value)
b.appendRec(ktVal, key, value)
}
// Delete appends 'delete operation' of the given key to the batch.
// It is safe to modify the contents of the argument after Delete returns.
func (b *Batch) Delete(key []byte) {
b.appendRec(tDel, key, nil)
b.appendRec(ktDel, key, nil)
}
// Dump dumps batch contents. The returned slice can be loaded into the
// batch using Load method.
// The returned slice is not its own copy, so the contents should not be
// modified.
func (b *Batch) Dump() []byte {
return b.encode()
}
// Load loads given slice into the batch. Previous contents of the batch
// will be discarded.
// The given slice will not be copied and will be used as batch buffer, so
// it is not safe to modify the contents of the slice.
func (b *Batch) Load(data []byte) error {
return b.decode(0, data)
}
// Replay replays batch contents.
func (b *Batch) Replay(r BatchReplay) error {
return b.decodeRec(func(i int, kt kType, key, value []byte) {
switch kt {
case ktVal:
r.Put(key, value)
case ktDel:
r.Delete(key)
}
})
}
// Len returns number of records in the batch.
func (b *Batch) Len() int {
return b.rLen
}
// Reset resets the batch.
func (b *Batch) Reset() {
b.buf = nil
b.data = b.data[:0]
b.seq = 0
b.rLen = 0
b.bLen = 0
@ -97,24 +149,10 @@ func (b *Batch) init(sync bool) {
b.sync = sync
}
func (b *Batch) put(key, value []byte, seq uint64) {
if b.rLen == 0 {
b.seq = seq
}
b.Put(key, value)
}
func (b *Batch) delete(key []byte, seq uint64) {
if b.rLen == 0 {
b.seq = seq
}
b.Delete(key)
}
func (b *Batch) append(p *Batch) {
if p.rLen > 0 {
b.grow(len(p.buf) - kBatchHdrLen)
b.buf = append(b.buf, p.buf[kBatchHdrLen:]...)
b.grow(len(p.data) - batchHdrLen)
b.data = append(b.data, p.data[batchHdrLen:]...)
b.rLen += p.rLen
}
if p.sync {
@ -122,95 +160,93 @@ func (b *Batch) append(p *Batch) {
}
}
func (b *Batch) len() int {
return b.rLen
}
// size returns sums of key/value pair length plus 8-bytes ikey.
func (b *Batch) size() int {
return b.bLen
}
func (b *Batch) encode() []byte {
b.grow(0)
binary.LittleEndian.PutUint64(b.buf, b.seq)
binary.LittleEndian.PutUint32(b.buf[8:], uint32(b.rLen))
binary.LittleEndian.PutUint64(b.data, b.seq)
binary.LittleEndian.PutUint32(b.data[8:], uint32(b.rLen))
return b.buf
return b.data
}
func (b *Batch) decode(buf []byte) error {
if len(buf) < kBatchHdrLen {
return errBatchTooShort
func (b *Batch) decode(prevSeq uint64, data []byte) error {
if len(data) < batchHdrLen {
return newErrBatchCorrupted("too short")
}
b.seq = binary.LittleEndian.Uint64(buf)
b.rLen = int(binary.LittleEndian.Uint32(buf[8:]))
b.seq = binary.LittleEndian.Uint64(data)
if b.seq < prevSeq {
return newErrBatchCorrupted("invalid sequence number")
}
b.rLen = int(binary.LittleEndian.Uint32(data[8:]))
if b.rLen < 0 {
return newErrBatchCorrupted("invalid records length")
}
// No need to be precise at this point, it won't be used anyway
b.bLen = len(buf) - kBatchHdrLen
b.buf = buf
b.bLen = len(data) - batchHdrLen
b.data = data
return nil
}
func (b *Batch) decodeRec(f func(i int, t vType, key, value []byte)) error {
off := kBatchHdrLen
func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error) {
off := batchHdrLen
for i := 0; i < b.rLen; i++ {
if off >= len(b.buf) {
return errors.New("leveldb: invalid batch record length")
if off >= len(b.data) {
return newErrBatchCorrupted("invalid records length")
}
t := vType(b.buf[off])
if t > tVal {
return errors.New("leveldb: invalid batch record type in batch")
kt := kType(b.data[off])
if kt > ktVal {
return newErrBatchCorrupted("bad record: invalid type")
}
off += 1
x, n := binary.Uvarint(b.buf[off:])
x, n := binary.Uvarint(b.data[off:])
off += n
if n <= 0 || off+int(x) > len(b.buf) {
return errBatchBadRecord
if n <= 0 || off+int(x) > len(b.data) {
return newErrBatchCorrupted("bad record: invalid key length")
}
key := b.buf[off : off+int(x)]
key := b.data[off : off+int(x)]
off += int(x)
var value []byte
if t == tVal {
x, n := binary.Uvarint(b.buf[off:])
if kt == ktVal {
x, n := binary.Uvarint(b.data[off:])
off += n
if n <= 0 || off+int(x) > len(b.buf) {
return errBatchBadRecord
if n <= 0 || off+int(x) > len(b.data) {
return newErrBatchCorrupted("bad record: invalid value length")
}
value = b.buf[off : off+int(x)]
value = b.data[off : off+int(x)]
off += int(x)
}
f(i, t, key, value)
f(i, kt, key, value)
}
return nil
}
func (b *Batch) replay(to batchReplay) error {
return b.decodeRec(func(i int, t vType, key, value []byte) {
switch t {
case tVal:
to.put(key, value, b.seq+uint64(i))
case tDel:
to.delete(key, b.seq+uint64(i))
}
})
}
func (b *Batch) memReplay(to *memdb.DB) error {
return b.decodeRec(func(i int, t vType, key, value []byte) {
ikey := newIKey(key, b.seq+uint64(i), t)
return b.decodeRec(func(i int, kt kType, key, value []byte) {
ikey := newIkey(key, b.seq+uint64(i), kt)
to.Put(ikey, value)
})
}
func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) error {
if err := b.decode(prevSeq, data); err != nil {
return err
}
return b.memReplay(to)
}
func (b *Batch) revertMemReplay(to *memdb.DB) error {
return b.decodeRec(func(i int, t vType, key, value []byte) {
ikey := newIKey(key, b.seq+uint64(i), t)
return b.decodeRec(func(i int, kt kType, key, value []byte) {
ikey := newIkey(key, b.seq+uint64(i), kt)
to.Delete(ikey)
})
}

View File

@ -15,7 +15,7 @@ import (
)
type tbRec struct {
t vType
kt kType
key, value []byte
}
@ -23,39 +23,39 @@ type testBatch struct {
rec []*tbRec
}
func (p *testBatch) put(key, value []byte, seq uint64) {
p.rec = append(p.rec, &tbRec{tVal, key, value})
func (p *testBatch) Put(key, value []byte) {
p.rec = append(p.rec, &tbRec{ktVal, key, value})
}
func (p *testBatch) delete(key []byte, seq uint64) {
p.rec = append(p.rec, &tbRec{tDel, key, nil})
func (p *testBatch) Delete(key []byte) {
p.rec = append(p.rec, &tbRec{ktDel, key, nil})
}
func compareBatch(t *testing.T, b1, b2 *Batch) {
if b1.seq != b2.seq {
t.Errorf("invalid seq number want %d, got %d", b1.seq, b2.seq)
}
if b1.len() != b2.len() {
t.Fatalf("invalid record length want %d, got %d", b1.len(), b2.len())
if b1.Len() != b2.Len() {
t.Fatalf("invalid record length want %d, got %d", b1.Len(), b2.Len())
}
p1, p2 := new(testBatch), new(testBatch)
err := b1.replay(p1)
err := b1.Replay(p1)
if err != nil {
t.Fatal("error when replaying batch 1: ", err)
}
err = b2.replay(p2)
err = b2.Replay(p2)
if err != nil {
t.Fatal("error when replaying batch 2: ", err)
}
for i := range p1.rec {
r1, r2 := p1.rec[i], p2.rec[i]
if r1.t != r2.t {
t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.t, r2.t)
if r1.kt != r2.kt {
t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.kt, r2.kt)
}
if !bytes.Equal(r1.key, r2.key) {
t.Errorf("invalid key on record '%d' want %s, got %s", i, string(r1.key), string(r2.key))
}
if r1.t == tVal {
if r1.kt == ktVal {
if !bytes.Equal(r1.value, r2.value) {
t.Errorf("invalid value on record '%d' want %s, got %s", i, string(r1.value), string(r2.value))
}
@ -75,7 +75,7 @@ func TestBatch_EncodeDecode(t *testing.T) {
b1.Delete([]byte("k"))
buf := b1.encode()
b2 := new(Batch)
err := b2.decode(buf)
err := b2.decode(0, buf)
if err != nil {
t.Error("error when decoding batch: ", err)
}

View File

@ -128,7 +128,8 @@ const (
type nodeState int
const (
nodeEffective nodeState = iota
nodeZero nodeState = iota
nodeEffective
nodeEvicted
nodeDeleted
)

View File

@ -7,10 +7,8 @@
package cache
import (
"fmt"
"math/rand"
"runtime"
"strings"
"sync"
"sync/atomic"
"testing"
@ -225,16 +223,16 @@ func TestLRUCache_Purge(t *testing.T) {
}
type testingCacheObjectCounter struct {
created uint32
released uint32
created uint
released uint
}
func (c *testingCacheObjectCounter) createOne() {
atomic.AddUint32(&c.created, 1)
c.created++
}
func (c *testingCacheObjectCounter) releaseOne() {
atomic.AddUint32(&c.released, 1)
c.released++
}
type testingCacheObject struct {
@ -243,17 +241,75 @@ type testingCacheObject struct {
ns, key uint64
releaseCalled uint32
releaseCalled bool
}
func (x *testingCacheObject) Release() {
if atomic.CompareAndSwapUint32(&x.releaseCalled, 0, 1) {
if !x.releaseCalled {
x.releaseCalled = true
x.cnt.releaseOne()
} else {
x.t.Errorf("duplicate setfin NS#%d KEY#%s", x.ns, x.key)
x.t.Errorf("duplicate setfin NS#%d KEY#%d", x.ns, x.key)
}
}
func TestLRUCache_ConcurrentSetGet(t *testing.T) {
runtime.GOMAXPROCS(runtime.NumCPU())
seed := time.Now().UnixNano()
t.Logf("seed=%d", seed)
const (
N = 2000000
M = 4000
C = 3
)
var set, get uint32
wg := &sync.WaitGroup{}
c := NewLRUCache(M / 4)
for ni := uint64(0); ni < C; ni++ {
r0 := rand.New(rand.NewSource(seed + int64(ni)))
r1 := rand.New(rand.NewSource(seed + int64(ni) + 1))
ns := c.GetNamespace(ni)
wg.Add(2)
go func(ns Namespace, r *rand.Rand) {
for i := 0; i < N; i++ {
x := uint64(r.Int63n(M))
o := ns.Get(x, func() (int, interface{}) {
atomic.AddUint32(&set, 1)
return 1, x
})
if v := o.Value().(uint64); v != x {
t.Errorf("#%d invalid value, got=%d", x, v)
}
o.Release()
}
wg.Done()
}(ns, r0)
go func(ns Namespace, r *rand.Rand) {
for i := 0; i < N; i++ {
x := uint64(r.Int63n(M))
o := ns.Get(x, nil)
if o != nil {
atomic.AddUint32(&get, 1)
if v := o.Value().(uint64); v != x {
t.Errorf("#%d invalid value, got=%d", x, v)
}
o.Release()
}
}
wg.Done()
}(ns, r1)
}
wg.Wait()
t.Logf("set=%d get=%d", set, get)
}
func TestLRUCache_Finalizer(t *testing.T) {
const (
capacity = 100
@ -262,10 +318,6 @@ func TestLRUCache_Finalizer(t *testing.T) {
keymax = 8000
)
runtime.GOMAXPROCS(runtime.NumCPU())
defer runtime.GOMAXPROCS(1)
wg := &sync.WaitGroup{}
cnt := &testingCacheObjectCounter{}
c := NewLRUCache(capacity)
@ -273,38 +325,40 @@ func TestLRUCache_Finalizer(t *testing.T) {
type instance struct {
seed int64
rnd *rand.Rand
ns uint64
effective int32
nsid uint64
ns Namespace
effective int
handles []Handle
handlesMap map[uint64]int
delete bool
purge bool
zap bool
wantDel int32
delfinCalledAll int32
delfinCalledEff int32
purgefinCalled int32
wantDel int
delfinCalled int
delfinCalledAll int
delfinCalledEff int
purgefinCalled int
}
instanceGet := func(p *instance, ns Namespace, key uint64) {
h := ns.Get(key, func() (charge int, value interface{}) {
instanceGet := func(p *instance, key uint64) {
h := p.ns.Get(key, func() (charge int, value interface{}) {
to := &testingCacheObject{
t: t, cnt: cnt,
ns: p.ns,
ns: p.nsid,
key: key,
}
atomic.AddInt32(&p.effective, 1)
p.effective++
cnt.createOne()
return 1, releaserFunc{func() {
to.Release()
atomic.AddInt32(&p.effective, -1)
p.effective--
}, to}
})
p.handles = append(p.handles, h)
p.handlesMap[key] = p.handlesMap[key] + 1
}
instanceRelease := func(p *instance, ns Namespace, i int) {
instanceRelease := func(p *instance, i int) {
h := p.handles[i]
key := h.Value().(releaserFunc).value.(*testingCacheObject).key
if n := p.handlesMap[key]; n == 0 {
@ -319,55 +373,71 @@ func TestLRUCache_Finalizer(t *testing.T) {
p.handles[len(p.handles) : len(p.handles)+1][0] = nil
}
seeds := make([]int64, goroutines)
instances := make([]instance, goroutines)
seed := time.Now().UnixNano()
t.Logf("seed=%d", seed)
instances := make([]*instance, goroutines)
for i := range instances {
p := &instances[i]
p := &instance{}
p.handlesMap = make(map[uint64]int)
if seeds[i] == 0 {
seeds[i] = time.Now().UnixNano()
}
p.seed = seeds[i]
p.seed = seed + int64(i)
p.rnd = rand.New(rand.NewSource(p.seed))
p.ns = uint64(i)
p.nsid = uint64(i)
p.ns = c.GetNamespace(p.nsid)
p.delete = i%6 == 0
p.purge = i%8 == 0
p.zap = i%12 == 0 || i%3 == 0
instances[i] = p
}
seedsStr := make([]string, len(seeds))
for i, seed := range seeds {
seedsStr[i] = fmt.Sprint(seed)
}
t.Logf("seeds := []int64{%s}", strings.Join(seedsStr, ", "))
// Get and release.
for i := range instances {
p := &instances[i]
wg.Add(1)
go func(p *instance) {
defer wg.Done()
ns := c.GetNamespace(p.ns)
for i := 0; i < iterations; i++ {
if len(p.handles) == 0 || p.rnd.Int()%2 == 0 {
instanceGet(p, ns, uint64(p.rnd.Intn(keymax)))
} else {
instanceRelease(p, ns, p.rnd.Intn(len(p.handles)))
runr := rand.New(rand.NewSource(seed - 1))
run := func(rnd *rand.Rand, x []*instance, init func(p *instance) bool, fn func(p *instance, i int) bool) {
var (
rx []*instance
rn []int
)
if init == nil {
rx = append([]*instance{}, x...)
rn = make([]int, len(x))
} else {
for _, p := range x {
if init(p) {
rx = append(rx, p)
rn = append(rn, 0)
}
}
}(p)
}
for len(rx) > 0 {
i := rand.Intn(len(rx))
if fn(rx[i], rn[i]) {
rn[i]++
} else {
rx = append(rx[:i], rx[i+1:]...)
rn = append(rn[:i], rn[i+1:]...)
}
}
}
wg.Wait()
// Get and release.
run(runr, instances, nil, func(p *instance, i int) bool {
if i < iterations {
if len(p.handles) == 0 || p.rnd.Int()%2 == 0 {
instanceGet(p, uint64(p.rnd.Intn(keymax)))
} else {
instanceRelease(p, p.rnd.Intn(len(p.handles)))
}
return true
} else {
return false
}
})
if used, cap := c.Used(), c.Capacity(); used > cap {
t.Errorf("Used > capacity, used=%d cap=%d", used, cap)
}
// Check effective objects.
for i := range instances {
p := &instances[i]
for i, p := range instances {
if int(p.effective) < len(p.handlesMap) {
t.Errorf("#%d effective objects < acquired handle, eo=%d ah=%d", i, p.effective, len(p.handlesMap))
}
@ -377,103 +447,93 @@ func TestLRUCache_Finalizer(t *testing.T) {
t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size())
}
// Delete and purge.
for i := range instances {
p := &instances[i]
// First delete.
run(runr, instances, func(p *instance) bool {
p.wantDel = p.effective
wg.Add(1)
go func(p *instance) {
defer wg.Done()
ns := c.GetNamespace(p.ns)
if p.delete {
for key := uint64(0); key < keymax; key++ {
_, wantExist := p.handlesMap[key]
gotExist := ns.Delete(key, func(exist, pending bool) {
atomic.AddInt32(&p.delfinCalledAll, 1)
if exist {
atomic.AddInt32(&p.delfinCalledEff, 1)
}
})
if !gotExist && wantExist {
t.Errorf("delete on NS#%d KEY#%d not found", p.ns, key)
}
}
var delfinCalled int
for key := uint64(0); key < keymax; key++ {
func(key uint64) {
gotExist := ns.Delete(key, func(exist, pending bool) {
if exist && !pending {
t.Errorf("delete fin on NS#%d KEY#%d exist and not pending for deletion", p.ns, key)
}
delfinCalled++
})
if gotExist {
t.Errorf("delete on NS#%d KEY#%d found", p.ns, key)
}
}(key)
}
if delfinCalled != keymax {
t.Errorf("(2) #%d not all delete fin called, diff=%d", p.ns, keymax-delfinCalled)
return p.delete
}, func(p *instance, i int) bool {
key := uint64(i)
if key < keymax {
_, wantExist := p.handlesMap[key]
gotExist := p.ns.Delete(key, func(exist, pending bool) {
p.delfinCalledAll++
if exist {
p.delfinCalledEff++
}
})
if !gotExist && wantExist {
t.Errorf("delete on NS#%d KEY#%d not found", p.nsid, key)
}
return true
} else {
return false
}
})
if p.purge {
ns.Purge(func(ns, key uint64) {
atomic.AddInt32(&p.purgefinCalled, 1)
})
// Second delete.
run(runr, instances, func(p *instance) bool {
p.delfinCalled = 0
return p.delete
}, func(p *instance, i int) bool {
key := uint64(i)
if key < keymax {
gotExist := p.ns.Delete(key, func(exist, pending bool) {
if exist && !pending {
t.Errorf("delete fin on NS#%d KEY#%d exist and not pending for deletion", p.nsid, key)
}
p.delfinCalled++
})
if gotExist {
t.Errorf("delete on NS#%d KEY#%d found", p.nsid, key)
}
}(p)
}
wg.Wait()
return true
} else {
if p.delfinCalled != keymax {
t.Errorf("(2) NS#%d not all delete fin called, diff=%d", p.nsid, keymax-p.delfinCalled)
}
return false
}
})
// Purge.
run(runr, instances, func(p *instance) bool {
return p.purge
}, func(p *instance, i int) bool {
p.ns.Purge(func(ns, key uint64) {
p.purgefinCalled++
})
return false
})
if want := int(cnt.created - cnt.released); c.Size() != want {
t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size())
}
// Release.
for i := range instances {
p := &instances[i]
if !p.zap {
wg.Add(1)
go func(p *instance) {
defer wg.Done()
ns := c.GetNamespace(p.ns)
for i := len(p.handles) - 1; i >= 0; i-- {
instanceRelease(p, ns, i)
}
}(p)
run(runr, instances, func(p *instance) bool {
return !p.zap
}, func(p *instance, i int) bool {
if len(p.handles) > 0 {
instanceRelease(p, len(p.handles)-1)
return true
} else {
return false
}
}
wg.Wait()
})
if want := int(cnt.created - cnt.released); c.Size() != want {
t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size())
}
// Zap.
for i := range instances {
p := &instances[i]
if p.zap {
wg.Add(1)
go func(p *instance) {
defer wg.Done()
ns := c.GetNamespace(p.ns)
ns.Zap()
p.handles = nil
p.handlesMap = nil
}(p)
}
}
wg.Wait()
run(runr, instances, func(p *instance) bool {
return p.zap
}, func(p *instance, i int) bool {
p.ns.Zap()
p.handles = nil
p.handlesMap = nil
return false
})
if want := int(cnt.created - cnt.released); c.Size() != want {
t.Errorf("Invalid cache size, want=%d got=%d", want, c.Size())
@ -485,23 +545,21 @@ func TestLRUCache_Finalizer(t *testing.T) {
c.Purge(nil)
for i := range instances {
p := &instances[i]
for _, p := range instances {
if p.delete {
if p.delfinCalledAll != keymax {
t.Errorf("#%d not all delete fin called, purge=%v zap=%v diff=%d", p.ns, p.purge, p.zap, keymax-p.delfinCalledAll)
t.Errorf("#%d not all delete fin called, purge=%v zap=%v diff=%d", p.nsid, p.purge, p.zap, keymax-p.delfinCalledAll)
}
if p.delfinCalledEff != p.wantDel {
t.Errorf("#%d not all effective delete fin called, diff=%d", p.ns, p.wantDel-p.delfinCalledEff)
t.Errorf("#%d not all effective delete fin called, diff=%d", p.nsid, p.wantDel-p.delfinCalledEff)
}
if p.purge && p.purgefinCalled > 0 {
t.Errorf("#%d some purge fin called, delete=%v zap=%v n=%d", p.ns, p.delete, p.zap, p.purgefinCalled)
t.Errorf("#%d some purge fin called, delete=%v zap=%v n=%d", p.nsid, p.delete, p.zap, p.purgefinCalled)
}
} else {
if p.purge {
if p.purgefinCalled != p.wantDel {
t.Errorf("#%d not all purge fin called, delete=%v zap=%v diff=%d", p.ns, p.delete, p.zap, p.wantDel-p.purgefinCalled)
t.Errorf("#%d not all purge fin called, delete=%v zap=%v diff=%d", p.nsid, p.delete, p.zap, p.wantDel-p.purgefinCalled)
}
}
}
@ -512,6 +570,56 @@ func TestLRUCache_Finalizer(t *testing.T) {
}
}
func BenchmarkLRUCache_Set(b *testing.B) {
c := NewLRUCache(0)
ns := c.GetNamespace(0)
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
set(ns, i, "", 1, nil)
}
}
func BenchmarkLRUCache_Get(b *testing.B) {
c := NewLRUCache(0)
ns := c.GetNamespace(0)
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
set(ns, i, "", 1, nil)
}
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
ns.Get(i, nil)
}
}
func BenchmarkLRUCache_Get2(b *testing.B) {
c := NewLRUCache(0)
ns := c.GetNamespace(0)
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
set(ns, i, "", 1, nil)
}
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
ns.Get(i, func() (charge int, value interface{}) {
return 0, nil
})
}
}
func BenchmarkLRUCache_Release(b *testing.B) {
c := NewLRUCache(0)
ns := c.GetNamespace(0)
handles := make([]Handle, b.N)
for i := uint64(0); i < uint64(b.N); i++ {
handles[i] = set(ns, i, "", 1, nil)
}
b.ResetTimer()
for _, h := range handles {
h.Release()
}
}
func BenchmarkLRUCache_SetRelease(b *testing.B) {
capacity := b.N / 100
if capacity <= 0 {
@ -521,7 +629,7 @@ func BenchmarkLRUCache_SetRelease(b *testing.B) {
ns := c.GetNamespace(0)
b.ResetTimer()
for i := uint64(0); i < uint64(b.N); i++ {
set(ns, i, nil, 1, nil).Release()
set(ns, i, "", 1, nil).Release()
}
}
@ -538,10 +646,10 @@ func BenchmarkLRUCache_SetReleaseTwice(b *testing.B) {
nb := b.N - na
for i := uint64(0); i < uint64(na); i++ {
set(ns, i, nil, 1, nil).Release()
set(ns, i, "", 1, nil).Release()
}
for i := uint64(0); i < uint64(nb); i++ {
set(ns, i, nil, 1, nil).Release()
set(ns, i, "", 1, nil).Release()
}
}

View File

@ -13,6 +13,13 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
// The LLRB implementation were taken from https://github.com/petar/GoLLRB.
// Which contains the following header:
//
// Copyright 2010 Petar Maymounkov. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// lruCache represent a LRU cache state.
type lruCache struct {
mu sync.Mutex
@ -74,11 +81,7 @@ func (c *lruCache) GetNamespace(id uint64) Namespace {
return ns
}
ns := &lruNs{
lru: c,
id: id,
table: make(map[uint64]*lruNode),
}
ns := &lruNs{lru: c, id: id}
c.table[id] = ns
return ns
}
@ -121,6 +124,9 @@ func (c *lruCache) Zap() {
func (c *lruCache) evict() {
top := &c.recent
for n := c.recent.rPrev; c.used > c.capacity && n != top; {
if n.state != nodeEffective {
panic("evicting non effective node")
}
n.state = nodeEvicted
n.rRemove()
n.derefNB()
@ -130,130 +136,271 @@ func (c *lruCache) evict() {
}
type lruNs struct {
lru *lruCache
id uint64
table map[uint64]*lruNode
state nsState
lru *lruCache
id uint64
rbRoot *lruNode
state nsState
}
func (ns *lruNs) rbGetOrCreateNode(h *lruNode, key uint64) (hn, n *lruNode) {
if h == nil {
n = &lruNode{ns: ns, key: key}
return n, n
}
if key < h.key {
hn, n = ns.rbGetOrCreateNode(h.rbLeft, key)
if hn != nil {
h.rbLeft = hn
} else {
return nil, n
}
} else if key > h.key {
hn, n = ns.rbGetOrCreateNode(h.rbRight, key)
if hn != nil {
h.rbRight = hn
} else {
return nil, n
}
} else {
return nil, h
}
if rbIsRed(h.rbRight) && !rbIsRed(h.rbLeft) {
h = rbRotLeft(h)
}
if rbIsRed(h.rbLeft) && rbIsRed(h.rbLeft.rbLeft) {
h = rbRotRight(h)
}
if rbIsRed(h.rbLeft) && rbIsRed(h.rbRight) {
rbFlip(h)
}
return h, n
}
func (ns *lruNs) getOrCreateNode(key uint64) *lruNode {
hn, n := ns.rbGetOrCreateNode(ns.rbRoot, key)
if hn != nil {
ns.rbRoot = hn
ns.rbRoot.rbBlack = true
}
return n
}
func (ns *lruNs) rbGetNode(key uint64) *lruNode {
h := ns.rbRoot
for h != nil {
switch {
case key < h.key:
h = h.rbLeft
case key > h.key:
h = h.rbRight
default:
return h
}
}
return nil
}
func (ns *lruNs) getNode(key uint64) *lruNode {
return ns.rbGetNode(key)
}
func (ns *lruNs) rbDeleteNode(h *lruNode, key uint64) *lruNode {
if h == nil {
return nil
}
if key < h.key {
if h.rbLeft == nil { // key not present. Nothing to delete
return h
}
if !rbIsRed(h.rbLeft) && !rbIsRed(h.rbLeft.rbLeft) {
h = rbMoveLeft(h)
}
h.rbLeft = ns.rbDeleteNode(h.rbLeft, key)
} else {
if rbIsRed(h.rbLeft) {
h = rbRotRight(h)
}
// If @key equals @h.key and no right children at @h
if h.key == key && h.rbRight == nil {
return nil
}
if h.rbRight != nil && !rbIsRed(h.rbRight) && !rbIsRed(h.rbRight.rbLeft) {
h = rbMoveRight(h)
}
// If @key equals @h.key, and (from above) 'h.Right != nil'
if h.key == key {
var x *lruNode
h.rbRight, x = rbDeleteMin(h.rbRight)
if x == nil {
panic("logic")
}
x.rbLeft, h.rbLeft = h.rbLeft, nil
x.rbRight, h.rbRight = h.rbRight, nil
x.rbBlack = h.rbBlack
h = x
} else { // Else, @key is bigger than @h.key
h.rbRight = ns.rbDeleteNode(h.rbRight, key)
}
}
return rbFixup(h)
}
func (ns *lruNs) deleteNode(key uint64) {
ns.rbRoot = ns.rbDeleteNode(ns.rbRoot, key)
if ns.rbRoot != nil {
ns.rbRoot.rbBlack = true
}
}
func (ns *lruNs) rbIterateNodes(h *lruNode, pivot uint64, iter func(n *lruNode) bool) bool {
if h == nil {
return true
}
if h.key >= pivot {
if !ns.rbIterateNodes(h.rbLeft, pivot, iter) {
return false
}
if !iter(h) {
return false
}
}
return ns.rbIterateNodes(h.rbRight, pivot, iter)
}
func (ns *lruNs) iterateNodes(iter func(n *lruNode) bool) {
ns.rbIterateNodes(ns.rbRoot, 0, iter)
}
func (ns *lruNs) Get(key uint64, setf SetFunc) Handle {
ns.lru.mu.Lock()
defer ns.lru.mu.Unlock()
if ns.state != nsEffective {
ns.lru.mu.Unlock()
return nil
}
node, ok := ns.table[key]
if ok {
switch node.state {
case nodeEvicted:
// Insert to recent list.
node.state = nodeEffective
node.ref++
ns.lru.used += node.charge
ns.lru.evict()
fallthrough
case nodeEffective:
// Bump to front.
node.rRemove()
node.rInsert(&ns.lru.recent)
}
node.ref++
} else {
if setf == nil {
ns.lru.mu.Unlock()
var n *lruNode
if setf == nil {
n = ns.getNode(key)
if n == nil {
return nil
}
} else {
n = ns.getOrCreateNode(key)
}
switch n.state {
case nodeZero:
charge, value := setf()
if value == nil {
ns.lru.mu.Unlock()
ns.deleteNode(key)
return nil
}
node = &lruNode{
ns: ns,
key: key,
value: value,
charge: charge,
ref: 1,
if charge < 0 {
charge = 0
}
ns.table[key] = node
n.value = value
n.charge = charge
n.state = nodeEvicted
ns.lru.size += charge
ns.lru.alive++
if charge > 0 {
node.ref++
node.rInsert(&ns.lru.recent)
ns.lru.used += charge
ns.lru.evict()
}
}
ns.lru.mu.Unlock()
return &lruHandle{node: node}
fallthrough
case nodeEvicted:
if n.charge == 0 {
break
}
// Insert to recent list.
n.state = nodeEffective
n.ref++
ns.lru.used += n.charge
ns.lru.evict()
fallthrough
case nodeEffective:
// Bump to front.
n.rRemove()
n.rInsert(&ns.lru.recent)
case nodeDeleted:
// Do nothing.
default:
panic("invalid state")
}
n.ref++
return &lruHandle{node: n}
}
func (ns *lruNs) Delete(key uint64, fin DelFin) bool {
ns.lru.mu.Lock()
defer ns.lru.mu.Unlock()
if ns.state != nsEffective {
if fin != nil {
fin(false, false)
}
ns.lru.mu.Unlock()
return false
}
node, exist := ns.table[key]
if !exist {
n := ns.getNode(key)
if n == nil {
if fin != nil {
fin(false, false)
}
ns.lru.mu.Unlock()
return false
}
switch node.state {
switch n.state {
case nodeEffective:
ns.lru.used -= n.charge
n.state = nodeDeleted
n.delfin = fin
n.rRemove()
n.derefNB()
case nodeEvicted:
n.state = nodeDeleted
n.delfin = fin
case nodeDeleted:
if fin != nil {
fin(true, true)
}
ns.lru.mu.Unlock()
return false
case nodeEffective:
ns.lru.used -= node.charge
node.state = nodeDeleted
node.delfin = fin
node.rRemove()
node.derefNB()
default:
node.state = nodeDeleted
node.delfin = fin
panic("invalid state")
}
ns.lru.mu.Unlock()
return true
}
func (ns *lruNs) purgeNB(fin PurgeFin) {
if ns.state != nsEffective {
return
}
for _, node := range ns.table {
switch node.state {
case nodeDeleted:
case nodeEffective:
ns.lru.used -= node.charge
node.state = nodeDeleted
node.purgefin = fin
node.rRemove()
node.derefNB()
default:
node.state = nodeDeleted
node.purgefin = fin
if ns.state == nsEffective {
var nodes []*lruNode
ns.iterateNodes(func(n *lruNode) bool {
nodes = append(nodes, n)
return true
})
for _, n := range nodes {
switch n.state {
case nodeEffective:
ns.lru.used -= n.charge
n.state = nodeDeleted
n.purgefin = fin
n.rRemove()
n.derefNB()
case nodeEvicted:
n.state = nodeDeleted
n.purgefin = fin
case nodeDeleted:
default:
panic("invalid state")
}
}
}
}
@ -265,22 +412,22 @@ func (ns *lruNs) Purge(fin PurgeFin) {
}
func (ns *lruNs) zapNB() {
if ns.state != nsEffective {
return
}
if ns.state == nsEffective {
ns.state = nsZapped
ns.state = nsZapped
ns.iterateNodes(func(n *lruNode) bool {
if n.state == nodeEffective {
ns.lru.used -= n.charge
n.rRemove()
}
ns.lru.size -= n.charge
n.state = nodeDeleted
n.fin()
for _, node := range ns.table {
if node.state == nodeEffective {
ns.lru.used -= node.charge
node.rRemove()
}
ns.lru.size -= node.charge
node.state = nodeDeleted
node.fin()
return true
})
ns.rbRoot = nil
}
ns.table = nil
}
func (ns *lruNs) Zap() {
@ -293,7 +440,9 @@ func (ns *lruNs) Zap() {
type lruNode struct {
ns *lruNs
rNext, rPrev *lruNode
rNext, rPrev *lruNode
rbLeft, rbRight *lruNode
rbBlack bool
key uint64
value interface{}
@ -330,8 +479,10 @@ func (n *lruNode) fin() {
r.Release()
}
if n.purgefin != nil {
if n.delfin != nil {
panic("conflicting delete and purge fin")
}
n.purgefin(n.ns.id, n.key)
n.delfin = nil
n.purgefin = nil
} else if n.delfin != nil {
n.delfin(true, false)
@ -344,7 +495,7 @@ func (n *lruNode) derefNB() {
if n.ref == 0 {
if n.ns.state == nsEffective {
// Remove elemement.
delete(n.ns.table, n.key)
n.ns.deleteNode(n.key)
n.ns.lru.size -= n.charge
n.ns.lru.alive--
n.fin()
@ -380,3 +531,92 @@ func (h *lruHandle) Release() {
h.node.deref()
h.node = nil
}
func rbIsRed(h *lruNode) bool {
if h == nil {
return false
}
return !h.rbBlack
}
func rbRotLeft(h *lruNode) *lruNode {
x := h.rbRight
if x.rbBlack {
panic("rotating a black link")
}
h.rbRight = x.rbLeft
x.rbLeft = h
x.rbBlack = h.rbBlack
h.rbBlack = false
return x
}
func rbRotRight(h *lruNode) *lruNode {
x := h.rbLeft
if x.rbBlack {
panic("rotating a black link")
}
h.rbLeft = x.rbRight
x.rbRight = h
x.rbBlack = h.rbBlack
h.rbBlack = false
return x
}
func rbFlip(h *lruNode) {
h.rbBlack = !h.rbBlack
h.rbLeft.rbBlack = !h.rbLeft.rbBlack
h.rbRight.rbBlack = !h.rbRight.rbBlack
}
func rbMoveLeft(h *lruNode) *lruNode {
rbFlip(h)
if rbIsRed(h.rbRight.rbLeft) {
h.rbRight = rbRotRight(h.rbRight)
h = rbRotLeft(h)
rbFlip(h)
}
return h
}
func rbMoveRight(h *lruNode) *lruNode {
rbFlip(h)
if rbIsRed(h.rbLeft.rbLeft) {
h = rbRotRight(h)
rbFlip(h)
}
return h
}
func rbFixup(h *lruNode) *lruNode {
if rbIsRed(h.rbRight) {
h = rbRotLeft(h)
}
if rbIsRed(h.rbLeft) && rbIsRed(h.rbLeft.rbLeft) {
h = rbRotRight(h)
}
if rbIsRed(h.rbLeft) && rbIsRed(h.rbRight) {
rbFlip(h)
}
return h
}
func rbDeleteMin(h *lruNode) (hn, n *lruNode) {
if h == nil {
return nil, nil
}
if h.rbLeft == nil {
return nil, h
}
if !rbIsRed(h.rbLeft) && !rbIsRed(h.rbLeft.rbLeft) {
h = rbMoveLeft(h)
}
h.rbLeft, n = rbDeleteMin(h.rbLeft)
return rbFixup(h), n
}

View File

@ -1,40 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
const (
kNumLevels = 7
// Level-0 compaction is started when we hit this many files.
kL0_CompactionTrigger float64 = 4
// Soft limit on number of level-0 files. We slow down writes at this point.
kL0_SlowdownWritesTrigger = 8
// Maximum number of level-0 files. We stop writes at this point.
kL0_StopWritesTrigger = 12
// Maximum level to which a new compacted memdb is pushed if it
// does not create overlap. We try to push to level 2 to avoid the
// relatively expensive level 0=>1 compactions and to avoid some
// expensive manifest file operations. We do not push all the way to
// the largest level since that can generate a lot of wasted disk
// space if the same key space is being repeatedly overwritten.
kMaxMemCompactLevel = 2
// Maximum size of a table.
kMaxTableSize = 2 * 1048576
// Maximum bytes of overlaps in grandparent (i.e., level+2) before we
// stop building a single file in a level->level+1 compaction.
kMaxGrandParentOverlapBytes = 10 * kMaxTableSize
// Maximum number of bytes in all compacted files. We avoid expanding
// the lower level file set of a compaction if it would make the
// total compaction cover more than this many bytes.
kExpCompactionMaxBytes = 25 * kMaxTableSize
)

View File

@ -14,6 +14,7 @@ import (
"testing"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage"
)
@ -96,21 +97,22 @@ func (h *dbCorruptHarness) deleteRand(n, max int, rnd *rand.Rand) {
}
}
func (h *dbCorruptHarness) corrupt(ft storage.FileType, offset, n int) {
func (h *dbCorruptHarness) corrupt(ft storage.FileType, fi, offset, n int) {
p := &h.dbHarness
t := p.t
var file storage.File
ff, _ := p.stor.GetFiles(ft)
for _, f := range ff {
if file == nil || f.Num() > file.Num() {
file = f
}
sff := files(ff)
sff.sort()
if fi < 0 {
fi = len(sff) - 1
}
if file == nil {
t.Fatalf("no such file with type %q", ft)
if fi >= len(sff) {
t.Fatalf("no such file with type %q with index %d", ft, fi)
}
file := sff[fi]
r, err := file.Open()
if err != nil {
t.Fatal("cannot open file: ", err)
@ -225,8 +227,8 @@ func TestCorruptDB_Journal(t *testing.T) {
h.build(100)
h.check(100, 100)
h.closeDB()
h.corrupt(storage.TypeJournal, 19, 1)
h.corrupt(storage.TypeJournal, 32*1024+1000, 1)
h.corrupt(storage.TypeJournal, -1, 19, 1)
h.corrupt(storage.TypeJournal, -1, 32*1024+1000, 1)
h.openDB()
h.check(36, 36)
@ -242,7 +244,7 @@ func TestCorruptDB_Table(t *testing.T) {
h.compactRangeAt(0, "", "")
h.compactRangeAt(1, "", "")
h.closeDB()
h.corrupt(storage.TypeTable, 100, 1)
h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.check(99, 99)
@ -256,7 +258,7 @@ func TestCorruptDB_TableIndex(t *testing.T) {
h.build(10000)
h.compactMem()
h.closeDB()
h.corrupt(storage.TypeTable, -2000, 500)
h.corrupt(storage.TypeTable, -1, -2000, 500)
h.openDB()
h.check(5000, 9999)
@ -355,7 +357,7 @@ func TestCorruptDB_CorruptedManifest(t *testing.T) {
h.compactMem()
h.compactRange("", "")
h.closeDB()
h.corrupt(storage.TypeManifest, 0, 1000)
h.corrupt(storage.TypeManifest, -1, 0, 1000)
h.openAssert(false)
h.recover()
@ -370,7 +372,7 @@ func TestCorruptDB_CompactionInputError(t *testing.T) {
h.build(10)
h.compactMem()
h.closeDB()
h.corrupt(storage.TypeTable, 100, 1)
h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.check(9, 9)
@ -387,7 +389,7 @@ func TestCorruptDB_UnrelatedKeys(t *testing.T) {
h.build(10)
h.compactMem()
h.closeDB()
h.corrupt(storage.TypeTable, 100, 1)
h.corrupt(storage.TypeTable, -1, 100, 1)
h.openDB()
h.put(string(tkey(1000)), string(tval(1000, ctValSize)))
@ -470,3 +472,31 @@ func TestCorruptDB_MissingTableFiles(t *testing.T) {
h.close()
}
func TestCorruptDB_RecoverTable(t *testing.T) {
h := newDbCorruptHarnessWopt(t, &opt.Options{
WriteBuffer: 112 * opt.KiB,
CompactionTableSize: 90 * opt.KiB,
Filter: filter.NewBloomFilter(10),
})
h.build(1000)
h.compactMem()
h.compactRangeAt(0, "", "")
h.compactRangeAt(1, "", "")
seq := h.db.seq
h.closeDB()
h.corrupt(storage.TypeTable, 0, 1000, 1)
h.corrupt(storage.TypeTable, 3, 10000, 1)
// Corrupted filter shouldn't affect recovery.
h.corrupt(storage.TypeTable, 3, 113888, 10)
h.corrupt(storage.TypeTable, -1, 20000, 1)
h.recover()
if h.db.seq != seq {
t.Errorf("invalid seq, want=%d got=%d", seq, h.db.seq)
}
h.check(985, 985)
h.close()
}

View File

@ -7,7 +7,7 @@
package leveldb
import (
"errors"
"container/list"
"fmt"
"io"
"os"
@ -17,6 +17,7 @@ import (
"sync/atomic"
"time"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb"
@ -46,7 +47,7 @@ type DB struct {
// Snapshot.
snapsMu sync.Mutex
snapsRoot snapshotElement
snapsList *list.List
// Stats.
aliveSnaps, aliveIters int32
@ -56,18 +57,19 @@ type DB struct {
writeMergedC chan bool
writeLockC chan struct{}
writeAckC chan error
writeDelay time.Duration
writeDelayN int
journalC chan *Batch
journalAckC chan error
// Compaction.
tcompCmdC chan cCmd
tcompPauseC chan chan<- struct{}
tcompTriggerC chan struct{}
mcompCmdC chan cCmd
mcompTriggerC chan struct{}
compErrC chan error
compErrSetC chan error
compStats [kNumLevels]cStats
tcompCmdC chan cCmd
tcompPauseC chan chan<- struct{}
mcompCmdC chan cCmd
compErrC chan error
compPerErrC chan error
compErrSetC chan error
compStats []cStats
// Close.
closeW sync.WaitGroup
@ -82,9 +84,11 @@ func openDB(s *session) (*DB, error) {
db := &DB{
s: s,
// Initial sequence
seq: s.stSeq,
seq: s.stSeqNum,
// MemDB
memPool: make(chan *memdb.DB, 1),
// Snapshot
snapsList: list.New(),
// Write
writeC: make(chan *Batch),
writeMergedC: make(chan bool),
@ -93,17 +97,16 @@ func openDB(s *session) (*DB, error) {
journalC: make(chan *Batch),
journalAckC: make(chan error),
// Compaction
tcompCmdC: make(chan cCmd),
tcompPauseC: make(chan chan<- struct{}),
tcompTriggerC: make(chan struct{}, 1),
mcompCmdC: make(chan cCmd),
mcompTriggerC: make(chan struct{}, 1),
compErrC: make(chan error),
compErrSetC: make(chan error),
tcompCmdC: make(chan cCmd),
tcompPauseC: make(chan chan<- struct{}),
mcompCmdC: make(chan cCmd),
compErrC: make(chan error),
compPerErrC: make(chan error),
compErrSetC: make(chan error),
compStats: make([]cStats, s.o.GetNumLevel()),
// Close
closeC: make(chan struct{}),
}
db.initSnapshot()
if err := db.recoverJournal(); err != nil {
return nil, err
@ -119,14 +122,14 @@ func openDB(s *session) (*DB, error) {
return nil, err
}
// Don't include compaction error goroutine into wait group.
// Doesn't need to be included in the wait group.
go db.compactionError()
go db.mpoolDrain()
db.closeW.Add(3)
go db.tCompaction()
go db.mCompaction()
go db.jWriter()
go db.mpoolDrain()
s.logf("db@open done T·%v", time.Since(start))
@ -253,6 +256,10 @@ func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
}
func recoverTable(s *session, o *opt.Options) error {
o = dupOptions(o)
// Mask StrictReader, lets StrictRecovery doing its job.
o.Strict &= ^opt.StrictReader
// Get all tables and sort it by file number.
tableFiles_, err := s.getFiles(storage.TypeTable)
if err != nil {
@ -261,10 +268,16 @@ func recoverTable(s *session, o *opt.Options) error {
tableFiles := files(tableFiles_)
tableFiles.sort()
var mSeq uint64
var good, corrupted int
rec := new(sessionRecord)
bpool := util.NewBufferPool(o.GetBlockSize() + 5)
var (
maxSeq uint64
recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
// We will drop corrupted table.
strict = o.GetStrict(opt.StrictRecovery)
rec = &sessionRecord{numLevel: o.GetNumLevel()}
bpool = util.NewBufferPool(o.GetBlockSize() + 5)
)
buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
tmp = s.newTemp()
writer, err := tmp.Create()
@ -311,7 +324,12 @@ func recoverTable(s *session, o *opt.Options) error {
if err != nil {
return err
}
defer reader.Close()
var closed bool
defer func() {
if !closed {
reader.Close()
}
}()
// Get file size.
size, err := reader.Seek(0, 2)
@ -319,25 +337,32 @@ func recoverTable(s *session, o *opt.Options) error {
return err
}
var tSeq uint64
var tgood, tcorrupted, blockerr int
var imin, imax []byte
tr := table.NewReader(reader, size, nil, bpool, o)
var (
tSeq uint64
tgoodKey, tcorruptedKey, tcorruptedBlock int
imin, imax []byte
)
tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o)
if err != nil {
return err
}
iter := tr.NewIterator(nil, nil)
iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) {
s.logf("table@recovery found error @%d %q", file.Num(), err)
blockerr++
if errors.IsCorrupted(err) {
s.logf("table@recovery block corruption @%d %q", file.Num(), err)
tcorruptedBlock++
}
})
// Scan the table.
for iter.Next() {
key := iter.Key()
_, seq, _, ok := parseIkey(key)
if !ok {
tcorrupted++
_, seq, _, kerr := parseIkey(key)
if kerr != nil {
tcorruptedKey++
continue
}
tgood++
tgoodKey++
if seq > tSeq {
tSeq = seq
}
@ -352,8 +377,18 @@ func recoverTable(s *session, o *opt.Options) error {
}
iter.Release()
if tgood > 0 {
if tcorrupted > 0 || blockerr > 0 {
goodKey += tgoodKey
corruptedKey += tcorruptedKey
corruptedBlock += tcorruptedBlock
if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
droppedTable++
s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
return nil
}
if tgoodKey > 0 {
if tcorruptedKey > 0 || tcorruptedBlock > 0 {
// Rebuild the table.
s.logf("table@recovery rebuilding @%d", file.Num())
iter := tr.NewIterator(nil, nil)
@ -362,25 +397,25 @@ func recoverTable(s *session, o *opt.Options) error {
if err != nil {
return err
}
closed = true
reader.Close()
if err := file.Replace(tmp); err != nil {
return err
}
size = newSize
}
if tSeq > mSeq {
mSeq = tSeq
if tSeq > maxSeq {
maxSeq = tSeq
}
recoveredKey += tgoodKey
// Add table to level 0.
rec.addTable(0, file.Num(), uint64(size), imin, imax)
s.logf("table@recovery recovered @%d N·%d C·%d B·%d S·%d Q·%d", file.Num(), tgood, tcorrupted, blockerr, size, tSeq)
s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
} else {
s.logf("table@recovery unrecoverable @%d C·%d B·%d S·%d", file.Num(), tcorrupted, blockerr, size)
droppedTable++
s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size)
}
good += tgood
corrupted += tcorrupted
return nil
}
@ -397,11 +432,11 @@ func recoverTable(s *session, o *opt.Options) error {
}
}
s.logf("table@recovery recovered F·%d N·%d C·%d Q·%d", len(tableFiles), good, corrupted, mSeq)
s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq)
}
// Set sequence number.
rec.setSeq(mSeq + 1)
rec.setSeqNum(maxSeq)
// Create new manifest.
if err := s.create(); err != nil {
@ -484,26 +519,30 @@ func (db *DB) recoverJournal() error {
if err == io.EOF {
break
}
return err
return errors.SetFile(err, file)
}
buf.Reset()
if _, err := buf.ReadFrom(r); err != nil {
if err == io.ErrUnexpectedEOF {
// This is error returned due to corruption, with strict == false.
continue
} else {
return err
return errors.SetFile(err, file)
}
}
if err := batch.decode(buf.Bytes()); err != nil {
return err
}
if err := batch.memReplay(mem); err != nil {
return err
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil {
if strict || !errors.IsCorrupted(err) {
return errors.SetFile(err, file)
} else {
db.s.logf("journal error: %v (skipped)", err)
// We won't apply sequence number as it might be corrupted.
continue
}
}
// Save sequence number.
db.seq = batch.seq + uint64(batch.len())
db.seq = batch.seq + uint64(batch.Len())
// Flush it if large enough.
if mem.Size() >= writeBuffer {
@ -564,7 +603,7 @@ func (db *DB) recoverJournal() error {
}
func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
ikey := newIKey(key, seq, tSeek)
ikey := newIkey(key, seq, ktSeek)
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
@ -575,9 +614,13 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er
mk, mv, me := m.mdb.Find(ikey)
if me == nil {
ukey, _, t, ok := parseIkey(mk)
if ok && db.s.icmp.uCompare(ukey, key) == 0 {
if t == tDel {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
if db.s.icmp.uCompare(ukey, key) == 0 {
if kt == ktDel {
return nil, ErrNotFound
}
return append([]byte{}, mv...), nil
@ -588,17 +631,60 @@ func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, er
}
v := db.s.version()
value, cSched, err := v.get(ikey, ro)
value, cSched, err := v.get(ikey, ro, false)
v.release()
if cSched {
// Trigger table compaction.
db.compTrigger(db.tcompTriggerC)
db.compSendTrigger(db.tcompCmdC)
}
return
}
func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
ikey := newIkey(key, seq, ktSeek)
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
if m == nil {
continue
}
defer m.decref()
mk, _, me := m.mdb.Find(ikey)
if me == nil {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
if db.s.icmp.uCompare(ukey, key) == 0 {
if kt == ktDel {
return false, nil
}
return true, nil
}
} else if me != ErrNotFound {
return false, me
}
}
v := db.s.version()
_, cSched, err := v.get(ikey, ro, true)
v.release()
if cSched {
// Trigger table compaction.
db.compSendTrigger(db.tcompCmdC)
}
if err == nil {
ret = true
} else if err == ErrNotFound {
err = nil
}
return
}
// Get gets the value for the given key. It returns ErrNotFound if the
// DB does not contain the key.
// DB does not contains the key.
//
// The returned slice is its own copy, it is safe to modify the contents
// of the returned slice.
@ -609,7 +695,23 @@ func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
return
}
return db.get(key, db.getSeq(), ro)
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.get(key, se.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Get returns.
func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
err = db.ok()
if err != nil {
return
}
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.has(key, se.seq, ro)
}
// NewIterator returns an iterator for the latest snapshot of the
@ -633,9 +735,11 @@ func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Itera
return iterator.NewEmptyIterator(err)
}
snap := db.newSnapshot()
defer snap.Release()
return snap.NewIterator(slice, ro)
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
// Iterator holds 'version' lock, 'version' is immutable so snapshot
// can be released after iterator created.
return db.newIterator(se.seq, slice, ro)
}
// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
@ -655,7 +759,7 @@ func (db *DB) GetSnapshot() (*Snapshot, error) {
//
// Property names:
// leveldb.num-files-at-level{n}
// Returns the number of filer at level 'n'.
// Returns the number of files at level 'n'.
// leveldb.stats
// Returns statistics of the underlying DB.
// leveldb.sstables
@ -685,12 +789,13 @@ func (db *DB) GetProperty(name string) (value string, err error) {
v := db.s.version()
defer v.release()
numFilesPrefix := "num-files-at-level"
switch {
case strings.HasPrefix(p, "num-files-at-level"):
case strings.HasPrefix(p, numFilesPrefix):
var level uint
var rest string
n, _ := fmt.Scanf("%d%s", &level, &rest)
if n != 1 || level >= kNumLevels {
n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
if n != 1 || int(level) >= db.s.o.GetNumLevel() {
err = errors.New("leveldb: GetProperty: invalid property: " + name)
} else {
value = fmt.Sprint(v.tLen(int(level)))
@ -752,8 +857,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
sizes := make(Sizes, 0, len(ranges))
for _, r := range ranges {
imin := newIKey(r.Start, kMaxSeq, tSeek)
imax := newIKey(r.Limit, kMaxSeq, tSeek)
imin := newIkey(r.Start, kMaxSeq, ktSeek)
imax := newIkey(r.Limit, kMaxSeq, ktSeek)
start, err := v.offsetOf(imin)
if err != nil {
return nil, err
@ -796,18 +901,23 @@ func (db *DB) Close() error {
default:
}
// Signal all goroutines.
close(db.closeC)
// Wait for the close WaitGroup.
// Wait for all gorotines to exit.
db.closeW.Wait()
// Close journal.
// Lock writer and closes journal.
db.writeLockC <- struct{}{}
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
}
if db.writeDelayN > 0 {
db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
}
// Close session.
db.s.close()
db.logf("db@close done T·%v", time.Since(start))
@ -827,7 +937,6 @@ func (db *DB) Close() error {
db.journalWriter = nil
db.journalFile = nil
db.frozenJournalFile = nil
db.snapsRoot = snapshotElement{}
db.closer = nil
return err

View File

@ -7,11 +7,12 @@
package leveldb
import (
"errors"
"sync"
"time"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
)
var (
@ -68,7 +69,7 @@ type cMem struct {
}
func newCMem(s *session) *cMem {
return &cMem{s: s, rec: new(sessionRecord)}
return &cMem{s: s, rec: &sessionRecord{numLevel: s.o.GetNumLevel()}}
}
func (c *cMem) flush(mem *memdb.DB, level int) error {
@ -84,7 +85,9 @@ func (c *cMem) flush(mem *memdb.DB, level int) error {
// Pick level.
if level < 0 {
level = s.version_NB().pickLevel(t.imin.ukey(), t.imax.ukey())
v := s.version()
level = v.pickLevel(t.imin.ukey(), t.imax.ukey())
v.release()
}
c.rec.addTableFile(level, t)
@ -95,24 +98,32 @@ func (c *cMem) flush(mem *memdb.DB, level int) error {
}
func (c *cMem) reset() {
c.rec = new(sessionRecord)
c.rec = &sessionRecord{numLevel: c.s.o.GetNumLevel()}
}
func (c *cMem) commit(journal, seq uint64) error {
c.rec.setJournalNum(journal)
c.rec.setSeq(seq)
c.rec.setSeqNum(seq)
// Commit changes.
return c.s.commit(c.rec)
}
func (db *DB) compactionError() {
var err error
var (
err error
wlocked bool
)
noerr:
// No error.
for {
select {
case err = <-db.compErrSetC:
if err != nil {
switch {
case err == nil:
case errors.IsCorrupted(err):
goto hasperr
default:
goto haserr
}
case _, _ = <-db.closeC:
@ -120,17 +131,39 @@ noerr:
}
}
haserr:
// Transient error.
for {
select {
case db.compErrC <- err:
case err = <-db.compErrSetC:
if err == nil {
switch {
case err == nil:
goto noerr
case errors.IsCorrupted(err):
goto hasperr
default:
}
case _, _ = <-db.closeC:
return
}
}
hasperr:
// Persistent error.
for {
select {
case db.compErrC <- err:
case db.compPerErrC <- err:
case db.writeLockC <- struct{}{}:
// Hold write lock, so that write won't pass-through.
wlocked = true
case _, _ = <-db.closeC:
if wlocked {
// We should release the lock or Close will hang.
<-db.writeLockC
}
return
}
}
}
type compactionTransactCounter int
@ -139,12 +172,17 @@ func (cnt *compactionTransactCounter) incr() {
*cnt++
}
func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactCounter) error, rollback func() error) {
type compactionTransactInterface interface {
run(cnt *compactionTransactCounter) error
revert() error
}
func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
defer func() {
if x := recover(); x != nil {
if x == errCompactionTransactExiting && rollback != nil {
if err := rollback(); err != nil {
db.logf("%s rollback error %q", name, err)
if x == errCompactionTransactExiting {
if err := t.revert(); err != nil {
db.logf("%s revert error %q", name, err)
}
}
panic(x)
@ -156,9 +194,13 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC
backoffMax = 8 * time.Second
backoffMul = 2 * time.Second
)
backoff := backoffMin
backoffT := time.NewTimer(backoff)
lastCnt := compactionTransactCounter(0)
var (
backoff = backoffMin
backoffT = time.NewTimer(backoff)
lastCnt = compactionTransactCounter(0)
disableBackoff = db.s.o.GetDisableCompactionBackoff()
)
for n := 0; ; n++ {
// Check wether the DB is closed.
if db.isClosed() {
@ -170,11 +212,19 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC
// Execute.
cnt := compactionTransactCounter(0)
err := exec(&cnt)
err := t.run(&cnt)
if err != nil {
db.logf("%s error I·%d %q", name, cnt, err)
}
// Set compaction error status.
select {
case db.compErrSetC <- err:
case perr := <-db.compPerErrC:
if err != nil {
db.logf("%s exiting (persistent error %q)", name, perr)
db.compactionExitTransact()
}
case _, _ = <-db.closeC:
db.logf("%s exiting", name)
db.compactionExitTransact()
@ -182,31 +232,56 @@ func (db *DB) compactionTransact(name string, exec func(cnt *compactionTransactC
if err == nil {
return
}
db.logf("%s error I·%d %q", name, cnt, err)
// Reset backoff duration if counter is advancing.
if cnt > lastCnt {
backoff = backoffMin
lastCnt = cnt
}
// Backoff.
backoffT.Reset(backoff)
if backoff < backoffMax {
backoff *= backoffMul
if backoff > backoffMax {
backoff = backoffMax
}
}
select {
case <-backoffT.C:
case _, _ = <-db.closeC:
db.logf("%s exiting", name)
if errors.IsCorrupted(err) {
db.logf("%s exiting (corruption detected)", name)
db.compactionExitTransact()
}
if !disableBackoff {
// Reset backoff duration if counter is advancing.
if cnt > lastCnt {
backoff = backoffMin
lastCnt = cnt
}
// Backoff.
backoffT.Reset(backoff)
if backoff < backoffMax {
backoff *= backoffMul
if backoff > backoffMax {
backoff = backoffMax
}
}
select {
case <-backoffT.C:
case _, _ = <-db.closeC:
db.logf("%s exiting", name)
db.compactionExitTransact()
}
}
}
}
type compactionTransactFunc struct {
runFunc func(cnt *compactionTransactCounter) error
revertFunc func() error
}
func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
return t.runFunc(cnt)
}
func (t *compactionTransactFunc) revert() error {
if t.revertFunc != nil {
return t.revertFunc()
}
return nil
}
func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
db.compactionTransact(name, &compactionTransactFunc{run, revert})
}
func (db *DB) compactionExitTransact() {
panic(errCompactionTransactExiting)
}
@ -232,20 +307,23 @@ func (db *DB) memCompaction() {
}
// Pause table compaction.
ch := make(chan struct{})
resumeC := make(chan struct{})
select {
case db.tcompPauseC <- (chan<- struct{})(ch):
case db.tcompPauseC <- (chan<- struct{})(resumeC):
case <-db.compPerErrC:
close(resumeC)
resumeC = nil
case _, _ = <-db.closeC:
return
}
db.compactionTransact("mem@flush", func(cnt *compactionTransactCounter) (err error) {
db.compactionTransactFunc("mem@flush", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
return c.flush(mem.mdb, -1)
}, func() error {
for _, r := range c.rec.addedTables {
db.logf("mem@flush rollback @%d", r.num)
db.logf("mem@flush revert @%d", r.num)
f := db.s.getTableFile(r.num)
if err := f.Remove(); err != nil {
return err
@ -254,13 +332,13 @@ func (db *DB) memCompaction() {
return nil
})
db.compactionTransact("mem@commit", func(cnt *compactionTransactCounter) (err error) {
db.compactionTransactFunc("mem@commit", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
defer stats.stopTimer()
return c.commit(db.journalFile.Num(), db.frozenSeq)
}, nil)
db.logf("mem@flush commited F·%d T·%v", len(c.rec.addedTables), stats.duration)
db.logf("mem@flush committed F·%d T·%v", len(c.rec.addedTables), stats.duration)
for _, r := range c.rec.addedTables {
stats.write += r.size
@ -271,26 +349,223 @@ func (db *DB) memCompaction() {
db.dropFrozenMem()
// Resume table compaction.
select {
case <-ch:
case _, _ = <-db.closeC:
return
if resumeC != nil {
select {
case <-resumeC:
close(resumeC)
case _, _ = <-db.closeC:
return
}
}
// Trigger table compaction.
db.compTrigger(db.mcompTriggerC)
db.compSendTrigger(db.tcompCmdC)
}
type tableCompactionBuilder struct {
db *DB
s *session
c *compaction
rec *sessionRecord
stat0, stat1 *cStatsStaging
snapHasLastUkey bool
snapLastUkey []byte
snapLastSeq uint64
snapIter int
snapKerrCnt int
snapDropCnt int
kerrCnt int
dropCnt int
minSeq uint64
strict bool
tableSize int
tw *tWriter
}
func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
// Create new table if not already.
if b.tw == nil {
// Check for pause event.
if b.db != nil {
select {
case ch := <-b.db.tcompPauseC:
b.db.pauseCompaction(ch)
case _, _ = <-b.db.closeC:
b.db.compactionExitTransact()
default:
}
}
// Create new table.
var err error
b.tw, err = b.s.tops.create()
if err != nil {
return err
}
}
// Write key/value into table.
return b.tw.append(key, value)
}
func (b *tableCompactionBuilder) needFlush() bool {
return b.tw.tw.BytesLen() >= b.tableSize
}
func (b *tableCompactionBuilder) flush() error {
t, err := b.tw.finish()
if err != nil {
return err
}
b.rec.addTableFile(b.c.level+1, t)
b.stat1.write += t.size
b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.level+1, t.file.Num(), b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
b.tw = nil
return nil
}
func (b *tableCompactionBuilder) cleanup() {
if b.tw != nil {
b.tw.drop()
b.tw = nil
}
}
func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
snapResumed := b.snapIter > 0
hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
lastUkey := append([]byte{}, b.snapLastUkey...)
lastSeq := b.snapLastSeq
b.kerrCnt = b.snapKerrCnt
b.dropCnt = b.snapDropCnt
// Restore compaction state.
b.c.restore()
defer b.cleanup()
b.stat1.startTimer()
defer b.stat1.stopTimer()
iter := b.c.newIterator()
defer iter.Release()
for i := 0; iter.Next(); i++ {
// Incr transact counter.
cnt.incr()
// Skip until last state.
if i < b.snapIter {
continue
}
resumed := false
if snapResumed {
resumed = true
snapResumed = false
}
ikey := iter.Key()
ukey, seq, kt, kerr := parseIkey(ikey)
if kerr == nil {
shouldStop := !resumed && b.c.shouldStopBefore(ikey)
if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
// First occurrence of this user key.
// Only rotate tables if ukey doesn't hop across.
if b.tw != nil && (shouldStop || b.needFlush()) {
if err := b.flush(); err != nil {
return err
}
// Creates snapshot of the state.
b.c.save()
b.snapHasLastUkey = hasLastUkey
b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
b.snapLastSeq = lastSeq
b.snapIter = i
b.snapKerrCnt = b.kerrCnt
b.snapDropCnt = b.dropCnt
}
hasLastUkey = true
lastUkey = append(lastUkey[:0], ukey...)
lastSeq = kMaxSeq
}
switch {
case lastSeq <= b.minSeq:
// Dropped because newer entry for same user key exist
fallthrough // (A)
case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger seq numbers
// (3) data in layers that are being compacted here and have
// smaller seq numbers will be dropped in the next
// few iterations of this loop (by rule (A) above).
// Therefore this deletion marker is obsolete and can be dropped.
lastSeq = seq
b.dropCnt++
continue
default:
lastSeq = seq
}
} else {
if b.strict {
return kerr
}
// Don't drop corrupted keys.
hasLastUkey = false
lastUkey = lastUkey[:0]
lastSeq = kMaxSeq
b.kerrCnt++
}
if err := b.appendKV(ikey, iter.Value()); err != nil {
return err
}
}
if err := iter.Error(); err != nil {
return err
}
// Finish last table.
if b.tw != nil && !b.tw.empty() {
return b.flush()
}
return nil
}
func (b *tableCompactionBuilder) revert() error {
for _, at := range b.rec.addedTables {
b.s.logf("table@build revert @%d", at.num)
f := b.s.getTableFile(at.num)
if err := f.Remove(); err != nil {
return err
}
}
return nil
}
func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
rec := new(sessionRecord)
rec.addCompactionPointer(c.level, c.imax)
defer c.release()
rec := &sessionRecord{numLevel: db.s.o.GetNumLevel()}
rec.addCompPtr(c.level, c.imax)
if !noTrivial && c.trivial() {
t := c.tables[0][0]
db.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1)
rec.deleteTable(c.level, t.file.Num())
rec.delTable(c.level, t.file.Num())
rec.addTableFile(c.level+1, t)
db.compactionTransact("table@move", func(cnt *compactionTransactCounter) (err error) {
db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) {
return db.s.commit(rec)
}, nil)
return
@ -301,184 +576,34 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
for _, t := range tables {
stats[i].read += t.size
// Insert deleted tables into record
rec.deleteTable(c.level+i, t.file.Num())
rec.delTable(c.level+i, t.file.Num())
}
}
sourceSize := int(stats[0].read + stats[1].read)
minSeq := db.minSeq()
db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq)
var snapUkey []byte
var snapHasUkey bool
var snapSeq uint64
var snapIter int
var snapDropCnt int
var dropCnt int
db.compactionTransact("table@build", func(cnt *compactionTransactCounter) (err error) {
ukey := append([]byte{}, snapUkey...)
hasUkey := snapHasUkey
lseq := snapSeq
dropCnt = snapDropCnt
snapSched := snapIter == 0
var tw *tWriter
finish := func() error {
t, err := tw.finish()
if err != nil {
return err
}
rec.addTableFile(c.level+1, t)
stats[1].write += t.size
db.logf("table@build created L%d@%d N·%d S·%s %q:%q", c.level+1, t.file.Num(), tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
return nil
}
defer func() {
stats[1].stopTimer()
if tw != nil {
tw.drop()
tw = nil
}
}()
stats[1].startTimer()
iter := c.newIterator()
defer iter.Release()
for i := 0; iter.Next(); i++ {
// Incr transact counter.
cnt.incr()
// Skip until last state.
if i < snapIter {
continue
}
ikey := iKey(iter.Key())
if c.shouldStopBefore(ikey) && tw != nil {
err = finish()
if err != nil {
return
}
snapSched = true
tw = nil
}
// Scheduled for snapshot, snapshot will used to retry compaction
// if error occured.
if snapSched {
snapUkey = append(snapUkey[:0], ukey...)
snapHasUkey = hasUkey
snapSeq = lseq
snapIter = i
snapDropCnt = dropCnt
snapSched = false
}
if seq, vt, ok := ikey.parseNum(); !ok {
// Don't drop error keys
ukey = ukey[:0]
hasUkey = false
lseq = kMaxSeq
} else {
if !hasUkey || db.s.icmp.uCompare(ikey.ukey(), ukey) != 0 {
// First occurrence of this user key
ukey = append(ukey[:0], ikey.ukey()...)
hasUkey = true
lseq = kMaxSeq
}
drop := false
if lseq <= minSeq {
// Dropped because newer entry for same user key exist
drop = true // (A)
} else if vt == tDel && seq <= minSeq && c.baseLevelForKey(ukey) {
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger seq numbers
// (3) data in layers that are being compacted here and have
// smaller seq numbers will be dropped in the next
// few iterations of this loop (by rule (A) above).
// Therefore this deletion marker is obsolete and can be dropped.
drop = true
}
lseq = seq
if drop {
dropCnt++
continue
}
}
// Create new table if not already
if tw == nil {
// Check for pause event.
select {
case ch := <-db.tcompPauseC:
db.pauseCompaction(ch)
case _, _ = <-db.closeC:
db.compactionExitTransact()
default:
}
// Create new table.
tw, err = db.s.tops.create()
if err != nil {
return
}
}
// Write key/value into table
err = tw.append(ikey, iter.Value())
if err != nil {
return
}
// Finish table if it is big enough
if tw.tw.BytesLen() >= kMaxTableSize {
err = finish()
if err != nil {
return
}
snapSched = true
tw = nil
}
}
err = iter.Error()
if err != nil {
return
}
// Finish last table
if tw != nil && !tw.empty() {
err = finish()
if err != nil {
return
}
tw = nil
}
return
}, func() error {
for _, r := range rec.addedTables {
db.logf("table@build rollback @%d", r.num)
f := db.s.getTableFile(r.num)
if err := f.Remove(); err != nil {
return err
}
}
return nil
})
b := &tableCompactionBuilder{
db: db,
s: db.s,
c: c,
rec: rec,
stat1: &stats[1],
minSeq: minSeq,
strict: db.s.o.GetStrict(opt.StrictCompaction),
tableSize: db.s.o.GetCompactionTableSize(c.level + 1),
}
db.compactionTransact("table@build", b)
// Commit changes
db.compactionTransact("table@commit", func(cnt *compactionTransactCounter) (err error) {
db.compactionTransactFunc("table@commit", func(cnt *compactionTransactCounter) (err error) {
stats[1].startTimer()
defer stats[1].stopTimer()
return db.s.commit(rec)
}, nil)
resultSize := int(stats[1].write)
db.logf("table@compaction commited F%s S%s D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), dropCnt, stats[1].duration)
db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
// Save compaction stats
for i := range stats {
@ -494,14 +619,14 @@ func (db *DB) tableRangeCompaction(level int, umin, umax []byte) {
db.tableCompaction(c, true)
}
} else {
v := db.s.version_NB()
v := db.s.version()
m := 1
for i, t := range v.tables[1:] {
if t.overlaps(db.s.icmp, umin, umax, false) {
m = i + 1
}
}
v.release()
for level := 0; level < m; level++ {
if c := db.s.getCompactionRange(level, umin, umax); c != nil {
@ -518,7 +643,9 @@ func (db *DB) tableAutoCompaction() {
}
func (db *DB) tableNeedCompaction() bool {
return db.s.version_NB().needCompaction()
v := db.s.version()
defer v.release()
return v.needCompaction()
}
func (db *DB) pauseCompaction(ch chan<- struct{}) {
@ -538,7 +665,12 @@ type cIdle struct {
}
func (r cIdle) ack(err error) {
r.ackC <- err
if r.ackC != nil {
defer func() {
recover()
}()
r.ackC <- err
}
}
type cRange struct {
@ -548,29 +680,45 @@ type cRange struct {
}
func (r cRange) ack(err error) {
defer func() {
recover()
}()
if r.ackC != nil {
defer func() {
recover()
}()
r.ackC <- err
}
}
func (db *DB) compSendIdle(compC chan<- cCmd) error {
// This will trigger auto compation and/or wait for all compaction to be done.
func (db *DB) compSendIdle(compC chan<- cCmd) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cIdle{ch}:
case err := <-db.compErrC:
return err
case err = <-db.compErrC:
return
case _, _ = <-db.closeC:
return ErrClosed
}
// Wait cmd.
return <-ch
select {
case err = <-ch:
case err = <-db.compErrC:
case _, _ = <-db.closeC:
return ErrClosed
}
return err
}
// This will trigger auto compaction but will not wait for it.
func (db *DB) compSendTrigger(compC chan<- cCmd) {
select {
case compC <- cIdle{}:
default:
}
}
// Send range compaction request.
func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
ch := make(chan error)
defer close(ch)
@ -584,19 +732,14 @@ func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err
}
// Wait cmd.
select {
case err = <-db.compErrC:
case err = <-ch:
case err = <-db.compErrC:
case _, _ = <-db.closeC:
return ErrClosed
}
return err
}
func (db *DB) compTrigger(compTriggerC chan struct{}) {
select {
case compTriggerC <- struct{}{}:
default:
}
}
func (db *DB) mCompaction() {
var x cCmd
@ -615,11 +758,14 @@ func (db *DB) mCompaction() {
for {
select {
case x = <-db.mcompCmdC:
db.memCompaction()
x.ack(nil)
x = nil
case <-db.mcompTriggerC:
db.memCompaction()
switch x.(type) {
case cIdle:
db.memCompaction()
x.ack(nil)
x = nil
default:
panic("leveldb: unknown command")
}
case _, _ = <-db.closeC:
return
}
@ -650,7 +796,6 @@ func (db *DB) tCompaction() {
if db.tableNeedCompaction() {
select {
case x = <-db.tcompCmdC:
case <-db.tcompTriggerC:
case ch := <-db.tcompPauseC:
db.pauseCompaction(ch)
continue
@ -666,7 +811,6 @@ func (db *DB) tCompaction() {
ackQ = ackQ[:0]
select {
case x = <-db.tcompCmdC:
case <-db.tcompTriggerC:
case ch := <-db.tcompPauseC:
db.pauseCompaction(ch)
continue
@ -681,6 +825,8 @@ func (db *DB) tCompaction() {
case cRange:
db.tableRangeCompaction(cmd.level, cmd.min, cmd.max)
x.ack(nil)
default:
panic("leveldb: unknown command")
}
x = nil
}

View File

@ -48,7 +48,7 @@ func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.It
i = append(i, fmi)
}
i = append(i, ti...)
strict := db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator)
strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
mi := iterator.NewMergedIterator(i, db.s.icmp, strict)
mi.SetReleaser(&versionReleaser{v: v})
return mi
@ -59,10 +59,10 @@ func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *d
if slice != nil {
islice = &util.Range{}
if slice.Start != nil {
islice.Start = newIKey(slice.Start, kMaxSeq, tSeek)
islice.Start = newIkey(slice.Start, kMaxSeq, ktSeek)
}
if slice.Limit != nil {
islice.Limit = newIKey(slice.Limit, kMaxSeq, tSeek)
islice.Limit = newIkey(slice.Limit, kMaxSeq, ktSeek)
}
}
rawIter := db.newRawIterator(islice, ro)
@ -71,7 +71,7 @@ func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *d
icmp: db.s.icmp,
iter: rawIter,
seq: seq,
strict: db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator),
strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
key: make([]byte, 0),
value: make([]byte, 0),
}
@ -162,7 +162,7 @@ func (i *dbIter) Seek(key []byte) bool {
return false
}
ikey := newIKey(key, i.seq, tSeek)
ikey := newIkey(key, i.seq, ktSeek)
if i.iter.Seek(ikey) {
i.dir = dirSOI
return i.next()
@ -174,15 +174,14 @@ func (i *dbIter) Seek(key []byte) bool {
func (i *dbIter) next() bool {
for {
ukey, seq, t, ok := parseIkey(i.iter.Key())
if ok {
if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
if seq <= i.seq {
switch t {
case tDel:
switch kt {
case ktDel:
// Skip deleted key.
i.key = append(i.key[:0], ukey...)
i.dir = dirForward
case tVal:
case ktVal:
if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
@ -192,7 +191,7 @@ func (i *dbIter) next() bool {
}
}
} else if i.strict {
i.setErr(errInvalidIkey)
i.setErr(kerr)
break
}
if !i.iter.Next() {
@ -225,20 +224,19 @@ func (i *dbIter) prev() bool {
del := true
if i.iter.Valid() {
for {
ukey, seq, t, ok := parseIkey(i.iter.Key())
if ok {
if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
if seq <= i.seq {
if !del && i.icmp.uCompare(ukey, i.key) < 0 {
return true
}
del = (t == tDel)
del = (kt == ktDel)
if !del {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
}
}
} else if i.strict {
i.setErr(errInvalidIkey)
i.setErr(kerr)
return false
}
if !i.iter.Prev() {
@ -267,13 +265,12 @@ func (i *dbIter) Prev() bool {
return i.Last()
case dirForward:
for i.iter.Prev() {
ukey, _, _, ok := parseIkey(i.iter.Key())
if ok {
if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil {
if i.icmp.uCompare(ukey, i.key) < 0 {
goto cont
}
} else if i.strict {
i.setErr(errInvalidIkey)
i.setErr(kerr)
return false
}
}
@ -321,9 +318,13 @@ func (i *dbIter) Release() {
}
func (i *dbIter) SetReleaser(releaser util.Releaser) {
if i.dir != dirReleased {
i.releaser = releaser
if i.dir == dirReleased {
panic(util.ErrReleased)
}
if i.releaser != nil && releaser != nil {
panic(util.ErrHasReleaser)
}
i.releaser = releaser
}
func (i *dbIter) Error() error {

View File

@ -7,6 +7,8 @@
package leveldb
import (
"container/list"
"fmt"
"runtime"
"sync"
"sync/atomic"
@ -19,51 +21,41 @@ import (
type snapshotElement struct {
seq uint64
ref int
// Next and previous pointers in the doubly-linked list of elements.
next, prev *snapshotElement
}
// Initialize the snapshot.
func (db *DB) initSnapshot() {
db.snapsRoot.next = &db.snapsRoot
db.snapsRoot.prev = &db.snapsRoot
e *list.Element
}
// Acquires a snapshot, based on latest sequence.
func (db *DB) acquireSnapshot() *snapshotElement {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
seq := db.getSeq()
elem := db.snapsRoot.prev
if elem == &db.snapsRoot || elem.seq != seq {
at := db.snapsRoot.prev
next := at.next
elem = &snapshotElement{
seq: seq,
prev: at,
next: next,
if e := db.snapsList.Back(); e != nil {
se := e.Value.(*snapshotElement)
if se.seq == seq {
se.ref++
return se
} else if seq < se.seq {
panic("leveldb: sequence number is not increasing")
}
at.next = elem
next.prev = elem
}
elem.ref++
db.snapsMu.Unlock()
return elem
se := &snapshotElement{seq: seq, ref: 1}
se.e = db.snapsList.PushBack(se)
return se
}
// Releases given snapshot element.
func (db *DB) releaseSnapshot(elem *snapshotElement) {
if !db.isClosed() {
db.snapsMu.Lock()
elem.ref--
if elem.ref == 0 {
elem.prev.next = elem.next
elem.next.prev = elem.prev
elem.next = nil
elem.prev = nil
} else if elem.ref < 0 {
panic("leveldb: Snapshot: negative element reference")
}
db.snapsMu.Unlock()
func (db *DB) releaseSnapshot(se *snapshotElement) {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
se.ref--
if se.ref == 0 {
db.snapsList.Remove(se.e)
se.e = nil
} else if se.ref < 0 {
panic("leveldb: Snapshot: negative element reference")
}
}
@ -71,10 +63,11 @@ func (db *DB) releaseSnapshot(elem *snapshotElement) {
func (db *DB) minSeq() uint64 {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
elem := db.snapsRoot.prev
if elem != &db.snapsRoot {
return elem.seq
if e := db.snapsList.Front(); e != nil {
return e.Value.(*snapshotElement).seq
}
return db.getSeq()
}
@ -97,8 +90,12 @@ func (db *DB) newSnapshot() *Snapshot {
return snap
}
func (snap *Snapshot) String() string {
return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
}
// Get gets the value for the given key. It returns ErrNotFound if
// the DB does not contain the key.
// the DB does not contains the key.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Get returns.
@ -116,6 +113,23 @@ func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err er
return snap.db.get(key, snap.elem.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Get returns.
func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
err = snap.db.ok()
if err != nil {
return
}
snap.mu.RLock()
defer snap.mu.RUnlock()
if snap.released {
err = ErrSnapshotReleased
return
}
return snap.db.has(key, snap.elem.seq, ro)
}
// NewIterator returns an iterator for the snapshot of the uderlying DB.
// The returned iterator is not goroutine-safe, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.

File diff suppressed because it is too large Load Diff

View File

@ -7,8 +7,7 @@
package leveldb
import (
"errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage"
@ -38,7 +37,9 @@ func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
// Check and clean files.
func (db *DB) checkAndCleanFiles() error {
v := db.s.version_NB()
v := db.s.version()
defer v.release()
tablesMap := make(map[uint64]bool)
for _, tables := range v.tables {
for _, t := range tables {
@ -78,12 +79,14 @@ func (db *DB) checkAndCleanFiles() error {
}
if nTables != len(tablesMap) {
var missing []*storage.FileInfo
for num, present := range tablesMap {
if !present {
missing = append(missing, &storage.FileInfo{Type: storage.TypeTable, Num: num})
db.logf("db@janitor table missing @%d", num)
}
}
return ErrCorrupted{Type: MissingFiles, Err: errors.New("leveldb: table files missing")}
return errors.NewErrCorrupted(nil, &errors.ErrMissingFiles{Files: missing})
}
db.logf("db@janitor F·%d G·%d", len(files), len(rem))

View File

@ -59,7 +59,7 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) {
}
// Schedule memdb compaction.
db.compTrigger(db.mcompTriggerC)
db.compSendTrigger(db.mcompCmdC)
return
}
@ -77,12 +77,12 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
}()
nn = mem.mdb.Free()
switch {
case v.tLen(0) >= kL0_SlowdownWritesTrigger && !delayed:
case v.tLen(0) >= db.s.o.GetWriteL0SlowdownTrigger() && !delayed:
delayed = true
time.Sleep(time.Millisecond)
case nn >= n:
return false
case v.tLen(0) >= kL0_StopWritesTrigger:
case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger():
delayed = true
err = db.compSendIdle(db.tcompCmdC)
if err != nil {
@ -109,7 +109,12 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
for flush() {
}
if delayed {
db.logf("db@write delayed T·%v", time.Since(start))
db.writeDelay += time.Since(start)
db.writeDelayN++
} else if db.writeDelayN > 0 {
db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
db.writeDelay = 0
db.writeDelayN = 0
}
return
}
@ -120,28 +125,33 @@ func (db *DB) flush(n int) (mem *memDB, nn int, err error) {
// It is safe to modify the contents of the arguments after Write returns.
func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
err = db.ok()
if err != nil || b == nil || b.len() == 0 {
if err != nil || b == nil || b.Len() == 0 {
return
}
b.init(wo.GetSync())
// The write happen synchronously.
retry:
select {
case db.writeC <- b:
if <-db.writeMergedC {
return <-db.writeAckC
}
goto retry
case db.writeLockC <- struct{}{}:
case err = <-db.compPerErrC:
return
case _, _ = <-db.closeC:
return ErrClosed
}
merged := 0
danglingMerge := false
defer func() {
<-db.writeLockC
if danglingMerge {
db.writeMergedC <- false
} else {
<-db.writeLockC
}
for i := 0; i < merged; i++ {
db.writeAckC <- err
}
@ -170,7 +180,7 @@ drain:
db.writeMergedC <- true
merged++
} else {
db.writeMergedC <- false
danglingMerge = true
break drain
}
default:
@ -185,35 +195,43 @@ drain:
if b.size() >= (128 << 10) {
// Push the write batch to the journal writer
select {
case db.journalC <- b:
// Write into memdb
if berr := b.memReplay(mem.mdb); berr != nil {
panic(berr)
}
case err = <-db.compPerErrC:
return
case _, _ = <-db.closeC:
err = ErrClosed
return
case db.journalC <- b:
// Write into memdb
b.memReplay(mem.mdb)
}
// Wait for journal writer
select {
case _, _ = <-db.closeC:
err = ErrClosed
return
case err = <-db.journalAckC:
if err != nil {
// Revert memdb if error detected
b.revertMemReplay(mem.mdb)
if berr := b.revertMemReplay(mem.mdb); berr != nil {
panic(berr)
}
return
}
case _, _ = <-db.closeC:
err = ErrClosed
return
}
} else {
err = db.writeJournal(b)
if err != nil {
return
}
b.memReplay(mem.mdb)
if berr := b.memReplay(mem.mdb); berr != nil {
panic(berr)
}
}
// Set last seq number.
db.addSeq(uint64(b.len()))
db.addSeq(uint64(b.Len()))
if b.size() >= memFree {
db.rotateMem(0)
@ -262,8 +280,11 @@ func (db *DB) CompactRange(r util.Range) error {
return err
}
// Lock writer.
select {
case db.writeLockC <- struct{}{}:
case err := <-db.compPerErrC:
return err
case _, _ = <-db.closeC:
return ErrClosed
}

View File

@ -7,32 +7,12 @@
package leveldb
import (
"errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
)
var (
ErrNotFound = util.ErrNotFound
ErrNotFound = errors.ErrNotFound
ErrSnapshotReleased = errors.New("leveldb: snapshot released")
ErrIterReleased = errors.New("leveldb: iterator released")
ErrClosed = errors.New("leveldb: closed")
)
type CorruptionType int
const (
CorruptedManifest CorruptionType = iota
MissingFiles
)
// ErrCorrupted is the type that wraps errors that indicate corruption in
// the database.
type ErrCorrupted struct {
Type CorruptionType
Err error
}
func (e ErrCorrupted) Error() string {
return e.Err.Error()
}

View File

@ -0,0 +1,76 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package errors provides common error types used throughout leveldb.
package errors
import (
"errors"
"fmt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrNotFound = New("leveldb: not found")
ErrReleased = util.ErrReleased
ErrHasReleaser = util.ErrHasReleaser
)
// New returns an error that formats as the given text.
func New(text string) error {
return errors.New(text)
}
// ErrCorrupted is the type that wraps errors that indicate corruption in
// the database.
type ErrCorrupted struct {
File *storage.FileInfo
Err error
}
func (e *ErrCorrupted) Error() string {
if e.File != nil {
return fmt.Sprintf("%v [file=%v]", e.Err, e.File)
} else {
return e.Err.Error()
}
}
// NewErrCorrupted creates new ErrCorrupted error.
func NewErrCorrupted(f storage.File, err error) error {
return &ErrCorrupted{storage.NewFileInfo(f), err}
}
// IsCorrupted returns a boolean indicating whether the error is indicating
// a corruption.
func IsCorrupted(err error) bool {
switch err.(type) {
case *ErrCorrupted:
return true
}
return false
}
// ErrMissingFiles is the type that indicating a corruption due to missing
// files.
type ErrMissingFiles struct {
Files []*storage.FileInfo
}
func (e *ErrMissingFiles) Error() string { return "file missing" }
// SetFile sets 'file info' of the given error with the given file.
// Currently only ErrCorrupted is supported, otherwise will do nothing.
func SetFile(err error, f storage.File) error {
switch x := err.(type) {
case *ErrCorrupted:
x.File = storage.NewFileInfo(f)
return x
}
return err
}

View File

@ -19,11 +19,12 @@ var _ = testutil.Defer(func() {
o := &opt.Options{
BlockCache: opt.NoCache,
BlockRestartInterval: 5,
BlockSize: 50,
BlockSize: 80,
Compression: opt.NoCompression,
CachedOpenFiles: -1,
Strict: opt.StrictAll,
WriteBuffer: 1000,
CompactionTableSize: 2000,
}
Describe("write test", func() {
@ -40,18 +41,17 @@ var _ = testutil.Defer(func() {
})
Describe("read test", func() {
testutil.AllKeyValueTesting(nil, func(kv testutil.KeyValue) testutil.DB {
testutil.AllKeyValueTesting(nil, nil, func(kv testutil.KeyValue) testutil.DB {
// Building the DB.
db := newTestingDB(o, nil, nil)
kv.IterateShuffled(nil, func(i int, key, value []byte) {
err := db.TestPut(key, value)
Expect(err).NotTo(HaveOccurred())
})
testutil.Defer("teardown", func() {
db.TestClose()
})
return db
}, func(db testutil.DB) {
db.(*testingDB).TestClose()
})
})
})

View File

@ -40,13 +40,19 @@ type basicArrayIterator struct {
util.BasicReleaser
array BasicArray
pos int
err error
}
func (i *basicArrayIterator) Valid() bool {
return i.pos >= 0 && i.pos < i.array.Len()
return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
}
func (i *basicArrayIterator) First() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.array.Len() == 0 {
i.pos = -1
return false
@ -56,6 +62,11 @@ func (i *basicArrayIterator) First() bool {
}
func (i *basicArrayIterator) Last() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
n := i.array.Len()
if n == 0 {
i.pos = 0
@ -66,6 +77,11 @@ func (i *basicArrayIterator) Last() bool {
}
func (i *basicArrayIterator) Seek(key []byte) bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
n := i.array.Len()
if n == 0 {
i.pos = 0
@ -79,6 +95,11 @@ func (i *basicArrayIterator) Seek(key []byte) bool {
}
func (i *basicArrayIterator) Next() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.pos++
if n := i.array.Len(); i.pos >= n {
i.pos = n
@ -88,6 +109,11 @@ func (i *basicArrayIterator) Next() bool {
}
func (i *basicArrayIterator) Prev() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.pos--
if i.pos < 0 {
i.pos = -1
@ -96,7 +122,7 @@ func (i *basicArrayIterator) Prev() bool {
return true
}
func (i *basicArrayIterator) Error() error { return nil }
func (i *basicArrayIterator) Error() error { return i.err }
type arrayIterator struct {
basicArrayIterator

View File

@ -7,6 +7,7 @@
package iterator
import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
@ -22,13 +23,13 @@ type IteratorIndexer interface {
type indexedIterator struct {
util.BasicReleaser
index IteratorIndexer
strict bool
strictGet bool
index IteratorIndexer
strict bool
data Iterator
err error
errf func(err error)
data Iterator
err error
errf func(err error)
closed bool
}
func (i *indexedIterator) setData() {
@ -36,11 +37,6 @@ func (i *indexedIterator) setData() {
i.data.Release()
}
i.data = i.index.Get()
if i.strictGet {
if err := i.data.Error(); err != nil {
i.err = err
}
}
}
func (i *indexedIterator) clearData() {
@ -50,14 +46,21 @@ func (i *indexedIterator) clearData() {
i.data = nil
}
func (i *indexedIterator) dataErr() bool {
if i.errf != nil {
if err := i.data.Error(); err != nil {
func (i *indexedIterator) indexErr() {
if err := i.index.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
i.err = err
}
if i.strict {
if err := i.data.Error(); err != nil {
}
func (i *indexedIterator) dataErr() bool {
if err := i.data.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
@ -72,9 +75,13 @@ func (i *indexedIterator) Valid() bool {
func (i *indexedIterator) First() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.First() {
i.indexErr()
i.clearData()
return false
}
@ -85,9 +92,13 @@ func (i *indexedIterator) First() bool {
func (i *indexedIterator) Last() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.Last() {
i.indexErr()
i.clearData()
return false
}
@ -105,9 +116,13 @@ func (i *indexedIterator) Last() bool {
func (i *indexedIterator) Seek(key []byte) bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.Seek(key) {
i.indexErr()
i.clearData()
return false
}
@ -125,6 +140,9 @@ func (i *indexedIterator) Seek(key []byte) bool {
func (i *indexedIterator) Next() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
switch {
@ -136,6 +154,7 @@ func (i *indexedIterator) Next() bool {
fallthrough
case i.data == nil:
if !i.index.Next() {
i.indexErr()
return false
}
i.setData()
@ -147,6 +166,9 @@ func (i *indexedIterator) Next() bool {
func (i *indexedIterator) Prev() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
switch {
@ -158,6 +180,7 @@ func (i *indexedIterator) Prev() bool {
fallthrough
case i.data == nil:
if !i.index.Prev() {
i.indexErr()
return false
}
i.setData()
@ -206,16 +229,14 @@ func (i *indexedIterator) SetErrorCallback(f func(err error)) {
i.errf = f
}
// NewIndexedIterator returns an indexed iterator. An index is iterator
// that returns another iterator, a data iterator. A data iterator is the
// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
// that returns another iterator, a 'data iterator'. A 'data iterator' is the
// iterator that contains actual key/value pairs.
//
// If strict is true then error yield by data iterator will halt the indexed
// iterator, on contrary if strict is false then the indexed iterator will
// ignore those error and move on to the next index. If strictGet is true and
// index.Get() yield an 'error iterator' then the indexed iterator will be halted.
// An 'error iterator' is iterator which its Error() method always return non-nil
// even before any 'seeks method' is called.
func NewIndexedIterator(index IteratorIndexer, strict, strictGet bool) Iterator {
return &indexedIterator{index: index, strict: strict, strictGet: strictGet}
// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
// ignored and will halt the iterator.
func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
return &indexedIterator{index: index, strict: strict}
}

View File

@ -65,7 +65,7 @@ var _ = testutil.Defer(func() {
// Test the iterator.
t := testutil.IteratorTesting{
KeyValue: kv.Clone(),
Iter: NewIndexedIterator(NewArrayIndexer(index), true, true),
Iter: NewIndexedIterator(NewArrayIndexer(index), true),
}
testutil.DoIteratorTesting(&t)
done <- true

View File

@ -14,6 +14,10 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrIterReleased = errors.New("leveldb/iterator: iterator released")
)
// IteratorSeeker is the interface that wraps the 'seeks method'.
type IteratorSeeker interface {
// First moves the iterator to the first key/value pair. If the iterator
@ -100,28 +104,13 @@ type ErrorCallbackSetter interface {
}
type emptyIterator struct {
releaser util.Releaser
released bool
err error
util.BasicReleaser
err error
}
func (i *emptyIterator) rErr() {
if i.err == nil && i.released {
i.err = errors.New("leveldb/iterator: iterator released")
}
}
func (i *emptyIterator) Release() {
if i.releaser != nil {
i.releaser.Release()
i.releaser = nil
}
i.released = true
}
func (i *emptyIterator) SetReleaser(releaser util.Releaser) {
if !i.released {
i.releaser = releaser
if i.err == nil && i.Released() {
i.err = ErrIterReleased
}
}

View File

@ -3,15 +3,9 @@ package iterator_test
import (
"testing"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestIterator(t *testing.T) {
testutil.RunDefer()
RegisterFailHandler(Fail)
RunSpecs(t, "Iterator Suite")
testutil.RunSuite(t, "Iterator Suite")
}

View File

@ -7,16 +7,11 @@
package iterator
import (
"errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrIterReleased = errors.New("leveldb/iterator: iterator released")
)
type dir int
const (
@ -48,13 +43,11 @@ func assertKey(key []byte) []byte {
}
func (i *mergedIterator) iterErr(iter Iterator) bool {
if i.errf != nil {
if err := iter.Error(); err != nil {
if err := iter.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
}
if i.strict {
if err := iter.Error(); err != nil {
if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
@ -274,9 +267,13 @@ func (i *mergedIterator) Release() {
}
func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
if i.dir != dirReleased {
i.releaser = releaser
if i.dir == dirReleased {
panic(util.ErrReleased)
}
if i.releaser != nil && releaser != nil {
panic(util.ErrHasReleaser)
}
i.releaser = releaser
}
func (i *mergedIterator) Error() error {
@ -294,9 +291,9 @@ func (i *mergedIterator) SetErrorCallback(f func(err error)) {
// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
// None of the iters may be nil.
//
// If strict is true then error yield by any iterators will halt the merged
// iterator, on contrary if strict is false then the merged iterator will
// ignore those error and move on to the next iterator.
// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
// won't be ignored and will halt 'merged iterator', otherwise the iterator will
// continue to the next 'input iterator'.
func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
return &mergedIterator{
iters: iters,

View File

@ -79,10 +79,10 @@ package journal
import (
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
@ -109,7 +109,7 @@ type ErrCorrupted struct {
Reason string
}
func (e ErrCorrupted) Error() string {
func (e *ErrCorrupted) Error() string {
return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
}
@ -162,10 +162,10 @@ var errSkip = errors.New("leveldb/journal: skipped")
func (r *Reader) corrupt(n int, reason string, skip bool) error {
if r.dropper != nil {
r.dropper.Drop(ErrCorrupted{n, reason})
r.dropper.Drop(&ErrCorrupted{n, reason})
}
if r.strict && !skip {
r.err = ErrCorrupted{n, reason}
r.err = errors.NewErrCorrupted(nil, &ErrCorrupted{n, reason})
return r.err
}
return errSkip

View File

@ -9,15 +9,30 @@ package leveldb
import (
"encoding/binary"
"fmt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
)
type vType int
type ErrIkeyCorrupted struct {
Ikey []byte
Reason string
}
func (t vType) String() string {
switch t {
case tDel:
func (e *ErrIkeyCorrupted) Error() string {
return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason)
}
func newErrIkeyCorrupted(ikey []byte, reason string) error {
return errors.NewErrCorrupted(nil, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason})
}
type kType int
func (kt kType) String() string {
switch kt {
case ktDel:
return "d"
case tVal:
case ktVal:
return "v"
}
return "x"
@ -26,16 +41,16 @@ func (t vType) String() string {
// Value types encoded as the last component of internal keys.
// Don't modify; this value are saved to disk.
const (
tDel vType = iota
tVal
ktDel kType = iota
ktVal
)
// tSeek defines the vType that should be passed when constructing an
// ktSeek defines the kType that should be passed when constructing an
// internal key for seeking to a particular sequence number (since we
// sort sequence numbers in decreasing order and the value type is
// embedded as the low 8 bits in the sequence number in internal keys,
// we need to use the highest-numbered ValueType, not the lowest).
const tSeek = tVal
const ktSeek = ktVal
const (
// Maximum value possible for sequence number; the 8-bits are
@ -43,7 +58,7 @@ const (
// 64-bit integer.
kMaxSeq uint64 = (uint64(1) << 56) - 1
// Maximum value possible for packed sequence number and type.
kMaxNum uint64 = (kMaxSeq << 8) | uint64(tSeek)
kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek)
)
// Maximum number encoded in bytes.
@ -55,85 +70,73 @@ func init() {
type iKey []byte
func newIKey(ukey []byte, seq uint64, t vType) iKey {
if seq > kMaxSeq || t > tVal {
panic("invalid seq number or value type")
func newIkey(ukey []byte, seq uint64, kt kType) iKey {
if seq > kMaxSeq {
panic("leveldb: invalid sequence number")
} else if kt > ktVal {
panic("leveldb: invalid type")
}
b := make(iKey, len(ukey)+8)
copy(b, ukey)
binary.LittleEndian.PutUint64(b[len(ukey):], (seq<<8)|uint64(t))
return b
ik := make(iKey, len(ukey)+8)
copy(ik, ukey)
binary.LittleEndian.PutUint64(ik[len(ukey):], (seq<<8)|uint64(kt))
return ik
}
func parseIkey(p []byte) (ukey []byte, seq uint64, t vType, ok bool) {
if len(p) < 8 {
return
func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) {
if len(ik) < 8 {
return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length")
}
num := binary.LittleEndian.Uint64(p[len(p)-8:])
seq, t = uint64(num>>8), vType(num&0xff)
if t > tVal {
return
num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
seq, kt = uint64(num>>8), kType(num&0xff)
if kt > ktVal {
return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type")
}
ukey = p[:len(p)-8]
ok = true
ukey = ik[:len(ik)-8]
return
}
func validIkey(p []byte) bool {
_, _, _, ok := parseIkey(p)
return ok
func validIkey(ik []byte) bool {
_, _, _, err := parseIkey(ik)
return err == nil
}
func (p iKey) assert() {
if p == nil {
panic("nil iKey")
func (ik iKey) assert() {
if ik == nil {
panic("leveldb: nil iKey")
}
if len(p) < 8 {
panic(fmt.Sprintf("invalid iKey %q, len=%d", []byte(p), len(p)))
if len(ik) < 8 {
panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik)))
}
}
func (p iKey) ok() bool {
if len(p) < 8 {
return false
}
_, _, ok := p.parseNum()
return ok
func (ik iKey) ukey() []byte {
ik.assert()
return ik[:len(ik)-8]
}
func (p iKey) ukey() []byte {
p.assert()
return p[:len(p)-8]
func (ik iKey) num() uint64 {
ik.assert()
return binary.LittleEndian.Uint64(ik[len(ik)-8:])
}
func (p iKey) num() uint64 {
p.assert()
return binary.LittleEndian.Uint64(p[len(p)-8:])
}
func (p iKey) parseNum() (seq uint64, t vType, ok bool) {
if p == nil {
panic("nil iKey")
func (ik iKey) parseNum() (seq uint64, kt kType) {
num := ik.num()
seq, kt = uint64(num>>8), kType(num&0xff)
if kt > ktVal {
panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
}
if len(p) < 8 {
return
}
num := p.num()
seq, t = uint64(num>>8), vType(num&0xff)
if t > tVal {
return 0, 0, false
}
ok = true
return
}
func (p iKey) String() string {
if len(p) == 0 {
func (ik iKey) String() string {
if ik == nil {
return "<nil>"
}
if seq, t, ok := p.parseNum(); ok {
return fmt.Sprintf("%s,%s%d", shorten(string(p.ukey())), t, seq)
if ukey, seq, kt, err := parseIkey(ik); err == nil {
return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
} else {
return "<invalid>"
}
return "<invalid>"
}

View File

@ -15,8 +15,8 @@ import (
var defaultIComparer = &iComparer{comparer.DefaultComparer}
func ikey(key string, seq uint64, t vType) iKey {
return newIKey([]byte(key), uint64(seq), t)
func ikey(key string, seq uint64, kt kType) iKey {
return newIkey([]byte(key), uint64(seq), kt)
}
func shortSep(a, b []byte) []byte {
@ -37,27 +37,37 @@ func shortSuccessor(b []byte) []byte {
return dst
}
func testSingleKey(t *testing.T, key string, seq uint64, vt vType) {
ik := ikey(key, seq, vt)
func testSingleKey(t *testing.T, key string, seq uint64, kt kType) {
ik := ikey(key, seq, kt)
if !bytes.Equal(ik.ukey(), []byte(key)) {
t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key)
}
if rseq, rt, ok := ik.parseNum(); ok {
rseq, rt := ik.parseNum()
if rseq != seq {
t.Errorf("seq number does not equal, got %v, want %v", rseq, seq)
}
if rt != kt {
t.Errorf("type does not equal, got %v, want %v", rt, kt)
}
if rukey, rseq, rt, kerr := parseIkey(ik); kerr == nil {
if !bytes.Equal(rukey, []byte(key)) {
t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key)
}
if rseq != seq {
t.Errorf("seq number does not equal, got %v, want %v", rseq, seq)
}
if rt != vt {
t.Errorf("type does not equal, got %v, want %v", rt, vt)
if rt != kt {
t.Errorf("type does not equal, got %v, want %v", rt, kt)
}
} else {
t.Error("cannot parse seq and type")
t.Errorf("key error: %v", kerr)
}
}
func TestIKey_EncodeDecode(t *testing.T) {
func TestIkey_EncodeDecode(t *testing.T) {
keys := []string{"", "k", "hello", "longggggggggggggggggggggg"}
seqs := []uint64{
1, 2, 3,
@ -67,8 +77,8 @@ func TestIKey_EncodeDecode(t *testing.T) {
}
for _, key := range keys {
for _, seq := range seqs {
testSingleKey(t, key, seq, tVal)
testSingleKey(t, "hello", 1, tDel)
testSingleKey(t, key, seq, ktVal)
testSingleKey(t, "hello", 1, ktDel)
}
}
}
@ -79,45 +89,45 @@ func assertBytes(t *testing.T, want, got []byte) {
}
}
func TestIKeyShortSeparator(t *testing.T) {
func TestIkeyShortSeparator(t *testing.T) {
// When user keys are same
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("foo", 99, tVal)))
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("foo", 101, tVal)))
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("foo", 100, tVal)))
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("foo", 100, tDel)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("foo", 99, ktVal)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("foo", 101, ktVal)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("foo", 100, ktVal)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("foo", 100, ktDel)))
// When user keys are misordered
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("bar", 99, tVal)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("bar", 99, ktVal)))
// When user keys are different, but correctly ordered
assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek),
shortSep(ikey("foo", 100, tVal),
ikey("hello", 200, tVal)))
assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek),
shortSep(ikey("foo", 100, ktVal),
ikey("hello", 200, ktVal)))
// When start user key is prefix of limit user key
assertBytes(t, ikey("foo", 100, tVal),
shortSep(ikey("foo", 100, tVal),
ikey("foobar", 200, tVal)))
assertBytes(t, ikey("foo", 100, ktVal),
shortSep(ikey("foo", 100, ktVal),
ikey("foobar", 200, ktVal)))
// When limit user key is prefix of start user key
assertBytes(t, ikey("foobar", 100, tVal),
shortSep(ikey("foobar", 100, tVal),
ikey("foo", 200, tVal)))
assertBytes(t, ikey("foobar", 100, ktVal),
shortSep(ikey("foobar", 100, ktVal),
ikey("foo", 200, ktVal)))
}
func TestIKeyShortestSuccessor(t *testing.T) {
assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek),
shortSuccessor(ikey("foo", 100, tVal)))
assertBytes(t, ikey("\xff\xff", 100, tVal),
shortSuccessor(ikey("\xff\xff", 100, tVal)))
func TestIkeyShortestSuccessor(t *testing.T) {
assertBytes(t, ikey("g", uint64(kMaxSeq), ktSeek),
shortSuccessor(ikey("foo", 100, ktVal)))
assertBytes(t, ikey("\xff\xff", 100, ktVal),
shortSuccessor(ikey("\xff\xff", 100, ktVal)))
}

View File

@ -3,18 +3,9 @@ package leveldb
import (
"testing"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestLeveldb(t *testing.T) {
testutil.RunDefer()
RegisterFailHandler(Fail)
RunSpecs(t, "Leveldb Suite")
RegisterTestingT(t)
testutil.RunDefer("teardown")
func TestLevelDB(t *testing.T) {
testutil.RunSuite(t, "LevelDB Suite")
}

View File

@ -12,12 +12,14 @@ import (
"sync"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrNotFound = util.ErrNotFound
ErrNotFound = errors.ErrNotFound
ErrIterReleased = errors.New("leveldb/memdb: iterator released")
)
const tMaxHeight = 12
@ -29,6 +31,7 @@ type dbIter struct {
node int
forward bool
key, value []byte
err error
}
func (i *dbIter) fill(checkStart, checkLimit bool) bool {
@ -59,6 +62,11 @@ func (i *dbIter) Valid() bool {
}
func (i *dbIter) First() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@ -71,9 +79,11 @@ func (i *dbIter) First() bool {
}
func (i *dbIter) Last() bool {
if i.p == nil {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = false
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@ -86,9 +96,11 @@ func (i *dbIter) Last() bool {
}
func (i *dbIter) Seek(key []byte) bool {
if i.p == nil {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
@ -100,9 +112,11 @@ func (i *dbIter) Seek(key []byte) bool {
}
func (i *dbIter) Next() bool {
if i.p == nil {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.node == 0 {
if !i.forward {
return i.First()
@ -117,9 +131,11 @@ func (i *dbIter) Next() bool {
}
func (i *dbIter) Prev() bool {
if i.p == nil {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.node == 0 {
if i.forward {
return i.Last()
@ -141,10 +157,10 @@ func (i *dbIter) Value() []byte {
return i.value
}
func (i *dbIter) Error() error { return nil }
func (i *dbIter) Error() error { return i.err }
func (i *dbIter) Release() {
if i.p != nil {
if !i.Released() {
i.p = nil
i.node = 0
i.key = nil

View File

@ -3,15 +3,9 @@ package memdb
import (
"testing"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestMemdb(t *testing.T) {
testutil.RunDefer()
RegisterFailHandler(Fail)
RunSpecs(t, "Memdb Suite")
func TestMemDB(t *testing.T) {
testutil.RunSuite(t, "MemDB Suite")
}

View File

@ -129,7 +129,7 @@ var _ = testutil.Defer(func() {
}
return db
})
}, nil, nil)
})
})
})

View File

@ -11,6 +11,7 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
"math"
)
const (
@ -20,12 +21,24 @@ const (
)
const (
DefaultBlockCacheSize = 8 * MiB
DefaultBlockRestartInterval = 16
DefaultBlockSize = 4 * KiB
DefaultCompressionType = SnappyCompression
DefaultCachedOpenFiles = 500
DefaultWriteBuffer = 4 * MiB
DefaultBlockCacheSize = 8 * MiB
DefaultBlockRestartInterval = 16
DefaultBlockSize = 4 * KiB
DefaultCompactionExpandLimitFactor = 25
DefaultCompactionGPOverlapsFactor = 10
DefaultCompactionL0Trigger = 4
DefaultCompactionSourceLimitFactor = 1
DefaultCompactionTableSize = 2 * MiB
DefaultCompactionTableSizeMultiplier = 1.0
DefaultCompactionTotalSize = 10 * MiB
DefaultCompactionTotalSizeMultiplier = 10.0
DefaultCompressionType = SnappyCompression
DefaultCachedOpenFiles = 500
DefaultMaxMemCompationLevel = 2
DefaultNumLevel = 7
DefaultWriteBuffer = 4 * MiB
DefaultWriteL0PauseTrigger = 12
DefaultWriteL0SlowdownTrigger = 8
)
type noCache struct{}
@ -65,34 +78,47 @@ const (
nCompression
)
// Strict is the DB strict level.
// Strict is the DB 'strict level'.
type Strict uint
const (
// If present then a corrupted or invalid chunk or block in manifest
// journal will cause an error istead of being dropped.
// journal will cause an error instead of being dropped.
// This will prevent database with corrupted manifest to be opened.
StrictManifest Strict = 1 << iota
// If present then a corrupted or invalid chunk or block in journal
// will cause an error istead of being dropped.
StrictJournal
// If present then journal chunk checksum will be verified.
StrictJournalChecksum
// If present then an invalid key/value pair will cause an error
// instead of being skipped.
StrictIterator
// If present then a corrupted or invalid chunk or block in journal
// will cause an error instead of being dropped.
// This will prevent database with corrupted journal to be opened.
StrictJournal
// If present then 'sorted table' block checksum will be verified.
// This has effect on both 'read operation' and compaction.
StrictBlockChecksum
// If present then a corrupted 'sorted table' will fails compaction.
// The database will enter read-only mode.
StrictCompaction
// If present then a corrupted 'sorted table' will halts 'read operation'.
StrictReader
// If present then leveldb.Recover will drop corrupted 'sorted table'.
StrictRecovery
// This only applicable for ReadOptions, if present then this ReadOptions
// 'strict level' will override global ones.
StrictOverride
// StrictAll enables all strict flags.
StrictAll = StrictManifest | StrictJournal | StrictJournalChecksum | StrictIterator | StrictBlockChecksum
StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
// DefaultStrict is the default strict flags. Specify any strict flags
// will override default strict flags as whole (i.e. not OR'ed).
DefaultStrict = StrictJournalChecksum | StrictBlockChecksum
DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
// NoStrict disables all strict flags. Override default strict flags.
NoStrict = ^StrictAll
@ -110,9 +136,14 @@ type Options struct {
// BlockCache provides per-block caching for LevelDB. Specify NoCache to
// disable block caching.
//
// By default LevelDB will create LRU-cache with capacity of 8MiB.
// By default LevelDB will create LRU-cache with capacity of BlockCacheSize.
BlockCache cache.Cache
// BlockCacheSize defines the capacity of the default 'block cache'.
//
// The default value is 8MiB.
BlockCacheSize int
// BlockRestartInterval is the number of keys between restart points for
// delta encoding of keys.
//
@ -132,6 +163,73 @@ type Options struct {
// The default value is 500.
CachedOpenFiles int
// CompactionExpandLimitFactor limits compaction size after expanded.
// This will be multiplied by table size limit at compaction target level.
//
// The default value is 25.
CompactionExpandLimitFactor int
// CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
// single 'sorted table' generates.
// This will be multiplied by table size limit at grandparent level.
//
// The default value is 10.
CompactionGPOverlapsFactor int
// CompactionL0Trigger defines number of 'sorted table' at level-0 that will
// trigger compaction.
//
// The default value is 4.
CompactionL0Trigger int
// CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
// level-0.
// This will be multiplied by table size limit at compaction target level.
//
// The default value is 1.
CompactionSourceLimitFactor int
// CompactionTableSize limits size of 'sorted table' that compaction generates.
// The limits for each level will be calculated as:
// CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
// The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
//
// The default value is 2MiB.
CompactionTableSize int
// CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
//
// The default value is 1.
CompactionTableSizeMultiplier float64
// CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
// CompactionTableSize.
// Use zero to skip a level.
//
// The default value is nil.
CompactionTableSizeMultiplierPerLevel []float64
// CompactionTotalSize limits total size of 'sorted table' for each level.
// The limits for each level will be calculated as:
// CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
// The multiplier for each level can also fine-tuned using
// CompactionTotalSizeMultiplierPerLevel.
//
// The default value is 10MiB.
CompactionTotalSize int
// CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
//
// The default value is 10.
CompactionTotalSizeMultiplier float64
// CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
// CompactionTotalSize.
// Use zero to skip a level.
//
// The default value is nil.
CompactionTotalSizeMultiplierPerLevel []float64
// Comparer defines a total ordering over the space of []byte keys: a 'less
// than' relationship. The same comparison algorithm must be used for reads
// and writes over the lifetime of the DB.
@ -144,6 +242,11 @@ type Options struct {
// The default value (DefaultCompression) uses snappy compression.
Compression Compression
// DisableCompactionBackoff allows disable compaction retry backoff.
//
// The default value is false.
DisableCompactionBackoff bool
// ErrorIfExist defines whether an error should returned if the DB already
// exist.
//
@ -172,6 +275,19 @@ type Options struct {
// The default value is nil.
Filter filter.Filter
// MaxMemCompationLevel defines maximum level a newly compacted 'memdb'
// will be pushed into if doesn't creates overlap. This should less than
// NumLevel. Use -1 for level-0.
//
// The default is 2.
MaxMemCompationLevel int
// NumLevel defines number of database level. The level shouldn't changed
// between opens, or the database will panic.
//
// The default is 7.
NumLevel int
// Strict defines the DB strict level.
Strict Strict
@ -183,6 +299,18 @@ type Options struct {
//
// The default value is 4MiB.
WriteBuffer int
// WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
// pause write.
//
// The default value is 12.
WriteL0PauseTrigger int
// WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
// will trigger write slowdown.
//
// The default value is 8.
WriteL0SlowdownTrigger int
}
func (o *Options) GetAltFilters() []filter.Filter {
@ -199,6 +327,13 @@ func (o *Options) GetBlockCache() cache.Cache {
return o.BlockCache
}
func (o *Options) GetBlockCacheSize() int {
if o == nil || o.BlockCacheSize <= 0 {
return DefaultBlockCacheSize
}
return o.BlockCacheSize
}
func (o *Options) GetBlockRestartInterval() int {
if o == nil || o.BlockRestartInterval <= 0 {
return DefaultBlockRestartInterval
@ -222,6 +357,79 @@ func (o *Options) GetCachedOpenFiles() int {
return o.CachedOpenFiles
}
func (o *Options) GetCompactionExpandLimit(level int) int {
factor := DefaultCompactionExpandLimitFactor
if o != nil && o.CompactionExpandLimitFactor > 0 {
factor = o.CompactionExpandLimitFactor
}
return o.GetCompactionTableSize(level+1) * factor
}
func (o *Options) GetCompactionGPOverlaps(level int) int {
factor := DefaultCompactionGPOverlapsFactor
if o != nil && o.CompactionGPOverlapsFactor > 0 {
factor = o.CompactionGPOverlapsFactor
}
return o.GetCompactionTableSize(level+2) * factor
}
func (o *Options) GetCompactionL0Trigger() int {
if o == nil || o.CompactionL0Trigger == 0 {
return DefaultCompactionL0Trigger
}
return o.CompactionL0Trigger
}
func (o *Options) GetCompactionSourceLimit(level int) int {
factor := DefaultCompactionSourceLimitFactor
if o != nil && o.CompactionSourceLimitFactor > 0 {
factor = o.CompactionSourceLimitFactor
}
return o.GetCompactionTableSize(level+1) * factor
}
func (o *Options) GetCompactionTableSize(level int) int {
var (
base = DefaultCompactionTableSize
mult float64
)
if o != nil {
if o.CompactionTableSize > 0 {
base = o.CompactionTableSize
}
if len(o.CompactionTableSizeMultiplierPerLevel) > level && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTableSizeMultiplierPerLevel[level]
} else if o.CompactionTableSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
}
}
if mult == 0 {
mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
}
return int(float64(base) * mult)
}
func (o *Options) GetCompactionTotalSize(level int) int64 {
var (
base = DefaultCompactionTotalSize
mult float64
)
if o != nil {
if o.CompactionTotalSize > 0 {
base = o.CompactionTotalSize
}
if len(o.CompactionTotalSizeMultiplierPerLevel) > level && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTotalSizeMultiplierPerLevel[level]
} else if o.CompactionTotalSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
}
}
if mult == 0 {
mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
}
return int64(float64(base) * mult)
}
func (o *Options) GetComparer() comparer.Comparer {
if o == nil || o.Comparer == nil {
return comparer.DefaultComparer
@ -236,6 +444,13 @@ func (o *Options) GetCompression() Compression {
return o.Compression
}
func (o *Options) GetDisableCompactionBackoff() bool {
if o == nil {
return false
}
return o.DisableCompactionBackoff
}
func (o *Options) GetErrorIfExist() bool {
if o == nil {
return false
@ -257,6 +472,28 @@ func (o *Options) GetFilter() filter.Filter {
return o.Filter
}
func (o *Options) GetMaxMemCompationLevel() int {
level := DefaultMaxMemCompationLevel
if o != nil {
if o.MaxMemCompationLevel > 0 {
level = o.MaxMemCompationLevel
} else if o.MaxMemCompationLevel == -1 {
level = 0
}
}
if level >= o.GetNumLevel() {
return o.GetNumLevel() - 1
}
return level
}
func (o *Options) GetNumLevel() int {
if o == nil || o.NumLevel <= 0 {
return DefaultNumLevel
}
return o.NumLevel
}
func (o *Options) GetStrict(strict Strict) bool {
if o == nil || o.Strict == 0 {
return DefaultStrict&strict != 0
@ -271,6 +508,20 @@ func (o *Options) GetWriteBuffer() int {
return o.WriteBuffer
}
func (o *Options) GetWriteL0PauseTrigger() int {
if o == nil || o.WriteL0PauseTrigger == 0 {
return DefaultWriteL0PauseTrigger
}
return o.WriteL0PauseTrigger
}
func (o *Options) GetWriteL0SlowdownTrigger() int {
if o == nil || o.WriteL0SlowdownTrigger == 0 {
return DefaultWriteL0SlowdownTrigger
}
return o.WriteL0SlowdownTrigger
}
// ReadOptions holds the optional parameters for 'read operation'. The
// 'read operation' includes Get, Find and NewIterator.
type ReadOptions struct {
@ -281,8 +532,8 @@ type ReadOptions struct {
// The default value is false.
DontFillCache bool
// Strict overrides global DB strict level. Only StrictIterator and
// StrictBlockChecksum that does have effects here.
// Strict will be OR'ed with global DB 'strict level' unless StrictOverride
// is present. Currently only StrictReader that has effect here.
Strict Strict
}
@ -324,3 +575,11 @@ func (wo *WriteOptions) GetSync() bool {
}
return wo.Sync
}
func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
if ro.GetStrict(StrictOverride) {
return ro.GetStrict(strict)
} else {
return o.GetStrict(strict) || ro.GetStrict(strict)
}
}

View File

@ -12,30 +12,89 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
)
func (s *session) setOptions(o *opt.Options) {
s.o = &opt.Options{}
func dupOptions(o *opt.Options) *opt.Options {
newo := &opt.Options{}
if o != nil {
*s.o = *o
*newo = *o
}
if newo.Strict == 0 {
newo.Strict = opt.DefaultStrict
}
return newo
}
func (s *session) setOptions(o *opt.Options) {
no := dupOptions(o)
// Alternative filters.
if filters := o.GetAltFilters(); len(filters) > 0 {
s.o.AltFilters = make([]filter.Filter, len(filters))
no.AltFilters = make([]filter.Filter, len(filters))
for i, filter := range filters {
s.o.AltFilters[i] = &iFilter{filter}
no.AltFilters[i] = &iFilter{filter}
}
}
// Block cache.
switch o.GetBlockCache() {
case nil:
s.o.BlockCache = cache.NewLRUCache(opt.DefaultBlockCacheSize)
no.BlockCache = cache.NewLRUCache(o.GetBlockCacheSize())
case opt.NoCache:
s.o.BlockCache = nil
no.BlockCache = nil
}
// Comparer.
s.icmp = &iComparer{o.GetComparer()}
s.o.Comparer = s.icmp
no.Comparer = s.icmp
// Filter.
if filter := o.GetFilter(); filter != nil {
s.o.Filter = &iFilter{filter}
no.Filter = &iFilter{filter}
}
s.o = &cachedOptions{Options: no}
s.o.cache()
}
type cachedOptions struct {
*opt.Options
compactionExpandLimit []int
compactionGPOverlaps []int
compactionSourceLimit []int
compactionTableSize []int
compactionTotalSize []int64
}
func (co *cachedOptions) cache() {
numLevel := co.Options.GetNumLevel()
co.compactionExpandLimit = make([]int, numLevel)
co.compactionGPOverlaps = make([]int, numLevel)
co.compactionSourceLimit = make([]int, numLevel)
co.compactionTableSize = make([]int, numLevel)
co.compactionTotalSize = make([]int64, numLevel)
for level := 0; level < numLevel; level++ {
co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
}
}
func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
return co.compactionExpandLimit[level]
}
func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
return co.compactionGPOverlaps[level]
}
func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
return co.compactionSourceLimit[level]
}
func (co *cachedOptions) GetCompactionTableSize(level int) int {
return co.compactionTableSize[level]
}
func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
return co.compactionTotalSize[level]
}

View File

@ -7,12 +7,13 @@
package leveldb
import (
"errors"
"fmt"
"io"
"os"
"sync"
"sync/atomic"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
@ -20,18 +21,31 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
type ErrManifestCorrupted struct {
Field string
Reason string
}
func (e *ErrManifestCorrupted) Error() string {
return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
}
func newErrManifestCorrupted(f storage.File, field, reason string) error {
return errors.NewErrCorrupted(f, &ErrManifestCorrupted{field, reason})
}
// session represent a persistent database session.
type session struct {
// Need 64-bit alignment.
stFileNum uint64 // current unused file number
stNextFileNum uint64 // current unused file number
stJournalNum uint64 // current journal file number; need external synchronization
stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb
stSeq uint64 // last mem compacted seq; need external synchronization
stSeqNum uint64 // last mem compacted seq; need external synchronization
stTempFileNum uint64
stor storage.Storage
storLock util.Releaser
o *opt.Options
o *cachedOptions
icmp *iComparer
tops *tOps
@ -39,9 +53,9 @@ type session struct {
manifestWriter storage.Writer
manifestFile storage.File
stCptrs [kNumLevels]iKey // compact pointers; need external synchronization
stVersion *version // current version
vmu sync.Mutex
stCompPtrs []iKey // compaction pointers; need external synchronization
stVersion *version // current version
vmu sync.Mutex
}
// Creates new initialized session instance.
@ -54,13 +68,14 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
return
}
s = &session{
stor: stor,
storLock: storLock,
stor: stor,
storLock: storLock,
stCompPtrs: make([]iKey, o.GetNumLevel()),
}
s.setOptions(o)
s.tops = newTableOps(s, s.o.GetCachedOpenFiles())
s.setVersion(&version{s: s})
s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock D·DeletedEntry L·Level Q·SeqNum T·TimeElapsed")
s.setVersion(newVersion(s))
s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
return
}
@ -100,26 +115,26 @@ func (s *session) recover() (err error) {
// Don't return os.ErrNotExist if the underlying storage contains
// other files that belong to LevelDB. So the DB won't get trashed.
if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 {
err = ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest file missing")}
err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
}
}
}()
file, err := s.stor.GetManifest()
m, err := s.stor.GetManifest()
if err != nil {
return
}
reader, err := file.Open()
reader, err := m.Open()
if err != nil {
return
}
defer reader.Close()
strict := s.o.GetStrict(opt.StrictManifest)
jr := journal.NewReader(reader, dropper{s, file}, strict, true)
jr := journal.NewReader(reader, dropper{s, m}, strict, true)
staging := s.version_NB().newStaging()
rec := &sessionRecord{}
staging := s.stVersion.newStaging()
rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
for {
var r io.Reader
r, err = jr.Next()
@ -128,51 +143,57 @@ func (s *session) recover() (err error) {
err = nil
break
}
return
return errors.SetFile(err, m)
}
err = rec.decode(r)
if err == nil {
// save compact pointers
for _, r := range rec.compactionPointers {
s.stCptrs[r.level] = iKey(r.ikey)
for _, r := range rec.compPtrs {
s.stCompPtrs[r.level] = iKey(r.ikey)
}
// commit record to version staging
staging.commit(rec)
} else if strict {
return ErrCorrupted{Type: CorruptedManifest, Err: err}
} else {
s.logf("manifest error: %v (skipped)", err)
err = errors.SetFile(err, m)
if strict || !errors.IsCorrupted(err) {
return
} else {
s.logf("manifest error: %v (skipped)", errors.SetFile(err, m))
}
}
rec.resetCompactionPointers()
rec.resetCompPtrs()
rec.resetAddedTables()
rec.resetDeletedTables()
}
switch {
case !rec.has(recComparer):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing comparer name")}
return newErrManifestCorrupted(m, "comparer", "missing")
case rec.comparer != s.icmp.uName():
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: comparer mismatch, " + "want '" + s.icmp.uName() + "', " + "got '" + rec.comparer + "'")}
case !rec.has(recNextNum):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing next file number")}
return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
case !rec.has(recNextFileNum):
return newErrManifestCorrupted(m, "next-file-num", "missing")
case !rec.has(recJournalNum):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing journal file number")}
case !rec.has(recSeq):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing seq number")}
return newErrManifestCorrupted(m, "journal-file-num", "missing")
case !rec.has(recSeqNum):
return newErrManifestCorrupted(m, "seq-num", "missing")
}
s.manifestFile = file
s.manifestFile = m
s.setVersion(staging.finish())
s.setFileNum(rec.nextNum)
s.setNextFileNum(rec.nextFileNum)
s.recordCommited(rec)
return nil
}
// Commit session; need external synchronization.
func (s *session) commit(r *sessionRecord) (err error) {
v := s.version()
defer v.release()
// spawn new version based on current version
nv := s.version_NB().spawn(r)
nv := v.spawn(r)
if s.manifest == nil {
// manifest journal writer not yet created, create one
@ -191,13 +212,13 @@ func (s *session) commit(r *sessionRecord) (err error) {
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version_NB()
v := s.version()
var level int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
cptr := s.stCptrs[level]
cptr := s.stCompPtrs[level]
tables := v.tables[level]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
@ -214,27 +235,21 @@ func (s *session) pickCompaction() *compaction {
level = ts.level
t0 = append(t0, ts.table)
} else {
v.release()
return nil
}
}
c := &compaction{s: s, v: v, level: level}
if level == 0 {
imin, imax := t0.getRange(s.icmp)
t0 = v.tables[0].getOverlaps(t0[:0], s.icmp, imin.ukey(), imax.ukey(), true)
}
c.tables[0] = t0
c.expand()
return c
return newCompaction(s, v, level, t0)
}
// Create compaction from given level and range; need external synchronization.
func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
v := s.version_NB()
v := s.version()
t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
if len(t0) == 0 {
v.release()
return nil
}
@ -243,7 +258,7 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
limit := uint64(kMaxTableSize)
limit := uint64(v.s.o.GetCompactionSourceLimit(level))
total := uint64(0)
for i, t := range t0 {
total += t.size
@ -255,9 +270,20 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
}
}
c := &compaction{s: s, v: v, level: level}
c.tables[0] = t0
return newCompaction(s, v, level, t0)
}
func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
c := &compaction{
s: s,
v: v,
level: level,
tables: [2]tFiles{t0, nil},
maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
tPtrs: make([]int, s.o.GetNumLevel()),
}
c.expand()
c.save()
return c
}
@ -266,25 +292,57 @@ type compaction struct {
s *session
v *version
level int
tables [2]tFiles
level int
tables [2]tFiles
maxGPOverlaps uint64
gp tFiles
gpidx int
seenKey bool
overlappedBytes uint64
imin, imax iKey
gp tFiles
gpi int
seenKey bool
gpOverlappedBytes uint64
imin, imax iKey
tPtrs []int
released bool
tPtrs [kNumLevels]int
snapGPI int
snapSeenKey bool
snapGPOverlappedBytes uint64
snapTPtrs []int
}
func (c *compaction) save() {
c.snapGPI = c.gpi
c.snapSeenKey = c.seenKey
c.snapGPOverlappedBytes = c.gpOverlappedBytes
c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
}
func (c *compaction) restore() {
c.gpi = c.snapGPI
c.seenKey = c.snapSeenKey
c.gpOverlappedBytes = c.snapGPOverlappedBytes
c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
}
func (c *compaction) release() {
if !c.released {
c.released = true
c.v.release()
}
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
level := c.level
vt0, vt1 := c.v.tables[level], c.v.tables[level+1]
limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
t0, t1 := c.tables[0], c.tables[1]
imin, imax := t0.getRange(c.s.icmp)
// We expand t0 here just incase ukey hop across tables.
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
if len(t0) != len(c.tables[0]) {
imin, imax = t0.getRange(c.s.icmp)
}
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
@ -292,13 +350,13 @@ func (c *compaction) expand() {
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
if len(t1) > 0 {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), level == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < kExpCompactionMaxBytes {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
level, level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
imin, imax = xmin, xmax
t0, t1 = exp0, exp1
@ -309,8 +367,8 @@ func (c *compaction) expand() {
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
if level+2 < kNumLevels {
c.gp = c.v.tables[level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
if c.level+2 < c.s.o.GetNumLevel() {
c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.tables[0], c.tables[1] = t0, t1
@ -319,7 +377,7 @@ func (c *compaction) expand() {
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= kMaxGrandParentOverlapBytes
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
func (c *compaction) baseLevelForKey(ukey []byte) bool {
@ -341,20 +399,20 @@ func (c *compaction) baseLevelForKey(ukey []byte) bool {
}
func (c *compaction) shouldStopBefore(ikey iKey) bool {
for ; c.gpidx < len(c.gp); c.gpidx++ {
gp := c.gp[c.gpidx]
for ; c.gpi < len(c.gp); c.gpi++ {
gp := c.gp[c.gpi]
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
break
}
if c.seenKey {
c.overlappedBytes += gp.size
c.gpOverlappedBytes += gp.size
}
}
c.seenKey = true
if c.overlappedBytes > kMaxGrandParentOverlapBytes {
if c.gpOverlappedBytes > c.maxGPOverlaps {
// Too much overlap for current output; start new output.
c.overlappedBytes = 0
c.gpOverlappedBytes = 0
return true
}
return false
@ -373,8 +431,12 @@ func (c *compaction) newIterator() iterator.Iterator {
// Options.
ro := &opt.ReadOptions{
DontFillCache: true,
Strict: opt.StrictOverride,
}
strict := c.s.o.GetStrict(opt.StrictCompaction)
if strict {
ro.Strict |= opt.StrictReader
}
strict := c.s.o.GetStrict(opt.StrictIterator)
for i, tables := range c.tables {
if len(tables) == 0 {
@ -387,10 +449,10 @@ func (c *compaction) newIterator() iterator.Iterator {
its = append(its, c.s.tops.newIterator(t, nil, ro))
}
} else {
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict, true)
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
its = append(its, it)
}
}
return iterator.NewMergedIterator(its, c.s.icmp, true)
return iterator.NewMergedIterator(its, c.s.icmp, strict)
}

View File

@ -9,11 +9,11 @@ package leveldb
import (
"bufio"
"encoding/binary"
"errors"
"io"
)
"strings"
var errCorruptManifest = errors.New("leveldb: corrupt manifest")
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
)
type byteReader interface {
io.Reader
@ -22,13 +22,13 @@ type byteReader interface {
// These numbers are written to disk and should not be changed.
const (
recComparer = 1
recJournalNum = 2
recNextNum = 3
recSeq = 4
recCompactionPointer = 5
recDeletedTable = 6
recNewTable = 7
recComparer = 1
recJournalNum = 2
recNextFileNum = 3
recSeqNum = 4
recCompPtr = 5
recDelTable = 6
recAddTable = 7
// 8 was used for large value refs
recPrevJournalNum = 9
)
@ -38,7 +38,7 @@ type cpRecord struct {
ikey iKey
}
type ntRecord struct {
type atRecord struct {
level int
num uint64
size uint64
@ -46,27 +46,26 @@ type ntRecord struct {
imax iKey
}
func (r ntRecord) makeFile(s *session) *tFile {
return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax)
}
type dtRecord struct {
level int
num uint64
}
type sessionRecord struct {
hasRec int
comparer string
journalNum uint64
prevJournalNum uint64
nextNum uint64
seq uint64
compactionPointers []cpRecord
addedTables []ntRecord
deletedTables []dtRecord
scratch [binary.MaxVarintLen64]byte
err error
numLevel int
hasRec int
comparer string
journalNum uint64
prevJournalNum uint64
nextFileNum uint64
seqNum uint64
compPtrs []cpRecord
addedTables []atRecord
deletedTables []dtRecord
scratch [binary.MaxVarintLen64]byte
err error
}
func (p *sessionRecord) has(rec int) bool {
@ -88,29 +87,29 @@ func (p *sessionRecord) setPrevJournalNum(num uint64) {
p.prevJournalNum = num
}
func (p *sessionRecord) setNextNum(num uint64) {
p.hasRec |= 1 << recNextNum
p.nextNum = num
func (p *sessionRecord) setNextFileNum(num uint64) {
p.hasRec |= 1 << recNextFileNum
p.nextFileNum = num
}
func (p *sessionRecord) setSeq(seq uint64) {
p.hasRec |= 1 << recSeq
p.seq = seq
func (p *sessionRecord) setSeqNum(num uint64) {
p.hasRec |= 1 << recSeqNum
p.seqNum = num
}
func (p *sessionRecord) addCompactionPointer(level int, ikey iKey) {
p.hasRec |= 1 << recCompactionPointer
p.compactionPointers = append(p.compactionPointers, cpRecord{level, ikey})
func (p *sessionRecord) addCompPtr(level int, ikey iKey) {
p.hasRec |= 1 << recCompPtr
p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
}
func (p *sessionRecord) resetCompactionPointers() {
p.hasRec &= ^(1 << recCompactionPointer)
p.compactionPointers = p.compactionPointers[:0]
func (p *sessionRecord) resetCompPtrs() {
p.hasRec &= ^(1 << recCompPtr)
p.compPtrs = p.compPtrs[:0]
}
func (p *sessionRecord) addTable(level int, num, size uint64, imin, imax iKey) {
p.hasRec |= 1 << recNewTable
p.addedTables = append(p.addedTables, ntRecord{level, num, size, imin, imax})
p.hasRec |= 1 << recAddTable
p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
}
func (p *sessionRecord) addTableFile(level int, t *tFile) {
@ -118,17 +117,17 @@ func (p *sessionRecord) addTableFile(level int, t *tFile) {
}
func (p *sessionRecord) resetAddedTables() {
p.hasRec &= ^(1 << recNewTable)
p.hasRec &= ^(1 << recAddTable)
p.addedTables = p.addedTables[:0]
}
func (p *sessionRecord) deleteTable(level int, num uint64) {
p.hasRec |= 1 << recDeletedTable
func (p *sessionRecord) delTable(level int, num uint64) {
p.hasRec |= 1 << recDelTable
p.deletedTables = append(p.deletedTables, dtRecord{level, num})
}
func (p *sessionRecord) resetDeletedTables() {
p.hasRec &= ^(1 << recDeletedTable)
p.hasRec &= ^(1 << recDelTable)
p.deletedTables = p.deletedTables[:0]
}
@ -161,26 +160,26 @@ func (p *sessionRecord) encode(w io.Writer) error {
p.putUvarint(w, recJournalNum)
p.putUvarint(w, p.journalNum)
}
if p.has(recNextNum) {
p.putUvarint(w, recNextNum)
p.putUvarint(w, p.nextNum)
if p.has(recNextFileNum) {
p.putUvarint(w, recNextFileNum)
p.putUvarint(w, p.nextFileNum)
}
if p.has(recSeq) {
p.putUvarint(w, recSeq)
p.putUvarint(w, p.seq)
if p.has(recSeqNum) {
p.putUvarint(w, recSeqNum)
p.putUvarint(w, p.seqNum)
}
for _, r := range p.compactionPointers {
p.putUvarint(w, recCompactionPointer)
for _, r := range p.compPtrs {
p.putUvarint(w, recCompPtr)
p.putUvarint(w, uint64(r.level))
p.putBytes(w, r.ikey)
}
for _, r := range p.deletedTables {
p.putUvarint(w, recDeletedTable)
p.putUvarint(w, recDelTable)
p.putUvarint(w, uint64(r.level))
p.putUvarint(w, r.num)
}
for _, r := range p.addedTables {
p.putUvarint(w, recNewTable)
p.putUvarint(w, recAddTable)
p.putUvarint(w, uint64(r.level))
p.putUvarint(w, r.num)
p.putUvarint(w, r.size)
@ -190,14 +189,16 @@ func (p *sessionRecord) encode(w io.Writer) error {
return p.err
}
func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 {
func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
if p.err != nil {
return 0
}
x, err := binary.ReadUvarint(r)
if err != nil {
if err == io.EOF {
p.err = errCorruptManifest
if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
} else if strings.HasPrefix(err.Error(), "binary:") {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, err.Error()})
} else {
p.err = err
}
@ -206,35 +207,39 @@ func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 {
return x
}
func (p *sessionRecord) readBytes(r byteReader) []byte {
func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
return p.readUvarintMayEOF(field, r, false)
}
func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
if p.err != nil {
return nil
}
n := p.readUvarint(r)
n := p.readUvarint(field, r)
if p.err != nil {
return nil
}
x := make([]byte, n)
_, p.err = io.ReadFull(r, x)
if p.err != nil {
if p.err == io.EOF {
p.err = errCorruptManifest
if p.err == io.ErrUnexpectedEOF {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
}
return nil
}
return x
}
func (p *sessionRecord) readLevel(r io.ByteReader) int {
func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
if p.err != nil {
return 0
}
x := p.readUvarint(r)
x := p.readUvarint(field, r)
if p.err != nil {
return 0
}
if x >= kNumLevels {
p.err = errCorruptManifest
if x >= uint64(p.numLevel) {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"})
return 0
}
return int(x)
@ -247,59 +252,59 @@ func (p *sessionRecord) decode(r io.Reader) error {
}
p.err = nil
for p.err == nil {
rec, err := binary.ReadUvarint(br)
if err != nil {
if err == io.EOF {
err = nil
rec := p.readUvarintMayEOF("field-header", br, true)
if p.err != nil {
if p.err == io.EOF {
return nil
}
return err
return p.err
}
switch rec {
case recComparer:
x := p.readBytes(br)
x := p.readBytes("comparer", br)
if p.err == nil {
p.setComparer(string(x))
}
case recJournalNum:
x := p.readUvarint(br)
x := p.readUvarint("journal-num", br)
if p.err == nil {
p.setJournalNum(x)
}
case recPrevJournalNum:
x := p.readUvarint(br)
x := p.readUvarint("prev-journal-num", br)
if p.err == nil {
p.setPrevJournalNum(x)
}
case recNextNum:
x := p.readUvarint(br)
case recNextFileNum:
x := p.readUvarint("next-file-num", br)
if p.err == nil {
p.setNextNum(x)
p.setNextFileNum(x)
}
case recSeq:
x := p.readUvarint(br)
case recSeqNum:
x := p.readUvarint("seq-num", br)
if p.err == nil {
p.setSeq(x)
p.setSeqNum(x)
}
case recCompactionPointer:
level := p.readLevel(br)
ikey := p.readBytes(br)
case recCompPtr:
level := p.readLevel("comp-ptr.level", br)
ikey := p.readBytes("comp-ptr.ikey", br)
if p.err == nil {
p.addCompactionPointer(level, iKey(ikey))
p.addCompPtr(level, iKey(ikey))
}
case recNewTable:
level := p.readLevel(br)
num := p.readUvarint(br)
size := p.readUvarint(br)
imin := p.readBytes(br)
imax := p.readBytes(br)
case recAddTable:
level := p.readLevel("add-table.level", br)
num := p.readUvarint("add-table.num", br)
size := p.readUvarint("add-table.size", br)
imin := p.readBytes("add-table.imin", br)
imax := p.readBytes("add-table.imax", br)
if p.err == nil {
p.addTable(level, num, size, imin, imax)
}
case recDeletedTable:
level := p.readLevel(br)
num := p.readUvarint(br)
case recDelTable:
level := p.readLevel("del-table.level", br)
num := p.readUvarint("del-table.num", br)
if p.err == nil {
p.deleteTable(level, num)
p.delTable(level, num)
}
}
}

View File

@ -9,6 +9,8 @@ package leveldb
import (
"bytes"
"testing"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
)
func decodeEncode(v *sessionRecord) (res bool, err error) {
@ -17,7 +19,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
if err != nil {
return
}
v2 := new(sessionRecord)
v2 := &sessionRecord{numLevel: opt.DefaultNumLevel}
err = v.decode(b)
if err != nil {
return
@ -32,7 +34,7 @@ func decodeEncode(v *sessionRecord) (res bool, err error) {
func TestSessionRecord_EncodeDecode(t *testing.T) {
big := uint64(1) << 50
v := new(sessionRecord)
v := &sessionRecord{numLevel: opt.DefaultNumLevel}
i := uint64(0)
test := func() {
res, err := decodeEncode(v)
@ -47,16 +49,16 @@ func TestSessionRecord_EncodeDecode(t *testing.T) {
for ; i < 4; i++ {
test()
v.addTable(3, big+300+i, big+400+i,
newIKey([]byte("foo"), big+500+1, tVal),
newIKey([]byte("zoo"), big+600+1, tDel))
v.deleteTable(4, big+700+i)
v.addCompactionPointer(int(i), newIKey([]byte("x"), big+900+1, tVal))
newIkey([]byte("foo"), big+500+1, ktVal),
newIkey([]byte("zoo"), big+600+1, ktDel))
v.delTable(4, big+700+i)
v.addCompPtr(int(i), newIkey([]byte("x"), big+900+1, ktVal))
}
v.setComparer("foo")
v.setJournalNum(big + 100)
v.setPrevJournalNum(big + 99)
v.setNextNum(big + 200)
v.setSeq(big + 1000)
v.setNextFileNum(big + 200)
v.setSeqNum(big + 1000)
test()
}

View File

@ -22,7 +22,7 @@ type dropper struct {
}
func (d dropper) Drop(err error) {
if e, ok := err.(journal.ErrCorrupted); ok {
if e, ok := err.(*journal.ErrCorrupted); ok {
d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason)
} else {
d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err)
@ -51,9 +51,14 @@ func (s *session) newTemp() storage.File {
return s.stor.GetFile(num, storage.TypeTemp)
}
func (s *session) tableFileFromRecord(r atRecord) *tFile {
return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax)
}
// Session state.
// Get current version.
// Get current version. This will incr version ref, must call
// version.release (exactly once) after use.
func (s *session) version() *version {
s.vmu.Lock()
defer s.vmu.Unlock()
@ -61,61 +66,56 @@ func (s *session) version() *version {
return s.stVersion
}
// Get current version; no barrier.
func (s *session) version_NB() *version {
return s.stVersion
}
// Set current version to v.
func (s *session) setVersion(v *version) {
s.vmu.Lock()
v.ref = 1
v.ref = 1 // Holds by session.
if old := s.stVersion; old != nil {
v.ref++
v.ref++ // Holds by old version.
old.next = v
old.release_NB()
old.releaseNB()
}
s.stVersion = v
s.vmu.Unlock()
}
// Get current unused file number.
func (s *session) fileNum() uint64 {
return atomic.LoadUint64(&s.stFileNum)
func (s *session) nextFileNum() uint64 {
return atomic.LoadUint64(&s.stNextFileNum)
}
// Get current unused file number to num.
func (s *session) setFileNum(num uint64) {
atomic.StoreUint64(&s.stFileNum, num)
// Set current unused file number to num.
func (s *session) setNextFileNum(num uint64) {
atomic.StoreUint64(&s.stNextFileNum, num)
}
// Mark file number as used.
func (s *session) markFileNum(num uint64) {
num += 1
nextFileNum := num + 1
for {
old, x := s.stFileNum, num
old, x := s.stNextFileNum, nextFileNum
if old > x {
x = old
}
if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) {
if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
break
}
}
}
// Allocate a file number.
func (s *session) allocFileNum() (num uint64) {
return atomic.AddUint64(&s.stFileNum, 1) - 1
func (s *session) allocFileNum() uint64 {
return atomic.AddUint64(&s.stNextFileNum, 1) - 1
}
// Reuse given file number.
func (s *session) reuseFileNum(num uint64) {
for {
old, x := s.stFileNum, num
old, x := s.stNextFileNum, num
if old != x+1 {
x = old
}
if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) {
if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
break
}
}
@ -126,20 +126,20 @@ func (s *session) reuseFileNum(num uint64) {
// Fill given session record obj with current states; need external
// synchronization.
func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
r.setNextNum(s.fileNum())
r.setNextFileNum(s.nextFileNum())
if snapshot {
if !r.has(recJournalNum) {
r.setJournalNum(s.stJournalNum)
}
if !r.has(recSeq) {
r.setSeq(s.stSeq)
if !r.has(recSeqNum) {
r.setSeqNum(s.stSeqNum)
}
for level, ik := range s.stCptrs {
for level, ik := range s.stCompPtrs {
if ik != nil {
r.addCompactionPointer(level, ik)
r.addCompPtr(level, ik)
}
}
@ -147,7 +147,7 @@ func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
}
}
// Mark if record has been commited, this will update session state;
// Mark if record has been committed, this will update session state;
// need external synchronization.
func (s *session) recordCommited(r *sessionRecord) {
if r.has(recJournalNum) {
@ -158,12 +158,12 @@ func (s *session) recordCommited(r *sessionRecord) {
s.stPrevJournalNum = r.prevJournalNum
}
if r.has(recSeq) {
s.stSeq = r.seq
if r.has(recSeqNum) {
s.stSeqNum = r.seqNum
}
for _, p := range r.compactionPointers {
s.stCptrs[p.level] = iKey(p.ikey)
for _, p := range r.compPtrs {
s.stCompPtrs[p.level] = iKey(p.ikey)
}
}
@ -178,10 +178,11 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
jw := journal.NewWriter(writer)
if v == nil {
v = s.version_NB()
v = s.version()
defer v.release()
}
if rec == nil {
rec = new(sessionRecord)
rec = &sessionRecord{numLevel: s.o.GetNumLevel()}
}
s.fillRecord(rec, true)
v.fillRecord(rec)

View File

@ -221,7 +221,7 @@ func (fs *fileStorage) GetManifest() (f File, err error) {
fs.log(fmt.Sprintf("skipping %s: invalid file name", fn))
continue
}
if _, e1 := strconv.ParseUint(fn[7:], 10, 0); e1 != nil {
if _, e1 := strconv.ParseUint(fn[8:], 10, 0); e1 != nil {
fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1))
continue
}

View File

@ -125,3 +125,33 @@ type Storage interface {
// Other methods should not be called after the storage has been closed.
Close() error
}
// FileInfo wraps basic file info.
type FileInfo struct {
Type FileType
Num uint64
}
func (fi FileInfo) String() string {
switch fi.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", fi.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", fi.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", fi.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", fi.Num)
default:
return fmt.Sprintf("%#x-%d", fi.Type, fi.Num)
}
}
// NewFileInfo creates new FileInfo from the given File. It will returns nil
// if File is nil.
func NewFileInfo(f File) *FileInfo {
if f == nil {
return nil
}
return &FileInfo{f.Type(), f.Num()}
}

View File

@ -11,6 +11,7 @@ import (
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"sync"
@ -28,11 +29,25 @@ var (
)
var (
tsFSEnv = os.Getenv("GOLEVELDB_USEFS")
tsKeepFS = tsFSEnv == "2"
tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1"
tsMU = &sync.Mutex{}
tsNum = 0
tsFSEnv = os.Getenv("GOLEVELDB_USEFS")
tsTempdir = os.Getenv("GOLEVELDB_TEMPDIR")
tsKeepFS = tsFSEnv == "2"
tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1"
tsMU = &sync.Mutex{}
tsNum = 0
)
type tsOp uint
const (
tsOpOpen tsOp = iota
tsOpCreate
tsOpRead
tsOpReadAt
tsOpWrite
tsOpSync
tsOpNum
)
type tsLock struct {
@ -53,6 +68,9 @@ type tsReader struct {
func (tr tsReader) Read(b []byte) (n int, err error) {
ts := tr.tf.ts
ts.countRead(tr.tf.Type())
if tr.tf.shouldErrLocked(tsOpRead) {
return 0, errors.New("leveldb.testStorage: emulated read error")
}
n, err = tr.Reader.Read(b)
if err != nil && err != io.EOF {
ts.t.Errorf("E: read error, num=%d type=%v n=%d: %v", tr.tf.Num(), tr.tf.Type(), n, err)
@ -63,6 +81,9 @@ func (tr tsReader) Read(b []byte) (n int, err error) {
func (tr tsReader) ReadAt(b []byte, off int64) (n int, err error) {
ts := tr.tf.ts
ts.countRead(tr.tf.Type())
if tr.tf.shouldErrLocked(tsOpReadAt) {
return 0, errors.New("leveldb.testStorage: emulated readAt error")
}
n, err = tr.Reader.ReadAt(b, off)
if err != nil && err != io.EOF {
ts.t.Errorf("E: readAt error, num=%d type=%v off=%d n=%d: %v", tr.tf.Num(), tr.tf.Type(), off, n, err)
@ -82,15 +103,12 @@ type tsWriter struct {
}
func (tw tsWriter) Write(b []byte) (n int, err error) {
ts := tw.tf.ts
ts.mu.Lock()
defer ts.mu.Unlock()
if ts.emuWriteErr&tw.tf.Type() != 0 {
if tw.tf.shouldErrLocked(tsOpWrite) {
return 0, errors.New("leveldb.testStorage: emulated write error")
}
n, err = tw.Writer.Write(b)
if err != nil {
ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err)
tw.tf.ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err)
}
return
}
@ -98,23 +116,23 @@ func (tw tsWriter) Write(b []byte) (n int, err error) {
func (tw tsWriter) Sync() (err error) {
ts := tw.tf.ts
ts.mu.Lock()
defer ts.mu.Unlock()
for ts.emuDelaySync&tw.tf.Type() != 0 {
ts.cond.Wait()
}
if ts.emuSyncErr&tw.tf.Type() != 0 {
ts.mu.Unlock()
if tw.tf.shouldErrLocked(tsOpSync) {
return errors.New("leveldb.testStorage: emulated sync error")
}
err = tw.Writer.Sync()
if err != nil {
ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err)
tw.tf.ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err)
}
return
}
func (tw tsWriter) Close() (err error) {
err = tw.Writer.Close()
tw.tf.close("reader", err)
tw.tf.close("writer", err)
return
}
@ -127,6 +145,16 @@ func (tf tsFile) x() uint64 {
return tf.Num()<<typeShift | uint64(tf.Type())
}
func (tf tsFile) shouldErr(op tsOp) bool {
return tf.ts.shouldErr(tf, op)
}
func (tf tsFile) shouldErrLocked(op tsOp) bool {
tf.ts.mu.Lock()
defer tf.ts.mu.Unlock()
return tf.shouldErr(op)
}
func (tf tsFile) checkOpen(m string) error {
ts := tf.ts
if writer, ok := ts.opens[tf.x()]; ok {
@ -163,7 +191,7 @@ func (tf tsFile) Open() (r storage.Reader, err error) {
if err != nil {
return
}
if ts.emuOpenErr&tf.Type() != 0 {
if tf.shouldErr(tsOpOpen) {
err = errors.New("leveldb.testStorage: emulated open error")
return
}
@ -190,7 +218,7 @@ func (tf tsFile) Create() (w storage.Writer, err error) {
if err != nil {
return
}
if ts.emuCreateErr&tf.Type() != 0 {
if tf.shouldErr(tsOpCreate) {
err = errors.New("leveldb.testStorage: emulated create error")
return
}
@ -205,6 +233,23 @@ func (tf tsFile) Create() (w storage.Writer, err error) {
return
}
func (tf tsFile) Replace(newfile storage.File) (err error) {
ts := tf.ts
ts.mu.Lock()
defer ts.mu.Unlock()
err = tf.checkOpen("replace")
if err != nil {
return
}
err = tf.File.Replace(newfile.(tsFile).File)
if err != nil {
ts.t.Errorf("E: cannot replace file, num=%d type=%v: %v", tf.Num(), tf.Type(), err)
} else {
ts.t.Logf("I: file replace, num=%d type=%v", tf.Num(), tf.Type())
}
return
}
func (tf tsFile) Remove() (err error) {
ts := tf.ts
ts.mu.Lock()
@ -231,25 +276,61 @@ type testStorage struct {
cond sync.Cond
// Open files, true=writer, false=reader
opens map[uint64]bool
emuOpenErr storage.FileType
emuCreateErr storage.FileType
emuDelaySync storage.FileType
emuWriteErr storage.FileType
emuSyncErr storage.FileType
ignoreOpenErr storage.FileType
readCnt uint64
readCntEn storage.FileType
emuErr [tsOpNum]storage.FileType
emuErrOnce [tsOpNum]storage.FileType
emuRandErr [tsOpNum]storage.FileType
emuRandErrProb int
emuErrOnceMap map[uint64]uint
emuRandRand *rand.Rand
}
func (ts *testStorage) SetOpenErr(t storage.FileType) {
func (ts *testStorage) shouldErr(tf tsFile, op tsOp) bool {
if ts.emuErr[op]&tf.Type() != 0 {
return true
} else if ts.emuRandErr[op]&tf.Type() != 0 || ts.emuErrOnce[op]&tf.Type() != 0 {
sop := uint(1) << op
eop := ts.emuErrOnceMap[tf.x()]
if eop&sop == 0 && (ts.emuRandRand.Int()%ts.emuRandErrProb == 0 || ts.emuErrOnce[op]&tf.Type() != 0) {
ts.emuErrOnceMap[tf.x()] = eop | sop
ts.t.Logf("I: emulated error: file=%d type=%v op=%v", tf.Num(), tf.Type(), op)
return true
}
}
return false
}
func (ts *testStorage) SetEmuErr(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
ts.emuOpenErr = t
for _, op := range ops {
ts.emuErr[op] = t
}
ts.mu.Unlock()
}
func (ts *testStorage) SetCreateErr(t storage.FileType) {
func (ts *testStorage) SetEmuErrOnce(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
ts.emuCreateErr = t
for _, op := range ops {
ts.emuErrOnce[op] = t
}
ts.mu.Unlock()
}
func (ts *testStorage) SetEmuRandErr(t storage.FileType, ops ...tsOp) {
ts.mu.Lock()
for _, op := range ops {
ts.emuRandErr[op] = t
}
ts.mu.Unlock()
}
func (ts *testStorage) SetEmuRandErrProb(prob int) {
ts.mu.Lock()
ts.emuRandErrProb = prob
ts.mu.Unlock()
}
@ -267,18 +348,6 @@ func (ts *testStorage) ReleaseSync(t storage.FileType) {
ts.mu.Unlock()
}
func (ts *testStorage) SetWriteErr(t storage.FileType) {
ts.mu.Lock()
ts.emuWriteErr = t
ts.mu.Unlock()
}
func (ts *testStorage) SetSyncErr(t storage.FileType) {
ts.mu.Lock()
ts.emuSyncErr = t
ts.mu.Unlock()
}
func (ts *testStorage) ReadCounter() uint64 {
ts.mu.Lock()
defer ts.mu.Unlock()
@ -413,7 +482,11 @@ func newTestStorage(t *testing.T) *testStorage {
num := tsNum
tsNum++
tsMU.Unlock()
path := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num))
tempdir := tsTempdir
if tempdir == "" {
tempdir = os.TempDir()
}
path := filepath.Join(tempdir, fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num))
if _, err := os.Stat(path); err != nil {
stor, err = storage.OpenFile(path)
if err != nil {
@ -436,6 +509,10 @@ func newTestStorage(t *testing.T) *testStorage {
}
f.Close()
}
if t.Failed() {
t.Logf("testing failed, test DB preserved at %s", path)
return nil
}
if tsKeepFS {
return nil
}
@ -449,10 +526,13 @@ func newTestStorage(t *testing.T) *testStorage {
stor = storage.NewMemStorage()
}
ts := &testStorage{
t: t,
Storage: stor,
closeFn: closeFn,
opens: make(map[uint64]bool),
t: t,
Storage: stor,
closeFn: closeFn,
opens: make(map[uint64]bool),
emuErrOnceMap: make(map[uint64]uint),
emuRandErrProb: 0x999,
emuRandRand: rand.New(rand.NewSource(0xfacedead)),
}
ts.cond.L = &ts.mu
return ts

View File

@ -7,6 +7,7 @@
package leveldb
import (
"fmt"
"sort"
"sync/atomic"
@ -82,6 +83,18 @@ type tFiles []*tFile
func (tf tFiles) Len() int { return len(tf) }
func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf tFiles) nums() string {
x := "[ "
for i, f := range tf {
if i != 0 {
x += ", "
}
x += fmt.Sprint(f.file.Num())
}
x += " ]"
return x
}
// Returns true if i smallest key is less than j.
// This used for sort by key in ascending order.
func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
@ -149,7 +162,7 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo
i := 0
if len(umin) > 0 {
// Find the earliest possible internal key for min.
i = tf.searchMax(icmp, newIKey(umin, kMaxSeq, tSeek))
i = tf.searchMax(icmp, newIkey(umin, kMaxSeq, ktSeek))
}
if i >= len(tf) {
// Beginning of range is after all files, so no overlap.
@ -159,24 +172,25 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo
}
// Returns tables whose its key range overlaps with given key range.
// If overlapped is true then the search will be expanded to tables that
// overlaps with each other.
// Range will be expanded if ukey found hop across tables.
// If overlapped is true then the search will be restarted if umax
// expanded.
// The dst content will be overwritten.
func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
x := len(dst)
dst = dst[:0]
for i := 0; i < len(tf); {
t := tf[i]
if t.overlaps(icmp, umin, umax) {
if overlapped {
// For overlapped files, check if the newly added file has
// expanded the range. If so, restart search.
if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
umin = t.imin.ukey()
dst = dst[:x]
i = 0
continue
} else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
umax = t.imax.ukey()
dst = dst[:x]
if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
umin = t.imin.ukey()
dst = dst[:0]
i = 0
continue
} else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
umax = t.imax.ukey()
// Restart search if it is overlapped.
if overlapped {
dst = dst[:0]
i = 0
continue
}
@ -289,7 +303,7 @@ func (t *tOps) create() (*tWriter, error) {
t: t,
file: file,
w: fw,
tw: table.NewWriter(fw, t.s.o),
tw: table.NewWriter(fw, t.s.o.Options),
}, nil
}
@ -337,7 +351,13 @@ func (t *tOps) open(f *tFile) (ch cache.Handle, err error) {
if bc := t.s.o.GetBlockCache(); bc != nil {
bcacheNS = bc.GetNamespace(num)
}
return 1, table.NewReader(r, int64(f.size), bcacheNS, t.bpool, t.s.o)
var tr *table.Reader
tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcacheNS, t.bpool, t.s.o.Options)
if err != nil {
r.Close()
return 0, nil
}
return 1, tr
})
if ch == nil && err == nil {
err = ErrClosed
@ -353,7 +373,17 @@ func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []b
return nil, nil, err
}
defer ch.Release()
return ch.Value().(*table.Reader).Find(key, ro)
return ch.Value().(*table.Reader).Find(key, true, ro)
}
// Finds key that is greater than or equal to the given key.
func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
ch, err := t.open(f)
if err != nil {
return nil, err
}
defer ch.Release()
return ch.Value().(*table.Reader).FindKey(key, true, ro)
}
// Returns approximate offset of the given key.
@ -440,28 +470,34 @@ func (w *tWriter) empty() bool {
return w.first == nil
}
// Closes the storage.Writer.
func (w *tWriter) close() {
if w.w != nil {
w.w.Close()
w.w = nil
}
}
// Finalizes the table and returns table file.
func (w *tWriter) finish() (f *tFile, err error) {
defer w.close()
err = w.tw.Close()
if err != nil {
return
}
err = w.w.Sync()
if err != nil {
w.w.Close()
return
}
w.w.Close()
f = newTableFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last))
return
}
// Drops the table.
func (w *tWriter) drop() {
w.w.Close()
w.close()
w.file.Remove()
w.t.s.reuseFileNum(w.file.Num())
w.w = nil
w.file = nil
w.tw = nil
w.first = nil

View File

@ -19,13 +19,18 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
func (b *block) TestNewIterator(slice *util.Range) iterator.Iterator {
return b.newIterator(slice, false, nil)
type blockTesting struct {
tr *Reader
b *block
}
func (t *blockTesting) TestNewIterator(slice *util.Range) iterator.Iterator {
return t.tr.newBlockIter(t.b, nil, slice, false)
}
var _ = testutil.Defer(func() {
Describe("Block", func() {
Build := func(kv *testutil.KeyValue, restartInterval int) *block {
Build := func(kv *testutil.KeyValue, restartInterval int) *blockTesting {
// Building the block.
bw := &blockWriter{
restartInterval: restartInterval,
@ -39,11 +44,13 @@ var _ = testutil.Defer(func() {
// Opening the block.
data := bw.buf.Bytes()
restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
return &block{
tr: &Reader{cmp: comparer.DefaultComparer},
data: data,
restartsLen: restartsLen,
restartsOffset: len(data) - (restartsLen+1)*4,
return &blockTesting{
tr: &Reader{cmp: comparer.DefaultComparer},
b: &block{
data: data,
restartsLen: restartsLen,
restartsOffset: len(data) - (restartsLen+1)*4,
},
}
}
@ -59,7 +66,7 @@ var _ = testutil.Defer(func() {
// Make block.
br := Build(kv, restartInterval)
// Do testing.
testutil.KeyValueTesting(nil, br, kv.Clone())
testutil.KeyValueTesting(nil, kv.Clone(), br, nil, nil)
}
Describe(Text(), Test)
@ -102,11 +109,11 @@ var _ = testutil.Defer(func() {
for restartInterval := 1; restartInterval <= 5; restartInterval++ {
Describe(fmt.Sprintf("with restart interval of %d", restartInterval), func() {
// Make block.
br := Build(kv, restartInterval)
bt := Build(kv, restartInterval)
Test := func(r *util.Range) func(done Done) {
return func(done Done) {
iter := br.newIterator(r, false, nil)
iter := bt.TestNewIterator(r)
Expect(iter.Error()).ShouldNot(HaveOccurred())
t := testutil.IteratorTesting{
@ -115,6 +122,7 @@ var _ = testutil.Defer(func() {
}
testutil.DoIteratorTesting(&t)
iter.Release()
done <- true
}
}

View File

@ -8,27 +8,41 @@ package table
import (
"encoding/binary"
"errors"
"fmt"
"io"
"sort"
"strings"
"sync"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrNotFound = util.ErrNotFound
ErrIterReleased = errors.New("leveldb/table: iterator released")
ErrNotFound = errors.ErrNotFound
ErrReaderReleased = errors.New("leveldb/table: reader released")
ErrIterReleased = errors.New("leveldb/table: iterator released")
)
type ErrCorrupted struct {
Pos int64
Size int64
Kind string
Reason string
}
func (e *ErrCorrupted) Error() string {
return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
}
func max(x, y int) int {
if x > y {
return x
@ -37,22 +51,21 @@ func max(x, y int) int {
}
type block struct {
tr *Reader
bpool *util.BufferPool
bh blockHandle
data []byte
restartsLen int
restartsOffset int
// Whether checksum is verified and valid.
checksum bool
}
func (b *block) seek(rstart, rlimit int, key []byte) (index, offset int, err error) {
func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
offset += 1 // shared always zero, since this is a restart point
v1, n1 := binary.Uvarint(b.data[offset:]) // key length
_, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
m := offset + n1 + n2
return b.tr.cmp.Compare(b.data[m:m+int(v1)], key) > 0
return cmp.Compare(b.data[m:m+int(v1)], key) > 0
}) + rstart - 1
if index < rstart {
// The smallest key is greater-than key sought.
@ -75,7 +88,7 @@ func (b *block) restartOffset(index int) int {
func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
if offset >= b.restartsOffset {
if offset != b.restartsOffset {
err = errors.New("leveldb/table: Reader: BlockEntry: invalid block (block entries offset not aligned)")
err = &ErrCorrupted{Reason: "entries offset not aligned"}
}
return
}
@ -85,7 +98,7 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error)
m := n0 + n1 + n2
n = m + int(v1) + int(v2)
if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
err = errors.New("leveldb/table: Reader: invalid block (block entries corrupted)")
err = &ErrCorrupted{Reason: "entries corrupted"}
return
}
key = b.data[offset+m : offset+m+int(v1)]
@ -94,50 +107,9 @@ func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error)
return
}
func (b *block) newIterator(slice *util.Range, inclLimit bool, cache util.Releaser) *blockIter {
bi := &blockIter{
block: b,
cache: cache,
// Valid key should never be nil.
key: make([]byte, 0),
dir: dirSOI,
riStart: 0,
riLimit: b.restartsLen,
offsetStart: 0,
offsetRealStart: 0,
offsetLimit: b.restartsOffset,
}
if slice != nil {
if slice.Start != nil {
if bi.Seek(slice.Start) {
bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
bi.offsetStart = b.restartOffset(bi.riStart)
bi.offsetRealStart = bi.prevOffset
} else {
bi.riStart = b.restartsLen
bi.offsetStart = b.restartsOffset
bi.offsetRealStart = b.restartsOffset
}
}
if slice.Limit != nil {
if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
bi.offsetLimit = bi.prevOffset
bi.riLimit = bi.restartIndex + 1
}
}
bi.reset()
if bi.offsetStart > bi.offsetLimit {
bi.sErr(errors.New("leveldb/table: Reader: invalid slice range"))
}
}
return bi
}
func (b *block) Release() {
if b.tr.bpool != nil {
b.tr.bpool.Put(b.data)
}
b.tr = nil
b.bpool.Put(b.data)
b.bpool = nil
b.data = nil
}
@ -152,10 +124,12 @@ const (
)
type blockIter struct {
block *block
cache, releaser util.Releaser
key, value []byte
offset int
tr *Reader
block *block
blockReleaser util.Releaser
releaser util.Releaser
key, value []byte
offset int
// Previous offset, only filled by Next.
prevOffset int
prevNode []int
@ -252,7 +226,7 @@ func (i *blockIter) Seek(key []byte) bool {
return false
}
ri, offset, err := i.block.seek(i.riStart, i.riLimit, key)
ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
if err != nil {
i.sErr(err)
return false
@ -263,7 +237,7 @@ func (i *blockIter) Seek(key []byte) bool {
i.dir = dirForward
}
for i.Next() {
if i.block.tr.cmp.Compare(i.key, key) >= 0 {
if i.tr.cmp.Compare(i.key, key) >= 0 {
return true
}
}
@ -288,7 +262,7 @@ func (i *blockIter) Next() bool {
for i.offset < i.offsetRealStart {
key, value, nShared, n, err := i.block.entry(i.offset)
if err != nil {
i.sErr(err)
i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if n == 0 {
@ -302,13 +276,13 @@ func (i *blockIter) Next() bool {
if i.offset >= i.offsetLimit {
i.dir = dirEOI
if i.offset != i.offsetLimit {
i.sErr(errors.New("leveldb/table: Reader: Next: invalid block (block entries offset not aligned)"))
i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
}
return false
}
key, value, nShared, n, err := i.block.entry(i.offset)
if err != nil {
i.sErr(err)
i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if n == 0 {
@ -393,7 +367,7 @@ func (i *blockIter) Prev() bool {
for {
key, value, nShared, n, err := i.block.entry(offset)
if err != nil {
i.sErr(err)
i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
return false
}
if offset >= i.offsetRealStart {
@ -412,7 +386,7 @@ func (i *blockIter) Prev() bool {
// Stop if target offset reached.
if offset >= i.offset {
if offset != i.offset {
i.sErr(errors.New("leveldb/table: Reader: Prev: invalid block (block entries offset not aligned)"))
i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
return false
}
@ -439,16 +413,17 @@ func (i *blockIter) Value() []byte {
}
func (i *blockIter) Release() {
if i.dir > dirReleased {
if i.dir != dirReleased {
i.tr = nil
i.block = nil
i.prevNode = nil
i.prevKeys = nil
i.key = nil
i.value = nil
i.dir = dirReleased
if i.cache != nil {
i.cache.Release()
i.cache = nil
if i.blockReleaser != nil {
i.blockReleaser.Release()
i.blockReleaser = nil
}
if i.releaser != nil {
i.releaser.Release()
@ -458,9 +433,13 @@ func (i *blockIter) Release() {
}
func (i *blockIter) SetReleaser(releaser util.Releaser) {
if i.dir > dirReleased {
i.releaser = releaser
if i.dir == dirReleased {
panic(util.ErrReleased)
}
if i.releaser != nil && releaser != nil {
panic(util.ErrHasReleaser)
}
i.releaser = releaser
}
func (i *blockIter) Valid() bool {
@ -472,21 +451,21 @@ func (i *blockIter) Error() error {
}
type filterBlock struct {
tr *Reader
bpool *util.BufferPool
data []byte
oOffset int
baseLg uint
filtersNum int
}
func (b *filterBlock) contains(offset uint64, key []byte) bool {
func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
i := int(offset >> b.baseLg)
if i < b.filtersNum {
o := b.data[b.oOffset+i*4:]
n := int(binary.LittleEndian.Uint32(o))
m := int(binary.LittleEndian.Uint32(o[4:]))
if n < m && m <= b.oOffset {
return b.tr.filter.Contains(b.data[n:m], key)
return filter.Contains(b.data[n:m], key)
} else if n == m {
return false
}
@ -495,18 +474,16 @@ func (b *filterBlock) contains(offset uint64, key []byte) bool {
}
func (b *filterBlock) Release() {
if b.tr.bpool != nil {
b.tr.bpool.Put(b.data)
}
b.tr = nil
b.bpool.Put(b.data)
b.bpool = nil
b.data = nil
}
type indexIter struct {
*blockIter
tr *Reader
slice *util.Range
// Options
checksum bool
fillCache bool
}
@ -517,87 +494,124 @@ func (i *indexIter) Get() iterator.Iterator {
}
dataBH, n := decodeBlockHandle(value)
if n == 0 {
return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid table (bad data block handle)"))
return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
}
var slice *util.Range
if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
slice = i.slice
}
return i.blockIter.block.tr.getDataIter(dataBH, slice, i.checksum, i.fillCache)
return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
}
// Reader is a table reader.
type Reader struct {
mu sync.RWMutex
fi *storage.FileInfo
reader io.ReaderAt
cache cache.Namespace
err error
bpool *util.BufferPool
// Options
cmp comparer.Comparer
filter filter.Filter
checksum bool
strictIter bool
o *opt.Options
cmp comparer.Comparer
filter filter.Filter
verifyChecksum bool
dataEnd int64
indexBH, filterBH blockHandle
dataEnd int64
metaBH, indexBH, filterBH blockHandle
indexBlock *block
filterBlock *filterBlock
}
func verifyChecksum(data []byte) bool {
n := len(data) - 4
checksum0 := binary.LittleEndian.Uint32(data[n:])
checksum1 := util.NewCRC(data[:n]).Value()
return checksum0 == checksum1
func (r *Reader) blockKind(bh blockHandle) string {
switch bh.offset {
case r.metaBH.offset:
return "meta-block"
case r.indexBH.offset:
return "index-block"
case r.filterBH.offset:
if r.filterBH.length > 0 {
return "filter-block"
}
}
return "data-block"
}
func (r *Reader) readRawBlock(bh blockHandle, checksum bool) ([]byte, error) {
func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
return &errors.ErrCorrupted{File: r.fi, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
}
func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
}
func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
if cerr, ok := err.(*ErrCorrupted); ok {
cerr.Pos = int64(bh.offset)
cerr.Size = int64(bh.length)
cerr.Kind = r.blockKind(bh)
return &errors.ErrCorrupted{File: r.fi, Err: cerr}
}
return err
}
func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
data := r.bpool.Get(int(bh.length + blockTrailerLen))
if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
return nil, err
}
if checksum || r.checksum {
if !verifyChecksum(data) {
if verifyChecksum {
n := bh.length + 1
checksum0 := binary.LittleEndian.Uint32(data[n:])
checksum1 := util.NewCRC(data[:n]).Value()
if checksum0 != checksum1 {
r.bpool.Put(data)
return nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)")
return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
}
}
switch data[bh.length] {
case blockTypeNoCompression:
data = data[:bh.length]
case blockTypeSnappyCompression:
decLen, err := snappy.DecodedLen(data[:bh.length])
if err != nil {
return nil, err
return nil, r.newErrCorruptedBH(bh, err.Error())
}
tmp := data
data, err = snappy.Decode(r.bpool.Get(decLen), tmp[:bh.length])
r.bpool.Put(tmp)
decData := r.bpool.Get(decLen)
decData, err = snappy.Decode(decData, data[:bh.length])
r.bpool.Put(data)
if err != nil {
return nil, err
r.bpool.Put(decData)
return nil, r.newErrCorruptedBH(bh, err.Error())
}
data = decData
default:
r.bpool.Put(data)
return nil, fmt.Errorf("leveldb/table: Reader: unknown block compression type: %d", data[bh.length])
return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
}
return data, nil
}
func (r *Reader) readBlock(bh blockHandle, checksum bool) (*block, error) {
data, err := r.readRawBlock(bh, checksum)
func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
data, err := r.readRawBlock(bh, verifyChecksum)
if err != nil {
return nil, err
}
restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
b := &block{
tr: r,
bpool: r.bpool,
bh: bh,
data: data,
restartsLen: restartsLen,
restartsOffset: len(data) - (restartsLen+1)*4,
checksum: checksum || r.checksum,
}
return b, nil
}
func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*block, util.Releaser, error) {
func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
if r.cache != nil {
var err error
ch := r.cache.Get(bh.offset, func() (charge int, value interface{}) {
@ -605,7 +619,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo
return 0, nil
}
var b *block
b, err = r.readBlock(bh, checksum)
b, err = r.readBlock(bh, verifyChecksum)
if err != nil {
return 0, nil
}
@ -615,14 +629,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo
b, ok := ch.Value().(*block)
if !ok {
ch.Release()
return nil, nil, errors.New("leveldb/table: Reader: inconsistent block type")
}
if !b.checksum && (r.checksum || checksum) {
if !verifyChecksum(b.data) {
ch.Release()
return nil, nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)")
}
b.checksum = true
return nil, nil, errors.New("leveldb/table: inconsistent block type")
}
return b, ch, err
} else if err != nil {
@ -630,7 +637,7 @@ func (r *Reader) readBlockCached(bh blockHandle, checksum, fillCache bool) (*blo
}
}
b, err := r.readBlock(bh, checksum)
b, err := r.readBlock(bh, verifyChecksum)
return b, b, err
}
@ -641,15 +648,15 @@ func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
}
n := len(data)
if n < 5 {
return nil, errors.New("leveldb/table: Reader: invalid filter block (too short)")
return nil, r.newErrCorruptedBH(bh, "too short")
}
m := n - 5
oOffset := int(binary.LittleEndian.Uint32(data[m:]))
if oOffset > m {
return nil, errors.New("leveldb/table: Reader: invalid filter block (invalid offset)")
return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
}
b := &filterBlock{
tr: r,
bpool: r.bpool,
data: data,
oOffset: oOffset,
baseLg: uint(data[n-1]),
@ -676,7 +683,7 @@ func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterB
b, ok := ch.Value().(*filterBlock)
if !ok {
ch.Release()
return nil, nil, errors.New("leveldb/table: Reader: inconsistent block type")
return nil, nil, errors.New("leveldb/table: inconsistent block type")
}
return b, ch, err
} else if err != nil {
@ -688,12 +695,77 @@ func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterB
return b, b, err
}
func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fillCache bool) iterator.Iterator {
b, rel, err := r.readBlockCached(dataBH, checksum, fillCache)
func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
if r.indexBlock == nil {
return r.readBlockCached(r.indexBH, true, fillCache)
}
return r.indexBlock, util.NoopReleaser{}, nil
}
func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
if r.filterBlock == nil {
return r.readFilterBlockCached(r.filterBH, fillCache)
}
return r.filterBlock, util.NoopReleaser{}, nil
}
func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
bi := &blockIter{
tr: r,
block: b,
blockReleaser: bReleaser,
// Valid key should never be nil.
key: make([]byte, 0),
dir: dirSOI,
riStart: 0,
riLimit: b.restartsLen,
offsetStart: 0,
offsetRealStart: 0,
offsetLimit: b.restartsOffset,
}
if slice != nil {
if slice.Start != nil {
if bi.Seek(slice.Start) {
bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
bi.offsetStart = b.restartOffset(bi.riStart)
bi.offsetRealStart = bi.prevOffset
} else {
bi.riStart = b.restartsLen
bi.offsetStart = b.restartsOffset
bi.offsetRealStart = b.restartsOffset
}
}
if slice.Limit != nil {
if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
bi.offsetLimit = bi.prevOffset
bi.riLimit = bi.restartIndex + 1
}
}
bi.reset()
if bi.offsetStart > bi.offsetLimit {
bi.sErr(errors.New("leveldb/table: invalid slice range"))
}
}
return bi
}
func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
if err != nil {
return iterator.NewEmptyIterator(err)
}
return b.newIterator(slice, false, rel)
return r.newBlockIter(b, rel, slice, false)
}
func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
r.mu.RLock()
defer r.mu.RUnlock()
if r.err != nil {
return iterator.NewEmptyIterator(r.err)
}
return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
}
// NewIterator creates an iterator from the table.
@ -708,43 +780,43 @@ func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fi
//
// Also read Iterator documentation of the leveldb/iterator package.
func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
r.mu.RLock()
defer r.mu.RUnlock()
if r.err != nil {
return iterator.NewEmptyIterator(r.err)
}
fillCache := !ro.GetDontFillCache()
b, rel, err := r.readBlockCached(r.indexBH, true, fillCache)
indexBlock, rel, err := r.getIndexBlock(fillCache)
if err != nil {
return iterator.NewEmptyIterator(err)
}
index := &indexIter{
blockIter: b.newIterator(slice, true, rel),
blockIter: r.newBlockIter(indexBlock, rel, slice, true),
tr: r,
slice: slice,
checksum: ro.GetStrict(opt.StrictBlockChecksum),
fillCache: !ro.GetDontFillCache(),
}
return iterator.NewIndexedIterator(index, r.strictIter || ro.GetStrict(opt.StrictIterator), false)
return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
}
// Find finds key/value pair whose key is greater than or equal to the
// given key. It returns ErrNotFound if the table doesn't contain
// such pair.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Find returns.
func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err error) {
func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
r.mu.RLock()
defer r.mu.RUnlock()
if r.err != nil {
err = r.err
return
}
indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
indexBlock, rel, err := r.getIndexBlock(true)
if err != nil {
return
}
defer rel.Release()
index := indexBlock.newIterator(nil, true, nil)
index := r.newBlockIter(indexBlock, nil, nil, true)
defer index.Release()
if !index.Seek(key) {
err = index.Error()
@ -755,20 +827,23 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err
}
dataBH, n := decodeBlockHandle(index.Value())
if n == 0 {
err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)")
r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
return
}
if r.filter != nil {
filterBlock, rel, ferr := r.readFilterBlockCached(r.filterBH, true)
if filtered && r.filter != nil {
filterBlock, frel, ferr := r.getFilterBlock(true)
if ferr == nil {
if !filterBlock.contains(dataBH.offset, key) {
rel.Release()
if !filterBlock.contains(r.filter, dataBH.offset, key) {
frel.Release()
return nil, nil, ErrNotFound
}
rel.Release()
frel.Release()
} else if !errors.IsCorrupted(ferr) {
err = ferr
return
}
}
data := r.getDataIter(dataBH, nil, ro.GetStrict(opt.StrictBlockChecksum), !ro.GetDontFillCache())
data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
defer data.Release()
if !data.Seek(key) {
err = data.Error()
@ -779,24 +854,62 @@ func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err
}
// Don't use block buffer, no need to copy the buffer.
rkey = data.Key()
// Use block buffer, and since the buffer will be recycled, the buffer
// need to be copied.
value = append([]byte{}, data.Value()...)
if !noValue {
if r.bpool == nil {
value = data.Value()
} else {
// Use block buffer, and since the buffer will be recycled, the buffer
// need to be copied.
value = append([]byte{}, data.Value()...)
}
}
return
}
// Find finds key/value pair whose key is greater than or equal to the
// given key. It returns ErrNotFound if the table doesn't contain
// such pair.
// If filtered is true then the nearest 'block' will be checked against
// 'filter data' (if present) and will immediately return ErrNotFound if
// 'filter data' indicates that such pair doesn't exist.
//
// The caller may modify the contents of the returned slice as it is its
// own copy.
// It is safe to modify the contents of the argument after Find returns.
func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
return r.find(key, filtered, ro, false)
}
// Find finds key that is greater than or equal to the given key.
// It returns ErrNotFound if the table doesn't contain such key.
// If filtered is true then the nearest 'block' will be checked against
// 'filter data' (if present) and will immediately return ErrNotFound if
// 'filter data' indicates that such key doesn't exist.
//
// The caller may modify the contents of the returned slice as it is its
// own copy.
// It is safe to modify the contents of the argument after Find returns.
func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
rkey, _, err = r.find(key, filtered, ro, true)
return
}
// Get gets the value for the given key. It returns errors.ErrNotFound
// if the table does not contain the key.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Get returns.
// The caller may modify the contents of the returned slice as it is its
// own copy.
// It is safe to modify the contents of the argument after Find returns.
func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
r.mu.RLock()
defer r.mu.RUnlock()
if r.err != nil {
err = r.err
return
}
rkey, value, err := r.Find(key, ro)
rkey, value, err := r.find(key, false, ro, false)
if err == nil && r.cmp.Compare(rkey, key) != 0 {
value = nil
err = ErrNotFound
@ -808,6 +921,9 @@ func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
//
// It is safe to modify the contents of the argument after Get returns.
func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
r.mu.RLock()
defer r.mu.RUnlock()
if r.err != nil {
err = r.err
return
@ -819,12 +935,12 @@ func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
}
defer rel.Release()
index := indexBlock.newIterator(nil, true, nil)
index := r.newBlockIter(indexBlock, nil, nil, true)
defer index.Release()
if index.Seek(key) {
dataBH, n := decodeBlockHandle(index.Value())
if n == 0 {
err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)")
r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
return
}
offset = int64(dataBH.offset)
@ -840,67 +956,91 @@ func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
// Release implements util.Releaser.
// It also close the file if it is an io.Closer.
func (r *Reader) Release() {
r.mu.Lock()
defer r.mu.Unlock()
if closer, ok := r.reader.(io.Closer); ok {
closer.Close()
}
if r.indexBlock != nil {
r.indexBlock.Release()
r.indexBlock = nil
}
if r.filterBlock != nil {
r.filterBlock.Release()
r.filterBlock = nil
}
r.reader = nil
r.cache = nil
r.bpool = nil
r.err = ErrReaderReleased
}
// NewReader creates a new initialized table reader for the file.
// The cache and bpool is optional and can be nil.
// The fi, cache and bpool is optional and can be nil.
//
// The returned table reader instance is goroutine-safe.
func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, bpool *util.BufferPool, o *opt.Options) *Reader {
if bpool == nil {
bpool = util.NewBufferPool(o.GetBlockSize() + blockTrailerLen)
}
r := &Reader{
reader: f,
cache: cache,
bpool: bpool,
cmp: o.GetComparer(),
checksum: o.GetStrict(opt.StrictBlockChecksum),
strictIter: o.GetStrict(opt.StrictIterator),
}
func NewReader(f io.ReaderAt, size int64, fi *storage.FileInfo, cache cache.Namespace, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
if f == nil {
r.err = errors.New("leveldb/table: Reader: nil file")
return r
return nil, errors.New("leveldb/table: nil file")
}
r := &Reader{
fi: fi,
reader: f,
cache: cache,
bpool: bpool,
o: o,
cmp: o.GetComparer(),
verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
}
if size < footerLen {
r.err = errors.New("leveldb/table: Reader: invalid table (file size is too small)")
return r
r.err = r.newErrCorrupted(0, size, "table", "too small")
return r, nil
}
footerPos := size - footerLen
var footer [footerLen]byte
if _, err := r.reader.ReadAt(footer[:], size-footerLen); err != nil && err != io.EOF {
r.err = fmt.Errorf("leveldb/table: Reader: invalid table (could not read footer): %v", err)
if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
return nil, err
}
if string(footer[footerLen-len(magic):footerLen]) != magic {
r.err = errors.New("leveldb/table: Reader: invalid table (bad magic number)")
return r
r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
return r, nil
}
var n int
// Decode the metaindex block handle.
metaBH, n := decodeBlockHandle(footer[:])
r.metaBH, n = decodeBlockHandle(footer[:])
if n == 0 {
r.err = errors.New("leveldb/table: Reader: invalid table (bad metaindex block handle)")
return r
r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
return r, nil
}
// Decode the index block handle.
r.indexBH, n = decodeBlockHandle(footer[n:])
if n == 0 {
r.err = errors.New("leveldb/table: Reader: invalid table (bad index block handle)")
return r
r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
return r, nil
}
// Read metaindex block.
metaBlock, err := r.readBlock(metaBH, true)
metaBlock, err := r.readBlock(r.metaBH, true)
if err != nil {
r.err = err
return r
if errors.IsCorrupted(err) {
r.err = err
return r, nil
} else {
return nil, err
}
}
// Set data end.
r.dataEnd = int64(metaBH.offset)
metaIter := metaBlock.newIterator(nil, false, nil)
r.dataEnd = int64(r.metaBH.offset)
// Read metaindex.
metaIter := r.newBlockIter(metaBlock, nil, nil, true)
for metaIter.Next() {
key := string(metaIter.Key())
if !strings.HasPrefix(key, "filter.") {
@ -930,5 +1070,30 @@ func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, bpool *util.Buf
}
metaIter.Release()
metaBlock.Release()
return r
// Cache index and filter block locally, since we don't have global cache.
if cache == nil {
r.indexBlock, err = r.readBlock(r.indexBH, true)
if err != nil {
if errors.IsCorrupted(err) {
r.err = err
return r, nil
} else {
return nil, err
}
}
if r.filter != nil {
r.filterBlock, err = r.readFilterBlock(r.filterBH)
if err != nil {
if !errors.IsCorrupted(err) {
return nil, err
}
// Don't use filter then.
r.filter = nil
}
}
}
return r, nil
}

View File

@ -133,9 +133,9 @@ Filter block trailer:
+- 4-bytes -+
/ \
+---------------+---------------+---------------+-------------------------+------------------+
| offset 1 | .... | offset n | filter offset (4-bytes) | base Lg (1-byte) |
+-------------- +---------------+---------------+-------------------------+------------------+
+---------------+---------------+---------------+-------------------------------+------------------+
| data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+-------------- +---------------+---------------+-------------------------------+------------------+
NOTE: All fixed-length integer are little-endian.

View File

@ -3,15 +3,9 @@ package table
import (
"testing"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil"
)
func TestTable(t *testing.T) {
testutil.RunDefer()
RegisterFailHandler(Fail)
RunSpecs(t, "Table Suite")
testutil.RunSuite(t, "Table Suite")
}

View File

@ -23,7 +23,7 @@ type tableWrapper struct {
}
func (t tableWrapper) TestFind(key []byte) (rkey, rvalue []byte, err error) {
return t.Reader.Find(key, nil)
return t.Reader.Find(key, false, nil)
}
func (t tableWrapper) TestGet(key []byte) (value []byte, err error) {
@ -59,7 +59,8 @@ var _ = testutil.Defer(func() {
It("Should be able to approximate offset of a key correctly", func() {
Expect(err).ShouldNot(HaveOccurred())
tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, o)
tr, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o)
Expect(err).ShouldNot(HaveOccurred())
CheckOffset := func(key string, expect, threshold int) {
offset, err := tr.OffsetOf([]byte(key))
Expect(err).ShouldNot(HaveOccurred())
@ -95,7 +96,7 @@ var _ = testutil.Defer(func() {
tw.Close()
// Opening the table.
tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, o)
tr, _ := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, nil, nil, o)
return tableWrapper{tr}
}
Test := func(kv *testutil.KeyValue, body func(r *Reader)) func() {
@ -104,11 +105,11 @@ var _ = testutil.Defer(func() {
if body != nil {
body(db.(tableWrapper).Reader)
}
testutil.KeyValueTesting(nil, db, *kv)
testutil.KeyValueTesting(nil, *kv, db, nil, nil)
}
}
testutil.AllKeyValueTesting(nil, Build)
testutil.AllKeyValueTesting(nil, Build, nil, nil)
Describe("with one key per block", Test(testutil.KeyValue_Generate(nil, 9, 1, 10, 512, 512), func(r *Reader) {
It("should have correct blocks number", func() {
indexBlock, err := r.readBlock(r.indexBH, true)

View File

@ -12,7 +12,7 @@ import (
"fmt"
"io"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/code.google.com/p/snappy-go/snappy"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/gosnappy/snappy"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"

View File

@ -12,6 +12,7 @@ import (
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
@ -34,6 +35,10 @@ type Get interface {
TestGet(key []byte) (value []byte, err error)
}
type Has interface {
TestHas(key []byte) (ret bool, err error)
}
type NewIterator interface {
TestNewIterator(slice *util.Range) iterator.Iterator
}
@ -110,7 +115,7 @@ func (t *DBTesting) TestAllPresent() {
func (t *DBTesting) TestDeletedKey(key []byte) {
_, err := t.DB.TestGet(key)
Expect(err).Should(Equal(util.ErrNotFound), "Get on deleted key %q, %s", key, t.text())
Expect(err).Should(Equal(errors.ErrNotFound), "Get on deleted key %q, %s", key, t.text())
}
func (t *DBTesting) TestAllDeleted() {
@ -212,5 +217,6 @@ func DoDBTesting(t *DBTesting) {
}
DoIteratorTesting(&it)
iter.Release()
}
}

View File

@ -0,0 +1,21 @@
package testutil
import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
func RunSuite(t GinkgoTestingT, name string) {
RunDefer()
SynchronizedBeforeSuite(func() []byte {
RunDefer("setup")
return nil
}, func(data []byte) {})
SynchronizedAfterSuite(func() {
RunDefer("teardown")
}, func() {})
RegisterFailHandler(Fail)
RunSpecs(t, name)
}

View File

@ -13,16 +13,28 @@ import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/errors"
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
func KeyValueTesting(rnd *rand.Rand, kv KeyValue, p DB, setup func(KeyValue) DB, teardown func(DB)) {
if rnd == nil {
rnd = NewRand()
}
if db, ok := p.(Find); ok {
It("Should find all keys with Find", func() {
if p == nil {
BeforeEach(func() {
p = setup(kv)
})
if teardown != nil {
AfterEach(func() {
teardown(p)
})
}
}
It("Should find all keys with Find", func() {
if db, ok := p.(Find); ok {
ShuffledIndex(nil, kv.Len(), 1, func(i int) {
key_, key, value := kv.IndexInexact(i)
@ -38,9 +50,11 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
Expect(rkey).Should(Equal(key))
Expect(rvalue).Should(Equal(value), "Value for key %q (%q)", key_, key)
})
})
}
})
It("Should return error if the key is not present", func() {
It("Should return error if the key is not present", func() {
if db, ok := p.(Find); ok {
var key []byte
if kv.Len() > 0 {
key_, _ := kv.Index(kv.Len() - 1)
@ -48,12 +62,12 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
}
rkey, _, err := db.TestFind(key)
Expect(err).Should(HaveOccurred(), "Find for key %q yield key %q", key, rkey)
Expect(err).Should(Equal(util.ErrNotFound))
})
}
Expect(err).Should(Equal(errors.ErrNotFound))
}
})
if db, ok := p.(Get); ok {
It("Should only find exact key with Get", func() {
It("Should only find exact key with Get", func() {
if db, ok := p.(Get); ok {
ShuffledIndex(nil, kv.Len(), 1, func(i int) {
key_, key, value := kv.IndexInexact(i)
@ -66,14 +80,34 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
if len(key_) > 0 {
_, err = db.TestGet(key_)
Expect(err).Should(HaveOccurred(), "Error for key %q", key_)
Expect(err).Should(Equal(util.ErrNotFound))
Expect(err).Should(Equal(errors.ErrNotFound))
}
})
})
}
}
})
if db, ok := p.(NewIterator); ok {
TestIter := func(r *util.Range, _kv KeyValue) {
It("Should only find present key with Has", func() {
if db, ok := p.(Has); ok {
ShuffledIndex(nil, kv.Len(), 1, func(i int) {
key_, key, _ := kv.IndexInexact(i)
// Using exact key.
ret, err := db.TestHas(key)
Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key)
Expect(ret).Should(BeTrue(), "False for key %q", key)
// Using inexact key.
if len(key_) > 0 {
ret, err = db.TestHas(key_)
Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key_)
Expect(ret).ShouldNot(BeTrue(), "True for key %q", key)
}
})
}
})
TestIter := func(r *util.Range, _kv KeyValue) {
if db, ok := p.(NewIterator); ok {
iter := db.TestNewIterator(r)
Expect(iter.Error()).ShouldNot(HaveOccurred())
@ -83,46 +117,62 @@ func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) {
}
DoIteratorTesting(&t)
iter.Release()
}
}
It("Should iterates and seeks correctly", func(done Done) {
TestIter(nil, kv.Clone())
done <- true
}, 3.0)
RandomIndex(rnd, kv.Len(), Min(kv.Len(), 50), func(i int) {
type slice struct {
r *util.Range
start, limit int
}
It("Should iterates and seeks correctly", func(done Done) {
TestIter(nil, kv.Clone())
done <- true
}, 3.0)
RandomIndex(rnd, kv.Len(), kv.Len(), func(i int) {
type slice struct {
r *util.Range
start, limit int
}
key_, _, _ := kv.IndexInexact(i)
for _, x := range []slice{
{&util.Range{Start: key_, Limit: nil}, i, kv.Len()},
{&util.Range{Start: nil, Limit: key_}, 0, i},
} {
It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) {
TestIter(x.r, kv.Slice(x.start, x.limit))
done <- true
}, 3.0)
}
})
RandomRange(rnd, kv.Len(), kv.Len(), func(start, limit int) {
It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) {
r := kv.Range(start, limit)
TestIter(&r, kv.Slice(start, limit))
key_, _, _ := kv.IndexInexact(i)
for _, x := range []slice{
{&util.Range{Start: key_, Limit: nil}, i, kv.Len()},
{&util.Range{Start: nil, Limit: key_}, 0, i},
} {
It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) {
TestIter(x.r, kv.Slice(x.start, x.limit))
done <- true
}, 3.0)
})
}
}
})
RandomRange(rnd, kv.Len(), Min(kv.Len(), 50), func(start, limit int) {
It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) {
r := kv.Range(start, limit)
TestIter(&r, kv.Slice(start, limit))
done <- true
}, 3.0)
})
}
func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) {
func AllKeyValueTesting(rnd *rand.Rand, body, setup func(KeyValue) DB, teardown func(DB)) {
Test := func(kv *KeyValue) func() {
return func() {
db := body(*kv)
KeyValueTesting(rnd, db, *kv)
var p DB
if setup != nil {
Defer("setup", func() {
p = setup(*kv)
})
}
if teardown != nil {
Defer("teardown", func() {
teardown(p)
})
}
if body != nil {
p = body(*kv)
}
KeyValueTesting(rnd, *kv, p, func(KeyValue) DB {
return p
}, nil)
}
}
@ -133,4 +183,5 @@ func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) {
Describe("with big value", Test(KeyValue_BigValue()))
Describe("with special key", Test(KeyValue_SpecialKey()))
Describe("with multiple key/value", Test(KeyValue_MultipleKeyValue()))
Describe("with generated key/value", Test(KeyValue_Generate(nil, 120, 1, 50, 10, 120)))
}

View File

@ -397,6 +397,7 @@ func (s *Storage) logI(format string, args ...interface{}) {
func (s *Storage) Log(str string) {
s.log(1, "Log: "+str)
s.Storage.Log(str)
}
func (s *Storage) Lock() (r util.Releaser, err error) {

View File

@ -155,3 +155,17 @@ func RandomRange(rnd *rand.Rand, n, round int, fn func(start, limit int)) {
}
return
}
func Max(x, y int) int {
if x > y {
return x
}
return y
}
func Min(x, y int) int {
if x < y {
return x
}
return y
}

View File

@ -34,6 +34,10 @@ func (t *testingDB) TestGet(key []byte) (value []byte, err error) {
return t.Get(key, t.ro)
}
func (t *testingDB) TestHas(key []byte) (ret bool, err error) {
return t.Has(key, t.ro)
}
func (t *testingDB) TestNewIterator(slice *util.Range) iterator.Iterator {
return t.NewIterator(slice, t.ro)
}

View File

@ -14,10 +14,10 @@ import (
)
func shorten(str string) string {
if len(str) <= 4 {
if len(str) <= 8 {
return str
}
return str[:1] + ".." + str[len(str)-1:]
return str[:3] + ".." + str[len(str)-3:]
}
var bunits = [...]string{"", "Ki", "Mi", "Gi"}

View File

@ -8,6 +8,7 @@ package util
import (
"fmt"
"sync"
"sync/atomic"
"time"
)
@ -19,17 +20,16 @@ type buffer struct {
// BufferPool is a 'buffer pool'.
type BufferPool struct {
pool [6]chan []byte
size [5]uint32
sizeMiss [5]uint32
sizeHalf [5]uint32
baseline [4]int
baselinex0 int
baselinex1 int
baseline0 int
baseline1 int
baseline2 int
close chan struct{}
pool [6]chan []byte
size [5]uint32
sizeMiss [5]uint32
sizeHalf [5]uint32
baseline [4]int
baseline0 int
mu sync.RWMutex
closed bool
closeC chan struct{}
get uint32
put uint32
@ -54,6 +54,17 @@ func (p *BufferPool) poolNum(n int) int {
// Get returns buffer with length of n.
func (p *BufferPool) Get(n int) []byte {
if p == nil {
return make([]byte, n)
}
p.mu.RLock()
defer p.mu.RUnlock()
if p.closed {
return make([]byte, n)
}
atomic.AddUint32(&p.get, 1)
poolNum := p.poolNum(n)
@ -145,6 +156,17 @@ func (p *BufferPool) Get(n int) []byte {
// Put adds given buffer to the pool.
func (p *BufferPool) Put(b []byte) {
if p == nil {
return
}
p.mu.RLock()
defer p.mu.RUnlock()
if p.closed {
return
}
atomic.AddUint32(&p.put, 1)
pool := p.pool[p.poolNum(cap(b))]
@ -156,13 +178,23 @@ func (p *BufferPool) Put(b []byte) {
}
func (p *BufferPool) Close() {
select {
case p.close <- struct{}{}:
default:
if p == nil {
return
}
p.mu.Lock()
if !p.closed {
p.closed = true
p.closeC <- struct{}{}
}
p.mu.Unlock()
}
func (p *BufferPool) String() string {
if p == nil {
return "<nil>"
}
return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
}
@ -178,7 +210,8 @@ func (p *BufferPool) drain() {
default:
}
}
case <-p.close:
case <-p.closeC:
close(p.closeC)
for _, ch := range p.pool {
close(ch)
}
@ -195,7 +228,7 @@ func NewBufferPool(baseline int) *BufferPool {
p := &BufferPool{
baseline0: baseline,
baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
close: make(chan struct{}, 1),
closeC: make(chan struct{}, 1),
}
for i, cap := range []int{2, 2, 4, 4, 2, 1} {
p.pool[i] = make(chan []byte, cap)

View File

@ -12,7 +12,8 @@ import (
)
var (
ErrNotFound = errors.New("leveldb: not found")
ErrReleased = errors.New("leveldb: resource already relesed")
ErrHasReleaser = errors.New("leveldb: releaser already defined")
)
// Releaser is the interface that wraps the basic Release method.
@ -27,23 +28,46 @@ type ReleaseSetter interface {
// SetReleaser associates the given releaser to the resources. The
// releaser will be called once coresponding resources released.
// Calling SetReleaser with nil will clear the releaser.
//
// This will panic if a releaser already present or coresponding
// resource is already released. Releaser should be cleared first
// before assigned a new one.
SetReleaser(releaser Releaser)
}
// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
type BasicReleaser struct {
releaser Releaser
released bool
}
// Released returns whether Release method already called.
func (r *BasicReleaser) Released() bool {
return r.released
}
// Release implements Releaser.Release.
func (r *BasicReleaser) Release() {
if r.releaser != nil {
r.releaser.Release()
r.releaser = nil
if !r.released {
if r.releaser != nil {
r.releaser.Release()
r.releaser = nil
}
r.released = true
}
}
// SetReleaser implements ReleaseSetter.SetReleaser.
func (r *BasicReleaser) SetReleaser(releaser Releaser) {
if r.released {
panic(ErrReleased)
}
if r.releaser != nil && releaser != nil {
panic(ErrHasReleaser)
}
r.releaser = releaser
}
type NoopReleaser struct{}
func (NoopReleaser) Release() {}

View File

@ -7,7 +7,6 @@
package leveldb
import (
"errors"
"sync/atomic"
"unsafe"
@ -16,19 +15,6 @@ import (
"github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
var levelMaxSize [kNumLevels]float64
func init() {
// Precompute max size of each level
for level := range levelMaxSize {
res := float64(10 * 1048576)
for n := level; n > 1; n-- {
res *= 10
}
levelMaxSize[level] = res
}
}
type tSet struct {
level int
table *tFile
@ -37,7 +23,7 @@ type tSet struct {
type version struct {
s *session
tables [kNumLevels]tFiles
tables []tFiles
// Level that should be compacted next and its compaction score.
// Score < 1 means compaction is not strictly needed. These fields
@ -47,11 +33,16 @@ type version struct {
cSeek unsafe.Pointer
ref int
ref int
// Succeeding version.
next *version
}
func (v *version) release_NB() {
func newVersion(s *session) *version {
return &version{s: s, tables: make([]tFiles, s.o.GetNumLevel())}
}
func (v *version) releaseNB() {
v.ref--
if v.ref > 0 {
return
@ -77,13 +68,13 @@ func (v *version) release_NB() {
}
}
v.next.release_NB()
v.next.releaseNB()
v.next = nil
}
func (v *version) release() {
v.s.vmu.Lock()
v.release_NB()
v.releaseNB()
v.s.vmu.Unlock()
}
@ -123,17 +114,18 @@ func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, l
}
}
func (v *version) get(ikey iKey, ro *opt.ReadOptions) (value []byte, tcomp bool, err error) {
func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
ukey := ikey.ukey()
var (
tset *tSet
tseek bool
l0found bool
l0seq uint64
l0vt vType
l0val []byte
// Level-0.
zfound bool
zseq uint64
zkt kType
zval []byte
)
err = ErrNotFound
@ -150,55 +142,60 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions) (value []byte, tcomp bool,
}
}
ikey__, val_, err_ := v.s.tops.find(t, ikey, ro)
switch err_ {
var (
fikey, fval []byte
ferr error
)
if noValue {
fikey, ferr = v.s.tops.findKey(t, ikey, ro)
} else {
fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
}
switch ferr {
case nil:
case ErrNotFound:
return true
default:
err = err_
err = ferr
return false
}
ikey_ := iKey(ikey__)
if seq, vt, ok := ikey_.parseNum(); ok {
if v.s.icmp.uCompare(ukey, ikey_.ukey()) != 0 {
return true
}
if level == 0 {
if seq >= l0seq {
l0found = true
l0seq = seq
l0vt = vt
l0val = val_
if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil {
if v.s.icmp.uCompare(ukey, fukey) == 0 {
if level == 0 {
if fseq >= zseq {
zfound = true
zseq = fseq
zkt = fkt
zval = fval
}
} else {
switch fkt {
case ktVal:
value = fval
err = nil
case ktDel:
default:
panic("leveldb: invalid iKey type")
}
return false
}
} else {
switch vt {
case tVal:
value = val_
err = nil
case tDel:
default:
panic("leveldb: invalid internal key type")
}
return false
}
} else {
err = errors.New("leveldb: internal key corrupted")
err = fkerr
return false
}
return true
}, func(level int) bool {
if l0found {
switch l0vt {
case tVal:
value = l0val
if zfound {
switch zkt {
case ktVal:
value = zval
err = nil
case tDel:
case ktDel:
default:
panic("leveldb: invalid internal key type")
panic("leveldb: invalid iKey type")
}
return false
}
@ -216,13 +213,13 @@ func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []it
its = append(its, it)
}
strict := v.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator)
strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
for _, tables := range v.tables[1:] {
if len(tables) == 0 {
continue
}
it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict, true)
it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict)
its = append(its, it)
}
@ -230,7 +227,7 @@ func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []it
}
func (v *version) newStaging() *versionStaging {
return &versionStaging{base: v}
return &versionStaging{base: v, tables: make([]tablesScratch, v.s.o.GetNumLevel())}
}
// Spawn a new version based on this version.
@ -285,12 +282,13 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
func (v *version) pickLevel(umin, umax []byte) (level int) {
if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) {
var overlaps tFiles
for ; level < kMaxMemCompactLevel; level++ {
maxLevel := v.s.o.GetMaxMemCompationLevel()
for ; level < maxLevel; level++ {
if v.tables[level+1].overlaps(v.s.icmp, umin, umax, false) {
break
}
overlaps = v.tables[level+2].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
if overlaps.size() > kMaxGrandParentOverlapBytes {
if overlaps.size() > uint64(v.s.o.GetCompactionGPOverlaps(level)) {
break
}
}
@ -318,9 +316,9 @@ func (v *version) computeCompaction() {
// file size is small (perhaps because of a small write-buffer
// setting, or very high compression ratios, or lots of
// overwrites/deletions).
score = float64(len(tables)) / kL0_CompactionTrigger
score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
} else {
score = float64(tables.size()) / levelMaxSize[level]
score = float64(tables.size()) / float64(v.s.o.GetCompactionTotalSize(level))
}
if score > bestScore {
@ -337,12 +335,14 @@ func (v *version) needCompaction() bool {
return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
}
type tablesScratch struct {
added map[uint64]atRecord
deleted map[uint64]struct{}
}
type versionStaging struct {
base *version
tables [kNumLevels]struct {
added map[uint64]ntRecord
deleted map[uint64]struct{}
}
tables []tablesScratch
}
func (p *versionStaging) commit(r *sessionRecord) {
@ -367,7 +367,7 @@ func (p *versionStaging) commit(r *sessionRecord) {
tm := &(p.tables[r.level])
if tm.added == nil {
tm.added = make(map[uint64]ntRecord)
tm.added = make(map[uint64]atRecord)
}
tm.added[r.num] = r
@ -379,7 +379,7 @@ func (p *versionStaging) commit(r *sessionRecord) {
func (p *versionStaging) finish() *version {
// Build new version.
nv := &version{s: p.base.s}
nv := newVersion(p.base.s)
for level, tm := range p.tables {
btables := p.base.tables[level]
@ -402,7 +402,7 @@ func (p *versionStaging) finish() *version {
// New tables.
for _, r := range tm.added {
nt = append(nt, r.makeFile(p.base.s))
nt = append(nt, p.base.s.tableFileFromRecord(r))
}
// Sort tables.
@ -429,7 +429,7 @@ func (vr *versionReleaser) Release() {
v := vr.v
v.s.vmu.Lock()
if !vr.once {
v.release_NB()
v.releaseNB()
vr.once = true
}
v.s.vmu.Unlock()

View File

@ -45,7 +45,13 @@ func (bs *blockstore) Get(k u.Key) (*blocks.Block, error) {
}
func (bs *blockstore) Put(block *blocks.Block) error {
return bs.datastore.Put(block.Key().DsKey(), block.Data)
// Has is cheaper than
k := block.Key().DsKey()
exists, err := bs.datastore.Has(k)
if err != nil && exists {
return nil // already stored.
}
return bs.datastore.Put(k, block.Data)
}
func (bs *blockstore) Has(k u.Key) (bool, error) {

View File

@ -44,25 +44,10 @@ func New(bs blockstore.Blockstore, rem exchange.Interface) (*BlockService, error
func (s *BlockService) AddBlock(b *blocks.Block) (u.Key, error) {
k := b.Key()
log.Debugf("blockservice: storing [%s] in datastore", k)
// TODO(brian): define a block datastore with a Put method which accepts a
// block parameter
// check if we have it before adding. this is an extra read, but large writes
// are more expensive.
// TODO(jbenet) cheaper has. https://github.com/jbenet/go-datastore/issues/6
has, err := s.Blockstore.Has(k)
err := s.Blockstore.Put(b)
if err != nil {
return k, err
}
if has {
log.Debugf("blockservice: storing [%s] in datastore (already stored)", k)
} else {
log.Debugf("blockservice: storing [%s] in datastore", k)
err := s.Blockstore.Put(b)
if err != nil {
return k, err
}
}
// TODO this operation rate-limits blockservice operations, we should
// consider moving this to an sync process.