mirror of
https://github.com/ipfs/kubo.git
synced 2026-03-10 10:47:51 +08:00
Merge pull request #1021 from ipfs/bloom-filter-fix
Make bloom filters simpler
This commit is contained in:
commit
096420cb0d
12
Godeps/Godeps.json
generated
12
Godeps/Godeps.json
generated
@ -213,6 +213,10 @@
|
||||
"ImportPath": "github.com/mitchellh/go-homedir",
|
||||
"Rev": "7d2d8c8a4e078ce3c58736ab521a40b37a504c52"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/mtchavez/jenkins",
|
||||
"Rev": "5a816af6ef21ef401bff5e4b7dd255d63400f497"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
|
||||
"Rev": "87e4e645d80ae9c537e8f2dee52b28036a5dd75e"
|
||||
@ -221,6 +225,10 @@
|
||||
"ImportPath": "github.com/syndtr/gosnappy/snappy",
|
||||
"Rev": "156a073208e131d7d2e212cb749feae7c339e846"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/whyrusleeping/go-metrics",
|
||||
"Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16"
|
||||
},
|
||||
{
|
||||
"ImportPath": "golang.org/x/crypto/blowfish",
|
||||
"Rev": "b7d6bf2c61544745a02f83dec90393985fc3a065"
|
||||
@ -233,10 +241,6 @@
|
||||
"ImportPath": "golang.org/x/net/context",
|
||||
"Rev": "7dbad50ab5b31073856416cdcfeb2796d682f844"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/whyrusleeping/go-metrics",
|
||||
"Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16"
|
||||
},
|
||||
{
|
||||
"ImportPath": "gopkg.in/fsnotify.v1",
|
||||
"Comment": "v1.2.0",
|
||||
|
||||
23
Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore
generated
vendored
Normal file
23
Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
8
Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml
generated
vendored
Normal file
8
Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
go:
|
||||
- 1.1
|
||||
- tip
|
||||
install:
|
||||
- go get github.com/onsi/ginkgo
|
||||
- go get github.com/onsi/gomega
|
||||
before_script: go test -i ./...
|
||||
script: go test ./...
|
||||
11
Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile
generated
vendored
Normal file
11
Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
build:
|
||||
go build jenkins.go
|
||||
|
||||
run:
|
||||
go run jenkins.go
|
||||
|
||||
test:
|
||||
go test -cover
|
||||
|
||||
default:
|
||||
go run jenkins.go
|
||||
45
Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md
generated
vendored
Normal file
45
Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
Jenkins
|
||||
=================
|
||||
|
||||
Golang Jenkins hash
|
||||
|
||||
[](https://travis-ci.org/mtchavez/go-jenkins-hashes)
|
||||
|
||||
## Install
|
||||
|
||||
`go get -u github.com/mtchavez/jenkins`
|
||||
|
||||
## Usage
|
||||
|
||||
Jenkins follows the [Hash32](http://golang.org/pkg/hash/#Hash32) interface from the Go standard library
|
||||
|
||||
```go
|
||||
// Create a new hash
|
||||
jenkhash := New()
|
||||
|
||||
// Write a string of bytes to hash
|
||||
key := []byte("my-random-key")
|
||||
length, err := jenkhash(key)
|
||||
|
||||
// Get uint32 sum of hash
|
||||
sum := jenkhash.Sum32()
|
||||
|
||||
// Sum hash with byte string
|
||||
sumbytes := jenkhash.Sum(key)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Uses [Ginkgo](http://onsi.github.io/ginkgo/) for testing.
|
||||
|
||||
Run via `make test` which will run `go test -cover`
|
||||
|
||||
## Documentation
|
||||
|
||||
Docs on [godoc](http://godoc.org/github.com/mtchavez/jenkins)
|
||||
|
||||
## License
|
||||
|
||||
Written by Chavez
|
||||
|
||||
Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
48
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go
generated
vendored
Normal file
48
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
package jenkins
|
||||
|
||||
import "hash"
|
||||
|
||||
type jenkhash uint32
|
||||
|
||||
func New() hash.Hash32 {
|
||||
var j jenkhash = 0
|
||||
return &j
|
||||
}
|
||||
|
||||
func (j *jenkhash) Write(key []byte) (int, error) {
|
||||
hash := *j
|
||||
|
||||
for _, b := range key {
|
||||
hash += jenkhash(b)
|
||||
hash += (hash << 10)
|
||||
hash ^= (hash >> 6)
|
||||
}
|
||||
|
||||
hash += (hash << 3)
|
||||
hash ^= (hash >> 11)
|
||||
hash += (hash << 15)
|
||||
|
||||
*j = hash
|
||||
return len(key), nil
|
||||
}
|
||||
|
||||
func (j *jenkhash) Reset() {
|
||||
*j = 0
|
||||
}
|
||||
|
||||
func (j *jenkhash) Size() int {
|
||||
return 4
|
||||
}
|
||||
|
||||
func (j *jenkhash) BlockSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (j *jenkhash) Sum32() uint32 {
|
||||
return uint32(*j)
|
||||
}
|
||||
|
||||
func (j *jenkhash) Sum(in []byte) []byte {
|
||||
v := j.Sum32()
|
||||
return append(in, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
13
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go
generated
vendored
Normal file
13
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
package jenkins
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJenkins(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Jenkins Suite")
|
||||
}
|
||||
101
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go
generated
vendored
Normal file
101
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go
generated
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
package jenkins
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"hash"
|
||||
)
|
||||
|
||||
var _ = Describe("Jenkins", func() {
|
||||
|
||||
var jhash hash.Hash32
|
||||
var key []byte
|
||||
|
||||
BeforeEach(func() {
|
||||
jhash = New()
|
||||
key = []byte("Apple")
|
||||
})
|
||||
|
||||
Describe("New", func() {
|
||||
|
||||
It("returns jenkhash", func() {
|
||||
var h *jenkhash
|
||||
Expect(jhash).To(BeAssignableToTypeOf(h))
|
||||
})
|
||||
|
||||
It("initializes offset to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Write", func() {
|
||||
|
||||
It("returns key length", func() {
|
||||
length, _ := jhash.Write(key)
|
||||
Expect(length).To(Equal(5))
|
||||
})
|
||||
|
||||
It("has no error", func() {
|
||||
_, err := jhash.Write(key)
|
||||
Expect(err).To(BeNil())
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Reset", func() {
|
||||
|
||||
It("sets back to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
jhash.Write(key)
|
||||
Expect(jhash.Sum32()).NotTo(Equal(uint32(0)))
|
||||
jhash.Reset()
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Size", func() {
|
||||
|
||||
It("is 4", func() {
|
||||
Expect(jhash.Size()).To(Equal(4))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("BlockSize", func() {
|
||||
|
||||
It("is 1", func() {
|
||||
Expect(jhash.BlockSize()).To(Equal(1))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Sum32", func() {
|
||||
|
||||
It("defaults to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
|
||||
It("sums hash", func() {
|
||||
jhash.Write(key)
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(884782484)))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Sum", func() {
|
||||
|
||||
It("default 0 hash byte returned", func() {
|
||||
expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x0, 0x0, 0x0, 0x0}
|
||||
Expect(jhash.Sum(key)).To(Equal(expected))
|
||||
})
|
||||
|
||||
It("returns sum byte array", func() {
|
||||
jhash.Write(key)
|
||||
expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x34, 0xbc, 0xb5, 0x94}
|
||||
Expect(jhash.Sum(key)).To(Equal(expected))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
@ -2,13 +2,11 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
// Non crypto hash, because speed
|
||||
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
|
||||
"hash"
|
||||
"hash/adler32"
|
||||
"hash/crc32"
|
||||
"hash/fnv"
|
||||
"math/big"
|
||||
)
|
||||
|
||||
type Filter interface {
|
||||
@ -17,61 +15,66 @@ type Filter interface {
|
||||
Merge(Filter) (Filter, error)
|
||||
}
|
||||
|
||||
func BasicFilter() Filter {
|
||||
// Non crypto hashes, because speed
|
||||
return NewFilter(2048, adler32.New(), fnv.New32(), crc32.NewIEEE())
|
||||
}
|
||||
|
||||
func NewFilter(size int, hashes ...hash.Hash) Filter {
|
||||
func NewFilter(size int) Filter {
|
||||
return &filter{
|
||||
hash: jenkins.New(),
|
||||
filter: make([]byte, size),
|
||||
hashes: hashes,
|
||||
k: 3,
|
||||
}
|
||||
}
|
||||
|
||||
type filter struct {
|
||||
filter []byte
|
||||
hashes []hash.Hash
|
||||
hash hash.Hash32
|
||||
k int
|
||||
}
|
||||
|
||||
func (f *filter) Add(k []byte) {
|
||||
for _, h := range f.hashes {
|
||||
i := bytesMod(h.Sum(k), int64(len(f.filter)*8))
|
||||
f.setBit(i)
|
||||
func BasicFilter() Filter {
|
||||
return NewFilter(2048)
|
||||
}
|
||||
|
||||
func (f *filter) Add(bytes []byte) {
|
||||
for _, bit := range f.getBitIndicies(bytes) {
|
||||
f.setBit(bit)
|
||||
}
|
||||
}
|
||||
|
||||
func (f *filter) Find(k []byte) bool {
|
||||
for _, h := range f.hashes {
|
||||
i := bytesMod(h.Sum(k), int64(len(f.filter)*8))
|
||||
if !f.getBit(i) {
|
||||
func (f *filter) getBitIndicies(bytes []byte) []uint32 {
|
||||
indicies := make([]uint32, f.k)
|
||||
|
||||
f.hash.Write(bytes)
|
||||
b := make([]byte, 4)
|
||||
|
||||
for i := 0; i < f.k; i++ {
|
||||
res := f.hash.Sum32()
|
||||
indicies[i] = res % (uint32(len(f.filter)) * 8)
|
||||
|
||||
binary.LittleEndian.PutUint32(b, res)
|
||||
f.hash.Write(b)
|
||||
}
|
||||
|
||||
f.hash.Reset()
|
||||
|
||||
return indicies
|
||||
}
|
||||
|
||||
func (f *filter) Find(bytes []byte) bool {
|
||||
for _, bit := range f.getBitIndicies(bytes) {
|
||||
if !f.getBit(bit) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *filter) setBit(i int64) {
|
||||
fmt.Printf("setting bit %d\n", i)
|
||||
func (f *filter) setBit(i uint32) {
|
||||
f.filter[i/8] |= (1 << byte(i%8))
|
||||
}
|
||||
|
||||
func (f *filter) getBit(i int64) bool {
|
||||
fmt.Printf("getting bit %d\n", i)
|
||||
func (f *filter) getBit(i uint32) bool {
|
||||
return f.filter[i/8]&(1<<byte(i%8)) != 0
|
||||
}
|
||||
|
||||
func bytesMod(b []byte, modulo int64) int64 {
|
||||
i := big.NewInt(0)
|
||||
i = i.SetBytes(b)
|
||||
|
||||
bigmod := big.NewInt(int64(modulo))
|
||||
result := big.NewInt(0)
|
||||
result.Mod(i, bigmod)
|
||||
|
||||
return result.Int64()
|
||||
}
|
||||
|
||||
func (f *filter) Merge(o Filter) (Filter, error) {
|
||||
casfil, ok := o.(*filter)
|
||||
if !ok {
|
||||
@ -82,12 +85,15 @@ func (f *filter) Merge(o Filter) (Filter, error) {
|
||||
return nil, errors.New("filter lengths must match!")
|
||||
}
|
||||
|
||||
if casfil.k != f.k {
|
||||
return nil, errors.New("filter k-values must match!")
|
||||
}
|
||||
|
||||
nfilt := new(filter)
|
||||
|
||||
// this bit is sketchy, need a way of comparing hash functions
|
||||
nfilt.hashes = f.hashes
|
||||
|
||||
nfilt.hash = f.hash
|
||||
nfilt.filter = make([]byte, len(f.filter))
|
||||
nfilt.k = f.k
|
||||
|
||||
for i, v := range f.filter {
|
||||
nfilt.filter[i] = v | casfil.filter[i]
|
||||
}
|
||||
|
||||
@ -1,13 +1,19 @@
|
||||
package bloom
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
f := BasicFilter()
|
||||
f := NewFilter(128)
|
||||
|
||||
keys := [][]byte{
|
||||
[]byte("hello"),
|
||||
[]byte("fish"),
|
||||
[]byte("ipfsrocks"),
|
||||
[]byte("i want ipfs socks"),
|
||||
}
|
||||
|
||||
f.Add(keys[0])
|
||||
@ -21,10 +27,54 @@ func TestFilter(t *testing.T) {
|
||||
}
|
||||
|
||||
f.Add(keys[2])
|
||||
f.Add(keys[3])
|
||||
|
||||
for _, k := range keys {
|
||||
if !f.Find(k) {
|
||||
t.Fatal("Couldnt find one of three keys")
|
||||
}
|
||||
}
|
||||
|
||||
if f.Find([]byte("beep boop")) {
|
||||
t.Fatal("Got false positive! Super unlikely!")
|
||||
}
|
||||
|
||||
fmt.Println(f)
|
||||
}
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
|
||||
f1 := NewFilter(128)
|
||||
f2 := NewFilter(128)
|
||||
|
||||
fbork := NewFilter(32)
|
||||
|
||||
_, err := f1.Merge(fbork)
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("Merge should fail on filters with different lengths")
|
||||
}
|
||||
|
||||
b := make([]byte, 4)
|
||||
|
||||
var i uint32
|
||||
for i = 0; i < 10; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
f1.Add(b)
|
||||
}
|
||||
|
||||
for i = 10; i < 20; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
f2.Add(b)
|
||||
}
|
||||
|
||||
merged, _ := f1.Merge(f2)
|
||||
|
||||
for i = 0; i < 20; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
|
||||
if !merged.Find(b) {
|
||||
t.Fatal("Could not find all keys in merged filter")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user