From ec72547c8d25929155e6c797f965872fb8448b47 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 01:12:52 -0700 Subject: [PATCH 01/30] started by copying from https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree --- weed/util/bptree/bpmap.go | 77 ++ weed/util/bptree/bptree.go | 160 ++++ weed/util/bptree/bptree_node.go | 752 ++++++++++++++++ weed/util/bptree/bptree_test.go | 1460 +++++++++++++++++++++++++++++++ weed/util/bptree/int.go | 357 ++++++++ weed/util/bptree/rand.go | 2 + weed/util/bptree/string.go | 71 ++ weed/util/bptree/types.go | 103 +++ 8 files changed, 2982 insertions(+) create mode 100644 weed/util/bptree/bpmap.go create mode 100644 weed/util/bptree/bptree.go create mode 100644 weed/util/bptree/bptree_node.go create mode 100644 weed/util/bptree/bptree_test.go create mode 100644 weed/util/bptree/int.go create mode 100644 weed/util/bptree/rand.go create mode 100644 weed/util/bptree/string.go create mode 100644 weed/util/bptree/types.go diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go new file mode 100644 index 000000000..37b2b25bb --- /dev/null +++ b/weed/util/bptree/bpmap.go @@ -0,0 +1,77 @@ +package bptree + +import ( + "fmt" +) + +/* A BpMap is a B+Tree with support for duplicate keys disabled. This makes it + * behave like a regular Map rather than a MultiMap. + */ +type BpMap BpTree + +func NewBpMap(node_size int) *BpMap { + return &BpMap{ + root: NewLeaf(node_size, true), + size: 0, + } +} + +func (self *BpMap) Size() int { + return (*BpTree)(self).Size() +} + +func (self *BpMap) Has(key Hashable) bool { + return (*BpTree)(self).Has(key) +} + +func (self *BpMap) Put(key Hashable, value interface{}) (err error) { + had := self.Has(key) + new_root, err := self.root.put(key, value) + if err != nil { + return err + } + self.root = new_root + if !had { + self.size += 1 + } + return nil +} + +func (self *BpMap) Get(key Hashable) (value interface{}, err error) { + j, l := self.root.get_start(key) + if l.keys[j].Equals(key) { + return l.values[j], nil + } + return nil, fmt.Errorf("key not found: %s", key) +} + +func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { + value, err = self.Get(key) + if err != nil { + return nil, err + } + ns := self.root.NodeSize() + new_root, err := self.root.remove(key, func(value interface{}) bool { return true }) + if err != nil { + return nil, err + } + if new_root == nil { + self.root = NewLeaf(ns, true) + } else { + self.root = new_root + } + self.size-- + return value, nil +} + +func (self *BpMap) Keys() (ki KIterator) { + return (*BpTree)(self).Keys() +} + +func (self *BpMap) Values() (vi Iterator) { + return (*BpTree)(self).Values() +} + +func (self *BpMap) Iterate() (kvi KVIterator) { + return (*BpTree)(self).Iterate() +} diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go new file mode 100644 index 000000000..4b68adb20 --- /dev/null +++ b/weed/util/bptree/bptree.go @@ -0,0 +1,160 @@ +package bptree + +// started by copying from https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree + +/* A BpTree is a B+Tree with support for duplicate keys. This makes it behave as + * a MultiMap. Additionally you can use the Range operator to select k/v in a + * range. If from > to it will iterate backwards. + */ +type BpTree struct { + root *BpNode + size int +} + +type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) + +func NewBpTree(node_size int) *BpTree { + return &BpTree{ + root: NewLeaf(node_size, false), + size: 0, + } +} + +func (self *BpTree) Size() int { + return self.size +} + +func (self *BpTree) Has(key Hashable) bool { + if len(self.root.keys) == 0 { + return false + } + j, l := self.root.get_start(key) + return l.keys[j].Equals(key) +} + +func (self *BpTree) Count(key Hashable) int { + if len(self.root.keys) == 0 { + return 0 + } + j, l := self.root.get_start(key) + count := 0 + end := false + for !end && l.keys[j].Equals(key) { + count++ + j, l, end = next_location(j, l) + } + return count +} + +func (self *BpTree) Add(key Hashable, value interface{}) (err error) { + new_root, err := self.root.put(key, value) + if err != nil { + return err + } + self.root = new_root + self.size += 1 + return nil +} + +func (self *BpTree) Replace(key Hashable, where WhereFunc, value interface{}) (err error) { + li := self.root.forward(key, key) + for i, leaf, next := li(); next != nil; i, leaf, next = next() { + if where(leaf.values[i]) { + leaf.values[i] = value + } + } + return nil +} + +func (self *BpTree) Find(key Hashable) (kvi KVIterator) { + return self.Range(key, key) +} + +func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { + var li loc_iterator + if !to.Less(from) { + li = self.root.forward(from, to) + } else { + li = self.root.backward(from, to) + } + kvi = func() (key Hashable, value interface{}, next KVIterator) { + var i int + var leaf *BpNode + i, leaf, li = li() + if li == nil { + return nil, nil, nil + } + return leaf.keys[i], leaf.values[i], kvi + } + return kvi +} + +func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { + ns := self.root.NodeSize() + new_root, err := self.root.remove(key, where) + if err != nil { + return err + } + if new_root == nil { + self.root = NewLeaf(ns, false) + } else { + self.root = new_root + } + self.size -= 1 + return nil +} + +func (self *BpTree) Keys() (ki KIterator) { + li := self.root.all() + var prev Equatable + ki = func() (key Hashable, next KIterator) { + var i int + var leaf *BpNode + i, leaf, li = li() + if li == nil { + return nil, nil + } + if leaf.keys[i].Equals(prev) { + return ki() + } + prev = leaf.keys[i] + return leaf.keys[i], ki + } + return ki +} + +func (self *BpTree) Values() (vi Iterator) { + return MakeValuesIterator(self) +} + +func (self *BpTree) Items() (vi KIterator) { + return MakeItemsIterator(self) +} + +func (self *BpTree) Iterate() (kvi KVIterator) { + li := self.root.all() + kvi = func() (key Hashable, value interface{}, next KVIterator) { + var i int + var leaf *BpNode + i, leaf, li = li() + if li == nil { + return nil, nil, nil + } + return leaf.keys[i], leaf.values[i], kvi + } + return kvi +} + +func (self *BpTree) Backward() (kvi KVIterator) { + li := self.root.all_backward() + kvi = func() (key Hashable, value interface{}, next KVIterator) { + var i int + var leaf *BpNode + i, leaf, li = li() + if li == nil { + return nil, nil, nil + } + return leaf.keys[i], leaf.values[i], kvi + } + return kvi +} \ No newline at end of file diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go new file mode 100644 index 000000000..3574371f5 --- /dev/null +++ b/weed/util/bptree/bptree_node.go @@ -0,0 +1,752 @@ +package bptree + +type BpNode struct { + keys []Hashable + values []interface{} + pointers []*BpNode + next *BpNode + prev *BpNode + no_dup bool +} + +func NewInternal(size int) *BpNode { + if size < 0 { + panic(NegativeSize()) + } + return &BpNode{ + keys: make([]Hashable, 0, size), + pointers: make([]*BpNode, 0, size), + } +} + +func NewLeaf(size int, no_dup bool) *BpNode { + if size < 0 { + panic(NegativeSize()) + } + return &BpNode{ + keys: make([]Hashable, 0, size), + values: make([]interface{}, 0, size), + no_dup: no_dup, + } +} + +func (self *BpNode) Full() bool { + return len(self.keys) == cap(self.keys) +} + +func (self *BpNode) Pure() bool { + if len(self.keys) == 0 { + return true + } + k0 := self.keys[0] + for _, k := range self.keys { + if !k0.Equals(k) { + return false + } + } + return true +} + +func (self *BpNode) Internal() bool { + return cap(self.pointers) > 0 +} + +func (self *BpNode) NodeSize() int { + return cap(self.keys) +} + +func (self *BpNode) Height() int { + if !self.Internal() { + return 1 + } else if len(self.pointers) == 0 { + panic(BpTreeError("Internal node has no pointers but asked for height")) + } + return self.pointers[0].Height() + 1 +} + +func (self *BpNode) count(key Hashable) int { + i, _ := self.find(key) + count := 0 + for ; i < len(self.keys); i++ { + if self.keys[i].Equals(key) { + count++ + } else { + break + } + } + return count +} + +func (self *BpNode) has(key Hashable) bool { + _, has := self.find(key) + return has +} + +func (self *BpNode) left_most_leaf() *BpNode { + if self.Internal() { + return self.pointers[0].left_most_leaf() + } + return self +} + +func (self *BpNode) right_most_leaf() *BpNode { + if self.Internal() { + return self.pointers[len(self.pointers)-1].right_most_leaf() + } + return self +} + +/* returns the index and leaf-block of the first key greater than or equal to + * the search key. (unless the search key is greater than all the keys in the + * tree, in that case it will be the last key in the tree) + */ +func (self *BpNode) get_start(key Hashable) (i int, leaf *BpNode) { + if self.Internal() { + return self.internal_get_start(key) + } else { + return self.leaf_get_start(key) + } +} + +func next_location(i int, leaf *BpNode) (int, *BpNode, bool) { + j := i + 1 + for j >= len(leaf.keys) && leaf.next != nil { + j = 0 + leaf = leaf.next + } + if j >= len(leaf.keys) { + return -1, nil, true + } + return j, leaf, false +} + +func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) { + j := i - 1 + for j < 0 && leaf.prev != nil { + leaf = leaf.prev + j = len(leaf.keys) - 1 + } + if j < 0 { + return -1, nil, true + } + return j, leaf, false +} + +/* returns the index and leaf-block of the last key equal to the search key or + * the first key greater than the search key. (unless the search key is greater + * than all the keys in the tree, in that case it will be the last key in the + * tree) + */ +func (self *BpNode) get_end(key Hashable) (i int, leaf *BpNode) { + end := false + i, leaf = self.get_start(key) + pi, pleaf := i, leaf + for !end && leaf.keys[i].Equals(key) { + pi, pleaf = i, leaf + i, leaf, end = next_location(i, leaf) + } + return pi, pleaf +} + +func (self *BpNode) internal_get_start(key Hashable) (i int, leaf *BpNode) { + if !self.Internal() { + panic(BpTreeError("Expected a internal node")) + } + i, has := self.find(key) + if !has && i > 0 { + // if it doesn't have it and the index > 0 then we have the next block + // so we have to subtract one from the index. + i-- + } + child := self.pointers[i] + return child.get_start(key) +} + +func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) { + i, has := self.find(key) + if i >= len(self.keys) && i > 0 { + i = len(self.keys) - 1 + } + if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.next != nil { + return self.next.leaf_get_start(key) + } + return i, self +} + +/* This puts the k/v pair into the B+Tree rooted at this node and returns the + * (possibly) new root of the tree. + */ +func (self *BpNode) put(key Hashable, value interface{}) (root *BpNode, err error) { + a, b, err := self.insert(key, value) + if err != nil { + return nil, err + } else if b == nil { + return a, nil + } + // else we have root split + root = NewInternal(self.NodeSize()) + root.put_kp(a.keys[0], a) + root.put_kp(b.keys[0], b) + return root, nil +} + +// right is only set on split +// left is always set. When split is false left is the pointer to block +// When split is true left is the pointer to the new left +// block +func (self *BpNode) insert(key Hashable, value interface{}) (a, b *BpNode, err error) { + if self.Internal() { + return self.internal_insert(key, value) + } else { // leaf node + return self.leaf_insert(key, value) + } +} + +/* - first find the child to insert into + * - do the child insert + * - if there was a split: + * - if the block is full, split this block + * - else insert the new key/pointer into this block + */ +func (self *BpNode) internal_insert(key Hashable, value interface{}) (a, b *BpNode, err error) { + if !self.Internal() { + return nil, nil, BpTreeError("Expected a internal node") + } + i, has := self.find(key) + if !has && i > 0 { + // if it doesn't have it and the index > 0 then we have the next block + // so we have to subtract one from the index. + i-- + } + child := self.pointers[i] + p, q, err := child.insert(key, value) + if err != nil { + return nil, nil, err + } + self.keys[i] = p.keys[0] + self.pointers[i] = p + if q != nil { + // we had a split + if self.Full() { + return self.internal_split(q.keys[0], q) + } else { + if err := self.put_kp(q.keys[0], q); err != nil { + return nil, nil, err + } + return self, nil, nil + } + } + return self, nil, nil +} + +/* On split + * - first assert that the key to be inserted is not already in the block. + * - Make a new block + * - balance the two blocks. + * - insert the new key/pointer combo into the correct block + */ +func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err error) { + if !self.Internal() { + return nil, nil, BpTreeError("Expected a internal node") + } + if self.has(key) { + return nil, nil, BpTreeError("Tried to split an internal block on duplicate key") + } + a = self + b = NewInternal(self.NodeSize()) + balance_nodes(a, b) + if key.Less(b.keys[0]) { + if err := a.put_kp(key, ptr); err != nil { + return nil, nil, err + } + } else { + if err := b.put_kp(key, ptr); err != nil { + return nil, nil, err + } + } + return a, b, nil +} + +/* if the leaf is full then it will defer to a leaf_split + * (but in one case that will not actually split in the case of a insert into + * a pure block with a matching key) + * else this leaf will get a new entry. + */ +func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, err error) { + if self.Internal() { + return nil, nil, BpTreeError("Expected a leaf node") + } + if self.no_dup { + i, has := self.find(key) + if has { + self.values[i] = value + return self, nil, nil + } + } + if self.Full() { + return self.leaf_split(key, value) + } else { + if err := self.put_kv(key, value); err != nil { + return nil, nil, err + } + return self, nil, nil + } +} + +/* on leaf split if the block is pure then it will defer to pure_leaf_split + * else + * - a new block will be made and inserted after this one + * - the two blocks will be balanced with balanced_nodes + * - if the key is less than b.keys[0] it will go in a else b + */ +func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) { + if self.Internal() { + return nil, nil, BpTreeError("Expected a leaf node") + } + if self.Pure() { + return self.pure_leaf_split(key, value) + } + a = self + b = NewLeaf(self.NodeSize(), self.no_dup) + insert_linked_list_node(b, a, a.next) + balance_nodes(a, b) + if key.Less(b.keys[0]) { + if err := a.put_kv(key, value); err != nil { + return nil, nil, err + } + } else { + if err := b.put_kv(key, value); err != nil { + return nil, nil, err + } + } + return a, b, nil +} + +/* a pure leaf split has two cases: + * 1) the inserted key is less than the current pure block. + * - a new block should be created before the current block + * - the key should be put in it + * 2) the inserted key is greater than or equal to the pure block. + * - the end of run of pure blocks should be found + * - if the key is equal to pure block and the last block is not full insert + * the new kv + * - else split by making a new block after the last block in the run + * and putting the new key there. + * - always return the current block as "a" and the new block as "b" + */ +func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) { + if self.Internal() || !self.Pure() { + return nil, nil, BpTreeError("Expected a pure leaf node") + } + if key.Less(self.keys[0]) { + a = NewLeaf(self.NodeSize(), self.no_dup) + b = self + if err := a.put_kv(key, value); err != nil { + return nil, nil, err + } + insert_linked_list_node(a, b.prev, b) + return a, b, nil + } else { + a = self + e := self.find_end_of_pure_run() + if e.keys[0].Equals(key) && !e.Full() { + if err := e.put_kv(key, value); err != nil { + return nil, nil, err + } + return a, nil, nil + } else { + b = NewLeaf(self.NodeSize(), self.no_dup) + if err := b.put_kv(key, value); err != nil { + return nil, nil, err + } + insert_linked_list_node(b, e, e.next) + if e.keys[0].Equals(key) { + return a, nil, nil + } + return a, b, nil + } + } +} + +func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error { + if self.Full() { + return BpTreeError("Block is full.") + } + if !self.Internal() { + return BpTreeError("Expected a internal node") + } + i, has := self.find(key) + if has { + return BpTreeError("Tried to insert a duplicate key into an internal node") + } else if i < 0 { + panic(BpTreeError("find returned a negative int")) + } else if i >= cap(self.keys) { + panic(BpTreeError("find returned a int > than cap(keys)")) + } + if err := self.put_key_at(i, key); err != nil { + return err + } + if err := self.put_pointer_at(i, ptr); err != nil { + return err + } + return nil +} + +func (self *BpNode) put_kv(key Hashable, value interface{}) error { + if self.Full() { + return BpTreeError("Block is full.") + } + if self.Internal() { + return BpTreeError("Expected a leaf node") + } + i, _ := self.find(key) + if i < 0 { + panic(BpTreeError("find returned a negative int")) + } else if i >= cap(self.keys) { + panic(BpTreeError("find returned a int > than cap(keys)")) + } + if err := self.put_key_at(i, key); err != nil { + return err + } + if err := self.put_value_at(i, value); err != nil { + return err + } + return nil +} + +func (self *BpNode) put_key_at(i int, key Hashable) error { + if self.Full() { + return BpTreeError("Block is full.") + } + self.keys = self.keys[:len(self.keys)+1] + for j := len(self.keys) - 1; j > i; j-- { + self.keys[j] = self.keys[j-1] + } + self.keys[i] = key + return nil +} + +func (self *BpNode) put_value_at(i int, value interface{}) error { + if len(self.values) == cap(self.values) { + return BpTreeError("Block is full.") + } + if self.Internal() { + return BpTreeError("Expected a leaf node") + } + self.values = self.values[:len(self.values)+1] + for j := len(self.values) - 1; j > i; j-- { + self.values[j] = self.values[j-1] + } + self.values[i] = value + return nil +} + +func (self *BpNode) put_pointer_at(i int, pointer *BpNode) error { + if len(self.pointers) == cap(self.pointers) { + return BpTreeError("Block is full.") + } + if !self.Internal() { + return BpTreeError("Expected a internal node") + } + self.pointers = self.pointers[:len(self.pointers)+1] + for j := len(self.pointers) - 1; j > i; j-- { + self.pointers[j] = self.pointers[j-1] + } + self.pointers[i] = pointer + return nil +} + +func (self *BpNode) remove(key Hashable, where WhereFunc) (a *BpNode, err error) { + if self.Internal() { + return self.internal_remove(key, nil, where) + } else { + return self.leaf_remove(key, self.keys[len(self.keys)-1], where) + } +} + +func (self *BpNode) internal_remove(key Hashable, sibling *BpNode, where WhereFunc) (a *BpNode, err error) { + if !self.Internal() { + panic(BpTreeError("Expected a internal node")) + } + i, has := self.find(key) + if !has && i > 0 { + // if it doesn't have it and the index > 0 then we have the next block + // so we have to subtract one from the index. + i-- + } + if i+1 < len(self.keys) { + sibling = self.pointers[i+1] + } else if sibling != nil { + sibling = sibling.left_most_leaf() + } + child := self.pointers[i] + if child.Internal() { + child, err = child.internal_remove(key, sibling, where) + } else { + if sibling == nil { + child, err = child.leaf_remove(key, nil, where) + } else { + child, err = child.leaf_remove(key, sibling.keys[0], where) + } + } + if err != nil { + return nil, err + } + if child == nil { + if err := self.remove_key_at(i); err != nil { + return nil, err + } + if err := self.remove_ptr_at(i); err != nil { + return nil, err + } + } else { + self.keys[i] = child.keys[0] + self.pointers[i] = child + } + if len(self.keys) == 0 { + return nil, nil + } + return self, nil +} + +func (self *BpNode) leaf_remove(key, stop Hashable, where WhereFunc) (a *BpNode, err error) { + if self.Internal() { + return nil, BpTreeError("Expected a leaf node") + } + a = self + for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() { + if where(l.values[j]) { + if err := l.remove_key_at(j); err != nil { + return nil, err + } + if err := l.remove_value_at(j); err != nil { + return nil, err + } + } + if len(l.keys) == 0 { + remove_linked_list_node(l) + if l.next == nil { + a = nil + } else if stop == nil { + a = nil + } else if !l.next.keys[0].Equals(stop) { + a = l.next + } else { + a = nil + } + } + } + return a, nil +} + +func (self *BpNode) remove_key_at(i int) error { + if i >= len(self.keys) || i < 0 { + return BpTreeError("i, %v, is out of bounds, %v, %v %v.", i, len(self.keys), len(self.values), self) + } + for j := i; j < len(self.keys)-1; j++ { + self.keys[j] = self.keys[j+1] + } + self.keys = self.keys[:len(self.keys)-1] + return nil +} + +func (self *BpNode) remove_value_at(i int) error { + if i >= len(self.values) || i < 0 { + return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.values)) + } + for j := i; j < len(self.values)-1; j++ { + self.values[j] = self.values[j+1] + } + self.values = self.values[:len(self.values)-1] + return nil +} + +func (self *BpNode) remove_ptr_at(i int) error { + if i >= len(self.pointers) || i < 0 { + return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.pointers)) + } + for j := i; j < len(self.pointers)-1; j++ { + self.pointers[j] = self.pointers[j+1] + } + self.pointers = self.pointers[:len(self.pointers)-1] + return nil +} + +func (self *BpNode) find(key Hashable) (int, bool) { + var l int = 0 + var r int = len(self.keys) - 1 + var m int + for l <= r { + m = ((r - l) >> 1) + l + if key.Less(self.keys[m]) { + r = m - 1 + } else if key.Equals(self.keys[m]) { + for j := m; j >= 0; j-- { + if j == 0 || !key.Equals(self.keys[j-1]) { + return j, true + } + } + } else { + l = m + 1 + } + } + return l, false +} + +func (self *BpNode) find_end_of_pure_run() *BpNode { + k := self.keys[0] + p := self + n := self.next + for n != nil && n.Pure() && k.Equals(n.keys[0]) { + p = n + n = n.next + } + return p +} + +func (self *BpNode) all() (li loc_iterator) { + j := -1 + l := self.left_most_leaf() + end := false + j, l, end = next_location(j, l) + li = func() (i int, leaf *BpNode, next loc_iterator) { + if end { + return -1, nil, nil + } + i = j + leaf = l + j, l, end = next_location(j, l) + return i, leaf, li + } + return li +} + +func (self *BpNode) all_backward() (li loc_iterator) { + l := self.right_most_leaf() + j := len(l.keys) + end := false + j, l, end = prev_location(j, l) + li = func() (i int, leaf *BpNode, next loc_iterator) { + if end { + return -1, nil, nil + } + i = j + leaf = l + j, l, end = prev_location(j, l) + return i, leaf, li + } + return li +} + +func (self *BpNode) forward(from, to Hashable) (li loc_iterator) { + j, l := self.get_start(from) + end := false + j-- + li = func() (i int, leaf *BpNode, next loc_iterator) { + j, l, end = next_location(j, l) + if end || to.Less(l.keys[j]) { + return -1, nil, nil + } + return j, l, li + } + return li +} + +func (self *BpNode) backward(from, to Hashable) (li loc_iterator) { + j, l := self.get_end(from) + end := false + li = func() (i int, leaf *BpNode, next loc_iterator) { + if end || l.keys[j].Less(to) { + return -1, nil, nil + } + i = j + leaf = l + j, l, end = prev_location(i, l) + return i, leaf, li + } + return li +} + +func insert_linked_list_node(n, prev, next *BpNode) { + if (prev != nil && prev.next != next) || (next != nil && next.prev != prev) { + panic(BpTreeError("prev and next not hooked up")) + } + n.prev = prev + n.next = next + if prev != nil { + prev.next = n + } + if next != nil { + next.prev = n + } +} + +func remove_linked_list_node(n *BpNode) { + if n.prev != nil { + n.prev.next = n.next + } + if n.next != nil { + n.next.prev = n.prev + } +} + +/* a must be full and b must be empty else there will be a panic + */ +func balance_nodes(a, b *BpNode) { + if len(b.keys) != 0 { + panic(BpTreeError("b was not empty")) + } + if !a.Full() { + panic(BpTreeError("a was not full", a)) + } + if cap(a.keys) != cap(b.keys) { + panic(BpTreeError("cap(a.keys) != cap(b.keys)")) + } + if cap(a.values) != cap(b.values) { + panic(BpTreeError("cap(a.values) != cap(b.values)")) + } + if cap(a.pointers) != cap(b.pointers) { + panic(BpTreeError("cap(a.pointers) != cap(b.pointers)")) + } + m := len(a.keys) / 2 + for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) { + m++ + } + if m == len(a.keys) { + m-- + for m > 0 && a.keys[m-1].Equals(a.keys[m]) { + m-- + } + } + var lim int = len(a.keys) - m + b.keys = b.keys[:lim] + if cap(a.values) > 0 { + if cap(a.values) != cap(a.keys) { + panic(BpTreeError("cap(a.values) != cap(a.keys)")) + } + b.values = b.values[:lim] + } + if cap(a.pointers) > 0 { + if cap(a.pointers) != cap(a.keys) { + panic(BpTreeError("cap(a.pointers) != cap(a.keys)")) + } + b.pointers = b.pointers[:lim] + } + for i := 0; i < lim; i++ { + j := m + i + b.keys[i] = a.keys[j] + if cap(a.values) > 0 { + b.values[i] = a.values[j] + } + if cap(a.pointers) > 0 { + b.pointers[i] = a.pointers[j] + } + } + a.keys = a.keys[:m] + if cap(a.values) > 0 { + a.values = a.values[:m] + } + if cap(a.pointers) > 0 { + a.pointers = a.pointers[:m] + } +} diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go new file mode 100644 index 000000000..cf978ede7 --- /dev/null +++ b/weed/util/bptree/bptree_test.go @@ -0,0 +1,1460 @@ +package bptree + +import ( + "encoding/hex" + "runtime/debug" + "sort" + "sync" + "testing" + + crand "crypto/rand" + "encoding/binary" + mrand "math/rand" + +) + +var rand *mrand.Rand + +func init() { + seed := make([]byte, 8) + if _, err := crand.Read(seed); err == nil { + rand = ThreadSafeRand(int64(binary.BigEndian.Uint64(seed))) + } else { + panic(err) + } +} + +func randslice(length int) []byte { + return RandSlice(length) +} + +func randstr(length int) String { + return String(RandStr(length)) +} + +type Strings []String + +func (self Strings) Len() int { + return len(self) +} + +func (self Strings) Less(i, j int) bool { + return self[i].Less(self[j]) +} + +func (self Strings) Swap(i, j int) { + self[i], self[j] = self[j], self[i] +} + +type record struct { + key String + value String +} + +type records []*record + +func (self records) Len() int { + return len(self) +} + +func (self records) Less(i, j int) bool { + return self[i].key.Less(self[j].key) +} + +func (self records) Swap(i, j int) { + self[i], self[j] = self[j], self[i] +} + +func BenchmarkBpTree(b *testing.B) { + b.StopTimer() + + recs := make(records, 100) + ranrec := func() *record { + return &record{randstr(20), randstr(20)} + } + + for i := range recs { + recs[i] = ranrec() + } + + b.StartTimer() + for i := 0; i < b.N; i++ { + t := NewBpTree(23) + for _, r := range recs { + t.Add(r.key, r.value) + } + for _, r := range recs { + t.RemoveWhere(r.key, func(value interface{}) bool { return true }) + } + } +} + +func TestAddHasCountFindIterateRemove(t *testing.T) { + + ranrec := func() *record { + return &record{ + randstr(12), + randstr(12), + } + } + + test := func(bpt *BpTree) { + var err error + recs := make(records, 128) + new_recs := make(records, 128) + for i := range recs { + r := ranrec() + recs[i] = r + new_recs[i] = &record{r.key, randstr(12)} + err = bpt.Add(r.key, r.value) + if err != nil { + t.Error(err) + } + if bpt.Size() != (i + 1) { + t.Error("size was wrong", bpt.Size(), i+1) + } + } + + for i, r := range recs { + if has := bpt.Has(r.key); !has { + t.Error(bpt, "Missing key") + } + if has := bpt.Has(randstr(10)); has { + t.Error("Table has extra key") + } + if count := bpt.Count(r.key); count != 1 { + t.Error(bpt, "Missing key") + } + if count := bpt.Count(randstr(10)); count != 0 { + t.Error("Table has extra key") + } + for k, v, next := bpt.Find(r.key)(); next != nil; k, v, next = next() { + if !k.Equals(r.key) { + t.Error(bpt, "Find Failed Key Error") + } + if !v.(String).Equals(r.value) { + t.Error(bpt, "Find Failed Value Error") + } + } + err = bpt.Replace(r.key, func(value interface{}) bool { return true }, new_recs[i].value) + if err != nil { + t.Error(err) + } + } + sort.Sort(recs) + sort.Sort(new_recs) + i := 0 + for k, v, next := bpt.Iterate()(); next != nil; k, v, next = next() { + if !recs[i].key.Equals(k) { + t.Error("iterate error wrong key") + } + if !new_recs[i].value.Equals(v.(String)) { + t.Error("iterate error wrong value") + } + i++ + } + i = len(recs) - 1 + for k, v, next := bpt.Backward()(); next != nil; k, v, next = next() { + if !recs[i].key.Equals(k) { + t.Error("iterate error wrong key") + } + if !new_recs[i].value.Equals(v.(String)) { + t.Error("iterate error wrong value") + } + i-- + } + i = 0 + for k, next := bpt.Keys()(); next != nil; k, next = next() { + if !recs[i].key.Equals(k) { + t.Error("iterate error wrong key") + } + i++ + } + i = 7 + for k, v, next := bpt.Range(recs[i].key, recs[i+(len(recs)/2)].key)(); next != nil; k, v, next = next() { + if !recs[i].key.Equals(k) { + t.Error("iterate error wrong key") + } + if !new_recs[i].value.Equals(v.(String)) { + t.Error("iterate error wrong value") + } + i++ + } + for k, v, next := bpt.Range(recs[i].key, recs[7].key)(); next != nil; k, v, next = next() { + if !recs[i].key.Equals(k) { + t.Error("iterate error wrong key") + } + if !new_recs[i].value.Equals(v.(String)) { + t.Error("iterate error wrong value", k, v, recs[i].value, new_recs[i].value) + } + i-- + } + for i, r := range recs { + if has := bpt.Has(r.key); !has { + t.Error(bpt, "Missing key") + } + if count := bpt.Count(r.key); count != 1 { + t.Error(bpt, "Missing key") + } + if err := bpt.RemoveWhere(r.key, func(value interface{}) bool { return true }); err != nil { + t.Fatal(bpt, err) + } + if has := bpt.Has(r.key); has { + t.Error("Table has extra key") + } + for _, x := range recs[i+1:] { + if has := bpt.Has(x.key); !has { + t.Error(bpt, "Missing key", x.key) + } + } + } + } + for i := 2; i < 64; i++ { + test(NewBpTree(i)) + } +} + +func TestBpMap(t *testing.T) { + + ranrec := func() *record { + return &record{ + randstr(12), + randstr(12), + } + } + + test := func(table MapOperable) { + recs := make(records, 400) + for i := range recs { + r := ranrec() + recs[i] = r + err := table.Put(r.key, String("")) + if err != nil { + t.Error(err) + } + err = table.Put(r.key, r.value) + if err != nil { + t.Error(err) + } + if table.Size() != (i + 1) { + t.Error("size was wrong", table.Size(), i+1) + } + } + + for _, r := range recs { + if has := table.Has(r.key); !has { + t.Error(table, "Missing key") + } + if has := table.Has(randstr(12)); has { + t.Error("Table has extra key") + } + if val, err := table.Get(r.key); err != nil { + t.Error(err) + } else if !(val.(String)).Equals(r.value) { + t.Error("wrong value") + } + } + + for i, x := range recs { + if val, err := table.Remove(x.key); err != nil { + t.Error(err) + } else if !(val.(String)).Equals(x.value) { + t.Error("wrong value") + } + for _, r := range recs[i+1:] { + if has := table.Has(r.key); !has { + t.Error("Missing key") + } + if has := table.Has(randstr(12)); has { + t.Error("Table has extra key") + } + if val, err := table.Get(r.key); err != nil { + t.Error(err) + } else if !(val.(String)).Equals(r.value) { + t.Error("wrong value") + } + } + if table.Size() != (len(recs) - (i + 1)) { + t.Error("size was wrong", table.Size(), (len(recs) - (i + 1))) + } + } + } + + test(NewBpMap(23)) +} + +func Test_get_start(t *testing.T) { + root := NewLeaf(2, false) + root, err := root.put(Int(1), 1) + if err != nil { + t.Error(err) + } + root, err = root.put(Int(5), 3) + if err != nil { + t.Error(err) + } + root, err = root.put(Int(3), 2) + if err != nil { + t.Error(err) + } + t.Log(root) + t.Log(root.pointers[0]) + t.Log(root.pointers[1]) + i, n := root.get_start(Int(1)) + if n != root.pointers[0] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(3)) + if n != root.pointers[0] { + t.Error("wrong node from get_start") + } + if i != 1 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(5)) + if n != root.pointers[1] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(2)) + if n != root.pointers[0] { + t.Error("wrong node from get_start") + } + if i != 1 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(4)) + t.Log(n) + if n != root.pointers[1] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(0)) + if n != root.pointers[0] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } + i, n = root.get_start(Int(5)) + if n != root.pointers[1] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } +} + +func Test_get_end(t *testing.T) { + root := NewLeaf(3, false) + root, err := root.put(Int(1), -1) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(4), -1) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(3), 1) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(3), 2) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(3), 3) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(3), 4) + if err != nil { + t.Fatal(err) + } + root, err = root.put(Int(3), 5) + if err != nil { + t.Fatal(err) + } + t.Log(root) + t.Log(root.pointers[0]) + t.Log(root.pointers[1]) + t.Log(root.pointers[2]) + i, n := root.get_start(Int(3)) + t.Log(n) + if n != root.pointers[1] { + t.Error("wrong node from get_start") + } + if i != 0 { + t.Error("wrong index from get_start") + } + i, n = root.get_end(Int(3)) + t.Log(n) + if n != root.pointers[1].next { + t.Error("wrong node from get_end") + } + if i != 1 { + t.Error("wrong index from get_end") + } + i, n = root.get_end(Int(1)) + t.Log(n) + if n != root.pointers[0] { + t.Error("wrong node from get_end") + } + if i != 0 { + t.Error("wrong index from get_end") + } + i, n = root.get_end(Int(4)) + t.Log(n) + if n != root.pointers[2] { + t.Error("wrong node from get_end") + } + if i != 0 { + t.Error("wrong index from get_end") + } + i, n = root.get_end(Int(0)) + t.Log(n) + if n != root.pointers[0] { + t.Error("wrong node from get_end") + } + if i != 0 { + t.Error("wrong index from get_end") + } + i, n = root.get_end(Int(5)) + t.Log(n) + if n != root.pointers[2] { + t.Error("wrong node from get_end") + } + if i != 0 { + t.Error("wrong index from get_end") + } + i, n = root.get_end(Int(2)) + t.Log(n) + if n != root.pointers[1] { + t.Error("wrong node from get_end") + } + if i != 0 { + t.Error("wrong index from get_end") + } +} + +func Test_put_no_root_split(t *testing.T) { + a := NewLeaf(2, false) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + p, err := a.put(Int(1), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + } + p, err = a.put(Int(1), 3) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + if p.next == nil { + t.Error("p.next should not be nil") + } + t.Log(p) + t.Log(p.next) + } +} + +func Test_put_root_split(t *testing.T) { + a := NewLeaf(2, false) + p, err := a.put(Int(1), 1) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + } + p, err = a.put(Int(3), 3) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if !p.has(Int(1)) || !p.has(Int(3)) { + t.Error("p didn't have the right keys", p) + } + } + p, err = a.put(Int(2), 2) + if err != nil { + t.Error(err) + } else { + if p == a { + t.Errorf("p == a") + } + if !p.has(Int(1)) || !p.has(Int(3)) { + t.Error("p didn't have the right keys", p) + } + if len(p.pointers) != 2 { + t.Error("p didn't have right number of pointers", p) + } + if !p.pointers[0].has(Int(1)) || !p.pointers[0].has(Int(2)) { + t.Error("p.pointers[0] didn't have the right keys", p.pointers[0]) + } + if !p.pointers[1].has(Int(3)) { + t.Error("p.pointers[1] didn't have the right keys", p.pointers[1]) + } + t.Log(p) + t.Log(p.pointers[0]) + t.Log(p.pointers[1]) + } +} + +func Test_internal_insert_no_split(t *testing.T) { + a := NewInternal(3) + leaf := NewLeaf(1, false) + if err := leaf.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(1), leaf); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + p, q, err := a.internal_insert(Int(2), nil) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q != nil { + t.Errorf("q != nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) || !p.has(Int(5)) { + t.Error("p didn't have the right keys", p) + } + } +} + +func Test_internal_insert_split_less(t *testing.T) { + a := NewInternal(3) + leaf := NewLeaf(1, false) + if err := leaf.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(1), leaf); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(3), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + p, q, err := a.internal_insert(Int(2), nil) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +func Test_internal_split_less(t *testing.T) { + a := NewInternal(3) + if err := a.put_kp(Int(1), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(3), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + p, q, err := a.internal_split(Int(2), nil) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +func Test_internal_split_equal(t *testing.T) { + a := NewInternal(3) + if err := a.put_kp(Int(1), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(3), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + p, q, err := a.internal_split(Int(3), nil) + if err == nil { + t.Error("split succeeded should have failed", p, q) + } +} + +func Test_internal_split_greater(t *testing.T) { + a := NewInternal(3) + if err := a.put_kp(Int(1), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(3), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + p, q, err := a.internal_split(Int(4), nil) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(4)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +func Test_leaf_insert_no_split(t *testing.T) { + a := NewLeaf(3, false) + insert_linked_list_node(a, nil, nil) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + p, q, err := a.leaf_insert(Int(2), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q != nil { + t.Errorf("q != nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) || !p.has(Int(3)) { + t.Error("p didn't have the right keys", p) + } + } +} + +// tests the defer to split logic +func Test_leaf_insert_split_less(t *testing.T) { + a := NewLeaf(3, false) + insert_linked_list_node(a, nil, nil) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(5), 5); err != nil { + t.Error(err) + } + p, q, err := a.leaf_insert(Int(2), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +func Test_leaf_split_less(t *testing.T) { + a := NewLeaf(3, false) + insert_linked_list_node(a, nil, nil) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(5), 5); err != nil { + t.Error(err) + } + p, q, err := a.leaf_split(Int(2), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) || !p.has(Int(2)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +func Test_leaf_split_equal(t *testing.T) { + a := NewLeaf(3, false) + insert_linked_list_node(a, nil, nil) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(5), 5); err != nil { + t.Error(err) + } + p, q, err := a.leaf_split(Int(3), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || q.count(Int(3)) != 2 || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q, q.count(Int(3))) + } + } +} + +func Test_leaf_split_greater(t *testing.T) { + a := NewLeaf(3, false) + insert_linked_list_node(a, nil, nil) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(5), 5); err != nil { + t.Error(err) + } + p, q, err := a.leaf_split(Int(4), 2) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil { + t.Errorf("q == nil") + } + if !p.has(Int(1)) { + t.Error("p didn't have the right keys", p) + } + if !q.has(Int(3)) || !q.has(Int(4)) || !q.has(Int(5)) { + t.Error("q didn't have the right keys", q) + } + } +} + +// tests the defer logic +func Test_pure_leaf_insert_split_less(t *testing.T) { + a := NewLeaf(2, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(2, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(2, false) + insert_linked_list_node(d, c, nil) + if err := a.put_kv(Int(3), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 2); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 4); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 5); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 6); err != nil { + t.Error(err) + } + if err := d.put_kv(Int(4), 6); err != nil { + t.Error(err) + } + p, q, err := a.leaf_insert(Int(2), 1) + if err != nil { + t.Error(err) + } else { + if q != a { + t.Errorf("q != a") + } + if p == nil || len(p.keys) != 1 || !p.keys[0].Equals(Int(2)) { + t.Errorf("p did not contain the right key") + } + if p.prev != nil { + t.Errorf("expected p.prev == nil") + } + if p.next != a { + t.Errorf("expected p.next == a") + } + if a.prev != p { + t.Errorf("expected a.prev == p") + } + if a.next != b { + t.Errorf("expected a.next == b") + } + if b.prev != a { + t.Errorf("expected b.prev == a") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != d { + t.Errorf("expected c.next == d") + } + if d.prev != c { + t.Errorf("expected d.prev == c") + } + if d.next != nil { + t.Errorf("expected d.next == nil") + } + } +} + +func Test_pure_leaf_split_less(t *testing.T) { + a := NewLeaf(2, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(2, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(2, false) + insert_linked_list_node(d, c, nil) + if err := a.put_kv(Int(3), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 2); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 4); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 5); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 6); err != nil { + t.Error(err) + } + if err := d.put_kv(Int(4), 6); err != nil { + t.Error(err) + } + p, q, err := a.pure_leaf_split(Int(2), 1) + if err != nil { + t.Error(err) + } else { + if q != a { + t.Errorf("q != a") + } + if p == nil || len(p.keys) != 1 || !p.keys[0].Equals(Int(2)) { + t.Errorf("p did not contain the right key") + } + if p.prev != nil { + t.Errorf("expected p.prev == nil") + } + if p.next != a { + t.Errorf("expected p.next == a") + } + if a.prev != p { + t.Errorf("expected a.prev == p") + } + if a.next != b { + t.Errorf("expected a.next == b") + } + if b.prev != a { + t.Errorf("expected b.prev == a") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != d { + t.Errorf("expected c.next == d") + } + if d.prev != c { + t.Errorf("expected d.prev == c") + } + if d.next != nil { + t.Errorf("expected d.next == nil") + } + } +} + +func Test_pure_leaf_split_equal(t *testing.T) { + a := NewLeaf(2, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(2, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(2, false) + insert_linked_list_node(d, c, nil) + if err := a.put_kv(Int(3), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 2); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 4); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 5); err != nil { + t.Error(err) + } + if err := d.put_kv(Int(4), 6); err != nil { + t.Error(err) + } + p, q, err := a.pure_leaf_split(Int(3), 1) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q != nil { + t.Errorf("q != nil") + } + if a.prev != nil { + t.Errorf("expected a.prev == nil") + } + if a.next != b { + t.Errorf("expected a.next == b") + } + if b.prev != a { + t.Errorf("expected b.prev == a") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != d { + t.Errorf("expected c.next == d") + } + if d.prev != c { + t.Errorf("expected d.prev == c") + } + if d.next != nil { + t.Errorf("expected d.next == nil") + } + } +} + +func Test_pure_leaf_split_greater(t *testing.T) { + a := NewLeaf(2, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(2, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(2, false) + insert_linked_list_node(d, c, nil) + if err := a.put_kv(Int(3), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 2); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 4); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 5); err != nil { + t.Error(err) + } + if err := d.put_kv(Int(5), 6); err != nil { + t.Error(err) + } + p, q, err := a.pure_leaf_split(Int(4), 1) + if err != nil { + t.Error(err) + } else { + if p != a { + t.Errorf("p != a") + } + if q == nil || len(q.keys) != 1 || !q.keys[0].Equals(Int(4)) { + t.Errorf("q != nil") + } + if a.prev != nil { + t.Errorf("expected a.prev == nil") + } + if a.next != b { + t.Errorf("expected a.next == b") + } + if b.prev != a { + t.Errorf("expected b.prev == a") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != q { + t.Errorf("expected c.next == q") + } + if q.prev != c { + t.Errorf("expected q.prev == c") + } + if q.next != d { + t.Errorf("expected q.next == d") + } + if d.prev != q { + t.Errorf("expected d.prev == q") + } + if d.next != nil { + t.Errorf("expected d.next == nil") + } + } +} + +func Test_find_end_of_pure_run(t *testing.T) { + a := NewLeaf(2, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(2, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(2, false) + insert_linked_list_node(d, c, nil) + if err := a.put_kv(Int(3), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 2); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := b.put_kv(Int(3), 4); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 5); err != nil { + t.Error(err) + } + if err := c.put_kv(Int(3), 6); err != nil { + t.Error(err) + } + if err := d.put_kv(Int(4), 6); err != nil { + t.Error(err) + } + e := a.find_end_of_pure_run() + if e != c { + t.Errorf("end of run should have been block c %v %v", e, c) + } +} + +func Test_insert_linked_list_node(t *testing.T) { + a := NewLeaf(1, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(3, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(4, false) + insert_linked_list_node(d, a, b) + if a.prev != nil { + t.Errorf("expected a.prev == nil") + } + if a.next != d { + t.Errorf("expected a.next == d") + } + if d.prev != a { + t.Errorf("expected d.prev == a") + } + if d.next != b { + t.Errorf("expected d.next == b") + } + if b.prev != d { + t.Errorf("expected b.prev == d") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != nil { + t.Errorf("expected c.next == nil") + } +} + +func Test_remove_linked_list_node(t *testing.T) { + a := NewLeaf(1, false) + insert_linked_list_node(a, nil, nil) + b := NewLeaf(2, false) + insert_linked_list_node(b, a, nil) + c := NewLeaf(3, false) + insert_linked_list_node(c, b, nil) + d := NewLeaf(4, false) + insert_linked_list_node(d, a, b) + if a.prev != nil { + t.Errorf("expected a.prev == nil") + } + if a.next != d { + t.Errorf("expected a.next == d") + } + if d.prev != a { + t.Errorf("expected d.prev == a") + } + if d.next != b { + t.Errorf("expected d.next == b") + } + if b.prev != d { + t.Errorf("expected b.prev == d") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != nil { + t.Errorf("expected c.next == nil") + } + remove_linked_list_node(d) + if a.prev != nil { + t.Errorf("expected a.prev == nil") + } + if a.next != b { + t.Errorf("expected a.next == b") + } + if b.prev != a { + t.Errorf("expected b.prev == a") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != nil { + t.Errorf("expected c.next == nil") + } + remove_linked_list_node(a) + if b.prev != nil { + t.Errorf("expected b.prev == nil") + } + if b.next != c { + t.Errorf("expected b.next == c") + } + if c.prev != b { + t.Errorf("expected c.prev == b") + } + if c.next != nil { + t.Errorf("expected c.next == nil") + } + remove_linked_list_node(c) + if b.prev != nil { + t.Errorf("expected b.prev == nil") + } + if b.next != nil { + t.Errorf("expected b.next == nil") + } + remove_linked_list_node(b) +} + +func Test_balance_leaf_nodes_with_dup(t *testing.T) { + a := NewLeaf(3, false) + b := NewLeaf(3, false) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(2), 1); err != nil { + t.Error(err) + } + balance_nodes(a, b) + if !a.has(Int(1)) || a.count(Int(1)) != 2 || a.has(Int(2)) { + t.Error("a had wrong items", a) + } + if !b.has(Int(2)) || b.count(Int(2)) != 1 || b.has(Int(1)) { + t.Error("a had wrong items", b) + } +} + +func Test_balance_leaf_nodes(t *testing.T) { + a := NewLeaf(7, false) + b := NewLeaf(7, false) + if err := a.put_kv(Int(1), 1); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(2), 2); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(3), 3); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(4), 4); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(5), 5); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(6), 6); err != nil { + t.Error(err) + } + if err := a.put_kv(Int(7), 7); err != nil { + t.Error(err) + } + balance_nodes(a, b) + for i, k := range a.keys { + if int(k.(Int)) != i+1 { + t.Errorf("k != %d", i+1) + } + } + for i, k := range b.keys { + if int(k.(Int)) != 3+i+1 { + t.Errorf("k != %d", 3+i+1) + } + } + for i, v := range a.values { + if v.(int) != i+1 { + t.Errorf("k != %d", i+1) + } + } + for i, v := range b.values { + if v.(int) != 3+i+1 { + t.Errorf("v != %d", 3+i+1) + } + } + t.Log(a) + t.Log(b) +} + +func Test_balance_internal_nodes(t *testing.T) { + a := NewInternal(6) + b := NewInternal(6) + if err := a.put_kp(Int(1), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(2), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(3), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(4), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(5), nil); err != nil { + t.Error(err) + } + if err := a.put_kp(Int(6), nil); err != nil { + t.Error(err) + } + balance_nodes(a, b) + for i, k := range a.keys { + if int(k.(Int)) != i+1 { + t.Errorf("k != %d", i+1) + } + } + for i, k := range b.keys { + if int(k.(Int)) != 3+i+1 { + t.Errorf("k != %d", 3+i+1) + } + } + t.Log(a) + t.Log(b) +} + + +// copied from + +// ThreadSafeRand provides a thread safe version of math/rand.Rand using +// the same technique used in the math/rand package to make the top level +// functions thread safe. +func ThreadSafeRand(seed int64) *mrand.Rand { + return mrand.New(&lockedSource{src: mrand.NewSource(seed).(mrand.Source64)}) +} + +// from: https://golang.org/src/math/rand/rand.go?s=8161:8175#L317 +type lockedSource struct { + lk sync.Mutex + src mrand.Source64 +} + +func (r *lockedSource) Int63() (n int64) { + r.lk.Lock() + n = r.src.Int63() + r.lk.Unlock() + return +} + +func (r *lockedSource) Uint64() (n uint64) { + r.lk.Lock() + n = r.src.Uint64() + r.lk.Unlock() + return +} + +func (r *lockedSource) Seed(seed int64) { + r.lk.Lock() + r.src.Seed(seed) + r.lk.Unlock() +} + +// seedPos implements Seed for a lockedSource without a race condiiton. +func (r *lockedSource) seedPos(seed int64, readPos *int8) { + r.lk.Lock() + r.src.Seed(seed) + *readPos = 0 + r.lk.Unlock() +} + +// read implements Read for a lockedSource without a race condition. +func (r *lockedSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) { + r.lk.Lock() + n, err = read(p, r.src.Int63, readVal, readPos) + r.lk.Unlock() + return +} + +func read(p []byte, int63 func() int64, readVal *int64, readPos *int8) (n int, err error) { + pos := *readPos + val := *readVal + for n = 0; n < len(p); n++ { + if pos == 0 { + val = int63() + pos = 7 + } + p[n] = byte(val) + val >>= 8 + pos-- + } + *readPos = pos + *readVal = val + return +} + +// copied from https://sourcegraph.com/github.com/timtadh/data-structures@master/-/blob/test/support.go + +type T testing.T + +func (t *T) Assert(ok bool, msg string, vars ...interface{}) { + if !ok { + t.Log("\n" + string(debug.Stack())) + t.Fatalf(msg, vars...) + } +} + +func (t *T) AssertNil(errors ...error) { + any := false + for _, err := range errors { + if err != nil { + any = true + t.Log("\n" + string(debug.Stack())) + t.Error(err) + } + } + if any { + t.Fatal("assert failed") + } +} + +func RandSlice(length int) []byte { + slice := make([]byte, length) + if _, err := crand.Read(slice); err != nil { + panic(err) + } + return slice +} + +func RandHex(length int) string { + return hex.EncodeToString(RandSlice(length / 2)) +} + +func RandStr(length int) string { + return string(RandSlice(length)) +} \ No newline at end of file diff --git a/weed/util/bptree/int.go b/weed/util/bptree/int.go new file mode 100644 index 000000000..e8fd9511c --- /dev/null +++ b/weed/util/bptree/int.go @@ -0,0 +1,357 @@ +package bptree + +import ( + "encoding/binary" + "fmt" +) + +type Int8 int8 +type UInt8 uint8 +type Int16 int16 +type UInt16 uint16 +type Int32 int32 +type UInt32 uint32 +type Int64 int64 +type UInt64 uint64 +type Int int +type UInt uint + +func (self *Int8) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 0) + bytes[0] = uint8(*self) + return bytes, nil +} + +func (self *Int8) UnmarshalBinary(data []byte) error { + if len(data) != 1 { + return fmt.Errorf("data wrong size") + } + *self = Int8(data[0]) + return nil +} + +func (self Int8) Equals(other Equatable) bool { + if o, ok := other.(Int8); ok { + return self == o + } else { + return false + } +} + +func (self Int8) Less(other Sortable) bool { + if o, ok := other.(Int8); ok { + return self < o + } else { + return false + } +} + +func (self Int8) Hash() int { + return int(self) +} + +func (self *UInt8) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 0) + bytes[0] = uint8(*self) + return bytes, nil +} + +func (self *UInt8) UnmarshalBinary(data []byte) error { + if len(data) != 1 { + return fmt.Errorf("data wrong size") + } + *self = UInt8(data[0]) + return nil +} + +func (self UInt8) Equals(other Equatable) bool { + if o, ok := other.(UInt8); ok { + return self == o + } else { + return false + } +} + +func (self UInt8) Less(other Sortable) bool { + if o, ok := other.(UInt8); ok { + return self < o + } else { + return false + } +} + +func (self UInt8) Hash() int { + return int(self) +} + +func (self *Int16) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, uint16(*self)) + return bytes, nil +} + +func (self *Int16) UnmarshalBinary(data []byte) error { + if len(data) != 2 { + return fmt.Errorf("data wrong size") + } + *self = Int16(binary.BigEndian.Uint16(data)) + return nil +} + +func (self Int16) Equals(other Equatable) bool { + if o, ok := other.(Int16); ok { + return self == o + } else { + return false + } +} + +func (self Int16) Less(other Sortable) bool { + if o, ok := other.(Int16); ok { + return self < o + } else { + return false + } +} + +func (self Int16) Hash() int { + return int(self) +} + +func (self *UInt16) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, uint16(*self)) + return bytes, nil +} + +func (self *UInt16) UnmarshalBinary(data []byte) error { + if len(data) != 2 { + return fmt.Errorf("data wrong size") + } + *self = UInt16(binary.BigEndian.Uint16(data)) + return nil +} + +func (self UInt16) Equals(other Equatable) bool { + if o, ok := other.(UInt16); ok { + return self == o + } else { + return false + } +} + +func (self UInt16) Less(other Sortable) bool { + if o, ok := other.(UInt16); ok { + return self < o + } else { + return false + } +} + +func (self UInt16) Hash() int { + return int(self) +} + +func (self *Int32) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, uint32(*self)) + return bytes, nil +} + +func (self *Int32) UnmarshalBinary(data []byte) error { + if len(data) != 4 { + return fmt.Errorf("data wrong size") + } + *self = Int32(binary.BigEndian.Uint32(data)) + return nil +} + +func (self Int32) Equals(other Equatable) bool { + if o, ok := other.(Int32); ok { + return self == o + } else { + return false + } +} + +func (self Int32) Less(other Sortable) bool { + if o, ok := other.(Int32); ok { + return self < o + } else { + return false + } +} + +func (self *UInt32) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, uint32(*self)) + return bytes, nil +} + +func (self *UInt32) UnmarshalBinary(data []byte) error { + if len(data) != 4 { + return fmt.Errorf("data wrong size") + } + *self = UInt32(binary.BigEndian.Uint32(data)) + return nil +} + +func (self Int32) Hash() int { + return int(self) +} + +func (self UInt32) Equals(other Equatable) bool { + if o, ok := other.(UInt32); ok { + return self == o + } else { + return false + } +} + +func (self UInt32) Less(other Sortable) bool { + if o, ok := other.(UInt32); ok { + return self < o + } else { + return false + } +} + +func (self UInt32) Hash() int { + return int(self) +} + +func (self *Int64) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, uint64(*self)) + return bytes, nil +} + +func (self *Int64) UnmarshalBinary(data []byte) error { + if len(data) != 8 { + return fmt.Errorf("data wrong size") + } + *self = Int64(binary.BigEndian.Uint64(data)) + return nil +} + +func (self Int64) Equals(other Equatable) bool { + if o, ok := other.(Int64); ok { + return self == o + } else { + return false + } +} + +func (self Int64) Less(other Sortable) bool { + if o, ok := other.(Int64); ok { + return self < o + } else { + return false + } +} + +func (self Int64) Hash() int { + return int(self>>32) ^ int(self) +} + +func (self *UInt64) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, uint64(*self)) + return bytes, nil +} + +func (self *UInt64) UnmarshalBinary(data []byte) error { + if len(data) != 8 { + return fmt.Errorf("data wrong size") + } + *self = UInt64(binary.BigEndian.Uint64(data)) + return nil +} + +func (self UInt64) Equals(other Equatable) bool { + if o, ok := other.(UInt64); ok { + return self == o + } else { + return false + } +} + +func (self UInt64) Less(other Sortable) bool { + if o, ok := other.(UInt64); ok { + return self < o + } else { + return false + } +} + +func (self UInt64) Hash() int { + return int(self>>32) ^ int(self) +} + +func (self *Int) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, uint32(*self)) + return bytes, nil +} + +func (self *Int) UnmarshalBinary(data []byte) error { + if len(data) != 4 { + return fmt.Errorf("data wrong size") + } + *self = Int(binary.BigEndian.Uint32(data)) + return nil +} + +func (self Int) Equals(other Equatable) bool { + if o, ok := other.(Int); ok { + return self == o + } else { + return false + } +} + +func (self Int) Less(other Sortable) bool { + if o, ok := other.(Int); ok { + return self < o + } else { + return false + } +} + +func (self Int) Hash() int { + return int(self) +} + +func (self *UInt) MarshalBinary() ([]byte, error) { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, uint32(*self)) + return bytes, nil +} + +func (self *UInt) UnmarshalBinary(data []byte) error { + if len(data) != 4 { + return fmt.Errorf("data wrong size") + } + *self = UInt(binary.BigEndian.Uint32(data)) + return nil +} + +func (self UInt) Equals(other Equatable) bool { + if o, ok := other.(UInt); ok { + return self == o + } else { + return false + } +} + +func (self UInt) Less(other Sortable) bool { + if o, ok := other.(UInt); ok { + return self < o + } else { + return false + } +} + +func (self UInt) Hash() int { + return int(self) +} diff --git a/weed/util/bptree/rand.go b/weed/util/bptree/rand.go new file mode 100644 index 000000000..08b2e50ab --- /dev/null +++ b/weed/util/bptree/rand.go @@ -0,0 +1,2 @@ +package bptree + diff --git a/weed/util/bptree/string.go b/weed/util/bptree/string.go new file mode 100644 index 000000000..262220878 --- /dev/null +++ b/weed/util/bptree/string.go @@ -0,0 +1,71 @@ +package bptree + +import ( + "bytes" + "hash/fnv" +) + +type String string +type ByteSlice []byte + +func (self *String) MarshalBinary() ([]byte, error) { + return []byte(*self), nil +} + +func (self *String) UnmarshalBinary(data []byte) error { + *self = String(data) + return nil +} + +func (self String) Equals(other Equatable) bool { + if o, ok := other.(String); ok { + return self == o + } else { + return false + } +} + +func (self String) Less(other Sortable) bool { + if o, ok := other.(String); ok { + return self < o + } else { + return false + } +} + +func (self String) Hash() int { + h := fnv.New32a() + h.Write([]byte(string(self))) + return int(h.Sum32()) +} + +func (self *ByteSlice) MarshalBinary() ([]byte, error) { + return []byte(*self), nil +} + +func (self *ByteSlice) UnmarshalBinary(data []byte) error { + *self = ByteSlice(data) + return nil +} + +func (self ByteSlice) Equals(other Equatable) bool { + if o, ok := other.(ByteSlice); ok { + return bytes.Equal(self, o) + } else { + return false + } +} + +func (self ByteSlice) Less(other Sortable) bool { + if o, ok := other.(ByteSlice); ok { + return bytes.Compare(self, o) < 0 // -1 if a < b + } else { + return false + } +} + +func (self ByteSlice) Hash() int { + h := fnv.New32a() + h.Write([]byte(self)) + return int(h.Sum32()) +} diff --git a/weed/util/bptree/types.go b/weed/util/bptree/types.go new file mode 100644 index 000000000..6a1d83098 --- /dev/null +++ b/weed/util/bptree/types.go @@ -0,0 +1,103 @@ +package bptree + +import ( + "errors" + "fmt" +) + +type Equatable interface { + Equals(b Equatable) bool +} + +type Sortable interface { + Equatable + Less(b Sortable) bool +} + +type Hashable interface { + Sortable + Hash() int +} + +var BpTreeError = fmt.Errorf + +func NegativeSize() error { + return errors.New("negative size") +} + +type Iterator func() (item interface{}, next Iterator) +type KIterator func() (key Hashable, next KIterator) +type KVIterator func() (key Hashable, value interface{}, next KVIterator) +type KVIterable interface { + Iterate() KVIterator +} + +type Sized interface { + Size() int +} + +type MapOperable interface { + Sized + Has(key Hashable) bool + Put(key Hashable, value interface{}) (err error) + Get(key Hashable) (value interface{}, err error) + Remove(key Hashable) (value interface{}, err error) +} + +type WhereFunc func(value interface{}) bool + +func MakeValuesIterator(obj KVIterable) Iterator { + kv_iterator := obj.Iterate() + var v_iterator Iterator + v_iterator = func() (value interface{}, next Iterator) { + _, value, kv_iterator = kv_iterator() + if kv_iterator == nil { + return nil, nil + } + return value, v_iterator + } + return v_iterator +} + +func MakeItemsIterator(obj KVIterable) (kit KIterator) { + kv_iterator := obj.Iterate() + kit = func() (item Hashable, next KIterator) { + var key Hashable + var value interface{} + key, value, kv_iterator = kv_iterator() + if kv_iterator == nil { + return nil, nil + } + return &MapEntry{key, value}, kit + } + return kit +} + +type MapEntry struct { + Key Hashable + Value interface{} +} + +func (m *MapEntry) Equals(other Equatable) bool { + if o, ok := other.(*MapEntry); ok { + return m.Key.Equals(o.Key) + } else { + return m.Key.Equals(other) + } +} + +func (m *MapEntry) Less(other Sortable) bool { + if o, ok := other.(*MapEntry); ok { + return m.Key.Less(o.Key) + } else { + return m.Key.Less(other) + } +} + +func (m *MapEntry) Hash() int { + return m.Key.Hash() +} + +func (m *MapEntry) String() string { + return fmt.Sprintf("", m.Key, m.Value) +} From 2d237da74a6e89ce932f88310d9fb6bcb2f2d586 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 01:19:11 -0700 Subject: [PATCH 02/30] remove size since each put/get will have to update the root node --- weed/util/bptree/bpmap.go | 10 ---------- weed/util/bptree/bptree.go | 8 -------- weed/util/bptree/bptree_test.go | 9 --------- weed/util/bptree/types.go | 5 ----- 4 files changed, 32 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 37b2b25bb..43c07d71f 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -12,28 +12,19 @@ type BpMap BpTree func NewBpMap(node_size int) *BpMap { return &BpMap{ root: NewLeaf(node_size, true), - size: 0, } } -func (self *BpMap) Size() int { - return (*BpTree)(self).Size() -} - func (self *BpMap) Has(key Hashable) bool { return (*BpTree)(self).Has(key) } func (self *BpMap) Put(key Hashable, value interface{}) (err error) { - had := self.Has(key) new_root, err := self.root.put(key, value) if err != nil { return err } self.root = new_root - if !had { - self.size += 1 - } return nil } @@ -60,7 +51,6 @@ func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { } else { self.root = new_root } - self.size-- return value, nil } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 4b68adb20..95f53ab29 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -8,7 +8,6 @@ package bptree */ type BpTree struct { root *BpNode - size int } type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) @@ -16,14 +15,9 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { return &BpTree{ root: NewLeaf(node_size, false), - size: 0, } } -func (self *BpTree) Size() int { - return self.size -} - func (self *BpTree) Has(key Hashable) bool { if len(self.root.keys) == 0 { return false @@ -52,7 +46,6 @@ func (self *BpTree) Add(key Hashable, value interface{}) (err error) { return err } self.root = new_root - self.size += 1 return nil } @@ -100,7 +93,6 @@ func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { } else { self.root = new_root } - self.size -= 1 return nil } diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index cf978ede7..38663c543 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -110,9 +110,6 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if err != nil { t.Error(err) } - if bpt.Size() != (i + 1) { - t.Error("size was wrong", bpt.Size(), i+1) - } } for i, r := range recs { @@ -236,9 +233,6 @@ func TestBpMap(t *testing.T) { if err != nil { t.Error(err) } - if table.Size() != (i + 1) { - t.Error("size was wrong", table.Size(), i+1) - } } for _, r := range recs { @@ -274,9 +268,6 @@ func TestBpMap(t *testing.T) { t.Error("wrong value") } } - if table.Size() != (len(recs) - (i + 1)) { - t.Error("size was wrong", table.Size(), (len(recs) - (i + 1))) - } } } diff --git a/weed/util/bptree/types.go b/weed/util/bptree/types.go index 6a1d83098..45084efdd 100644 --- a/weed/util/bptree/types.go +++ b/weed/util/bptree/types.go @@ -32,12 +32,7 @@ type KVIterable interface { Iterate() KVIterator } -type Sized interface { - Size() int -} - type MapOperable interface { - Sized Has(key Hashable) bool Put(key Hashable, value interface{}) (err error) Get(key Hashable) (value interface{}, err error) From 88d68cad872e6f0d10fadaa14949937b365d8545 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 04:14:52 -0700 Subject: [PATCH 03/30] remove dedup --- weed/util/bptree/bpmap.go | 4 +- weed/util/bptree/bptree.go | 18 +------ weed/util/bptree/bptree_node.go | 17 ++---- weed/util/bptree/bptree_test.go | 95 +++++++++++++++------------------ 4 files changed, 51 insertions(+), 83 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 43c07d71f..1dff8643a 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -11,7 +11,7 @@ type BpMap BpTree func NewBpMap(node_size int) *BpMap { return &BpMap{ - root: NewLeaf(node_size, true), + root: NewLeaf(node_size), } } @@ -47,7 +47,7 @@ func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { return nil, err } if new_root == nil { - self.root = NewLeaf(ns, true) + self.root = NewLeaf(ns) } else { self.root = new_root } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 95f53ab29..eff4f3238 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -14,7 +14,7 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { return &BpTree{ - root: NewLeaf(node_size, false), + root: NewLeaf(node_size), } } @@ -26,20 +26,6 @@ func (self *BpTree) Has(key Hashable) bool { return l.keys[j].Equals(key) } -func (self *BpTree) Count(key Hashable) int { - if len(self.root.keys) == 0 { - return 0 - } - j, l := self.root.get_start(key) - count := 0 - end := false - for !end && l.keys[j].Equals(key) { - count++ - j, l, end = next_location(j, l) - } - return count -} - func (self *BpTree) Add(key Hashable, value interface{}) (err error) { new_root, err := self.root.put(key, value) if err != nil { @@ -89,7 +75,7 @@ func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { return err } if new_root == nil { - self.root = NewLeaf(ns, false) + self.root = NewLeaf(ns) } else { self.root = new_root } diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 3574371f5..bd74e6327 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -6,7 +6,6 @@ type BpNode struct { pointers []*BpNode next *BpNode prev *BpNode - no_dup bool } func NewInternal(size int) *BpNode { @@ -19,14 +18,13 @@ func NewInternal(size int) *BpNode { } } -func NewLeaf(size int, no_dup bool) *BpNode { +func NewLeaf(size int) *BpNode { if size < 0 { panic(NegativeSize()) } return &BpNode{ keys: make([]Hashable, 0, size), values: make([]interface{}, 0, size), - no_dup: no_dup, } } @@ -276,13 +274,6 @@ func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } - if self.no_dup { - i, has := self.find(key) - if has { - self.values[i] = value - return self, nil, nil - } - } if self.Full() { return self.leaf_split(key, value) } else { @@ -307,7 +298,7 @@ func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, e return self.pure_leaf_split(key, value) } a = self - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.NodeSize()) insert_linked_list_node(b, a, a.next) balance_nodes(a, b) if key.Less(b.keys[0]) { @@ -339,7 +330,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo return nil, nil, BpTreeError("Expected a pure leaf node") } if key.Less(self.keys[0]) { - a = NewLeaf(self.NodeSize(), self.no_dup) + a = NewLeaf(self.NodeSize()) b = self if err := a.put_kv(key, value); err != nil { return nil, nil, err @@ -355,7 +346,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo } return a, nil, nil } else { - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.NodeSize()) if err := b.put_kv(key, value); err != nil { return nil, nil, err } diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index 38663c543..ef73e862d 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -119,12 +119,6 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if has := bpt.Has(randstr(10)); has { t.Error("Table has extra key") } - if count := bpt.Count(r.key); count != 1 { - t.Error(bpt, "Missing key") - } - if count := bpt.Count(randstr(10)); count != 0 { - t.Error("Table has extra key") - } for k, v, next := bpt.Find(r.key)(); next != nil; k, v, next = next() { if !k.Equals(r.key) { t.Error(bpt, "Find Failed Key Error") @@ -190,9 +184,6 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if has := bpt.Has(r.key); !has { t.Error(bpt, "Missing key") } - if count := bpt.Count(r.key); count != 1 { - t.Error(bpt, "Missing key") - } if err := bpt.RemoveWhere(r.key, func(value interface{}) bool { return true }); err != nil { t.Fatal(bpt, err) } @@ -275,7 +266,7 @@ func TestBpMap(t *testing.T) { } func Test_get_start(t *testing.T) { - root := NewLeaf(2, false) + root := NewLeaf(2) root, err := root.put(Int(1), 1) if err != nil { t.Error(err) @@ -344,7 +335,7 @@ func Test_get_start(t *testing.T) { } func Test_get_end(t *testing.T) { - root := NewLeaf(3, false) + root := NewLeaf(3) root, err := root.put(Int(1), -1) if err != nil { t.Fatal(err) @@ -436,7 +427,7 @@ func Test_get_end(t *testing.T) { } func Test_put_no_root_split(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -470,7 +461,7 @@ func Test_put_no_root_split(t *testing.T) { } func Test_put_root_split(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) p, err := a.put(Int(1), 1) if err != nil { t.Error(err) @@ -520,7 +511,7 @@ func Test_put_root_split(t *testing.T) { func Test_internal_insert_no_split(t *testing.T) { a := NewInternal(3) - leaf := NewLeaf(1, false) + leaf := NewLeaf(1) if err := leaf.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -548,7 +539,7 @@ func Test_internal_insert_no_split(t *testing.T) { func Test_internal_insert_split_less(t *testing.T) { a := NewInternal(3) - leaf := NewLeaf(1, false) + leaf := NewLeaf(1) if err := leaf.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -658,7 +649,7 @@ func Test_internal_split_greater(t *testing.T) { } func Test_leaf_insert_no_split(t *testing.T) { - a := NewLeaf(3, false) + a := NewLeaf(3) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -684,7 +675,7 @@ func Test_leaf_insert_no_split(t *testing.T) { // tests the defer to split logic func Test_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(3, false) + a := NewLeaf(3) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -715,7 +706,7 @@ func Test_leaf_insert_split_less(t *testing.T) { } func Test_leaf_split_less(t *testing.T) { - a := NewLeaf(3, false) + a := NewLeaf(3) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -746,7 +737,7 @@ func Test_leaf_split_less(t *testing.T) { } func Test_leaf_split_equal(t *testing.T) { - a := NewLeaf(3, false) + a := NewLeaf(3) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -777,7 +768,7 @@ func Test_leaf_split_equal(t *testing.T) { } func Test_leaf_split_greater(t *testing.T) { - a := NewLeaf(3, false) + a := NewLeaf(3) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -809,13 +800,13 @@ func Test_leaf_split_greater(t *testing.T) { // tests the defer logic func Test_pure_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(2, false) + c := NewLeaf(2) insert_linked_list_node(c, b, nil) - d := NewLeaf(2, false) + d := NewLeaf(2) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -882,13 +873,13 @@ func Test_pure_leaf_insert_split_less(t *testing.T) { } func Test_pure_leaf_split_less(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(2, false) + c := NewLeaf(2) insert_linked_list_node(c, b, nil) - d := NewLeaf(2, false) + d := NewLeaf(2) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -955,13 +946,13 @@ func Test_pure_leaf_split_less(t *testing.T) { } func Test_pure_leaf_split_equal(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(2, false) + c := NewLeaf(2) insert_linked_list_node(c, b, nil) - d := NewLeaf(2, false) + d := NewLeaf(2) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1019,13 +1010,13 @@ func Test_pure_leaf_split_equal(t *testing.T) { } func Test_pure_leaf_split_greater(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(2, false) + c := NewLeaf(2) insert_linked_list_node(c, b, nil) - d := NewLeaf(2, false) + d := NewLeaf(2) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1089,13 +1080,13 @@ func Test_pure_leaf_split_greater(t *testing.T) { } func Test_find_end_of_pure_run(t *testing.T) { - a := NewLeaf(2, false) + a := NewLeaf(2) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(2, false) + c := NewLeaf(2) insert_linked_list_node(c, b, nil) - d := NewLeaf(2, false) + d := NewLeaf(2) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1125,13 +1116,13 @@ func Test_find_end_of_pure_run(t *testing.T) { } func Test_insert_linked_list_node(t *testing.T) { - a := NewLeaf(1, false) + a := NewLeaf(1) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(3, false) + c := NewLeaf(3) insert_linked_list_node(c, b, nil) - d := NewLeaf(4, false) + d := NewLeaf(4) insert_linked_list_node(d, a, b) if a.prev != nil { t.Errorf("expected a.prev == nil") @@ -1160,13 +1151,13 @@ func Test_insert_linked_list_node(t *testing.T) { } func Test_remove_linked_list_node(t *testing.T) { - a := NewLeaf(1, false) + a := NewLeaf(1) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2, false) + b := NewLeaf(2) insert_linked_list_node(b, a, nil) - c := NewLeaf(3, false) + c := NewLeaf(3) insert_linked_list_node(c, b, nil) - d := NewLeaf(4, false) + d := NewLeaf(4) insert_linked_list_node(d, a, b) if a.prev != nil { t.Errorf("expected a.prev == nil") @@ -1235,8 +1226,8 @@ func Test_remove_linked_list_node(t *testing.T) { } func Test_balance_leaf_nodes_with_dup(t *testing.T) { - a := NewLeaf(3, false) - b := NewLeaf(3, false) + a := NewLeaf(3) + b := NewLeaf(3) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -1256,8 +1247,8 @@ func Test_balance_leaf_nodes_with_dup(t *testing.T) { } func Test_balance_leaf_nodes(t *testing.T) { - a := NewLeaf(7, false) - b := NewLeaf(7, false) + a := NewLeaf(7) + b := NewLeaf(7) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } From 0c360eb6b25fe11c3db7c0fcc8df0f2923fc8902 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 18:34:50 -0700 Subject: [PATCH 04/30] add getter and setter for root of tree and map --- weed/util/bptree/bpmap.go | 21 ++++++++++++++------- weed/util/bptree/bptree.go | 37 ++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 1dff8643a..9b391fd88 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -9,6 +9,13 @@ import ( */ type BpMap BpTree +func (self *BpMap) getRoot() *BpNode { + return self.root +} +func (self *BpMap) setRoot(root *BpNode) { + self.root = root +} + func NewBpMap(node_size int) *BpMap { return &BpMap{ root: NewLeaf(node_size), @@ -20,16 +27,16 @@ func (self *BpMap) Has(key Hashable) bool { } func (self *BpMap) Put(key Hashable, value interface{}) (err error) { - new_root, err := self.root.put(key, value) + new_root, err := self.getRoot().put(key, value) if err != nil { return err } - self.root = new_root + self.setRoot(new_root) return nil } func (self *BpMap) Get(key Hashable) (value interface{}, err error) { - j, l := self.root.get_start(key) + j, l := self.getRoot().get_start(key) if l.keys[j].Equals(key) { return l.values[j], nil } @@ -41,15 +48,15 @@ func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { if err != nil { return nil, err } - ns := self.root.NodeSize() - new_root, err := self.root.remove(key, func(value interface{}) bool { return true }) + ns := self.getRoot().NodeSize() + new_root, err := self.getRoot().remove(key, func(value interface{}) bool { return true }) if err != nil { return nil, err } if new_root == nil { - self.root = NewLeaf(ns) + self.setRoot(NewLeaf(ns)) } else { - self.root = new_root + self.setRoot(new_root) } return value, nil } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index eff4f3238..68ee08720 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -10,6 +10,13 @@ type BpTree struct { root *BpNode } +func (self *BpTree) getRoot() *BpNode { + return self.root +} +func (self *BpTree) setRoot(root *BpNode) { + self.root = root +} + type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { @@ -19,24 +26,24 @@ func NewBpTree(node_size int) *BpTree { } func (self *BpTree) Has(key Hashable) bool { - if len(self.root.keys) == 0 { + if len(self.getRoot().keys) == 0 { return false } - j, l := self.root.get_start(key) + j, l := self.getRoot().get_start(key) return l.keys[j].Equals(key) } func (self *BpTree) Add(key Hashable, value interface{}) (err error) { - new_root, err := self.root.put(key, value) + new_root, err := self.getRoot().put(key, value) if err != nil { return err } - self.root = new_root + self.setRoot(new_root) return nil } func (self *BpTree) Replace(key Hashable, where WhereFunc, value interface{}) (err error) { - li := self.root.forward(key, key) + li := self.getRoot().forward(key, key) for i, leaf, next := li(); next != nil; i, leaf, next = next() { if where(leaf.values[i]) { leaf.values[i] = value @@ -52,9 +59,9 @@ func (self *BpTree) Find(key Hashable) (kvi KVIterator) { func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { var li loc_iterator if !to.Less(from) { - li = self.root.forward(from, to) + li = self.getRoot().forward(from, to) } else { - li = self.root.backward(from, to) + li = self.getRoot().backward(from, to) } kvi = func() (key Hashable, value interface{}, next KVIterator) { var i int @@ -69,21 +76,21 @@ func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { } func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { - ns := self.root.NodeSize() - new_root, err := self.root.remove(key, where) + ns := self.getRoot().NodeSize() + new_root, err := self.getRoot().remove(key, where) if err != nil { return err } if new_root == nil { - self.root = NewLeaf(ns) + self.setRoot(NewLeaf(ns)) } else { - self.root = new_root + self.setRoot(new_root) } return nil } func (self *BpTree) Keys() (ki KIterator) { - li := self.root.all() + li := self.getRoot().all() var prev Equatable ki = func() (key Hashable, next KIterator) { var i int @@ -110,7 +117,7 @@ func (self *BpTree) Items() (vi KIterator) { } func (self *BpTree) Iterate() (kvi KVIterator) { - li := self.root.all() + li := self.getRoot().all() kvi = func() (key Hashable, value interface{}, next KVIterator) { var i int var leaf *BpNode @@ -124,7 +131,7 @@ func (self *BpTree) Iterate() (kvi KVIterator) { } func (self *BpTree) Backward() (kvi KVIterator) { - li := self.root.all_backward() + li := self.getRoot().all_backward() kvi = func() (key Hashable, value interface{}, next KVIterator) { var i int var leaf *BpNode @@ -135,4 +142,4 @@ func (self *BpTree) Backward() (kvi KVIterator) { return leaf.keys[i], leaf.values[i], kvi } return kvi -} \ No newline at end of file +} From 01661ec6a77eeefc44466df1c0dd90ac320f38fa Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 18:37:34 -0700 Subject: [PATCH 05/30] move to getter setter file --- weed/util/bptree/bpmap.go | 7 ------- weed/util/bptree/bptree.go | 7 ------- weed/util/bptree/getter_setter.go | 15 +++++++++++++++ 3 files changed, 15 insertions(+), 14 deletions(-) create mode 100644 weed/util/bptree/getter_setter.go diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 9b391fd88..a68eae255 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -9,13 +9,6 @@ import ( */ type BpMap BpTree -func (self *BpMap) getRoot() *BpNode { - return self.root -} -func (self *BpMap) setRoot(root *BpNode) { - self.root = root -} - func NewBpMap(node_size int) *BpMap { return &BpMap{ root: NewLeaf(node_size), diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 68ee08720..405e93ac7 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -10,13 +10,6 @@ type BpTree struct { root *BpNode } -func (self *BpTree) getRoot() *BpNode { - return self.root -} -func (self *BpTree) setRoot(root *BpNode) { - self.root = root -} - type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { diff --git a/weed/util/bptree/getter_setter.go b/weed/util/bptree/getter_setter.go new file mode 100644 index 000000000..cd5c8344b --- /dev/null +++ b/weed/util/bptree/getter_setter.go @@ -0,0 +1,15 @@ +package bptree + +func (self *BpMap) getRoot() *BpNode { + return self.root +} +func (self *BpMap) setRoot(root *BpNode) { + self.root = root +} + +func (self *BpTree) getRoot() *BpNode { + return self.root +} +func (self *BpTree) setRoot(root *BpNode) { + self.root = root +} From 172da83449e04641995eeb76ed015586fe6827f5 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 20 Aug 2021 18:50:16 -0700 Subject: [PATCH 06/30] bpnode use get prev and next --- weed/util/bptree/bptree_node.go | 46 +++++----- weed/util/bptree/bptree_test.go | 138 +++++++++++++++--------------- weed/util/bptree/getter_setter.go | 13 +++ 3 files changed, 105 insertions(+), 92 deletions(-) diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index bd74e6327..765a25cfa 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -108,9 +108,9 @@ func (self *BpNode) get_start(key Hashable) (i int, leaf *BpNode) { func next_location(i int, leaf *BpNode) (int, *BpNode, bool) { j := i + 1 - for j >= len(leaf.keys) && leaf.next != nil { + for j >= len(leaf.keys) && leaf.getNext() != nil { j = 0 - leaf = leaf.next + leaf = leaf.getNext() } if j >= len(leaf.keys) { return -1, nil, true @@ -120,8 +120,8 @@ func next_location(i int, leaf *BpNode) (int, *BpNode, bool) { func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) { j := i - 1 - for j < 0 && leaf.prev != nil { - leaf = leaf.prev + for j < 0 && leaf.getPrev() != nil { + leaf = leaf.getPrev() j = len(leaf.keys) - 1 } if j < 0 { @@ -165,8 +165,8 @@ func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) { if i >= len(self.keys) && i > 0 { i = len(self.keys) - 1 } - if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.next != nil { - return self.next.leaf_get_start(key) + if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.getNext() != nil { + return self.getNext().leaf_get_start(key) } return i, self } @@ -299,7 +299,7 @@ func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, e } a = self b = NewLeaf(self.NodeSize()) - insert_linked_list_node(b, a, a.next) + insert_linked_list_node(b, a, a.getNext()) balance_nodes(a, b) if key.Less(b.keys[0]) { if err := a.put_kv(key, value); err != nil { @@ -335,7 +335,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo if err := a.put_kv(key, value); err != nil { return nil, nil, err } - insert_linked_list_node(a, b.prev, b) + insert_linked_list_node(a, b.getPrev(), b) return a, b, nil } else { a = self @@ -350,7 +350,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo if err := b.put_kv(key, value); err != nil { return nil, nil, err } - insert_linked_list_node(b, e, e.next) + insert_linked_list_node(b, e, e.getNext()) if e.keys[0].Equals(key) { return a, nil, nil } @@ -516,12 +516,12 @@ func (self *BpNode) leaf_remove(key, stop Hashable, where WhereFunc) (a *BpNode, } if len(l.keys) == 0 { remove_linked_list_node(l) - if l.next == nil { + if l.getNext() == nil { a = nil } else if stop == nil { a = nil - } else if !l.next.keys[0].Equals(stop) { - a = l.next + } else if !l.getNext().keys[0].Equals(stop) { + a = l.getNext() } else { a = nil } @@ -587,10 +587,10 @@ func (self *BpNode) find(key Hashable) (int, bool) { func (self *BpNode) find_end_of_pure_run() *BpNode { k := self.keys[0] p := self - n := self.next + n := self.getNext() for n != nil && n.Pure() && k.Equals(n.keys[0]) { p = n - n = n.next + n = n.getNext() } return p } @@ -659,25 +659,25 @@ func (self *BpNode) backward(from, to Hashable) (li loc_iterator) { } func insert_linked_list_node(n, prev, next *BpNode) { - if (prev != nil && prev.next != next) || (next != nil && next.prev != prev) { + if (prev != nil && prev.getNext() != next) || (next != nil && next.getPrev() != prev) { panic(BpTreeError("prev and next not hooked up")) } - n.prev = prev - n.next = next + n.setPrev(prev) + n.setNext(next) if prev != nil { - prev.next = n + prev.setNext(n) } if next != nil { - next.prev = n + next.setPrev(n) } } func remove_linked_list_node(n *BpNode) { - if n.prev != nil { - n.prev.next = n.next + if n.getPrev() != nil { + n.getPrev().setNext(n.getNext()) } - if n.next != nil { - n.next.prev = n.prev + if n.getNext() != nil { + n.getNext().setPrev(n.getPrev()) } } diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index ef73e862d..3c99c616a 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -378,7 +378,7 @@ func Test_get_end(t *testing.T) { } i, n = root.get_end(Int(3)) t.Log(n) - if n != root.pointers[1].next { + if n != root.pointers[1].getNext() { t.Error("wrong node from get_end") } if i != 1 { @@ -452,11 +452,11 @@ func Test_put_no_root_split(t *testing.T) { if !p.has(Int(1)) { t.Error("p didn't have the right keys", p) } - if p.next == nil { + if p.getNext() == nil { t.Error("p.next should not be nil") } t.Log(p) - t.Log(p.next) + t.Log(p.getNext()) } } @@ -839,34 +839,34 @@ func Test_pure_leaf_insert_split_less(t *testing.T) { if p == nil || len(p.keys) != 1 || !p.keys[0].Equals(Int(2)) { t.Errorf("p did not contain the right key") } - if p.prev != nil { + if p.getPrev() != nil { t.Errorf("expected p.prev == nil") } - if p.next != a { + if p.getNext() != a { t.Errorf("expected p.next == a") } - if a.prev != p { + if a.getPrev() != p { t.Errorf("expected a.prev == p") } - if a.next != b { + if a.getNext() != b { t.Errorf("expected a.next == b") } - if b.prev != a { + if b.getPrev() != a { t.Errorf("expected b.prev == a") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != d { + if c.getNext() != d { t.Errorf("expected c.next == d") } - if d.prev != c { + if d.getPrev() != c { t.Errorf("expected d.prev == c") } - if d.next != nil { + if d.getNext() != nil { t.Errorf("expected d.next == nil") } } @@ -912,34 +912,34 @@ func Test_pure_leaf_split_less(t *testing.T) { if p == nil || len(p.keys) != 1 || !p.keys[0].Equals(Int(2)) { t.Errorf("p did not contain the right key") } - if p.prev != nil { + if p.getPrev() != nil { t.Errorf("expected p.prev == nil") } - if p.next != a { + if p.getNext() != a { t.Errorf("expected p.next == a") } - if a.prev != p { + if a.getPrev() != p { t.Errorf("expected a.prev == p") } - if a.next != b { + if a.getNext() != b { t.Errorf("expected a.next == b") } - if b.prev != a { + if b.getPrev() != a { t.Errorf("expected b.prev == a") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != d { + if c.getNext() != d { t.Errorf("expected c.next == d") } - if d.prev != c { + if d.getPrev() != c { t.Errorf("expected d.prev == c") } - if d.next != nil { + if d.getNext() != nil { t.Errorf("expected d.next == nil") } } @@ -982,28 +982,28 @@ func Test_pure_leaf_split_equal(t *testing.T) { if q != nil { t.Errorf("q != nil") } - if a.prev != nil { + if a.getPrev() != nil { t.Errorf("expected a.prev == nil") } - if a.next != b { + if a.getNext() != b { t.Errorf("expected a.next == b") } - if b.prev != a { + if b.getPrev() != a { t.Errorf("expected b.prev == a") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != d { + if c.getNext() != d { t.Errorf("expected c.next == d") } - if d.prev != c { + if d.getPrev() != c { t.Errorf("expected d.prev == c") } - if d.next != nil { + if d.getNext() != nil { t.Errorf("expected d.next == nil") } } @@ -1046,34 +1046,34 @@ func Test_pure_leaf_split_greater(t *testing.T) { if q == nil || len(q.keys) != 1 || !q.keys[0].Equals(Int(4)) { t.Errorf("q != nil") } - if a.prev != nil { + if a.getPrev() != nil { t.Errorf("expected a.prev == nil") } - if a.next != b { + if a.getNext() != b { t.Errorf("expected a.next == b") } - if b.prev != a { + if b.getPrev() != a { t.Errorf("expected b.prev == a") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != q { + if c.getNext() != q { t.Errorf("expected c.next == q") } - if q.prev != c { + if q.getPrev() != c { t.Errorf("expected q.prev == c") } - if q.next != d { + if q.getNext() != d { t.Errorf("expected q.next == d") } - if d.prev != q { + if d.getPrev() != q { t.Errorf("expected d.prev == q") } - if d.next != nil { + if d.getNext() != nil { t.Errorf("expected d.next == nil") } } @@ -1124,28 +1124,28 @@ func Test_insert_linked_list_node(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(4) insert_linked_list_node(d, a, b) - if a.prev != nil { + if a.getPrev() != nil { t.Errorf("expected a.prev == nil") } - if a.next != d { + if a.getNext() != d { t.Errorf("expected a.next == d") } - if d.prev != a { + if d.getPrev() != a { t.Errorf("expected d.prev == a") } - if d.next != b { + if d.getNext() != b { t.Errorf("expected d.next == b") } - if b.prev != d { + if b.getPrev() != d { t.Errorf("expected b.prev == d") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != nil { + if c.getNext() != nil { t.Errorf("expected c.next == nil") } } @@ -1159,67 +1159,67 @@ func Test_remove_linked_list_node(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(4) insert_linked_list_node(d, a, b) - if a.prev != nil { + if a.getPrev() != nil { t.Errorf("expected a.prev == nil") } - if a.next != d { + if a.getNext() != d { t.Errorf("expected a.next == d") } - if d.prev != a { + if d.getPrev() != a { t.Errorf("expected d.prev == a") } - if d.next != b { + if d.getNext() != b { t.Errorf("expected d.next == b") } - if b.prev != d { + if b.getPrev() != d { t.Errorf("expected b.prev == d") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != nil { + if c.getNext() != nil { t.Errorf("expected c.next == nil") } remove_linked_list_node(d) - if a.prev != nil { + if a.getPrev() != nil { t.Errorf("expected a.prev == nil") } - if a.next != b { + if a.getNext() != b { t.Errorf("expected a.next == b") } - if b.prev != a { + if b.getPrev() != a { t.Errorf("expected b.prev == a") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != nil { + if c.getNext() != nil { t.Errorf("expected c.next == nil") } remove_linked_list_node(a) - if b.prev != nil { + if b.getPrev() != nil { t.Errorf("expected b.prev == nil") } - if b.next != c { + if b.getNext() != c { t.Errorf("expected b.next == c") } - if c.prev != b { + if c.getPrev() != b { t.Errorf("expected c.prev == b") } - if c.next != nil { + if c.getNext() != nil { t.Errorf("expected c.next == nil") } remove_linked_list_node(c) - if b.prev != nil { + if b.getPrev() != nil { t.Errorf("expected b.prev == nil") } - if b.next != nil { + if b.getNext() != nil { t.Errorf("expected b.next == nil") } remove_linked_list_node(b) diff --git a/weed/util/bptree/getter_setter.go b/weed/util/bptree/getter_setter.go index cd5c8344b..da9d69b05 100644 --- a/weed/util/bptree/getter_setter.go +++ b/weed/util/bptree/getter_setter.go @@ -13,3 +13,16 @@ func (self *BpTree) getRoot() *BpNode { func (self *BpTree) setRoot(root *BpNode) { self.root = root } + +func (self *BpNode) getNext() *BpNode { + return self.next +} +func (self *BpNode) setNext(next *BpNode) { + self.next = next +} +func (self *BpNode) getPrev() *BpNode { + return self.prev +} +func (self *BpNode) setPrev(prev *BpNode) { + self.prev = prev +} From 5f6cc9a8145f79c0f679c8b3fda34c3c76a6d411 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Aug 2021 13:36:52 -0700 Subject: [PATCH 07/30] make proto node --- weed/util/bptree/Makefile | 6 ++ weed/util/bptree/bptree.pb.go | 186 ++++++++++++++++++++++++++++++++++ weed/util/bptree/bptree.proto | 14 +++ 3 files changed, 206 insertions(+) create mode 100644 weed/util/bptree/Makefile create mode 100644 weed/util/bptree/bptree.pb.go create mode 100644 weed/util/bptree/bptree.proto diff --git a/weed/util/bptree/Makefile b/weed/util/bptree/Makefile new file mode 100644 index 000000000..a98f39a08 --- /dev/null +++ b/weed/util/bptree/Makefile @@ -0,0 +1,6 @@ +all: gen + +.PHONY : gen + +gen: + protoc bptree.proto --go_out=plugins=grpc:. --go_opt=paths=source_relative diff --git a/weed/util/bptree/bptree.pb.go b/weed/util/bptree/bptree.pb.go new file mode 100644 index 000000000..e7d155a36 --- /dev/null +++ b/weed/util/bptree/bptree.pb.go @@ -0,0 +1,186 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.12.3 +// source: bptree.proto + +package bptree + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type ProtoNode struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys [][]byte `protobuf:"bytes,1,rep,name=keys,proto3" json:"keys,omitempty"` + Values [][]byte `protobuf:"bytes,2,rep,name=values,proto3" json:"values,omitempty"` + Pointers []int64 `protobuf:"varint,3,rep,packed,name=pointers,proto3" json:"pointers,omitempty"` + Next int64 `protobuf:"varint,4,opt,name=next,proto3" json:"next,omitempty"` + Prev int64 `protobuf:"varint,5,opt,name=prev,proto3" json:"prev,omitempty"` +} + +func (x *ProtoNode) Reset() { + *x = ProtoNode{} + if protoimpl.UnsafeEnabled { + mi := &file_bptree_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ProtoNode) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ProtoNode) ProtoMessage() {} + +func (x *ProtoNode) ProtoReflect() protoreflect.Message { + mi := &file_bptree_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ProtoNode.ProtoReflect.Descriptor instead. +func (*ProtoNode) Descriptor() ([]byte, []int) { + return file_bptree_proto_rawDescGZIP(), []int{0} +} + +func (x *ProtoNode) GetKeys() [][]byte { + if x != nil { + return x.Keys + } + return nil +} + +func (x *ProtoNode) GetValues() [][]byte { + if x != nil { + return x.Values + } + return nil +} + +func (x *ProtoNode) GetPointers() []int64 { + if x != nil { + return x.Pointers + } + return nil +} + +func (x *ProtoNode) GetNext() int64 { + if x != nil { + return x.Next + } + return 0 +} + +func (x *ProtoNode) GetPrev() int64 { + if x != nil { + return x.Prev + } + return 0 +} + +var File_bptree_proto protoreflect.FileDescriptor + +var file_bptree_proto_rawDesc = []byte{ + 0x0a, 0x0c, 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, + 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x22, 0x7b, 0x0a, 0x09, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x4e, + 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0c, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, + 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, + 0x03, 0x52, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x6e, 0x65, 0x78, 0x74, 0x12, + 0x12, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x70, + 0x72, 0x65, 0x76, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, 0x77, + 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, 0x2f, + 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_bptree_proto_rawDescOnce sync.Once + file_bptree_proto_rawDescData = file_bptree_proto_rawDesc +) + +func file_bptree_proto_rawDescGZIP() []byte { + file_bptree_proto_rawDescOnce.Do(func() { + file_bptree_proto_rawDescData = protoimpl.X.CompressGZIP(file_bptree_proto_rawDescData) + }) + return file_bptree_proto_rawDescData +} + +var file_bptree_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_bptree_proto_goTypes = []interface{}{ + (*ProtoNode)(nil), // 0: bptree.ProtoNode +} +var file_bptree_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_bptree_proto_init() } +func file_bptree_proto_init() { + if File_bptree_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_bptree_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ProtoNode); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_bptree_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_bptree_proto_goTypes, + DependencyIndexes: file_bptree_proto_depIdxs, + MessageInfos: file_bptree_proto_msgTypes, + }.Build() + File_bptree_proto = out.File + file_bptree_proto_rawDesc = nil + file_bptree_proto_goTypes = nil + file_bptree_proto_depIdxs = nil +} diff --git a/weed/util/bptree/bptree.proto b/weed/util/bptree/bptree.proto new file mode 100644 index 000000000..1d55096a2 --- /dev/null +++ b/weed/util/bptree/bptree.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package bptree; + +option go_package = "github.com/chrislusf/seaweedfs/weed/util/bptree"; + +message ProtoNode { + repeated bytes keys = 1; + repeated bytes values = 2; + repeated int64 pointers = 3; + int64 next = 4; + int64 prev = 5; + int64 id = 6; +} From 849f185a20ea2216f2f780f872b6cd8bac854d2b Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Aug 2021 15:00:44 -0700 Subject: [PATCH 08/30] add memory kv store --- weed/util/bptree/tree_store/memory_store.go | 29 ++++++++++++++++++++ weed/util/bptree/tree_store/tree_store.go.go | 6 ++++ 2 files changed, 35 insertions(+) create mode 100644 weed/util/bptree/tree_store/memory_store.go create mode 100644 weed/util/bptree/tree_store/tree_store.go.go diff --git a/weed/util/bptree/tree_store/memory_store.go b/weed/util/bptree/tree_store/memory_store.go new file mode 100644 index 000000000..467455664 --- /dev/null +++ b/weed/util/bptree/tree_store/memory_store.go @@ -0,0 +1,29 @@ +package tree_store + +import "errors" + +var ( + NotFound = errors.New("not found") +) + +type MemoryTreeStore struct { + m map[int64][]byte +} + +func NewMemoryTreeStore() *MemoryTreeStore{ + return &MemoryTreeStore{ + m: make(map[int64][]byte), + } +} + +func (m *MemoryTreeStore) Put(k int64, v []byte) error { + m.m[k] = v + return nil +} + +func (m *MemoryTreeStore) Get(k int64) ([]byte, error) { + if v, found := m.m[k]; found { + return v, nil + } + return nil, NotFound +} diff --git a/weed/util/bptree/tree_store/tree_store.go.go b/weed/util/bptree/tree_store/tree_store.go.go new file mode 100644 index 000000000..6a0af6ae6 --- /dev/null +++ b/weed/util/bptree/tree_store/tree_store.go.go @@ -0,0 +1,6 @@ +package tree_store + +type TreeStore interface { + Put(k int64, v []byte) error + Get(k int64) ([]byte, error) +} From 38c8470d1d8e3687c01b4eb6a9d9ebd9a988eb43 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Aug 2021 15:13:13 -0700 Subject: [PATCH 09/30] add back non_dedup --- weed/util/bptree/bpmap.go | 4 +- weed/util/bptree/bptree.go | 18 ++++++- weed/util/bptree/bptree_node.go | 17 ++++-- weed/util/bptree/bptree_test.go | 95 ++++++++++++++++++--------------- 4 files changed, 83 insertions(+), 51 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index a68eae255..5dd95070e 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -11,7 +11,7 @@ type BpMap BpTree func NewBpMap(node_size int) *BpMap { return &BpMap{ - root: NewLeaf(node_size), + root: NewLeaf(node_size, true), } } @@ -47,7 +47,7 @@ func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { return nil, err } if new_root == nil { - self.setRoot(NewLeaf(ns)) + self.setRoot(NewLeaf(ns, true)) } else { self.setRoot(new_root) } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 405e93ac7..12a4bfb0d 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -14,7 +14,7 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { return &BpTree{ - root: NewLeaf(node_size), + root: NewLeaf(node_size, false), } } @@ -26,6 +26,20 @@ func (self *BpTree) Has(key Hashable) bool { return l.keys[j].Equals(key) } +func (self *BpTree) Count(key Hashable) int { + if len(self.root.keys) == 0 { + return 0 + } + j, l := self.root.get_start(key) + count := 0 + end := false + for !end && l.keys[j].Equals(key) { + count++ + j, l, end = next_location(j, l) + } + return count +} + func (self *BpTree) Add(key Hashable, value interface{}) (err error) { new_root, err := self.getRoot().put(key, value) if err != nil { @@ -75,7 +89,7 @@ func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { return err } if new_root == nil { - self.setRoot(NewLeaf(ns)) + self.setRoot(NewLeaf(ns, false)) } else { self.setRoot(new_root) } diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 765a25cfa..3337292ba 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -6,6 +6,7 @@ type BpNode struct { pointers []*BpNode next *BpNode prev *BpNode + no_dup bool } func NewInternal(size int) *BpNode { @@ -18,13 +19,14 @@ func NewInternal(size int) *BpNode { } } -func NewLeaf(size int) *BpNode { +func NewLeaf(size int, no_dup bool) *BpNode { if size < 0 { panic(NegativeSize()) } return &BpNode{ keys: make([]Hashable, 0, size), values: make([]interface{}, 0, size), + no_dup: no_dup, } } @@ -274,6 +276,13 @@ func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } + if self.no_dup { + i, has := self.find(key) + if has { + self.values[i] = value + return self, nil, nil + } + } if self.Full() { return self.leaf_split(key, value) } else { @@ -298,7 +307,7 @@ func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, e return self.pure_leaf_split(key, value) } a = self - b = NewLeaf(self.NodeSize()) + b = NewLeaf(self.NodeSize(), self.no_dup) insert_linked_list_node(b, a, a.getNext()) balance_nodes(a, b) if key.Less(b.keys[0]) { @@ -330,7 +339,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo return nil, nil, BpTreeError("Expected a pure leaf node") } if key.Less(self.keys[0]) { - a = NewLeaf(self.NodeSize()) + a = NewLeaf(self.NodeSize(), self.no_dup) b = self if err := a.put_kv(key, value); err != nil { return nil, nil, err @@ -346,7 +355,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNo } return a, nil, nil } else { - b = NewLeaf(self.NodeSize()) + b = NewLeaf(self.NodeSize(), self.no_dup) if err := b.put_kv(key, value); err != nil { return nil, nil, err } diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index 3c99c616a..d1df0749e 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -119,6 +119,12 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if has := bpt.Has(randstr(10)); has { t.Error("Table has extra key") } + if count := bpt.Count(r.key); count != 1 { + t.Error(bpt, "Missing key") + } + if count := bpt.Count(randstr(10)); count != 0 { + t.Error("Table has extra key") + } for k, v, next := bpt.Find(r.key)(); next != nil; k, v, next = next() { if !k.Equals(r.key) { t.Error(bpt, "Find Failed Key Error") @@ -184,6 +190,9 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if has := bpt.Has(r.key); !has { t.Error(bpt, "Missing key") } + if count := bpt.Count(r.key); count != 1 { + t.Error(bpt, "Missing key") + } if err := bpt.RemoveWhere(r.key, func(value interface{}) bool { return true }); err != nil { t.Fatal(bpt, err) } @@ -266,7 +275,7 @@ func TestBpMap(t *testing.T) { } func Test_get_start(t *testing.T) { - root := NewLeaf(2) + root := NewLeaf(2, false) root, err := root.put(Int(1), 1) if err != nil { t.Error(err) @@ -335,7 +344,7 @@ func Test_get_start(t *testing.T) { } func Test_get_end(t *testing.T) { - root := NewLeaf(3) + root := NewLeaf(3, false) root, err := root.put(Int(1), -1) if err != nil { t.Fatal(err) @@ -427,7 +436,7 @@ func Test_get_end(t *testing.T) { } func Test_put_no_root_split(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -461,7 +470,7 @@ func Test_put_no_root_split(t *testing.T) { } func Test_put_root_split(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) p, err := a.put(Int(1), 1) if err != nil { t.Error(err) @@ -511,7 +520,7 @@ func Test_put_root_split(t *testing.T) { func Test_internal_insert_no_split(t *testing.T) { a := NewInternal(3) - leaf := NewLeaf(1) + leaf := NewLeaf(1, false) if err := leaf.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -539,7 +548,7 @@ func Test_internal_insert_no_split(t *testing.T) { func Test_internal_insert_split_less(t *testing.T) { a := NewInternal(3) - leaf := NewLeaf(1) + leaf := NewLeaf(1, false) if err := leaf.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -649,7 +658,7 @@ func Test_internal_split_greater(t *testing.T) { } func Test_leaf_insert_no_split(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -675,7 +684,7 @@ func Test_leaf_insert_no_split(t *testing.T) { // tests the defer to split logic func Test_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -706,7 +715,7 @@ func Test_leaf_insert_split_less(t *testing.T) { } func Test_leaf_split_less(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -737,7 +746,7 @@ func Test_leaf_split_less(t *testing.T) { } func Test_leaf_split_equal(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -768,7 +777,7 @@ func Test_leaf_split_equal(t *testing.T) { } func Test_leaf_split_greater(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) @@ -800,13 +809,13 @@ func Test_leaf_split_greater(t *testing.T) { // tests the defer logic func Test_pure_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -873,13 +882,13 @@ func Test_pure_leaf_insert_split_less(t *testing.T) { } func Test_pure_leaf_split_less(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -946,13 +955,13 @@ func Test_pure_leaf_split_less(t *testing.T) { } func Test_pure_leaf_split_equal(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1010,13 +1019,13 @@ func Test_pure_leaf_split_equal(t *testing.T) { } func Test_pure_leaf_split_greater(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1080,13 +1089,13 @@ func Test_pure_leaf_split_greater(t *testing.T) { } func Test_find_end_of_pure_run(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), 1); err != nil { t.Error(err) @@ -1116,13 +1125,13 @@ func Test_find_end_of_pure_run(t *testing.T) { } func Test_insert_linked_list_node(t *testing.T) { - a := NewLeaf(1) + a := NewLeaf(1, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(3) + c := NewLeaf(3, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(4) + d := NewLeaf(4, false) insert_linked_list_node(d, a, b) if a.getPrev() != nil { t.Errorf("expected a.prev == nil") @@ -1151,13 +1160,13 @@ func Test_insert_linked_list_node(t *testing.T) { } func Test_remove_linked_list_node(t *testing.T) { - a := NewLeaf(1) + a := NewLeaf(1, false) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, false) insert_linked_list_node(b, a, nil) - c := NewLeaf(3) + c := NewLeaf(3, false) insert_linked_list_node(c, b, nil) - d := NewLeaf(4) + d := NewLeaf(4, false) insert_linked_list_node(d, a, b) if a.getPrev() != nil { t.Errorf("expected a.prev == nil") @@ -1226,8 +1235,8 @@ func Test_remove_linked_list_node(t *testing.T) { } func Test_balance_leaf_nodes_with_dup(t *testing.T) { - a := NewLeaf(3) - b := NewLeaf(3) + a := NewLeaf(3, false) + b := NewLeaf(3, false) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } @@ -1247,8 +1256,8 @@ func Test_balance_leaf_nodes_with_dup(t *testing.T) { } func Test_balance_leaf_nodes(t *testing.T) { - a := NewLeaf(7) - b := NewLeaf(7) + a := NewLeaf(7, false) + b := NewLeaf(7, false) if err := a.put_kv(Int(1), 1); err != nil { t.Error(err) } From b3e49d27583a0e5346f270ccaae87049b368b215 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Aug 2021 15:52:17 -0700 Subject: [PATCH 10/30] change value type to ItemValue --- weed/util/bptree/bpmap.go | 8 +- weed/util/bptree/bptree.go | 10 +- weed/util/bptree/bptree.pb.go | 33 ++++-- weed/util/bptree/bptree_node.go | 39 ++++--- weed/util/bptree/bptree_test.go | 188 ++++++++++++++++---------------- weed/util/bptree/types.go | 18 +-- 6 files changed, 156 insertions(+), 140 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 5dd95070e..50bedb980 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -19,7 +19,7 @@ func (self *BpMap) Has(key Hashable) bool { return (*BpTree)(self).Has(key) } -func (self *BpMap) Put(key Hashable, value interface{}) (err error) { +func (self *BpMap) Put(key Hashable, value ItemValue) (err error) { new_root, err := self.getRoot().put(key, value) if err != nil { return err @@ -28,7 +28,7 @@ func (self *BpMap) Put(key Hashable, value interface{}) (err error) { return nil } -func (self *BpMap) Get(key Hashable) (value interface{}, err error) { +func (self *BpMap) Get(key Hashable) (value ItemValue, err error) { j, l := self.getRoot().get_start(key) if l.keys[j].Equals(key) { return l.values[j], nil @@ -36,13 +36,13 @@ func (self *BpMap) Get(key Hashable) (value interface{}, err error) { return nil, fmt.Errorf("key not found: %s", key) } -func (self *BpMap) Remove(key Hashable) (value interface{}, err error) { +func (self *BpMap) Remove(key Hashable) (value ItemValue, err error) { value, err = self.Get(key) if err != nil { return nil, err } ns := self.getRoot().NodeSize() - new_root, err := self.getRoot().remove(key, func(value interface{}) bool { return true }) + new_root, err := self.getRoot().remove(key, func(value ItemValue) bool { return true }) if err != nil { return nil, err } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 12a4bfb0d..06e3f514e 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -40,7 +40,7 @@ func (self *BpTree) Count(key Hashable) int { return count } -func (self *BpTree) Add(key Hashable, value interface{}) (err error) { +func (self *BpTree) Add(key Hashable, value ItemValue) (err error) { new_root, err := self.getRoot().put(key, value) if err != nil { return err @@ -49,7 +49,7 @@ func (self *BpTree) Add(key Hashable, value interface{}) (err error) { return nil } -func (self *BpTree) Replace(key Hashable, where WhereFunc, value interface{}) (err error) { +func (self *BpTree) Replace(key Hashable, where WhereFunc, value ItemValue) (err error) { li := self.getRoot().forward(key, key) for i, leaf, next := li(); next != nil; i, leaf, next = next() { if where(leaf.values[i]) { @@ -70,7 +70,7 @@ func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { } else { li = self.getRoot().backward(from, to) } - kvi = func() (key Hashable, value interface{}, next KVIterator) { + kvi = func() (key Hashable, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() @@ -125,7 +125,7 @@ func (self *BpTree) Items() (vi KIterator) { func (self *BpTree) Iterate() (kvi KVIterator) { li := self.getRoot().all() - kvi = func() (key Hashable, value interface{}, next KVIterator) { + kvi = func() (key Hashable, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() @@ -139,7 +139,7 @@ func (self *BpTree) Iterate() (kvi KVIterator) { func (self *BpTree) Backward() (kvi KVIterator) { li := self.getRoot().all_backward() - kvi = func() (key Hashable, value interface{}, next KVIterator) { + kvi = func() (key Hashable, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() diff --git a/weed/util/bptree/bptree.pb.go b/weed/util/bptree/bptree.pb.go index e7d155a36..078a54717 100644 --- a/weed/util/bptree/bptree.pb.go +++ b/weed/util/bptree/bptree.pb.go @@ -35,6 +35,7 @@ type ProtoNode struct { Pointers []int64 `protobuf:"varint,3,rep,packed,name=pointers,proto3" json:"pointers,omitempty"` Next int64 `protobuf:"varint,4,opt,name=next,proto3" json:"next,omitempty"` Prev int64 `protobuf:"varint,5,opt,name=prev,proto3" json:"prev,omitempty"` + Id int64 `protobuf:"varint,6,opt,name=id,proto3" json:"id,omitempty"` } func (x *ProtoNode) Reset() { @@ -104,22 +105,30 @@ func (x *ProtoNode) GetPrev() int64 { return 0 } +func (x *ProtoNode) GetId() int64 { + if x != nil { + return x.Id + } + return 0 +} + var File_bptree_proto protoreflect.FileDescriptor var file_bptree_proto_rawDesc = []byte{ 0x0a, 0x0c, 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, - 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x22, 0x7b, 0x0a, 0x09, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x4e, - 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0c, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, - 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, - 0x03, 0x52, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, - 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x6e, 0x65, 0x78, 0x74, 0x12, - 0x12, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x70, - 0x72, 0x65, 0x76, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, 0x77, - 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, 0x2f, - 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x22, 0x8b, 0x01, 0x0a, 0x09, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0c, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, + 0x28, 0x03, 0x52, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x6e, 0x65, 0x78, 0x74, + 0x12, 0x12, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, + 0x70, 0x72, 0x65, 0x76, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x02, 0x69, 0x64, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, + 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, + 0x2f, 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 3337292ba..8fb7c8854 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -1,12 +1,15 @@ package bptree +type ItemValue Equatable + type BpNode struct { - keys []Hashable - values []interface{} - pointers []*BpNode - next *BpNode - prev *BpNode - no_dup bool + keys []Hashable + values []ItemValue + pointers []*BpNode + next *BpNode + prev *BpNode + no_dup bool + protoNode *ProtoNode } func NewInternal(size int) *BpNode { @@ -25,7 +28,7 @@ func NewLeaf(size int, no_dup bool) *BpNode { } return &BpNode{ keys: make([]Hashable, 0, size), - values: make([]interface{}, 0, size), + values: make([]ItemValue, 0, size), no_dup: no_dup, } } @@ -176,7 +179,7 @@ func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) { /* This puts the k/v pair into the B+Tree rooted at this node and returns the * (possibly) new root of the tree. */ -func (self *BpNode) put(key Hashable, value interface{}) (root *BpNode, err error) { +func (self *BpNode) put(key Hashable, value ItemValue) (root *BpNode, err error) { a, b, err := self.insert(key, value) if err != nil { return nil, err @@ -194,7 +197,7 @@ func (self *BpNode) put(key Hashable, value interface{}) (root *BpNode, err erro // left is always set. When split is false left is the pointer to block // When split is true left is the pointer to the new left // block -func (self *BpNode) insert(key Hashable, value interface{}) (a, b *BpNode, err error) { +func (self *BpNode) insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return self.internal_insert(key, value) } else { // leaf node @@ -208,7 +211,7 @@ func (self *BpNode) insert(key Hashable, value interface{}) (a, b *BpNode, err e * - if the block is full, split this block * - else insert the new key/pointer into this block */ -func (self *BpNode) internal_insert(key Hashable, value interface{}) (a, b *BpNode, err error) { +func (self *BpNode) internal_insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { if !self.Internal() { return nil, nil, BpTreeError("Expected a internal node") } @@ -272,7 +275,7 @@ func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err * a pure block with a matching key) * else this leaf will get a new entry. */ -func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, err error) { +func (self *BpNode) leaf_insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } @@ -299,7 +302,7 @@ func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, * - the two blocks will be balanced with balanced_nodes * - if the key is less than b.keys[0] it will go in a else b */ -func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) { +func (self *BpNode) leaf_split(key Hashable, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } @@ -334,7 +337,7 @@ func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, e * and putting the new key there. * - always return the current block as "a" and the new block as "b" */ -func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) { +func (self *BpNode) pure_leaf_split(key Hashable, value ItemValue) (a, b *BpNode, err error) { if self.Internal() || !self.Pure() { return nil, nil, BpTreeError("Expected a pure leaf node") } @@ -392,7 +395,7 @@ func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error { return nil } -func (self *BpNode) put_kv(key Hashable, value interface{}) error { +func (self *BpNode) put_kv(key Hashable, value ItemValue) error { if self.Full() { return BpTreeError("Block is full.") } @@ -426,7 +429,7 @@ func (self *BpNode) put_key_at(i int, key Hashable) error { return nil } -func (self *BpNode) put_value_at(i int, value interface{}) error { +func (self *BpNode) put_value_at(i int, value ItemValue) error { if len(self.values) == cap(self.values) { return BpTreeError("Block is full.") } @@ -573,8 +576,8 @@ func (self *BpNode) remove_ptr_at(i int) error { } func (self *BpNode) find(key Hashable) (int, bool) { - var l int = 0 - var r int = len(self.keys) - 1 + var l = 0 + var r = len(self.keys) - 1 var m int for l <= r { m = ((r - l) >> 1) + l @@ -718,7 +721,7 @@ func balance_nodes(a, b *BpNode) { m-- } } - var lim int = len(a.keys) - m + var lim = len(a.keys) - m b.keys = b.keys[:lim] if cap(a.values) > 0 { if cap(a.values) != cap(a.keys) { diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index d1df0749e..5fdf817e9 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -48,7 +48,7 @@ func (self Strings) Swap(i, j int) { type record struct { key String - value String + value ItemValue } type records []*record @@ -84,7 +84,7 @@ func BenchmarkBpTree(b *testing.B) { t.Add(r.key, r.value) } for _, r := range recs { - t.RemoveWhere(r.key, func(value interface{}) bool { return true }) + t.RemoveWhere(r.key, func(value ItemValue) bool { return true }) } } } @@ -133,7 +133,7 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { t.Error(bpt, "Find Failed Value Error") } } - err = bpt.Replace(r.key, func(value interface{}) bool { return true }, new_recs[i].value) + err = bpt.Replace(r.key, func(value ItemValue) bool { return true }, new_recs[i].value) if err != nil { t.Error(err) } @@ -193,7 +193,7 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { if count := bpt.Count(r.key); count != 1 { t.Error(bpt, "Missing key") } - if err := bpt.RemoveWhere(r.key, func(value interface{}) bool { return true }); err != nil { + if err := bpt.RemoveWhere(r.key, func(value ItemValue) bool { return true }); err != nil { t.Fatal(bpt, err) } if has := bpt.Has(r.key); has { @@ -276,15 +276,15 @@ func TestBpMap(t *testing.T) { func Test_get_start(t *testing.T) { root := NewLeaf(2, false) - root, err := root.put(Int(1), 1) + root, err := root.put(Int(1), Int(1)) if err != nil { t.Error(err) } - root, err = root.put(Int(5), 3) + root, err = root.put(Int(5), Int(3)) if err != nil { t.Error(err) } - root, err = root.put(Int(3), 2) + root, err = root.put(Int(3), Int(2)) if err != nil { t.Error(err) } @@ -345,31 +345,31 @@ func Test_get_start(t *testing.T) { func Test_get_end(t *testing.T) { root := NewLeaf(3, false) - root, err := root.put(Int(1), -1) + root, err := root.put(Int(1), Int(-1)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(4), -1) + root, err = root.put(Int(4), Int(-1)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(3), 1) + root, err = root.put(Int(3), Int(1)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(3), 2) + root, err = root.put(Int(3), Int(2)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(3), 3) + root, err = root.put(Int(3), Int(3)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(3), 4) + root, err = root.put(Int(3), Int(4)) if err != nil { t.Fatal(err) } - root, err = root.put(Int(3), 5) + root, err = root.put(Int(3), Int(5)) if err != nil { t.Fatal(err) } @@ -437,10 +437,10 @@ func Test_get_end(t *testing.T) { func Test_put_no_root_split(t *testing.T) { a := NewLeaf(2, false) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - p, err := a.put(Int(1), 2) + p, err := a.put(Int(1), Int(2)) if err != nil { t.Error(err) } else { @@ -451,7 +451,7 @@ func Test_put_no_root_split(t *testing.T) { t.Error("p didn't have the right keys", p) } } - p, err = a.put(Int(1), 3) + p, err = a.put(Int(1), Int(3)) if err != nil { t.Error(err) } else { @@ -471,7 +471,7 @@ func Test_put_no_root_split(t *testing.T) { func Test_put_root_split(t *testing.T) { a := NewLeaf(2, false) - p, err := a.put(Int(1), 1) + p, err := a.put(Int(1), Int(1)) if err != nil { t.Error(err) } else { @@ -482,7 +482,7 @@ func Test_put_root_split(t *testing.T) { t.Error("p didn't have the right keys", p) } } - p, err = a.put(Int(3), 3) + p, err = a.put(Int(3), Int(3)) if err != nil { t.Error(err) } else { @@ -493,7 +493,7 @@ func Test_put_root_split(t *testing.T) { t.Error("p didn't have the right keys", p) } } - p, err = a.put(Int(2), 2) + p, err = a.put(Int(2), Int(2)) if err != nil { t.Error(err) } else { @@ -521,7 +521,7 @@ func Test_put_root_split(t *testing.T) { func Test_internal_insert_no_split(t *testing.T) { a := NewInternal(3) leaf := NewLeaf(1, false) - if err := leaf.put_kv(Int(1), 1); err != nil { + if err := leaf.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } if err := a.put_kp(Int(1), leaf); err != nil { @@ -549,7 +549,7 @@ func Test_internal_insert_no_split(t *testing.T) { func Test_internal_insert_split_less(t *testing.T) { a := NewInternal(3) leaf := NewLeaf(1, false) - if err := leaf.put_kv(Int(1), 1); err != nil { + if err := leaf.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } if err := a.put_kp(Int(1), leaf); err != nil { @@ -660,13 +660,13 @@ func Test_internal_split_greater(t *testing.T) { func Test_leaf_insert_no_split(t *testing.T) { a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - p, q, err := a.leaf_insert(Int(2), 2) + p, q, err := a.leaf_insert(Int(2), Int(2)) if err != nil { t.Error(err) } else { @@ -686,16 +686,16 @@ func Test_leaf_insert_no_split(t *testing.T) { func Test_leaf_insert_split_less(t *testing.T) { a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := a.put_kv(Int(5), 5); err != nil { + if err := a.put_kv(Int(5), Int(5)); err != nil { t.Error(err) } - p, q, err := a.leaf_insert(Int(2), 2) + p, q, err := a.leaf_insert(Int(2), Int(2)) if err != nil { t.Error(err) } else { @@ -717,16 +717,16 @@ func Test_leaf_insert_split_less(t *testing.T) { func Test_leaf_split_less(t *testing.T) { a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := a.put_kv(Int(5), 5); err != nil { + if err := a.put_kv(Int(5), Int(5)); err != nil { t.Error(err) } - p, q, err := a.leaf_split(Int(2), 2) + p, q, err := a.leaf_split(Int(2), Int(2)) if err != nil { t.Error(err) } else { @@ -748,16 +748,16 @@ func Test_leaf_split_less(t *testing.T) { func Test_leaf_split_equal(t *testing.T) { a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := a.put_kv(Int(5), 5); err != nil { + if err := a.put_kv(Int(5), Int(5)); err != nil { t.Error(err) } - p, q, err := a.leaf_split(Int(3), 2) + p, q, err := a.leaf_split(Int(3), Int(2)) if err != nil { t.Error(err) } else { @@ -779,16 +779,16 @@ func Test_leaf_split_equal(t *testing.T) { func Test_leaf_split_greater(t *testing.T) { a := NewLeaf(3, false) insert_linked_list_node(a, nil, nil) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := a.put_kv(Int(5), 5); err != nil { + if err := a.put_kv(Int(5), Int(5)); err != nil { t.Error(err) } - p, q, err := a.leaf_split(Int(4), 2) + p, q, err := a.leaf_split(Int(4), Int(2)) if err != nil { t.Error(err) } else { @@ -817,28 +817,28 @@ func Test_pure_leaf_insert_split_less(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) - if err := a.put_kv(Int(3), 1); err != nil { + if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 2); err != nil { + if err := a.put_kv(Int(3), Int(2)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 3); err != nil { + if err := b.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 4); err != nil { + if err := b.put_kv(Int(3), Int(4)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 5); err != nil { + if err := c.put_kv(Int(3), Int(5)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 6); err != nil { + if err := c.put_kv(Int(3), Int(6)); err != nil { t.Error(err) } - if err := d.put_kv(Int(4), 6); err != nil { + if err := d.put_kv(Int(4), Int(6)); err != nil { t.Error(err) } - p, q, err := a.leaf_insert(Int(2), 1) + p, q, err := a.leaf_insert(Int(2), Int(1)) if err != nil { t.Error(err) } else { @@ -890,28 +890,28 @@ func Test_pure_leaf_split_less(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) - if err := a.put_kv(Int(3), 1); err != nil { + if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 2); err != nil { + if err := a.put_kv(Int(3), Int(2)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 3); err != nil { + if err := b.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 4); err != nil { + if err := b.put_kv(Int(3), Int(4)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 5); err != nil { + if err := c.put_kv(Int(3), Int(5)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 6); err != nil { + if err := c.put_kv(Int(3), Int(6)); err != nil { t.Error(err) } - if err := d.put_kv(Int(4), 6); err != nil { + if err := d.put_kv(Int(4), Int(6)); err != nil { t.Error(err) } - p, q, err := a.pure_leaf_split(Int(2), 1) + p, q, err := a.pure_leaf_split(Int(2), Int(1)) if err != nil { t.Error(err) } else { @@ -963,25 +963,25 @@ func Test_pure_leaf_split_equal(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) - if err := a.put_kv(Int(3), 1); err != nil { + if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 2); err != nil { + if err := a.put_kv(Int(3), Int(2)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 3); err != nil { + if err := b.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 4); err != nil { + if err := b.put_kv(Int(3), Int(4)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 5); err != nil { + if err := c.put_kv(Int(3), Int(5)); err != nil { t.Error(err) } - if err := d.put_kv(Int(4), 6); err != nil { + if err := d.put_kv(Int(4), Int(6)); err != nil { t.Error(err) } - p, q, err := a.pure_leaf_split(Int(3), 1) + p, q, err := a.pure_leaf_split(Int(3), Int(1)) if err != nil { t.Error(err) } else { @@ -1027,25 +1027,25 @@ func Test_pure_leaf_split_greater(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) - if err := a.put_kv(Int(3), 1); err != nil { + if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 2); err != nil { + if err := a.put_kv(Int(3), Int(2)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 3); err != nil { + if err := b.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 4); err != nil { + if err := b.put_kv(Int(3), Int(4)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 5); err != nil { + if err := c.put_kv(Int(3), Int(5)); err != nil { t.Error(err) } - if err := d.put_kv(Int(5), 6); err != nil { + if err := d.put_kv(Int(5), Int(6)); err != nil { t.Error(err) } - p, q, err := a.pure_leaf_split(Int(4), 1) + p, q, err := a.pure_leaf_split(Int(4), Int(1)) if err != nil { t.Error(err) } else { @@ -1097,25 +1097,25 @@ func Test_find_end_of_pure_run(t *testing.T) { insert_linked_list_node(c, b, nil) d := NewLeaf(2, false) insert_linked_list_node(d, c, nil) - if err := a.put_kv(Int(3), 1); err != nil { + if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 2); err != nil { + if err := a.put_kv(Int(3), Int(2)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 3); err != nil { + if err := b.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := b.put_kv(Int(3), 4); err != nil { + if err := b.put_kv(Int(3), Int(4)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 5); err != nil { + if err := c.put_kv(Int(3), Int(5)); err != nil { t.Error(err) } - if err := c.put_kv(Int(3), 6); err != nil { + if err := c.put_kv(Int(3), Int(6)); err != nil { t.Error(err) } - if err := d.put_kv(Int(4), 6); err != nil { + if err := d.put_kv(Int(4), Int(6)); err != nil { t.Error(err) } e := a.find_end_of_pure_run() @@ -1237,13 +1237,13 @@ func Test_remove_linked_list_node(t *testing.T) { func Test_balance_leaf_nodes_with_dup(t *testing.T) { a := NewLeaf(3, false) b := NewLeaf(3, false) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(2), 1); err != nil { + if err := a.put_kv(Int(2), Int(1)); err != nil { t.Error(err) } balance_nodes(a, b) @@ -1258,25 +1258,25 @@ func Test_balance_leaf_nodes_with_dup(t *testing.T) { func Test_balance_leaf_nodes(t *testing.T) { a := NewLeaf(7, false) b := NewLeaf(7, false) - if err := a.put_kv(Int(1), 1); err != nil { + if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } - if err := a.put_kv(Int(2), 2); err != nil { + if err := a.put_kv(Int(2), Int(2)); err != nil { t.Error(err) } - if err := a.put_kv(Int(3), 3); err != nil { + if err := a.put_kv(Int(3), Int(3)); err != nil { t.Error(err) } - if err := a.put_kv(Int(4), 4); err != nil { + if err := a.put_kv(Int(4), Int(4)); err != nil { t.Error(err) } - if err := a.put_kv(Int(5), 5); err != nil { + if err := a.put_kv(Int(5), Int(5)); err != nil { t.Error(err) } - if err := a.put_kv(Int(6), 6); err != nil { + if err := a.put_kv(Int(6), Int(6)); err != nil { t.Error(err) } - if err := a.put_kv(Int(7), 7); err != nil { + if err := a.put_kv(Int(7), Int(7)); err != nil { t.Error(err) } balance_nodes(a, b) @@ -1291,12 +1291,12 @@ func Test_balance_leaf_nodes(t *testing.T) { } } for i, v := range a.values { - if v.(int) != i+1 { + if int(v.(Int)) != i+1 { t.Errorf("k != %d", i+1) } } for i, v := range b.values { - if v.(int) != 3+i+1 { + if int(v.(Int)) != 3+i+1 { t.Errorf("v != %d", 3+i+1) } } @@ -1413,10 +1413,14 @@ func read(p []byte, int63 func() int64, readVal *int64, readPos *int8) (n int, e type T testing.T -func (t *T) Assert(ok bool, msg string, vars ...interface{}) { +func (t *T) Assert(ok bool, msg string, vars ...ItemValue) { if !ok { t.Log("\n" + string(debug.Stack())) - t.Fatalf(msg, vars...) + var objects []interface{} + for _, t := range vars { + objects = append(objects, t) + } + t.Fatalf(msg, objects...) } } diff --git a/weed/util/bptree/types.go b/weed/util/bptree/types.go index 45084efdd..f828f7065 100644 --- a/weed/util/bptree/types.go +++ b/weed/util/bptree/types.go @@ -25,26 +25,26 @@ func NegativeSize() error { return errors.New("negative size") } -type Iterator func() (item interface{}, next Iterator) +type Iterator func() (item ItemValue, next Iterator) type KIterator func() (key Hashable, next KIterator) -type KVIterator func() (key Hashable, value interface{}, next KVIterator) +type KVIterator func() (key Hashable, value ItemValue, next KVIterator) type KVIterable interface { Iterate() KVIterator } type MapOperable interface { Has(key Hashable) bool - Put(key Hashable, value interface{}) (err error) - Get(key Hashable) (value interface{}, err error) - Remove(key Hashable) (value interface{}, err error) + Put(key Hashable, value ItemValue) (err error) + Get(key Hashable) (value ItemValue, err error) + Remove(key Hashable) (value ItemValue, err error) } -type WhereFunc func(value interface{}) bool +type WhereFunc func(value ItemValue) bool func MakeValuesIterator(obj KVIterable) Iterator { kv_iterator := obj.Iterate() var v_iterator Iterator - v_iterator = func() (value interface{}, next Iterator) { + v_iterator = func() (value ItemValue, next Iterator) { _, value, kv_iterator = kv_iterator() if kv_iterator == nil { return nil, nil @@ -58,7 +58,7 @@ func MakeItemsIterator(obj KVIterable) (kit KIterator) { kv_iterator := obj.Iterate() kit = func() (item Hashable, next KIterator) { var key Hashable - var value interface{} + var value ItemValue key, value, kv_iterator = kv_iterator() if kv_iterator == nil { return nil, nil @@ -70,7 +70,7 @@ func MakeItemsIterator(obj KVIterable) (kit KIterator) { type MapEntry struct { Key Hashable - Value interface{} + Value ItemValue } func (m *MapEntry) Equals(other Equatable) bool { From 51c8f2518f8515b51553854faf1c926f032387a1 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 21 Aug 2021 15:54:42 -0700 Subject: [PATCH 11/30] change key type to ItemKey --- weed/util/bptree/bpmap.go | 8 +++--- weed/util/bptree/bptree.go | 22 +++++++------- weed/util/bptree/bptree_node.go | 51 +++++++++++++++++---------------- weed/util/bptree/types.go | 18 ++++++------ 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 50bedb980..e7509b179 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -15,11 +15,11 @@ func NewBpMap(node_size int) *BpMap { } } -func (self *BpMap) Has(key Hashable) bool { +func (self *BpMap) Has(key ItemKey) bool { return (*BpTree)(self).Has(key) } -func (self *BpMap) Put(key Hashable, value ItemValue) (err error) { +func (self *BpMap) Put(key ItemKey, value ItemValue) (err error) { new_root, err := self.getRoot().put(key, value) if err != nil { return err @@ -28,7 +28,7 @@ func (self *BpMap) Put(key Hashable, value ItemValue) (err error) { return nil } -func (self *BpMap) Get(key Hashable) (value ItemValue, err error) { +func (self *BpMap) Get(key ItemKey) (value ItemValue, err error) { j, l := self.getRoot().get_start(key) if l.keys[j].Equals(key) { return l.values[j], nil @@ -36,7 +36,7 @@ func (self *BpMap) Get(key Hashable) (value ItemValue, err error) { return nil, fmt.Errorf("key not found: %s", key) } -func (self *BpMap) Remove(key Hashable) (value ItemValue, err error) { +func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { value, err = self.Get(key) if err != nil { return nil, err diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 06e3f514e..2d09026c3 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -18,7 +18,7 @@ func NewBpTree(node_size int) *BpTree { } } -func (self *BpTree) Has(key Hashable) bool { +func (self *BpTree) Has(key ItemKey) bool { if len(self.getRoot().keys) == 0 { return false } @@ -26,7 +26,7 @@ func (self *BpTree) Has(key Hashable) bool { return l.keys[j].Equals(key) } -func (self *BpTree) Count(key Hashable) int { +func (self *BpTree) Count(key ItemKey) int { if len(self.root.keys) == 0 { return 0 } @@ -40,7 +40,7 @@ func (self *BpTree) Count(key Hashable) int { return count } -func (self *BpTree) Add(key Hashable, value ItemValue) (err error) { +func (self *BpTree) Add(key ItemKey, value ItemValue) (err error) { new_root, err := self.getRoot().put(key, value) if err != nil { return err @@ -49,7 +49,7 @@ func (self *BpTree) Add(key Hashable, value ItemValue) (err error) { return nil } -func (self *BpTree) Replace(key Hashable, where WhereFunc, value ItemValue) (err error) { +func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err error) { li := self.getRoot().forward(key, key) for i, leaf, next := li(); next != nil; i, leaf, next = next() { if where(leaf.values[i]) { @@ -59,18 +59,18 @@ func (self *BpTree) Replace(key Hashable, where WhereFunc, value ItemValue) (err return nil } -func (self *BpTree) Find(key Hashable) (kvi KVIterator) { +func (self *BpTree) Find(key ItemKey) (kvi KVIterator) { return self.Range(key, key) } -func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { +func (self *BpTree) Range(from, to ItemKey) (kvi KVIterator) { var li loc_iterator if !to.Less(from) { li = self.getRoot().forward(from, to) } else { li = self.getRoot().backward(from, to) } - kvi = func() (key Hashable, value ItemValue, next KVIterator) { + kvi = func() (key ItemKey, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() @@ -82,7 +82,7 @@ func (self *BpTree) Range(from, to Hashable) (kvi KVIterator) { return kvi } -func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { +func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { ns := self.getRoot().NodeSize() new_root, err := self.getRoot().remove(key, where) if err != nil { @@ -99,7 +99,7 @@ func (self *BpTree) RemoveWhere(key Hashable, where WhereFunc) (err error) { func (self *BpTree) Keys() (ki KIterator) { li := self.getRoot().all() var prev Equatable - ki = func() (key Hashable, next KIterator) { + ki = func() (key ItemKey, next KIterator) { var i int var leaf *BpNode i, leaf, li = li() @@ -125,7 +125,7 @@ func (self *BpTree) Items() (vi KIterator) { func (self *BpTree) Iterate() (kvi KVIterator) { li := self.getRoot().all() - kvi = func() (key Hashable, value ItemValue, next KVIterator) { + kvi = func() (key ItemKey, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() @@ -139,7 +139,7 @@ func (self *BpTree) Iterate() (kvi KVIterator) { func (self *BpTree) Backward() (kvi KVIterator) { li := self.getRoot().all_backward() - kvi = func() (key Hashable, value ItemValue, next KVIterator) { + kvi = func() (key ItemKey, value ItemValue, next KVIterator) { var i int var leaf *BpNode i, leaf, li = li() diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 8fb7c8854..160dfad74 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -1,9 +1,10 @@ package bptree +type ItemKey Hashable type ItemValue Equatable type BpNode struct { - keys []Hashable + keys []ItemKey values []ItemValue pointers []*BpNode next *BpNode @@ -17,7 +18,7 @@ func NewInternal(size int) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]Hashable, 0, size), + keys: make([]ItemKey, 0, size), pointers: make([]*BpNode, 0, size), } } @@ -27,7 +28,7 @@ func NewLeaf(size int, no_dup bool) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]Hashable, 0, size), + keys: make([]ItemKey, 0, size), values: make([]ItemValue, 0, size), no_dup: no_dup, } @@ -67,7 +68,7 @@ func (self *BpNode) Height() int { return self.pointers[0].Height() + 1 } -func (self *BpNode) count(key Hashable) int { +func (self *BpNode) count(key ItemKey) int { i, _ := self.find(key) count := 0 for ; i < len(self.keys); i++ { @@ -80,7 +81,7 @@ func (self *BpNode) count(key Hashable) int { return count } -func (self *BpNode) has(key Hashable) bool { +func (self *BpNode) has(key ItemKey) bool { _, has := self.find(key) return has } @@ -103,7 +104,7 @@ func (self *BpNode) right_most_leaf() *BpNode { * the search key. (unless the search key is greater than all the keys in the * tree, in that case it will be the last key in the tree) */ -func (self *BpNode) get_start(key Hashable) (i int, leaf *BpNode) { +func (self *BpNode) get_start(key ItemKey) (i int, leaf *BpNode) { if self.Internal() { return self.internal_get_start(key) } else { @@ -140,7 +141,7 @@ func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) { * than all the keys in the tree, in that case it will be the last key in the * tree) */ -func (self *BpNode) get_end(key Hashable) (i int, leaf *BpNode) { +func (self *BpNode) get_end(key ItemKey) (i int, leaf *BpNode) { end := false i, leaf = self.get_start(key) pi, pleaf := i, leaf @@ -151,7 +152,7 @@ func (self *BpNode) get_end(key Hashable) (i int, leaf *BpNode) { return pi, pleaf } -func (self *BpNode) internal_get_start(key Hashable) (i int, leaf *BpNode) { +func (self *BpNode) internal_get_start(key ItemKey) (i int, leaf *BpNode) { if !self.Internal() { panic(BpTreeError("Expected a internal node")) } @@ -165,7 +166,7 @@ func (self *BpNode) internal_get_start(key Hashable) (i int, leaf *BpNode) { return child.get_start(key) } -func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) { +func (self *BpNode) leaf_get_start(key ItemKey) (i int, leaf *BpNode) { i, has := self.find(key) if i >= len(self.keys) && i > 0 { i = len(self.keys) - 1 @@ -179,7 +180,7 @@ func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) { /* This puts the k/v pair into the B+Tree rooted at this node and returns the * (possibly) new root of the tree. */ -func (self *BpNode) put(key Hashable, value ItemValue) (root *BpNode, err error) { +func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) { a, b, err := self.insert(key, value) if err != nil { return nil, err @@ -197,7 +198,7 @@ func (self *BpNode) put(key Hashable, value ItemValue) (root *BpNode, err error) // left is always set. When split is false left is the pointer to block // When split is true left is the pointer to the new left // block -func (self *BpNode) insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { +func (self *BpNode) insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return self.internal_insert(key, value) } else { // leaf node @@ -211,7 +212,7 @@ func (self *BpNode) insert(key Hashable, value ItemValue) (a, b *BpNode, err err * - if the block is full, split this block * - else insert the new key/pointer into this block */ -func (self *BpNode) internal_insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { +func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { if !self.Internal() { return nil, nil, BpTreeError("Expected a internal node") } @@ -248,7 +249,7 @@ func (self *BpNode) internal_insert(key Hashable, value ItemValue) (a, b *BpNode * - balance the two blocks. * - insert the new key/pointer combo into the correct block */ -func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err error) { +func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err error) { if !self.Internal() { return nil, nil, BpTreeError("Expected a internal node") } @@ -275,7 +276,7 @@ func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err * a pure block with a matching key) * else this leaf will get a new entry. */ -func (self *BpNode) leaf_insert(key Hashable, value ItemValue) (a, b *BpNode, err error) { +func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } @@ -302,7 +303,7 @@ func (self *BpNode) leaf_insert(key Hashable, value ItemValue) (a, b *BpNode, er * - the two blocks will be balanced with balanced_nodes * - if the key is less than b.keys[0] it will go in a else b */ -func (self *BpNode) leaf_split(key Hashable, value ItemValue) (a, b *BpNode, err error) { +func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err error) { if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } @@ -337,7 +338,7 @@ func (self *BpNode) leaf_split(key Hashable, value ItemValue) (a, b *BpNode, err * and putting the new key there. * - always return the current block as "a" and the new block as "b" */ -func (self *BpNode) pure_leaf_split(key Hashable, value ItemValue) (a, b *BpNode, err error) { +func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err error) { if self.Internal() || !self.Pure() { return nil, nil, BpTreeError("Expected a pure leaf node") } @@ -371,7 +372,7 @@ func (self *BpNode) pure_leaf_split(key Hashable, value ItemValue) (a, b *BpNode } } -func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error { +func (self *BpNode) put_kp(key ItemKey, ptr *BpNode) error { if self.Full() { return BpTreeError("Block is full.") } @@ -395,7 +396,7 @@ func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error { return nil } -func (self *BpNode) put_kv(key Hashable, value ItemValue) error { +func (self *BpNode) put_kv(key ItemKey, value ItemValue) error { if self.Full() { return BpTreeError("Block is full.") } @@ -417,7 +418,7 @@ func (self *BpNode) put_kv(key Hashable, value ItemValue) error { return nil } -func (self *BpNode) put_key_at(i int, key Hashable) error { +func (self *BpNode) put_key_at(i int, key ItemKey) error { if self.Full() { return BpTreeError("Block is full.") } @@ -459,7 +460,7 @@ func (self *BpNode) put_pointer_at(i int, pointer *BpNode) error { return nil } -func (self *BpNode) remove(key Hashable, where WhereFunc) (a *BpNode, err error) { +func (self *BpNode) remove(key ItemKey, where WhereFunc) (a *BpNode, err error) { if self.Internal() { return self.internal_remove(key, nil, where) } else { @@ -467,7 +468,7 @@ func (self *BpNode) remove(key Hashable, where WhereFunc) (a *BpNode, err error) } } -func (self *BpNode) internal_remove(key Hashable, sibling *BpNode, where WhereFunc) (a *BpNode, err error) { +func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFunc) (a *BpNode, err error) { if !self.Internal() { panic(BpTreeError("Expected a internal node")) } @@ -512,7 +513,7 @@ func (self *BpNode) internal_remove(key Hashable, sibling *BpNode, where WhereFu return self, nil } -func (self *BpNode) leaf_remove(key, stop Hashable, where WhereFunc) (a *BpNode, err error) { +func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) { if self.Internal() { return nil, BpTreeError("Expected a leaf node") } @@ -575,7 +576,7 @@ func (self *BpNode) remove_ptr_at(i int) error { return nil } -func (self *BpNode) find(key Hashable) (int, bool) { +func (self *BpNode) find(key ItemKey) (int, bool) { var l = 0 var r = len(self.keys) - 1 var m int @@ -641,7 +642,7 @@ func (self *BpNode) all_backward() (li loc_iterator) { return li } -func (self *BpNode) forward(from, to Hashable) (li loc_iterator) { +func (self *BpNode) forward(from, to ItemKey) (li loc_iterator) { j, l := self.get_start(from) end := false j-- @@ -655,7 +656,7 @@ func (self *BpNode) forward(from, to Hashable) (li loc_iterator) { return li } -func (self *BpNode) backward(from, to Hashable) (li loc_iterator) { +func (self *BpNode) backward(from, to ItemKey) (li loc_iterator) { j, l := self.get_end(from) end := false li = func() (i int, leaf *BpNode, next loc_iterator) { diff --git a/weed/util/bptree/types.go b/weed/util/bptree/types.go index f828f7065..f987e0419 100644 --- a/weed/util/bptree/types.go +++ b/weed/util/bptree/types.go @@ -26,17 +26,17 @@ func NegativeSize() error { } type Iterator func() (item ItemValue, next Iterator) -type KIterator func() (key Hashable, next KIterator) -type KVIterator func() (key Hashable, value ItemValue, next KVIterator) +type KIterator func() (key ItemKey, next KIterator) +type KVIterator func() (key ItemKey, value ItemValue, next KVIterator) type KVIterable interface { Iterate() KVIterator } type MapOperable interface { - Has(key Hashable) bool - Put(key Hashable, value ItemValue) (err error) - Get(key Hashable) (value ItemValue, err error) - Remove(key Hashable) (value ItemValue, err error) + Has(key ItemKey) bool + Put(key ItemKey, value ItemValue) (err error) + Get(key ItemKey) (value ItemValue, err error) + Remove(key ItemKey) (value ItemValue, err error) } type WhereFunc func(value ItemValue) bool @@ -56,8 +56,8 @@ func MakeValuesIterator(obj KVIterable) Iterator { func MakeItemsIterator(obj KVIterable) (kit KIterator) { kv_iterator := obj.Iterate() - kit = func() (item Hashable, next KIterator) { - var key Hashable + kit = func() (item ItemKey, next KIterator) { + var key ItemKey var value ItemValue key, value, kv_iterator = kv_iterator() if kv_iterator == nil { @@ -69,7 +69,7 @@ func MakeItemsIterator(obj KVIterable) (kit KIterator) { } type MapEntry struct { - Key Hashable + Key ItemKey Value ItemValue } From df1d6133a82680d3b58c922ad02a14fc7ee017ba Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 22 Aug 2021 18:19:26 -0700 Subject: [PATCH 12/30] bptree does not work well for auto-increasing keys --- weed/util/bptree/README.md | 60 ++++++++++++++++++++++++ weed/util/bptree/bpmap.go | 4 +- weed/util/bptree/bptree.go | 12 +++-- weed/util/bptree/bptree_node.go | 67 +++++++++++++++++---------- weed/util/bptree/bptree_store_test.go | 34 ++++++++++++++ weed/util/bptree/getter_setter.go | 44 ++++++++++++++++++ 6 files changed, 193 insertions(+), 28 deletions(-) create mode 100644 weed/util/bptree/README.md create mode 100644 weed/util/bptree/bptree_store_test.go diff --git a/weed/util/bptree/README.md b/weed/util/bptree/README.md new file mode 100644 index 000000000..1dddae940 --- /dev/null +++ b/weed/util/bptree/README.md @@ -0,0 +1,60 @@ +This adapts one b+ tree implementation +https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree +to persist changes to on disk. + +# When a node needs to persist itself? + +* A node changed its key or value + * When an item is added. + * When an item is updated. + * When an item is deleted. + +* When a node is split. + * 2 new nodes are created (they shoud persist themselves). + * Parent node need to point to the new nodes. + +* When a node is merged. + * delete one node + * persist the merged node + + +In general, if one node is returned from a function, the node should have already been persisted. +The parent node may need to delete the old node. + +BpTree + Add(key ItemKey, value ItemValue) + new_root = self.getRoot().put(key,value) + a, b, err := self.insert(key, value) + self.internal_insert(key, value) + self.internal_split(q.keys[0], q) + persist(a,b) + self.persist() // child add q node + self.maybePersist(child == p) + self.leaf_insert(key, value) + self.persist() // if dedup + self.leaf_split(key, value) + self.pure_leaf_split(key, value) + persist(a,b) + a.persist() + persist(a,b) + self.put_kv(key, value) + new_root.persist() + self.setRoot(new_root) + oldroot.destroy() + // maybe persist BpTree new root + + Replace(key ItemKey, where WhereFunc, value ItemValue) + leaf.persist() + RemoveWhere(key ItemKey, where WhereFunc) + self.getRoot().remove(key, where) + self.internal_remove(key, nil, where) + child.leaf_remove(key, nil, where) + child.leaf_remove(key, sibling.keys[0], where) + l.destroy() // when the node is empty + a.maybePersist(hasChange) + self.destroy() // when no keys left + self.persist() // when some keys are left + self.leaf_remove(key, self.keys[len(self.keys)-1], where) + new_root.persist() // when new root is added + // maybe persist BpTree new root + \ No newline at end of file diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index e7509b179..cbf363c95 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -47,7 +47,9 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { return nil, err } if new_root == nil { - self.setRoot(NewLeaf(ns, true)) + new_root = NewLeaf(ns, false) + err = new_root.persist() + self.setRoot(new_root) } else { self.setRoot(new_root) } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 2d09026c3..f9a5cf058 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -54,9 +54,13 @@ func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err for i, leaf, next := li(); next != nil; i, leaf, next = next() { if where(leaf.values[i]) { leaf.values[i] = value + if persistErr := leaf.persist(); persistErr != nil && err == nil { + err = persistErr + break + } } } - return nil + return err } func (self *BpTree) Find(key ItemKey) (kvi KVIterator) { @@ -89,11 +93,13 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { return err } if new_root == nil { - self.setRoot(NewLeaf(ns, false)) + new_root = NewLeaf(ns, false) + err = new_root.persist() + self.setRoot(new_root) } else { self.setRoot(new_root) } - return nil + return err } func (self *BpTree) Keys() (ki KIterator) { diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 160dfad74..4e6d63ac6 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -2,15 +2,23 @@ package bptree type ItemKey Hashable type ItemValue Equatable +type PersistFunc func(node *BpNode) error +type DestroyFunc func(node *BpNode) error + +var ( + PersistFn PersistFunc + DestroyFn DestroyFunc +) type BpNode struct { - keys []ItemKey - values []ItemValue - pointers []*BpNode - next *BpNode - prev *BpNode - no_dup bool - protoNode *ProtoNode + keys []ItemKey + values []ItemValue + pointers []*BpNode + next *BpNode + prev *BpNode + no_dup bool + protoNodeId int64 + protoNode *ProtoNode } func NewInternal(size int) *BpNode { @@ -18,8 +26,9 @@ func NewInternal(size int) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]ItemKey, 0, size), - pointers: make([]*BpNode, 0, size), + keys: make([]ItemKey, 0, size), + pointers: make([]*BpNode, 0, size), + protoNodeId: GetProtoNodeId(), } } @@ -28,9 +37,10 @@ func NewLeaf(size int, no_dup bool) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]ItemKey, 0, size), - values: make([]ItemValue, 0, size), - no_dup: no_dup, + keys: make([]ItemKey, 0, size), + values: make([]ItemValue, 0, size), + no_dup: no_dup, + protoNodeId: GetProtoNodeId(), } } @@ -191,7 +201,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) root = NewInternal(self.NodeSize()) root.put_kp(a.keys[0], a) root.put_kp(b.keys[0], b) - return root, nil + return root, root.persist() } // right is only set on split @@ -237,10 +247,10 @@ func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode, if err := self.put_kp(q.keys[0], q); err != nil { return nil, nil, err } - return self, nil, nil + return self, nil, self.persist() } } - return self, nil, nil + return self, nil, self.maybePersist(child != p) } /* On split @@ -268,7 +278,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err return nil, nil, err } } - return a, b, nil + return a, b, persist(a, b) } /* if the leaf is full then it will defer to a leaf_split @@ -284,7 +294,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err i, has := self.find(key) if has { self.values[i] = value - return self, nil, nil + return self, nil, self.persist() } } if self.Full() { @@ -293,7 +303,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err if err := self.put_kv(key, value); err != nil { return nil, nil, err } - return self, nil, nil + return self, nil, self.persist() } } @@ -323,7 +333,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err return nil, nil, err } } - return a, b, nil + return a, b, persist(a, b) } /* a pure leaf split has two cases: @@ -349,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, return nil, nil, err } insert_linked_list_node(a, b.getPrev(), b) - return a, b, nil + return a, b, persist(a, b) } else { a = self e := self.find_end_of_pure_run() @@ -357,7 +367,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, if err := e.put_kv(key, value); err != nil { return nil, nil, err } - return a, nil, nil + return a, nil, a.persist() } else { b = NewLeaf(self.NodeSize(), self.no_dup) if err := b.put_kv(key, value); err != nil { @@ -367,7 +377,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, if e.keys[0].Equals(key) { return a, nil, nil } - return a, b, nil + return a, b, persist(a, b) } } } @@ -484,6 +494,7 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun sibling = sibling.left_most_leaf() } child := self.pointers[i] + oldChild := child if child.Internal() { child, err = child.internal_remove(key, sibling, where) } else { @@ -508,9 +519,9 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun self.pointers[i] = child } if len(self.keys) == 0 { - return nil, nil + return nil, self.destroy() } - return self, nil + return self, self.maybePersist(oldChild != child) } func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) { @@ -518,8 +529,10 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, return nil, BpTreeError("Expected a leaf node") } a = self + hasChange := false for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() { if where(l.values[j]) { + hasChange = true if err := l.remove_key_at(j); err != nil { return nil, err } @@ -538,8 +551,14 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, } else { a = nil } + if err := l.destroy(); err != nil { + return nil, err + } } } + if a != nil { + return a, a.maybePersist(hasChange) + } return a, nil } diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go new file mode 100644 index 000000000..a5e330aa9 --- /dev/null +++ b/weed/util/bptree/bptree_store_test.go @@ -0,0 +1,34 @@ +package bptree + +import ( + "fmt" + "testing" +) + +func TestAddRemove(t *testing.T) { + tree := NewBpTree(32) + PersistFn = func(node *BpNode) error { + println("saving", node.protoNodeId) + return nil + } + DestroyFn = func(node *BpNode) error { + println("delete", node.protoNodeId) + return nil + } + for i:=0;i<1024;i++{ + println("++++++++++", i) + tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i))) + printTree(tree.root, "") + } +} + +func printTree(node *BpNode, prefix string) { + fmt.Printf("%sNode %d\n", prefix, node.protoNodeId) + prefix += " " + for i:=0;i Date: Sat, 18 Sep 2021 01:29:47 -0700 Subject: [PATCH 13/30] split node based on the last inserted key --- weed/util/bptree/bpmap.go | 6 +- weed/util/bptree/bptree.go | 6 +- weed/util/bptree/bptree_node.go | 79 +++++----- weed/util/bptree/bptree_store_test.go | 8 +- weed/util/bptree/bptree_test.go | 203 ++++++++++---------------- 5 files changed, 123 insertions(+), 179 deletions(-) diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index cbf363c95..399ac7b86 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -11,7 +11,7 @@ type BpMap BpTree func NewBpMap(node_size int) *BpMap { return &BpMap{ - root: NewLeaf(node_size, true), + root: NewLeaf(node_size), } } @@ -41,13 +41,13 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { if err != nil { return nil, err } - ns := self.getRoot().NodeSize() + ns := self.getRoot().Capacity() new_root, err := self.getRoot().remove(key, func(value ItemValue) bool { return true }) if err != nil { return nil, err } if new_root == nil { - new_root = NewLeaf(ns, false) + new_root = NewLeaf(ns) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index f9a5cf058..3ad73ad30 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -14,7 +14,7 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { return &BpTree{ - root: NewLeaf(node_size, false), + root: NewLeaf(node_size), } } @@ -87,13 +87,13 @@ func (self *BpTree) Range(from, to ItemKey) (kvi KVIterator) { } func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { - ns := self.getRoot().NodeSize() + ns := self.getRoot().Capacity() new_root, err := self.getRoot().remove(key, where) if err != nil { return err } if new_root == nil { - new_root = NewLeaf(ns, false) + new_root = NewLeaf(ns) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 4e6d63ac6..5c3461cfd 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -16,7 +16,6 @@ type BpNode struct { pointers []*BpNode next *BpNode prev *BpNode - no_dup bool protoNodeId int64 protoNode *ProtoNode } @@ -32,14 +31,13 @@ func NewInternal(size int) *BpNode { } } -func NewLeaf(size int, no_dup bool) *BpNode { +func NewLeaf(size int) *BpNode { if size < 0 { panic(NegativeSize()) } return &BpNode{ keys: make([]ItemKey, 0, size), values: make([]ItemValue, 0, size), - no_dup: no_dup, protoNodeId: GetProtoNodeId(), } } @@ -65,7 +63,11 @@ func (self *BpNode) Internal() bool { return cap(self.pointers) > 0 } -func (self *BpNode) NodeSize() int { +func (self *BpNode) Len() int { + return len(self.keys) +} + +func (self *BpNode) Capacity() int { return cap(self.keys) } @@ -78,19 +80,6 @@ func (self *BpNode) Height() int { return self.pointers[0].Height() + 1 } -func (self *BpNode) count(key ItemKey) int { - i, _ := self.find(key) - count := 0 - for ; i < len(self.keys); i++ { - if self.keys[i].Equals(key) { - count++ - } else { - break - } - } - return count -} - func (self *BpNode) has(key ItemKey) bool { _, has := self.find(key) return has @@ -198,7 +187,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) return a, nil } // else we have root split - root = NewInternal(self.NodeSize()) + root = NewInternal(self.Capacity()) root.put_kp(a.keys[0], a) root.put_kp(b.keys[0], b) return root, root.persist() @@ -267,9 +256,9 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err return nil, nil, BpTreeError("Tried to split an internal block on duplicate key") } a = self - b = NewInternal(self.NodeSize()) - balance_nodes(a, b) - if key.Less(b.keys[0]) { + b = NewInternal(self.Capacity()) + balance_nodes(a, b, key) + if b.Len() > 0 && key.Less(b.keys[0]) { if err := a.put_kp(key, ptr); err != nil { return nil, nil, err } @@ -290,7 +279,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } - if self.no_dup { + if true { // no_dup = true i, has := self.find(key) if has { self.values[i] = value @@ -321,10 +310,10 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err return self.pure_leaf_split(key, value) } a = self - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.Capacity()) insert_linked_list_node(b, a, a.getNext()) - balance_nodes(a, b) - if key.Less(b.keys[0]) { + balance_nodes(a, b, key) + if b.Len() > 0 && key.Less(b.keys[0]) { if err := a.put_kv(key, value); err != nil { return nil, nil, err } @@ -353,7 +342,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, return nil, nil, BpTreeError("Expected a pure leaf node") } if key.Less(self.keys[0]) { - a = NewLeaf(self.NodeSize(), self.no_dup) + a = NewLeaf(self.Capacity()) b = self if err := a.put_kv(key, value); err != nil { return nil, nil, err @@ -369,7 +358,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, } return a, nil, a.persist() } else { - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.Capacity()) if err := b.put_kv(key, value); err != nil { return nil, nil, err } @@ -604,11 +593,7 @@ func (self *BpNode) find(key ItemKey) (int, bool) { if key.Less(self.keys[m]) { r = m - 1 } else if key.Equals(self.keys[m]) { - for j := m; j >= 0; j-- { - if j == 0 || !key.Equals(self.keys[j-1]) { - return j, true - } - } + return m, true } else { l = m + 1 } @@ -713,9 +698,15 @@ func remove_linked_list_node(n *BpNode) { } } -/* a must be full and b must be empty else there will be a panic +/** + * a must be full and b must be empty else there will be a panic + * + * Different from common btree implementation, this splits the nodes by the inserted key. + * Items less than the splitKey stays in a, or moved to b if otherwise. + * This should help for monotonically increasing inserts. + * */ -func balance_nodes(a, b *BpNode) { +func balance_nodes(a, b *BpNode, splitKey ItemKey) { if len(b.keys) != 0 { panic(BpTreeError("b was not empty")) } @@ -731,16 +722,8 @@ func balance_nodes(a, b *BpNode) { if cap(a.pointers) != cap(b.pointers) { panic(BpTreeError("cap(a.pointers) != cap(b.pointers)")) } - m := len(a.keys) / 2 - for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) { - m++ - } - if m == len(a.keys) { - m-- - for m > 0 && a.keys[m-1].Equals(a.keys[m]) { - m-- - } - } + + m := find_split_index(a, b, splitKey) var lim = len(a.keys) - m b.keys = b.keys[:lim] if cap(a.values) > 0 { @@ -773,3 +756,11 @@ func balance_nodes(a, b *BpNode) { a.pointers = a.pointers[:m] } } + +func find_split_index(a, b *BpNode, splitKey ItemKey) int { + m := len(a.keys) + for m > 0 && !a.keys[m-1].Less(splitKey) { + m-- + } + return m +} diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go index a5e330aa9..6ed4abca8 100644 --- a/weed/util/bptree/bptree_store_test.go +++ b/weed/util/bptree/bptree_store_test.go @@ -6,7 +6,7 @@ import ( ) func TestAddRemove(t *testing.T) { - tree := NewBpTree(32) + tree := NewBpTree(5) PersistFn = func(node *BpNode) error { println("saving", node.protoNodeId) return nil @@ -24,11 +24,11 @@ func TestAddRemove(t *testing.T) { func printTree(node *BpNode, prefix string) { fmt.Printf("%sNode %d\n", prefix, node.protoNodeId) - prefix += " " + prefix += " " for i:=0;i Date: Sat, 18 Sep 2021 14:04:30 -0700 Subject: [PATCH 14/30] wip --- weed/filer/redis3/kv_directory_children.go | 49 +++++ weed/filer/redis3/redis_cluster_store.go | 42 +++++ weed/filer/redis3/redis_store.go | 36 ++++ weed/filer/redis3/universal_redis_store.go | 175 ++++++++++++++++++ weed/filer/redis3/universal_redis_store_kv.go | 42 +++++ weed/util/bptree/bptree_store_test.go | 14 +- weed/util/bptree/serde.go | 10 + 7 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 weed/filer/redis3/kv_directory_children.go create mode 100644 weed/filer/redis3/redis_cluster_store.go create mode 100644 weed/filer/redis3/redis_store.go create mode 100644 weed/filer/redis3/universal_redis_store.go create mode 100644 weed/filer/redis3/universal_redis_store_kv.go create mode 100644 weed/util/bptree/serde.go diff --git a/weed/filer/redis3/kv_directory_children.go b/weed/filer/redis3/kv_directory_children.go new file mode 100644 index 000000000..f3152c970 --- /dev/null +++ b/weed/filer/redis3/kv_directory_children.go @@ -0,0 +1,49 @@ +package redis3 + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/util/bptree" + "github.com/go-redis/redis/v8" + "github.com/golang/protobuf/proto" +) + +func insertChild(ctx context.Context, client redis.UniversalClient, key string, name string) error { + data, err := client.Get(ctx, key).Result() + if err != nil { + if err != redis.Nil { + return fmt.Errorf("read %s: %v", key, err) + } + } + rootNode := &bptree.ProtoNode{} + if err := proto.UnmarshalMerge([]byte(data), rootNode); err != nil { + return fmt.Errorf("decoding root for %s: %v", key, err) + } + tree := rootNode.ToBpTree() + tree.Add(bptree.String(name), nil) + return nil +} + +func removeChild(ctx context.Context, client redis.UniversalClient, key string, name string) error { + data, err := client.Get(ctx, key).Result() + if err != nil { + if err != redis.Nil { + return fmt.Errorf("read %s: %v", key, err) + } + } + rootNode := &bptree.ProtoNode{} + if err := proto.UnmarshalMerge([]byte(data), rootNode); err != nil { + return fmt.Errorf("decoding root for %s: %v", key, err) + } + tree := rootNode.ToBpTree() + tree.Add(bptree.String(name), nil) + return nil +} + +func removeChildren(ctx context.Context, client redis.UniversalClient, key string, onDeleteFn func(name string) error) error { + return nil +} + +func iterateChildren(ctx context.Context, client redis.UniversalClient, key string, eachFn func(name string) error) error { + return nil +} diff --git a/weed/filer/redis3/redis_cluster_store.go b/weed/filer/redis3/redis_cluster_store.go new file mode 100644 index 000000000..e0c620450 --- /dev/null +++ b/weed/filer/redis3/redis_cluster_store.go @@ -0,0 +1,42 @@ +package redis3 + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &RedisCluster3Store{}) +} + +type RedisCluster3Store struct { + UniversalRedis3Store +} + +func (store *RedisCluster3Store) GetName() string { + return "redis_cluster3" +} + +func (store *RedisCluster3Store) Initialize(configuration util.Configuration, prefix string) (err error) { + + configuration.SetDefault(prefix+"useReadOnly", false) + configuration.SetDefault(prefix+"routeByLatency", false) + + return store.initialize( + configuration.GetStringSlice(prefix+"addresses"), + configuration.GetString(prefix+"password"), + configuration.GetBool(prefix+"useReadOnly"), + configuration.GetBool(prefix+"routeByLatency"), + ) +} + +func (store *RedisCluster3Store) initialize(addresses []string, password string, readOnly, routeByLatency bool) (err error) { + store.Client = redis.NewClusterClient(&redis.ClusterOptions{ + Addrs: addresses, + Password: password, + ReadOnly: readOnly, + RouteByLatency: routeByLatency, + }) + return +} diff --git a/weed/filer/redis3/redis_store.go b/weed/filer/redis3/redis_store.go new file mode 100644 index 000000000..fdbf994ec --- /dev/null +++ b/weed/filer/redis3/redis_store.go @@ -0,0 +1,36 @@ +package redis3 + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &Redis3Store{}) +} + +type Redis3Store struct { + UniversalRedis3Store +} + +func (store *Redis3Store) GetName() string { + return "redis3" +} + +func (store *Redis3Store) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"address"), + configuration.GetString(prefix+"password"), + configuration.GetInt(prefix+"database"), + ) +} + +func (store *Redis3Store) initialize(hostPort string, password string, database int) (err error) { + store.Client = redis.NewClient(&redis.Options{ + Addr: hostPort, + Password: password, + DB: database, + }) + return +} diff --git a/weed/filer/redis3/universal_redis_store.go b/weed/filer/redis3/universal_redis_store.go new file mode 100644 index 000000000..958338afe --- /dev/null +++ b/weed/filer/redis3/universal_redis_store.go @@ -0,0 +1,175 @@ +package redis3 + +import ( + "context" + "fmt" + "time" + + "github.com/go-redis/redis/v8" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_LIST_MARKER = "\x00" +) + +type UniversalRedis3Store struct { + Client redis.UniversalClient +} + +func (store *UniversalRedis3Store) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *UniversalRedis3Store) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *UniversalRedis3Store) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *UniversalRedis3Store) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = util.MaybeGzipData(value) + } + + if err = store.Client.Set(ctx, string(entry.FullPath), value, time.Duration(entry.TtlSec)*time.Second).Err(); err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + dir, name := entry.FullPath.DirAndName() + + if name != "" { + if err = insertChild(ctx, store.Client, genDirectoryListKey(dir), name); err != nil { + return fmt.Errorf("persisting %s in parent dir: %v", entry.FullPath, err) + } + } + + return nil +} + +func (store *UniversalRedis3Store) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *UniversalRedis3Store) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + + data, err := store.Client.Get(ctx, string(fullpath)).Result() + if err == redis.Nil { + return nil, filer_pb.ErrNotFound + } + + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData([]byte(data))) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *UniversalRedis3Store) DeleteEntry(ctx context.Context, fullpath util.FullPath) (err error) { + + _, err = store.Client.Del(ctx, genDirectoryListKey(string(fullpath))).Result() + if err != nil { + return fmt.Errorf("delete dir list %s : %v", fullpath, err) + } + + _, err = store.Client.Del(ctx, string(fullpath)).Result() + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + dir, name := fullpath.DirAndName() + + if name != "" { + if err = removeChild(ctx, store.Client, genDirectoryListKey(dir), name); err != nil { + return fmt.Errorf("DeleteEntry %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedis3Store) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) { + + return removeChildren(ctx, store.Client, genDirectoryListKey(string(fullpath)), func(name string) error { + path := util.NewFullPath(string(fullpath), name) + _, err = store.Client.Del(ctx, string(path)).Result() + if err != nil { + return fmt.Errorf("DeleteFolderChildren %s in parent dir: %v", fullpath, err) + } + return nil + }) + +} + +func (store *UniversalRedis3Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *UniversalRedis3Store) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + dirListKey := genDirectoryListKey(string(dirPath)) + start := int64(0) + if startFileName != "" { + start, _ = store.Client.ZRank(ctx, dirListKey, startFileName).Result() + if !includeStartFile { + start++ + } + } + members, err := store.Client.ZRange(ctx, dirListKey, start, start+int64(limit)-1).Result() + if err != nil { + return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) + } + + // fetch entry meta + for _, fileName := range members { + path := util.NewFullPath(string(dirPath), fileName) + entry, err := store.FindEntry(ctx, path) + lastFileName = fileName + if err != nil { + glog.V(0).Infof("list %s : %v", path, err) + if err == filer_pb.ErrNotFound { + continue + } + } else { + if entry.TtlSec > 0 { + if entry.Attr.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + store.Client.Del(ctx, string(path)).Result() + store.Client.ZRem(ctx, dirListKey, fileName).Result() + continue + } + } + if !eachEntryFunc(entry) { + break + } + } + } + + return lastFileName, err +} + +func genDirectoryListKey(dir string) (dirList string) { + return dir + DIR_LIST_MARKER +} + +func (store *UniversalRedis3Store) Shutdown() { + store.Client.Close() +} diff --git a/weed/filer/redis3/universal_redis_store_kv.go b/weed/filer/redis3/universal_redis_store_kv.go new file mode 100644 index 000000000..a9c440a37 --- /dev/null +++ b/weed/filer/redis3/universal_redis_store_kv.go @@ -0,0 +1,42 @@ +package redis3 + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/go-redis/redis/v8" +) + +func (store *UniversalRedis3Store) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + _, err = store.Client.Set(ctx, string(key), value, 0).Result() + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *UniversalRedis3Store) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + data, err := store.Client.Get(ctx, string(key)).Result() + + if err == redis.Nil { + return nil, filer.ErrKvNotFound + } + + return []byte(data), err +} + +func (store *UniversalRedis3Store) KvDelete(ctx context.Context, key []byte) (err error) { + + _, err = store.Client.Del(ctx, string(key)).Result() + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go index 6ed4abca8..82dcbbf55 100644 --- a/weed/util/bptree/bptree_store_test.go +++ b/weed/util/bptree/bptree_store_test.go @@ -15,11 +15,21 @@ func TestAddRemove(t *testing.T) { println("delete", node.protoNodeId) return nil } - for i:=0;i<1024;i++{ + for i:=0;i<32;i++{ println("++++++++++", i) - tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i))) + tree.Add(String(fmt.Sprintf("%02d", i)), nil) printTree(tree.root, "") } + + if !tree.Has(String("30")) { + t.Errorf("lookup error") + } + tree.RemoveWhere(String("30"), func(value ItemValue) bool { + return true + }) + if tree.Has(String("30")) { + t.Errorf("remove error") + } } func printTree(node *BpNode, prefix string) { diff --git a/weed/util/bptree/serde.go b/weed/util/bptree/serde.go new file mode 100644 index 000000000..2a98a774a --- /dev/null +++ b/weed/util/bptree/serde.go @@ -0,0 +1,10 @@ +package bptree + +func (protoNode *ProtoNode) ToBpTree() *BpTree { + node := protoNode.ToBpNode() + return &BpTree{root: node} +} + +func (protoNode *ProtoNode) ToBpNode() *BpNode { + return nil +} \ No newline at end of file From e066e2642ce8cd99854042773abb34616695c4fc Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 18 Sep 2021 15:32:17 -0700 Subject: [PATCH 15/30] add NodeStore --- weed/util/bptree/bpmap.go | 6 +- weed/util/bptree/bptree.go | 6 +- weed/util/bptree/bptree_node.go | 27 +++---- weed/util/bptree/bptree_store_test.go | 39 ++++++---- weed/util/bptree/bptree_test.go | 106 +++++++++++++------------- weed/util/bptree/getter_setter.go | 8 +- weed/util/bptree/serde_test.go | 46 +++++++++++ 7 files changed, 147 insertions(+), 91 deletions(-) create mode 100644 weed/util/bptree/serde_test.go diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index 399ac7b86..0c13a132f 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -9,9 +9,9 @@ import ( */ type BpMap BpTree -func NewBpMap(node_size int) *BpMap { +func NewBpMap(node_size int, nodeStore NodeStore) *BpMap { return &BpMap{ - root: NewLeaf(node_size), + root: NewLeaf(node_size, nodeStore), } } @@ -47,7 +47,7 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { return nil, err } if new_root == nil { - new_root = NewLeaf(ns) + new_root = NewLeaf(ns, self.root.nodeStore) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 3ad73ad30..141c595f3 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -12,9 +12,9 @@ type BpTree struct { type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) -func NewBpTree(node_size int) *BpTree { +func NewBpTree(node_size int, nodeStore NodeStore) *BpTree { return &BpTree{ - root: NewLeaf(node_size), + root: NewLeaf(node_size, nodeStore), } } @@ -93,7 +93,7 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { return err } if new_root == nil { - new_root = NewLeaf(ns) + new_root = NewLeaf(ns, self.root.nodeStore) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 5c3461cfd..507d9d318 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -2,13 +2,11 @@ package bptree type ItemKey Hashable type ItemValue Equatable -type PersistFunc func(node *BpNode) error -type DestroyFunc func(node *BpNode) error -var ( - PersistFn PersistFunc - DestroyFn DestroyFunc -) +type NodeStore interface { + PersistFunc(node *BpNode) error + DestroyFunc(node *BpNode) error +} type BpNode struct { keys []ItemKey @@ -18,9 +16,10 @@ type BpNode struct { prev *BpNode protoNodeId int64 protoNode *ProtoNode + nodeStore NodeStore } -func NewInternal(size int) *BpNode { +func NewInternal(size int, nodeStore NodeStore) *BpNode { if size < 0 { panic(NegativeSize()) } @@ -28,10 +27,11 @@ func NewInternal(size int) *BpNode { keys: make([]ItemKey, 0, size), pointers: make([]*BpNode, 0, size), protoNodeId: GetProtoNodeId(), + nodeStore: nodeStore, } } -func NewLeaf(size int) *BpNode { +func NewLeaf(size int, nodeStore NodeStore) *BpNode { if size < 0 { panic(NegativeSize()) } @@ -39,6 +39,7 @@ func NewLeaf(size int) *BpNode { keys: make([]ItemKey, 0, size), values: make([]ItemValue, 0, size), protoNodeId: GetProtoNodeId(), + nodeStore: nodeStore, } } @@ -187,7 +188,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) return a, nil } // else we have root split - root = NewInternal(self.Capacity()) + root = NewInternal(self.Capacity(), self.nodeStore) root.put_kp(a.keys[0], a) root.put_kp(b.keys[0], b) return root, root.persist() @@ -256,7 +257,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err return nil, nil, BpTreeError("Tried to split an internal block on duplicate key") } a = self - b = NewInternal(self.Capacity()) + b = NewInternal(self.Capacity(), self.nodeStore) balance_nodes(a, b, key) if b.Len() > 0 && key.Less(b.keys[0]) { if err := a.put_kp(key, ptr); err != nil { @@ -310,7 +311,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err return self.pure_leaf_split(key, value) } a = self - b = NewLeaf(self.Capacity()) + b = NewLeaf(self.Capacity(), self.nodeStore) insert_linked_list_node(b, a, a.getNext()) balance_nodes(a, b, key) if b.Len() > 0 && key.Less(b.keys[0]) { @@ -342,7 +343,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, return nil, nil, BpTreeError("Expected a pure leaf node") } if key.Less(self.keys[0]) { - a = NewLeaf(self.Capacity()) + a = NewLeaf(self.Capacity(), self.nodeStore) b = self if err := a.put_kv(key, value); err != nil { return nil, nil, err @@ -358,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, } return a, nil, a.persist() } else { - b = NewLeaf(self.Capacity()) + b = NewLeaf(self.Capacity(), self.nodeStore) if err := b.put_kv(key, value); err != nil { return nil, nil, err } diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go index 82dcbbf55..2e034171c 100644 --- a/weed/util/bptree/bptree_store_test.go +++ b/weed/util/bptree/bptree_store_test.go @@ -5,29 +5,38 @@ import ( "testing" ) +type nodeStorePrintlnImpl struct { +} + +func (n *nodeStorePrintlnImpl) PersistFunc(node *BpNode) error { + println("saving node", node.protoNodeId) + return nil +} +func (n *nodeStorePrintlnImpl) DestroyFunc(node *BpNode) error { + println("delete node", node.protoNodeId) + return nil +} + func TestAddRemove(t *testing.T) { - tree := NewBpTree(5) - PersistFn = func(node *BpNode) error { - println("saving", node.protoNodeId) - return nil - } - DestroyFn = func(node *BpNode) error { - println("delete", node.protoNodeId) - return nil - } - for i:=0;i<32;i++{ + + tree := NewBpTree(3, &nodeStorePrintlnImpl{}) + for i:=0;i<9;i++{ println("++++++++++", i) tree.Add(String(fmt.Sprintf("%02d", i)), nil) printTree(tree.root, "") } - if !tree.Has(String("30")) { + if !tree.Has(String("08")) { t.Errorf("lookup error") } - tree.RemoveWhere(String("30"), func(value ItemValue) bool { - return true - }) - if tree.Has(String("30")) { + for i:=5;i<9;i++{ + println("----------", i) + tree.RemoveWhere(String(fmt.Sprintf("%02d", i)), func(value ItemValue) bool { + return true + }) + printTree(tree.root, "") + } + if tree.Has(String("08")) { t.Errorf("remove error") } } diff --git a/weed/util/bptree/bptree_test.go b/weed/util/bptree/bptree_test.go index fc5b6f900..1fd6d1122 100644 --- a/weed/util/bptree/bptree_test.go +++ b/weed/util/bptree/bptree_test.go @@ -79,7 +79,7 @@ func BenchmarkBpTree(b *testing.B) { b.StartTimer() for i := 0; i < b.N; i++ { - t := NewBpTree(23) + t := NewBpTree(23, nil) for _, r := range recs { t.Add(r.key, r.value) } @@ -207,7 +207,7 @@ func TestAddHasCountFindIterateRemove(t *testing.T) { } } for i := 2; i < 64; i++ { - test(NewBpTree(i)) + test(NewBpTree(i, nil)) } } @@ -271,11 +271,11 @@ func TestBpMap(t *testing.T) { } } - test(NewBpMap(23)) + test(NewBpMap(23, nil)) } func Test_get_start(t *testing.T) { - root := NewLeaf(2) + root := NewLeaf(2, nil) root, err := root.put(Int(1), Int(1)) if err != nil { t.Error(err) @@ -344,7 +344,7 @@ func Test_get_start(t *testing.T) { } func Test_get_end(t *testing.T) { - root := NewLeaf(3) + root := NewLeaf(3, nil) root, err := root.put(Int(1), Int(1)) if err != nil { t.Fatal(err) @@ -388,7 +388,7 @@ func Test_get_end(t *testing.T) { } func Test_put_no_root_split(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } @@ -423,7 +423,7 @@ func Test_put_no_root_split(t *testing.T) { } func Test_put_root_split(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) p, err := a.put(Int(1), Int(1)) if err != nil { t.Error(err) @@ -472,8 +472,8 @@ func Test_put_root_split(t *testing.T) { } func Test_internal_insert_no_split(t *testing.T) { - a := NewInternal(3) - leaf := NewLeaf(1) + a := NewInternal(3, nil) + leaf := NewLeaf(1, nil) if err := leaf.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } @@ -500,8 +500,8 @@ func Test_internal_insert_no_split(t *testing.T) { } func Test_internal_insert_split_less(t *testing.T) { - a := NewInternal(3) - leaf := NewLeaf(1) + a := NewInternal(3, nil) + leaf := NewLeaf(1, nil) if err := leaf.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } @@ -534,7 +534,7 @@ func Test_internal_insert_split_less(t *testing.T) { } func Test_internal_split_less(t *testing.T) { - a := NewInternal(3) + a := NewInternal(3, nil) if err := a.put_kp(Int(1), nil); err != nil { t.Error(err) } @@ -564,7 +564,7 @@ func Test_internal_split_less(t *testing.T) { } func Test_internal_split_equal(t *testing.T) { - a := NewInternal(3) + a := NewInternal(3, nil) if err := a.put_kp(Int(1), nil); err != nil { t.Error(err) } @@ -581,7 +581,7 @@ func Test_internal_split_equal(t *testing.T) { } func Test_internal_split_greater(t *testing.T) { - a := NewInternal(3) + a := NewInternal(3, nil) if err := a.put_kp(Int(1), nil); err != nil { t.Error(err) } @@ -611,7 +611,7 @@ func Test_internal_split_greater(t *testing.T) { } func Test_leaf_insert_no_split(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, nil) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) @@ -637,7 +637,7 @@ func Test_leaf_insert_no_split(t *testing.T) { // tests the defer to split logic func Test_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, nil) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) @@ -668,7 +668,7 @@ func Test_leaf_insert_split_less(t *testing.T) { } func Test_leaf_split_less(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, nil) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) @@ -699,7 +699,7 @@ func Test_leaf_split_less(t *testing.T) { } func Test_leaf_split_equal(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, nil) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) @@ -730,7 +730,7 @@ func Test_leaf_split_equal(t *testing.T) { } func Test_leaf_split_greater(t *testing.T) { - a := NewLeaf(3) + a := NewLeaf(3, nil) insert_linked_list_node(a, nil, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) @@ -762,13 +762,13 @@ func Test_leaf_split_greater(t *testing.T) { // tests the defer logic func Test_pure_leaf_insert_split_less(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, nil) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) @@ -835,13 +835,13 @@ func Test_pure_leaf_insert_split_less(t *testing.T) { } func Test_pure_leaf_split_less(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, nil) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) @@ -908,13 +908,13 @@ func Test_pure_leaf_split_less(t *testing.T) { } func Test_pure_leaf_split_equal(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, nil) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) @@ -972,13 +972,13 @@ func Test_pure_leaf_split_equal(t *testing.T) { } func Test_pure_leaf_split_greater(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, nil) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) @@ -1042,13 +1042,13 @@ func Test_pure_leaf_split_greater(t *testing.T) { } func Test_find_end_of_pure_run(t *testing.T) { - a := NewLeaf(2) + a := NewLeaf(2, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(2) + c := NewLeaf(2, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(2) + d := NewLeaf(2, nil) insert_linked_list_node(d, c, nil) if err := a.put_kv(Int(3), Int(1)); err != nil { t.Error(err) @@ -1078,13 +1078,13 @@ func Test_find_end_of_pure_run(t *testing.T) { } func Test_insert_linked_list_node(t *testing.T) { - a := NewLeaf(1) + a := NewLeaf(1, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(3) + c := NewLeaf(3, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(4) + d := NewLeaf(4, nil) insert_linked_list_node(d, a, b) if a.getPrev() != nil { t.Errorf("expected a.prev == nil") @@ -1113,13 +1113,13 @@ func Test_insert_linked_list_node(t *testing.T) { } func Test_remove_linked_list_node(t *testing.T) { - a := NewLeaf(1) + a := NewLeaf(1, nil) insert_linked_list_node(a, nil, nil) - b := NewLeaf(2) + b := NewLeaf(2, nil) insert_linked_list_node(b, a, nil) - c := NewLeaf(3) + c := NewLeaf(3, nil) insert_linked_list_node(c, b, nil) - d := NewLeaf(4) + d := NewLeaf(4, nil) insert_linked_list_node(d, a, b) if a.getPrev() != nil { t.Errorf("expected a.prev == nil") @@ -1188,8 +1188,8 @@ func Test_remove_linked_list_node(t *testing.T) { } func Test_balance_leaf_nodes_with_dup(t *testing.T) { - a := NewLeaf(3) - b := NewLeaf(3) + a := NewLeaf(3, nil) + b := NewLeaf(3, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } @@ -1209,8 +1209,8 @@ func Test_balance_leaf_nodes_with_dup(t *testing.T) { } func Test_balance_leaf_nodes(t *testing.T) { - a := NewLeaf(7) - b := NewLeaf(7) + a := NewLeaf(7, nil) + b := NewLeaf(7, nil) if err := a.put_kv(Int(1), Int(1)); err != nil { t.Error(err) } @@ -1258,8 +1258,8 @@ func Test_balance_leaf_nodes(t *testing.T) { } func Test_balance_internal_nodes(t *testing.T) { - a := NewInternal(6) - b := NewInternal(6) + a := NewInternal(6, nil) + b := NewInternal(6, nil) if err := a.put_kp(Int(1), nil); err != nil { t.Error(err) } diff --git a/weed/util/bptree/getter_setter.go b/weed/util/bptree/getter_setter.go index caafc1bbd..dcaa7a0b6 100644 --- a/weed/util/bptree/getter_setter.go +++ b/weed/util/bptree/getter_setter.go @@ -45,14 +45,14 @@ func (self *BpNode) maybePersist(shouldPersist bool) error { return self.persist() } func (self *BpNode) persist() error { - if PersistFn != nil { - return PersistFn(self) + if self.nodeStore != nil { + return self.nodeStore.PersistFunc(self) } return nil } func (self *BpNode) destroy() error { - if DestroyFn != nil { - return DestroyFn(self) + if self.nodeStore != nil { + return self.nodeStore.DestroyFunc(self) } return nil } diff --git a/weed/util/bptree/serde_test.go b/weed/util/bptree/serde_test.go new file mode 100644 index 000000000..27ccccb78 --- /dev/null +++ b/weed/util/bptree/serde_test.go @@ -0,0 +1,46 @@ +package bptree + +import ( + "fmt" + "testing" +) + +type nodeStoreMapImpl struct { + m map[int64]*ProtoNode +} + +func (n *nodeStoreMapImpl) PersistFunc(node *BpNode) error { + println("saving node", node.protoNodeId) + n.m[node.protoNodeId] = node.protoNode + return nil +} +func (n *nodeStoreMapImpl) DestroyFunc(node *BpNode) error { + println("delete node", node.protoNodeId) + delete(n.m, node.protoNodeId) + return nil +} + +func TestSerDe(t *testing.T) { + + nodeStore := &nodeStoreMapImpl{ + m: make(map[int64]*ProtoNode), + } + + tree := NewBpTree(3, nodeStore) + + for i:=0;i<32;i++{ + println("add", i) + tree.Add(String(fmt.Sprintf("%02d", i)), nil) + } + + for i:=5;i<9;i++{ + println("----------", i) + tree.RemoveWhere(String(fmt.Sprintf("%02d", i)), func(value ItemValue) bool { + return true + }) + printTree(tree.root, "") + } + + + +} \ No newline at end of file From 4c1741fdbb93cc0f3228b59e3912b8aa896a24c0 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 2 Oct 2021 14:02:56 -0700 Subject: [PATCH 16/30] working skiplist --- weed/util/skiplist/Makefile | 6 + weed/util/skiplist/serde.go | 54 ++++ weed/util/skiplist/skiplist.go | 480 ++++++++++++++++++++++++++++ weed/util/skiplist/skiplist.pb.go | 386 ++++++++++++++++++++++ weed/util/skiplist/skiplist.proto | 27 ++ weed/util/skiplist/skiplist_test.go | 212 ++++++++++++ 6 files changed, 1165 insertions(+) create mode 100644 weed/util/skiplist/Makefile create mode 100644 weed/util/skiplist/serde.go create mode 100644 weed/util/skiplist/skiplist.go create mode 100644 weed/util/skiplist/skiplist.pb.go create mode 100644 weed/util/skiplist/skiplist.proto create mode 100644 weed/util/skiplist/skiplist_test.go diff --git a/weed/util/skiplist/Makefile b/weed/util/skiplist/Makefile new file mode 100644 index 000000000..af4afe639 --- /dev/null +++ b/weed/util/skiplist/Makefile @@ -0,0 +1,6 @@ +all: gen + +.PHONY : gen + +gen: + protoc skiplist.proto --go_out=plugins=grpc:. --go_opt=paths=source_relative diff --git a/weed/util/skiplist/serde.go b/weed/util/skiplist/serde.go new file mode 100644 index 000000000..2337b4b19 --- /dev/null +++ b/weed/util/skiplist/serde.go @@ -0,0 +1,54 @@ +package skiplist + +import "bytes" + +func compareElement(a *SkipListElement, key []byte) int { + if len(a.Values) == 0 { + return -1 + } + if bytes.Compare(a.Values[0], key) < 0 { + return -1 + } + if bytes.Compare(a.Values[len(a.Values)-1], key) > 0 { + return 1 + } + return 0 +} + +var ( + memStore = make(map[int64]*SkipListElement) +) + +func (node *SkipListElement) Reference() *SkipListElementReference { + if node == nil { + return nil + } + return &SkipListElementReference{ + ElementPointer: node.Id, + Key: node.Values[0], + } +} +func (node *SkipListElement) Save() { + if node == nil { + return + } + memStore[node.Id] = node + //println("++ node", node.Id, string(node.Values[0])) +} + +func (node *SkipListElement) DeleteSelf() { + if node == nil { + return + } + delete(memStore, node.Id) + //println("++ node", node.Id, string(node.Values[0])) +} + +func (ref *SkipListElementReference) Load() *SkipListElement { + if ref == nil { + return nil + } + //println("~ node", ref.ElementPointer, string(ref.Key)) + return memStore[ref.ElementPointer] +} + diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go new file mode 100644 index 000000000..a47cf4608 --- /dev/null +++ b/weed/util/skiplist/skiplist.go @@ -0,0 +1,480 @@ +package skiplist + +import ( + "bytes" + "fmt" + "math/bits" + "math/rand" + "time" +) + +const ( + // maxLevel denotes the maximum height of the skiplist. This height will keep the skiplist + // efficient for up to 34m entries. If there is a need for much more, please adjust this constant accordingly. + maxLevel = 25 +) + +type SkipList struct { + startLevels [maxLevel]*SkipListElementReference + endLevels [maxLevel]*SkipListElementReference + maxNewLevel int + maxLevel int + elementCount int +} + +// NewSeedEps returns a new empty, initialized Skiplist. +// Given a seed, a deterministic height/list behaviour can be achieved. +// Eps is used to compare keys given by the ExtractKey() function on equality. +func NewSeed(seed int64) *SkipList { + + // Initialize random number generator. + rand.Seed(seed) + //fmt.Printf("SkipList seed: %v\n", seed) + + list := &SkipList{ + maxNewLevel: maxLevel, + maxLevel: 0, + elementCount: 0, + } + + return list +} + +// New returns a new empty, initialized Skiplist. +func New() *SkipList { + return NewSeed(time.Now().UTC().UnixNano()) +} + +// IsEmpty checks, if the skiplist is empty. +func (t *SkipList) IsEmpty() bool { + return t.startLevels[0] == nil +} + +func (t *SkipList) generateLevel(maxLevel int) int { + level := maxLevel - 1 + // First we apply some mask which makes sure that we don't get a level + // above our desired level. Then we find the first set bit. + var x = rand.Uint64() & ((1 << uint(maxLevel-1)) - 1) + zeroes := bits.TrailingZeros64(x) + if zeroes <= maxLevel { + level = zeroes + } + + return level +} + +func (t *SkipList) findEntryIndex(key []byte, level int) int { + // Find good entry point so we don't accidentally skip half the list. + for i := t.maxLevel; i >= 0; i-- { + if t.startLevels[i] != nil && bytes.Compare(t.startLevels[i].Key, key) < 0 || i <= level { + return i + } + } + return 0 +} + +func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem *SkipListElement, ok bool) { + + foundElem = nil + ok = false + + if t.IsEmpty() { + return + } + + index := t.findEntryIndex(key, 0) + var currentNode *SkipListElement + + currentNode = t.startLevels[index].Load() + + // In case, that our first element is already greater-or-equal! + if findGreaterOrEqual && compareElement(currentNode, key) > 0 { + foundElem = currentNode + ok = true + return + } + + for { + if compareElement(currentNode, key) == 0 { + foundElem = currentNode + ok = true + return + } + + // Which direction are we continuing next time? + if currentNode.Next[index] != nil && bytes.Compare(currentNode.Next[index].Key, key) <= 0 { + // Go right + currentNode = currentNode.Next[index].Load() + } else { + if index > 0 { + + // Early exit + if currentNode.Next[0] != nil && bytes.Compare(currentNode.Next[0].Key, key) == 0 { + currentNodeNext := currentNode.Next[0].Load() + foundElem = currentNodeNext + ok = true + return + } + // Go down + index-- + } else { + // Element is not found and we reached the bottom. + if findGreaterOrEqual { + foundElem = currentNode.Next[index].Load() + ok = foundElem != nil + } + + return + } + } + } +} + +// Find tries to find an element in the skiplist based on the key from the given ListElement. +// elem can be used, if ok is true. +// Find runs in approx. O(log(n)) +func (t *SkipList) Find(key []byte) (elem *SkipListElement, ok bool) { + + if t == nil || key == nil { + return + } + + elem, ok = t.findExtended(key, false) + return +} + +// FindGreaterOrEqual finds the first element, that is greater or equal to the given ListElement e. +// The comparison is done on the keys (So on ExtractKey()). +// FindGreaterOrEqual runs in approx. O(log(n)) +func (t *SkipList) FindGreaterOrEqual(key []byte) (elem *SkipListElement, ok bool) { + + if t == nil || key == nil { + return + } + + elem, ok = t.findExtended(key, true) + return +} + +// Delete removes an element equal to e from the skiplist, if there is one. +// If there are multiple entries with the same value, Delete will remove one of them +// (Which one will change based on the actual skiplist layout) +// Delete runs in approx. O(log(n)) +func (t *SkipList) Delete(key []byte) { + + if t == nil || t.IsEmpty() || key == nil { + return + } + + index := t.findEntryIndex(key, t.maxLevel) + + var currentNode *SkipListElement + var nextNode *SkipListElement + + for { + + if currentNode == nil { + nextNode = t.startLevels[index].Load() + } else { + nextNode = currentNode.Next[index].Load() + } + + // Found and remove! + if nextNode != nil && compareElement(nextNode, key) == 0 { + + if currentNode != nil { + currentNode.Next[index] = nextNode.Next[index] + currentNode.Save() + } + + if index == 0 { + if nextNode.Next[index] != nil { + nextNextNode := nextNode.Next[index].Load() + nextNextNode.Prev = currentNode.Reference() + nextNextNode.Save() + } + t.elementCount-- + nextNode.DeleteSelf() + } + + // Link from start needs readjustments. + startNextKey := t.startLevels[index].Key + if compareElement(nextNode, startNextKey) == 0 { + t.startLevels[index] = nextNode.Next[index] + // This was our currently highest node! + if t.startLevels[index] == nil { + t.maxLevel = index - 1 + } + } + + // Link from end needs readjustments. + if nextNode.Next[index] == nil { + t.endLevels[index] = currentNode.Reference() + } + nextNode.Next[index] = nil + } + + if nextNode != nil && compareElement(nextNode, key) < 0 { + // Go right + currentNode = nextNode + } else { + // Go down + index-- + if index < 0 { + break + } + } + } + +} + +// Insert inserts the given ListElement into the skiplist. +// Insert runs in approx. O(log(n)) +func (t *SkipList) Insert(key []byte) { + + if t == nil || key == nil { + return + } + + level := t.generateLevel(t.maxNewLevel) + + // Only grow the height of the skiplist by one at a time! + if level > t.maxLevel { + level = t.maxLevel + 1 + t.maxLevel = level + } + + elem := &SkipListElement{ + Id: rand.Int63(), + Next: make([]*SkipListElementReference, t.maxNewLevel, t.maxNewLevel), + Level: int32(level), + Values: [][]byte{key}, + } + + t.elementCount++ + + newFirst := true + newLast := true + if !t.IsEmpty() { + newFirst = compareElement(elem, t.startLevels[0].Key) < 0 + newLast = compareElement(elem, t.endLevels[0].Key) > 0 + } + + normallyInserted := false + if !newFirst && !newLast { + + normallyInserted = true + + index := t.findEntryIndex(key, level) + + var currentNode *SkipListElement + var nextNodeRef *SkipListElementReference + + for { + + if currentNode == nil { + nextNodeRef = t.startLevels[index] + } else { + nextNodeRef = currentNode.Next[index] + } + + var nextNode *SkipListElement + + // Connect node to next + if index <= level && (nextNodeRef == nil || bytes.Compare(nextNodeRef.Key, key) > 0) { + elem.Next[index] = nextNodeRef + if currentNode != nil { + currentNode.Next[index] = elem.Reference() + currentNode.Save() + } + if index == 0 { + elem.Prev = currentNode.Reference() + if nextNodeRef != nil { + nextNode = nextNodeRef.Load() + nextNode.Prev = elem.Reference() + nextNode.Save() + } + } + } + + if nextNodeRef != nil && bytes.Compare(nextNodeRef.Key, key) <= 0 { + // Go right + if nextNode == nil { + // reuse nextNode when index == 0 + nextNode = nextNodeRef.Load() + } + currentNode = nextNode + } else { + // Go down + index-- + if index < 0 { + break + } + } + } + } + + // Where we have a left-most position that needs to be referenced! + for i := level; i >= 0; i-- { + + didSomething := false + + if newFirst || normallyInserted { + + if t.startLevels[i] == nil || bytes.Compare(t.startLevels[i].Key, key) > 0 { + if i == 0 && t.startLevels[i] != nil { + startLevelElement := t.startLevels[i].Load() + startLevelElement.Prev = elem.Reference() + startLevelElement.Save() + } + elem.Next[i] = t.startLevels[i] + t.startLevels[i] = elem.Reference() + } + + // link the endLevels to this element! + if elem.Next[i] == nil { + t.endLevels[i] = elem.Reference() + } + + didSomething = true + } + + if newLast { + // Places the element after the very last element on this level! + // This is very important, so we are not linking the very first element (newFirst AND newLast) to itself! + if !newFirst { + if t.endLevels[i] != nil { + endLevelElement := t.endLevels[i].Load() + endLevelElement.Next[i] = elem.Reference() + endLevelElement.Save() + } + if i == 0 { + elem.Prev = t.endLevels[i] + } + t.endLevels[i] = elem.Reference() + } + + // Link the startLevels to this element! + if t.startLevels[i] == nil || bytes.Compare(t.startLevels[i].Key, key) > 0 { + t.startLevels[i] = elem.Reference() + } + + didSomething = true + } + + if !didSomething { + break + } + } + + elem.Save() + +} + +// GetValue extracts the ListElement value from a skiplist node. +func (e *SkipListElement) GetValue() []byte { + return e.Values[0] +} + +// GetSmallestNode returns the very first/smallest node in the skiplist. +// GetSmallestNode runs in O(1) +func (t *SkipList) GetSmallestNode() *SkipListElement { + return t.startLevels[0].Load() +} + +// GetLargestNode returns the very last/largest node in the skiplist. +// GetLargestNode runs in O(1) +func (t *SkipList) GetLargestNode() *SkipListElement { + return t.endLevels[0].Load() +} + +// Next returns the next element based on the given node. +// Next will loop around to the first node, if you call it on the last! +func (t *SkipList) Next(e *SkipListElement) *SkipListElement { + if e.Next[0] == nil { + return t.startLevels[0].Load() + } + return e.Next[0].Load() +} + +// Prev returns the previous element based on the given node. +// Prev will loop around to the last node, if you call it on the first! +func (t *SkipList) Prev(e *SkipListElement) *SkipListElement { + if e.Prev == nil { + return t.endLevels[0].Load() + } + return e.Prev.Load() +} + +// GetNodeCount returns the number of nodes currently in the skiplist. +func (t *SkipList) GetNodeCount() int { + return t.elementCount +} + +// String returns a string format of the skiplist. Useful to get a graphical overview and/or debugging. +func (t *SkipList) println() { + + print("start --> ") + for i, l := range t.startLevels { + if l == nil { + break + } + if i > 0 { + print(" -> ") + } + next := "---" + if l != nil { + next = string(l.Key) + } + print(fmt.Sprintf("[%v]", next)) + } + println() + + nodeRef := t.startLevels[0] + for nodeRef != nil { + print(fmt.Sprintf("%v: ", string(nodeRef.Key))) + node := nodeRef.Load() + for i := 0; i <= int(node.Level); i++ { + + l := node.Next[i] + + next := "---" + if l != nil { + next = string(l.Key) + } + + if i == 0 { + prev := "---" + + if node.Prev != nil { + prev = string(node.Prev.Key) + } + print(fmt.Sprintf("[%v|%v]", prev, next)) + } else { + print(fmt.Sprintf("[%v]", next)) + } + if i < int(node.Level) { + print(" -> ") + } + + } + println() + nodeRef = node.Next[0] + } + + print("end --> ") + for i, l := range t.endLevels { + if l == nil { + break + } + if i > 0 { + print(" -> ") + } + next := "---" + if l != nil { + next = string(l.Key) + } + print(fmt.Sprintf("[%v]", next)) + } + println() +} diff --git a/weed/util/skiplist/skiplist.pb.go b/weed/util/skiplist/skiplist.pb.go new file mode 100644 index 000000000..63b6c74a3 --- /dev/null +++ b/weed/util/skiplist/skiplist.pb.go @@ -0,0 +1,386 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.25.0 +// protoc v3.12.3 +// source: skiplist.proto + +package skiplist + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +type SkipListProto struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + StartLevels []*SkipListElementReference `protobuf:"bytes,1,rep,name=start_levels,json=startLevels,proto3" json:"start_levels,omitempty"` + EndLevels []*SkipListElementReference `protobuf:"bytes,2,rep,name=end_levels,json=endLevels,proto3" json:"end_levels,omitempty"` + MaxNewLevel int32 `protobuf:"varint,3,opt,name=max_new_level,json=maxNewLevel,proto3" json:"max_new_level,omitempty"` + MaxLevel int32 `protobuf:"varint,4,opt,name=max_level,json=maxLevel,proto3" json:"max_level,omitempty"` + ElementCount int64 `protobuf:"varint,5,opt,name=element_count,json=elementCount,proto3" json:"element_count,omitempty"` + Eps float64 `protobuf:"fixed64,7,opt,name=eps,proto3" json:"eps,omitempty"` +} + +func (x *SkipListProto) Reset() { + *x = SkipListProto{} + if protoimpl.UnsafeEnabled { + mi := &file_skiplist_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SkipListProto) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SkipListProto) ProtoMessage() {} + +func (x *SkipListProto) ProtoReflect() protoreflect.Message { + mi := &file_skiplist_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SkipListProto.ProtoReflect.Descriptor instead. +func (*SkipListProto) Descriptor() ([]byte, []int) { + return file_skiplist_proto_rawDescGZIP(), []int{0} +} + +func (x *SkipListProto) GetStartLevels() []*SkipListElementReference { + if x != nil { + return x.StartLevels + } + return nil +} + +func (x *SkipListProto) GetEndLevels() []*SkipListElementReference { + if x != nil { + return x.EndLevels + } + return nil +} + +func (x *SkipListProto) GetMaxNewLevel() int32 { + if x != nil { + return x.MaxNewLevel + } + return 0 +} + +func (x *SkipListProto) GetMaxLevel() int32 { + if x != nil { + return x.MaxLevel + } + return 0 +} + +func (x *SkipListProto) GetElementCount() int64 { + if x != nil { + return x.ElementCount + } + return 0 +} + +func (x *SkipListProto) GetEps() float64 { + if x != nil { + return x.Eps + } + return 0 +} + +type SkipListElementReference struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ElementPointer int64 `protobuf:"varint,1,opt,name=element_pointer,json=elementPointer,proto3" json:"element_pointer,omitempty"` + Key []byte `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"` +} + +func (x *SkipListElementReference) Reset() { + *x = SkipListElementReference{} + if protoimpl.UnsafeEnabled { + mi := &file_skiplist_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SkipListElementReference) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SkipListElementReference) ProtoMessage() {} + +func (x *SkipListElementReference) ProtoReflect() protoreflect.Message { + mi := &file_skiplist_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SkipListElementReference.ProtoReflect.Descriptor instead. +func (*SkipListElementReference) Descriptor() ([]byte, []int) { + return file_skiplist_proto_rawDescGZIP(), []int{1} +} + +func (x *SkipListElementReference) GetElementPointer() int64 { + if x != nil { + return x.ElementPointer + } + return 0 +} + +func (x *SkipListElementReference) GetKey() []byte { + if x != nil { + return x.Key + } + return nil +} + +type SkipListElement struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Next []*SkipListElementReference `protobuf:"bytes,2,rep,name=next,proto3" json:"next,omitempty"` + Level int32 `protobuf:"varint,3,opt,name=level,proto3" json:"level,omitempty"` + Values [][]byte `protobuf:"bytes,4,rep,name=values,proto3" json:"values,omitempty"` + Prev *SkipListElementReference `protobuf:"bytes,5,opt,name=prev,proto3" json:"prev,omitempty"` +} + +func (x *SkipListElement) Reset() { + *x = SkipListElement{} + if protoimpl.UnsafeEnabled { + mi := &file_skiplist_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SkipListElement) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SkipListElement) ProtoMessage() {} + +func (x *SkipListElement) ProtoReflect() protoreflect.Message { + mi := &file_skiplist_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SkipListElement.ProtoReflect.Descriptor instead. +func (*SkipListElement) Descriptor() ([]byte, []int) { + return file_skiplist_proto_rawDescGZIP(), []int{2} +} + +func (x *SkipListElement) GetId() int64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *SkipListElement) GetNext() []*SkipListElementReference { + if x != nil { + return x.Next + } + return nil +} + +func (x *SkipListElement) GetLevel() int32 { + if x != nil { + return x.Level + } + return 0 +} + +func (x *SkipListElement) GetValues() [][]byte { + if x != nil { + return x.Values + } + return nil +} + +func (x *SkipListElement) GetPrev() *SkipListElementReference { + if x != nil { + return x.Prev + } + return nil +} + +var File_skiplist_proto protoreflect.FileDescriptor + +var file_skiplist_proto_rawDesc = []byte{ + 0x0a, 0x0e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x08, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x22, 0x91, 0x02, 0x0a, 0x0d, 0x53, + 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x45, 0x0a, 0x0c, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, + 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, + 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x0b, 0x73, 0x74, 0x61, 0x72, 0x74, 0x4c, 0x65, 0x76, + 0x65, 0x6c, 0x73, 0x12, 0x41, 0x0a, 0x0a, 0x65, 0x6e, 0x64, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, + 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, + 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x09, 0x65, 0x6e, 0x64, + 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x73, 0x12, 0x22, 0x0a, 0x0d, 0x6d, 0x61, 0x78, 0x5f, 0x6e, 0x65, + 0x77, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6d, + 0x61, 0x78, 0x4e, 0x65, 0x77, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x61, + 0x78, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x6d, + 0x61, 0x78, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6c, 0x65, 0x6d, 0x65, + 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, + 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x65, 0x70, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x01, 0x52, 0x03, 0x65, 0x70, 0x73, 0x22, 0x55, + 0x0a, 0x18, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, + 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x65, 0x6c, + 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x0e, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x50, 0x6f, 0x69, 0x6e, + 0x74, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x22, 0xbf, 0x01, 0x0a, 0x0f, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, + 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x36, 0x0a, 0x04, 0x6e, 0x65, 0x78, + 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, + 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, + 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x04, 0x6e, 0x65, 0x78, + 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, + 0x36, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, + 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, + 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, + 0x65, 0x52, 0x04, 0x70, 0x72, 0x65, 0x76, 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, + 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, + 0x74, 0x69, 0x6c, 0x2f, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_skiplist_proto_rawDescOnce sync.Once + file_skiplist_proto_rawDescData = file_skiplist_proto_rawDesc +) + +func file_skiplist_proto_rawDescGZIP() []byte { + file_skiplist_proto_rawDescOnce.Do(func() { + file_skiplist_proto_rawDescData = protoimpl.X.CompressGZIP(file_skiplist_proto_rawDescData) + }) + return file_skiplist_proto_rawDescData +} + +var file_skiplist_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_skiplist_proto_goTypes = []interface{}{ + (*SkipListProto)(nil), // 0: skiplist.SkipListProto + (*SkipListElementReference)(nil), // 1: skiplist.SkipListElementReference + (*SkipListElement)(nil), // 2: skiplist.SkipListElement +} +var file_skiplist_proto_depIdxs = []int32{ + 1, // 0: skiplist.SkipListProto.start_levels:type_name -> skiplist.SkipListElementReference + 1, // 1: skiplist.SkipListProto.end_levels:type_name -> skiplist.SkipListElementReference + 1, // 2: skiplist.SkipListElement.next:type_name -> skiplist.SkipListElementReference + 1, // 3: skiplist.SkipListElement.prev:type_name -> skiplist.SkipListElementReference + 4, // [4:4] is the sub-list for method output_type + 4, // [4:4] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name +} + +func init() { file_skiplist_proto_init() } +func file_skiplist_proto_init() { + if File_skiplist_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_skiplist_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SkipListProto); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_skiplist_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SkipListElementReference); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_skiplist_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SkipListElement); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_skiplist_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_skiplist_proto_goTypes, + DependencyIndexes: file_skiplist_proto_depIdxs, + MessageInfos: file_skiplist_proto_msgTypes, + }.Build() + File_skiplist_proto = out.File + file_skiplist_proto_rawDesc = nil + file_skiplist_proto_goTypes = nil + file_skiplist_proto_depIdxs = nil +} diff --git a/weed/util/skiplist/skiplist.proto b/weed/util/skiplist/skiplist.proto new file mode 100644 index 000000000..ce84ed996 --- /dev/null +++ b/weed/util/skiplist/skiplist.proto @@ -0,0 +1,27 @@ +syntax = "proto3"; + +package skiplist; + +option go_package = "github.com/chrislusf/seaweedfs/weed/util/skiplist"; + +message SkipListProto { + repeated SkipListElementReference start_levels = 1; + repeated SkipListElementReference end_levels = 2; + int32 max_new_level = 3; + int32 max_level = 4; + int64 element_count = 5; + double eps = 7; +} + +message SkipListElementReference { + int64 element_pointer = 1; + bytes key = 2; +} + +message SkipListElement { + int64 id = 1; + repeated SkipListElementReference next = 2; + int32 level = 3; + repeated bytes values = 4; + SkipListElementReference prev = 5; +} diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go new file mode 100644 index 000000000..ca41e382a --- /dev/null +++ b/weed/util/skiplist/skiplist_test.go @@ -0,0 +1,212 @@ +package skiplist + +import ( + "bytes" + "math/rand" + "strconv" + "testing" +) + +const ( + maxN = 10000 +) + +func TestInsertAndFind(t *testing.T) { + + k0 := []byte("0") + var list *SkipList + + var listPointer *SkipList + listPointer.Insert(k0) + if _, ok := listPointer.Find(k0); ok { + t.Fail() + } + + list = New() + if _, ok := list.Find(k0); ok { + t.Fail() + } + if !list.IsEmpty() { + t.Fail() + } + + // Test at the beginning of the list. + for i := 0; i < maxN; i++ { + key := []byte(strconv.Itoa(maxN-i)) + list.Insert(key) + } + for i := 0; i < maxN; i++ { + key := []byte(strconv.Itoa(maxN-i)) + if _, ok := list.Find(key); !ok { + t.Fail() + } + } + + + list = New() + // Test at the end of the list. + for i := 0; i < maxN; i++ { + key := []byte(strconv.Itoa(i)) + list.Insert(key) + } + for i := 0; i < maxN; i++ { + key := []byte(strconv.Itoa(i)) + if _, ok := list.Find(key); !ok { + t.Fail() + } + } + + list = New() + // Test at random positions in the list. + rList := rand.Perm(maxN) + for _, e := range rList { + key := []byte(strconv.Itoa(e)) + println("insert", e) + list.Insert(key) + } + for _, e := range rList { + key := []byte(strconv.Itoa(e)) + println("find", e) + if _, ok := list.Find(key); !ok { + t.Fail() + } + } + println("print list") + list.println() + +} + +func Element(x int) []byte { + return []byte(strconv.Itoa(x)) +} + +func TestDelete(t *testing.T) { + + k0 := []byte("0") + + var list *SkipList + + // Delete on empty list + list.Delete(k0) + + list = New() + + list.Delete(k0) + if !list.IsEmpty() { + t.Fail() + } + + list.Insert(k0) + list.Delete(k0) + if !list.IsEmpty() { + t.Fail() + } + + // Delete elements at the beginning of the list. + for i := 0; i < maxN; i++ { + list.Insert(Element(i)) + } + for i := 0; i < maxN; i++ { + list.Delete(Element(i)) + } + if !list.IsEmpty() { + t.Fail() + } + + list = New() + // Delete elements at the end of the list. + for i := 0; i < maxN; i++ { + list.Insert(Element(i)) + } + for i := 0; i < maxN; i++ { + list.Delete(Element(maxN - i - 1)) + } + if !list.IsEmpty() { + t.Fail() + } + + list = New() + // Delete elements at random positions in the list. + rList := rand.Perm(maxN) + for _, e := range rList { + list.Insert(Element(e)) + } + for _, e := range rList { + list.Delete(Element(e)) + } + if !list.IsEmpty() { + t.Fail() + } +} + +func TestNext(t *testing.T) { + list := New() + + for i := 0; i < maxN; i++ { + list.Insert(Element(i)) + } + + smallest := list.GetSmallestNode() + largest := list.GetLargestNode() + + lastNode := smallest + node := lastNode + for node != largest { + node = list.Next(node) + // Must always be incrementing here! + if bytes.Compare(node.Values[0], lastNode.Values[0]) <= 0 { + t.Fail() + } + // Next.Prev must always point to itself! + if list.Next(list.Prev(node)) != node { + t.Fail() + } + lastNode = node + } + + if list.Next(largest) != smallest { + t.Fail() + } +} + +func TestPrev(t *testing.T) { + list := New() + + for i := 0; i < maxN; i++ { + list.Insert(Element(i)) + } + + smallest := list.GetSmallestNode() + largest := list.GetLargestNode() + + lastNode := largest + node := lastNode + for node != smallest { + node = list.Prev(node) + // Must always be incrementing here! + if bytes.Compare(node.Values[0], lastNode.Values[0]) >= 0 { + t.Fail() + } + // Next.Prev must always point to itself! + if list.Prev(list.Next(node)) != node { + t.Fail() + } + lastNode = node + } + + if list.Prev(smallest) != largest { + t.Fail() + } +} + +func TestGetNodeCount(t *testing.T) { + list := New() + + for i := 0; i < maxN; i++ { + list.Insert(Element(i)) + } + + if list.GetNodeCount() != maxN { + t.Fail() + } +} From 57e2fd3f9bb3c094f897daaaed3851f5b5af0ed2 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 2 Oct 2021 14:03:54 -0700 Subject: [PATCH 17/30] remove bptree --- weed/util/bptree/Makefile | 6 - weed/util/bptree/README.md | 60 - weed/util/bptree/bpmap.go | 69 - weed/util/bptree/bptree.go | 158 -- weed/util/bptree/bptree.pb.go | 195 --- weed/util/bptree/bptree.proto | 14 - weed/util/bptree/bptree_node.go | 767 ---------- weed/util/bptree/bptree_store_test.go | 53 - weed/util/bptree/bptree_test.go | 1408 ------------------ weed/util/bptree/getter_setter.go | 72 - weed/util/bptree/int.go | 357 ----- weed/util/bptree/rand.go | 2 - weed/util/bptree/serde.go | 10 - weed/util/bptree/serde_test.go | 46 - weed/util/bptree/string.go | 71 - weed/util/bptree/tree_store/memory_store.go | 29 - weed/util/bptree/tree_store/tree_store.go.go | 6 - weed/util/bptree/types.go | 98 -- 18 files changed, 3421 deletions(-) delete mode 100644 weed/util/bptree/Makefile delete mode 100644 weed/util/bptree/README.md delete mode 100644 weed/util/bptree/bpmap.go delete mode 100644 weed/util/bptree/bptree.go delete mode 100644 weed/util/bptree/bptree.pb.go delete mode 100644 weed/util/bptree/bptree.proto delete mode 100644 weed/util/bptree/bptree_node.go delete mode 100644 weed/util/bptree/bptree_store_test.go delete mode 100644 weed/util/bptree/bptree_test.go delete mode 100644 weed/util/bptree/getter_setter.go delete mode 100644 weed/util/bptree/int.go delete mode 100644 weed/util/bptree/rand.go delete mode 100644 weed/util/bptree/serde.go delete mode 100644 weed/util/bptree/serde_test.go delete mode 100644 weed/util/bptree/string.go delete mode 100644 weed/util/bptree/tree_store/memory_store.go delete mode 100644 weed/util/bptree/tree_store/tree_store.go.go delete mode 100644 weed/util/bptree/types.go diff --git a/weed/util/bptree/Makefile b/weed/util/bptree/Makefile deleted file mode 100644 index a98f39a08..000000000 --- a/weed/util/bptree/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -all: gen - -.PHONY : gen - -gen: - protoc bptree.proto --go_out=plugins=grpc:. --go_opt=paths=source_relative diff --git a/weed/util/bptree/README.md b/weed/util/bptree/README.md deleted file mode 100644 index 1dddae940..000000000 --- a/weed/util/bptree/README.md +++ /dev/null @@ -1,60 +0,0 @@ -This adapts one b+ tree implementation -https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree -to persist changes to on disk. - -# When a node needs to persist itself? - -* A node changed its key or value - * When an item is added. - * When an item is updated. - * When an item is deleted. - -* When a node is split. - * 2 new nodes are created (they shoud persist themselves). - * Parent node need to point to the new nodes. - -* When a node is merged. - * delete one node - * persist the merged node - - -In general, if one node is returned from a function, the node should have already been persisted. -The parent node may need to delete the old node. - -BpTree - Add(key ItemKey, value ItemValue) - new_root = self.getRoot().put(key,value) - a, b, err := self.insert(key, value) - self.internal_insert(key, value) - self.internal_split(q.keys[0], q) - persist(a,b) - self.persist() // child add q node - self.maybePersist(child == p) - self.leaf_insert(key, value) - self.persist() // if dedup - self.leaf_split(key, value) - self.pure_leaf_split(key, value) - persist(a,b) - a.persist() - persist(a,b) - self.put_kv(key, value) - new_root.persist() - self.setRoot(new_root) - oldroot.destroy() - // maybe persist BpTree new root - - Replace(key ItemKey, where WhereFunc, value ItemValue) - leaf.persist() - RemoveWhere(key ItemKey, where WhereFunc) - self.getRoot().remove(key, where) - self.internal_remove(key, nil, where) - child.leaf_remove(key, nil, where) - child.leaf_remove(key, sibling.keys[0], where) - l.destroy() // when the node is empty - a.maybePersist(hasChange) - self.destroy() // when no keys left - self.persist() // when some keys are left - self.leaf_remove(key, self.keys[len(self.keys)-1], where) - new_root.persist() // when new root is added - // maybe persist BpTree new root - \ No newline at end of file diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go deleted file mode 100644 index 0c13a132f..000000000 --- a/weed/util/bptree/bpmap.go +++ /dev/null @@ -1,69 +0,0 @@ -package bptree - -import ( - "fmt" -) - -/* A BpMap is a B+Tree with support for duplicate keys disabled. This makes it - * behave like a regular Map rather than a MultiMap. - */ -type BpMap BpTree - -func NewBpMap(node_size int, nodeStore NodeStore) *BpMap { - return &BpMap{ - root: NewLeaf(node_size, nodeStore), - } -} - -func (self *BpMap) Has(key ItemKey) bool { - return (*BpTree)(self).Has(key) -} - -func (self *BpMap) Put(key ItemKey, value ItemValue) (err error) { - new_root, err := self.getRoot().put(key, value) - if err != nil { - return err - } - self.setRoot(new_root) - return nil -} - -func (self *BpMap) Get(key ItemKey) (value ItemValue, err error) { - j, l := self.getRoot().get_start(key) - if l.keys[j].Equals(key) { - return l.values[j], nil - } - return nil, fmt.Errorf("key not found: %s", key) -} - -func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { - value, err = self.Get(key) - if err != nil { - return nil, err - } - ns := self.getRoot().Capacity() - new_root, err := self.getRoot().remove(key, func(value ItemValue) bool { return true }) - if err != nil { - return nil, err - } - if new_root == nil { - new_root = NewLeaf(ns, self.root.nodeStore) - err = new_root.persist() - self.setRoot(new_root) - } else { - self.setRoot(new_root) - } - return value, nil -} - -func (self *BpMap) Keys() (ki KIterator) { - return (*BpTree)(self).Keys() -} - -func (self *BpMap) Values() (vi Iterator) { - return (*BpTree)(self).Values() -} - -func (self *BpMap) Iterate() (kvi KVIterator) { - return (*BpTree)(self).Iterate() -} diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go deleted file mode 100644 index 141c595f3..000000000 --- a/weed/util/bptree/bptree.go +++ /dev/null @@ -1,158 +0,0 @@ -package bptree - -// started by copying from https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree - -/* A BpTree is a B+Tree with support for duplicate keys. This makes it behave as - * a MultiMap. Additionally you can use the Range operator to select k/v in a - * range. If from > to it will iterate backwards. - */ -type BpTree struct { - root *BpNode -} - -type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) - -func NewBpTree(node_size int, nodeStore NodeStore) *BpTree { - return &BpTree{ - root: NewLeaf(node_size, nodeStore), - } -} - -func (self *BpTree) Has(key ItemKey) bool { - if len(self.getRoot().keys) == 0 { - return false - } - j, l := self.getRoot().get_start(key) - return l.keys[j].Equals(key) -} - -func (self *BpTree) Count(key ItemKey) int { - if len(self.root.keys) == 0 { - return 0 - } - j, l := self.root.get_start(key) - count := 0 - end := false - for !end && l.keys[j].Equals(key) { - count++ - j, l, end = next_location(j, l) - } - return count -} - -func (self *BpTree) Add(key ItemKey, value ItemValue) (err error) { - new_root, err := self.getRoot().put(key, value) - if err != nil { - return err - } - self.setRoot(new_root) - return nil -} - -func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err error) { - li := self.getRoot().forward(key, key) - for i, leaf, next := li(); next != nil; i, leaf, next = next() { - if where(leaf.values[i]) { - leaf.values[i] = value - if persistErr := leaf.persist(); persistErr != nil && err == nil { - err = persistErr - break - } - } - } - return err -} - -func (self *BpTree) Find(key ItemKey) (kvi KVIterator) { - return self.Range(key, key) -} - -func (self *BpTree) Range(from, to ItemKey) (kvi KVIterator) { - var li loc_iterator - if !to.Less(from) { - li = self.getRoot().forward(from, to) - } else { - li = self.getRoot().backward(from, to) - } - kvi = func() (key ItemKey, value ItemValue, next KVIterator) { - var i int - var leaf *BpNode - i, leaf, li = li() - if li == nil { - return nil, nil, nil - } - return leaf.keys[i], leaf.values[i], kvi - } - return kvi -} - -func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { - ns := self.getRoot().Capacity() - new_root, err := self.getRoot().remove(key, where) - if err != nil { - return err - } - if new_root == nil { - new_root = NewLeaf(ns, self.root.nodeStore) - err = new_root.persist() - self.setRoot(new_root) - } else { - self.setRoot(new_root) - } - return err -} - -func (self *BpTree) Keys() (ki KIterator) { - li := self.getRoot().all() - var prev Equatable - ki = func() (key ItemKey, next KIterator) { - var i int - var leaf *BpNode - i, leaf, li = li() - if li == nil { - return nil, nil - } - if leaf.keys[i].Equals(prev) { - return ki() - } - prev = leaf.keys[i] - return leaf.keys[i], ki - } - return ki -} - -func (self *BpTree) Values() (vi Iterator) { - return MakeValuesIterator(self) -} - -func (self *BpTree) Items() (vi KIterator) { - return MakeItemsIterator(self) -} - -func (self *BpTree) Iterate() (kvi KVIterator) { - li := self.getRoot().all() - kvi = func() (key ItemKey, value ItemValue, next KVIterator) { - var i int - var leaf *BpNode - i, leaf, li = li() - if li == nil { - return nil, nil, nil - } - return leaf.keys[i], leaf.values[i], kvi - } - return kvi -} - -func (self *BpTree) Backward() (kvi KVIterator) { - li := self.getRoot().all_backward() - kvi = func() (key ItemKey, value ItemValue, next KVIterator) { - var i int - var leaf *BpNode - i, leaf, li = li() - if li == nil { - return nil, nil, nil - } - return leaf.keys[i], leaf.values[i], kvi - } - return kvi -} diff --git a/weed/util/bptree/bptree.pb.go b/weed/util/bptree/bptree.pb.go deleted file mode 100644 index 078a54717..000000000 --- a/weed/util/bptree/bptree.pb.go +++ /dev/null @@ -1,195 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.25.0 -// protoc v3.12.3 -// source: bptree.proto - -package bptree - -import ( - proto "github.com/golang/protobuf/proto" - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -// This is a compile-time assertion that a sufficiently up-to-date version -// of the legacy proto package is being used. -const _ = proto.ProtoPackageIsVersion4 - -type ProtoNode struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Keys [][]byte `protobuf:"bytes,1,rep,name=keys,proto3" json:"keys,omitempty"` - Values [][]byte `protobuf:"bytes,2,rep,name=values,proto3" json:"values,omitempty"` - Pointers []int64 `protobuf:"varint,3,rep,packed,name=pointers,proto3" json:"pointers,omitempty"` - Next int64 `protobuf:"varint,4,opt,name=next,proto3" json:"next,omitempty"` - Prev int64 `protobuf:"varint,5,opt,name=prev,proto3" json:"prev,omitempty"` - Id int64 `protobuf:"varint,6,opt,name=id,proto3" json:"id,omitempty"` -} - -func (x *ProtoNode) Reset() { - *x = ProtoNode{} - if protoimpl.UnsafeEnabled { - mi := &file_bptree_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *ProtoNode) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ProtoNode) ProtoMessage() {} - -func (x *ProtoNode) ProtoReflect() protoreflect.Message { - mi := &file_bptree_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ProtoNode.ProtoReflect.Descriptor instead. -func (*ProtoNode) Descriptor() ([]byte, []int) { - return file_bptree_proto_rawDescGZIP(), []int{0} -} - -func (x *ProtoNode) GetKeys() [][]byte { - if x != nil { - return x.Keys - } - return nil -} - -func (x *ProtoNode) GetValues() [][]byte { - if x != nil { - return x.Values - } - return nil -} - -func (x *ProtoNode) GetPointers() []int64 { - if x != nil { - return x.Pointers - } - return nil -} - -func (x *ProtoNode) GetNext() int64 { - if x != nil { - return x.Next - } - return 0 -} - -func (x *ProtoNode) GetPrev() int64 { - if x != nil { - return x.Prev - } - return 0 -} - -func (x *ProtoNode) GetId() int64 { - if x != nil { - return x.Id - } - return 0 -} - -var File_bptree_proto protoreflect.FileDescriptor - -var file_bptree_proto_rawDesc = []byte{ - 0x0a, 0x0c, 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x06, - 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x22, 0x8b, 0x01, 0x0a, 0x09, 0x50, 0x72, 0x6f, 0x74, 0x6f, - 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0c, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, - 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x03, 0x52, 0x08, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x12, 0x12, 0x0a, 0x04, - 0x6e, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x6e, 0x65, 0x78, 0x74, - 0x12, 0x12, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, - 0x70, 0x72, 0x65, 0x76, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x02, 0x69, 0x64, 0x42, 0x31, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, - 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, - 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, - 0x2f, 0x62, 0x70, 0x74, 0x72, 0x65, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, -} - -var ( - file_bptree_proto_rawDescOnce sync.Once - file_bptree_proto_rawDescData = file_bptree_proto_rawDesc -) - -func file_bptree_proto_rawDescGZIP() []byte { - file_bptree_proto_rawDescOnce.Do(func() { - file_bptree_proto_rawDescData = protoimpl.X.CompressGZIP(file_bptree_proto_rawDescData) - }) - return file_bptree_proto_rawDescData -} - -var file_bptree_proto_msgTypes = make([]protoimpl.MessageInfo, 1) -var file_bptree_proto_goTypes = []interface{}{ - (*ProtoNode)(nil), // 0: bptree.ProtoNode -} -var file_bptree_proto_depIdxs = []int32{ - 0, // [0:0] is the sub-list for method output_type - 0, // [0:0] is the sub-list for method input_type - 0, // [0:0] is the sub-list for extension type_name - 0, // [0:0] is the sub-list for extension extendee - 0, // [0:0] is the sub-list for field type_name -} - -func init() { file_bptree_proto_init() } -func file_bptree_proto_init() { - if File_bptree_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_bptree_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ProtoNode); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_bptree_proto_rawDesc, - NumEnums: 0, - NumMessages: 1, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_bptree_proto_goTypes, - DependencyIndexes: file_bptree_proto_depIdxs, - MessageInfos: file_bptree_proto_msgTypes, - }.Build() - File_bptree_proto = out.File - file_bptree_proto_rawDesc = nil - file_bptree_proto_goTypes = nil - file_bptree_proto_depIdxs = nil -} diff --git a/weed/util/bptree/bptree.proto b/weed/util/bptree/bptree.proto deleted file mode 100644 index 1d55096a2..000000000 --- a/weed/util/bptree/bptree.proto +++ /dev/null @@ -1,14 +0,0 @@ -syntax = "proto3"; - -package bptree; - -option go_package = "github.com/chrislusf/seaweedfs/weed/util/bptree"; - -message ProtoNode { - repeated bytes keys = 1; - repeated bytes values = 2; - repeated int64 pointers = 3; - int64 next = 4; - int64 prev = 5; - int64 id = 6; -} diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go deleted file mode 100644 index 507d9d318..000000000 --- a/weed/util/bptree/bptree_node.go +++ /dev/null @@ -1,767 +0,0 @@ -package bptree - -type ItemKey Hashable -type ItemValue Equatable - -type NodeStore interface { - PersistFunc(node *BpNode) error - DestroyFunc(node *BpNode) error -} - -type BpNode struct { - keys []ItemKey - values []ItemValue - pointers []*BpNode - next *BpNode - prev *BpNode - protoNodeId int64 - protoNode *ProtoNode - nodeStore NodeStore -} - -func NewInternal(size int, nodeStore NodeStore) *BpNode { - if size < 0 { - panic(NegativeSize()) - } - return &BpNode{ - keys: make([]ItemKey, 0, size), - pointers: make([]*BpNode, 0, size), - protoNodeId: GetProtoNodeId(), - nodeStore: nodeStore, - } -} - -func NewLeaf(size int, nodeStore NodeStore) *BpNode { - if size < 0 { - panic(NegativeSize()) - } - return &BpNode{ - keys: make([]ItemKey, 0, size), - values: make([]ItemValue, 0, size), - protoNodeId: GetProtoNodeId(), - nodeStore: nodeStore, - } -} - -func (self *BpNode) Full() bool { - return len(self.keys) == cap(self.keys) -} - -func (self *BpNode) Pure() bool { - if len(self.keys) == 0 { - return true - } - k0 := self.keys[0] - for _, k := range self.keys { - if !k0.Equals(k) { - return false - } - } - return true -} - -func (self *BpNode) Internal() bool { - return cap(self.pointers) > 0 -} - -func (self *BpNode) Len() int { - return len(self.keys) -} - -func (self *BpNode) Capacity() int { - return cap(self.keys) -} - -func (self *BpNode) Height() int { - if !self.Internal() { - return 1 - } else if len(self.pointers) == 0 { - panic(BpTreeError("Internal node has no pointers but asked for height")) - } - return self.pointers[0].Height() + 1 -} - -func (self *BpNode) has(key ItemKey) bool { - _, has := self.find(key) - return has -} - -func (self *BpNode) left_most_leaf() *BpNode { - if self.Internal() { - return self.pointers[0].left_most_leaf() - } - return self -} - -func (self *BpNode) right_most_leaf() *BpNode { - if self.Internal() { - return self.pointers[len(self.pointers)-1].right_most_leaf() - } - return self -} - -/* returns the index and leaf-block of the first key greater than or equal to - * the search key. (unless the search key is greater than all the keys in the - * tree, in that case it will be the last key in the tree) - */ -func (self *BpNode) get_start(key ItemKey) (i int, leaf *BpNode) { - if self.Internal() { - return self.internal_get_start(key) - } else { - return self.leaf_get_start(key) - } -} - -func next_location(i int, leaf *BpNode) (int, *BpNode, bool) { - j := i + 1 - for j >= len(leaf.keys) && leaf.getNext() != nil { - j = 0 - leaf = leaf.getNext() - } - if j >= len(leaf.keys) { - return -1, nil, true - } - return j, leaf, false -} - -func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) { - j := i - 1 - for j < 0 && leaf.getPrev() != nil { - leaf = leaf.getPrev() - j = len(leaf.keys) - 1 - } - if j < 0 { - return -1, nil, true - } - return j, leaf, false -} - -/* returns the index and leaf-block of the last key equal to the search key or - * the first key greater than the search key. (unless the search key is greater - * than all the keys in the tree, in that case it will be the last key in the - * tree) - */ -func (self *BpNode) get_end(key ItemKey) (i int, leaf *BpNode) { - end := false - i, leaf = self.get_start(key) - pi, pleaf := i, leaf - for !end && leaf.keys[i].Equals(key) { - pi, pleaf = i, leaf - i, leaf, end = next_location(i, leaf) - } - return pi, pleaf -} - -func (self *BpNode) internal_get_start(key ItemKey) (i int, leaf *BpNode) { - if !self.Internal() { - panic(BpTreeError("Expected a internal node")) - } - i, has := self.find(key) - if !has && i > 0 { - // if it doesn't have it and the index > 0 then we have the next block - // so we have to subtract one from the index. - i-- - } - child := self.pointers[i] - return child.get_start(key) -} - -func (self *BpNode) leaf_get_start(key ItemKey) (i int, leaf *BpNode) { - i, has := self.find(key) - if i >= len(self.keys) && i > 0 { - i = len(self.keys) - 1 - } - if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.getNext() != nil { - return self.getNext().leaf_get_start(key) - } - return i, self -} - -/* This puts the k/v pair into the B+Tree rooted at this node and returns the - * (possibly) new root of the tree. - */ -func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) { - a, b, err := self.insert(key, value) - if err != nil { - return nil, err - } else if b == nil { - return a, nil - } - // else we have root split - root = NewInternal(self.Capacity(), self.nodeStore) - root.put_kp(a.keys[0], a) - root.put_kp(b.keys[0], b) - return root, root.persist() -} - -// right is only set on split -// left is always set. When split is false left is the pointer to block -// When split is true left is the pointer to the new left -// block -func (self *BpNode) insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { - if self.Internal() { - return self.internal_insert(key, value) - } else { // leaf node - return self.leaf_insert(key, value) - } -} - -/* - first find the child to insert into - * - do the child insert - * - if there was a split: - * - if the block is full, split this block - * - else insert the new key/pointer into this block - */ -func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { - if !self.Internal() { - return nil, nil, BpTreeError("Expected a internal node") - } - i, has := self.find(key) - if !has && i > 0 { - // if it doesn't have it and the index > 0 then we have the next block - // so we have to subtract one from the index. - i-- - } - child := self.pointers[i] - p, q, err := child.insert(key, value) - if err != nil { - return nil, nil, err - } - self.keys[i] = p.keys[0] - self.pointers[i] = p - if q != nil { - // we had a split - if self.Full() { - return self.internal_split(q.keys[0], q) - } else { - if err := self.put_kp(q.keys[0], q); err != nil { - return nil, nil, err - } - return self, nil, self.persist() - } - } - return self, nil, self.maybePersist(child != p) -} - -/* On split - * - first assert that the key to be inserted is not already in the block. - * - Make a new block - * - balance the two blocks. - * - insert the new key/pointer combo into the correct block - */ -func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err error) { - if !self.Internal() { - return nil, nil, BpTreeError("Expected a internal node") - } - if self.has(key) { - return nil, nil, BpTreeError("Tried to split an internal block on duplicate key") - } - a = self - b = NewInternal(self.Capacity(), self.nodeStore) - balance_nodes(a, b, key) - if b.Len() > 0 && key.Less(b.keys[0]) { - if err := a.put_kp(key, ptr); err != nil { - return nil, nil, err - } - } else { - if err := b.put_kp(key, ptr); err != nil { - return nil, nil, err - } - } - return a, b, persist(a, b) -} - -/* if the leaf is full then it will defer to a leaf_split - * (but in one case that will not actually split in the case of a insert into - * a pure block with a matching key) - * else this leaf will get a new entry. - */ -func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err error) { - if self.Internal() { - return nil, nil, BpTreeError("Expected a leaf node") - } - if true { // no_dup = true - i, has := self.find(key) - if has { - self.values[i] = value - return self, nil, self.persist() - } - } - if self.Full() { - return self.leaf_split(key, value) - } else { - if err := self.put_kv(key, value); err != nil { - return nil, nil, err - } - return self, nil, self.persist() - } -} - -/* on leaf split if the block is pure then it will defer to pure_leaf_split - * else - * - a new block will be made and inserted after this one - * - the two blocks will be balanced with balanced_nodes - * - if the key is less than b.keys[0] it will go in a else b - */ -func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err error) { - if self.Internal() { - return nil, nil, BpTreeError("Expected a leaf node") - } - if self.Pure() { - return self.pure_leaf_split(key, value) - } - a = self - b = NewLeaf(self.Capacity(), self.nodeStore) - insert_linked_list_node(b, a, a.getNext()) - balance_nodes(a, b, key) - if b.Len() > 0 && key.Less(b.keys[0]) { - if err := a.put_kv(key, value); err != nil { - return nil, nil, err - } - } else { - if err := b.put_kv(key, value); err != nil { - return nil, nil, err - } - } - return a, b, persist(a, b) -} - -/* a pure leaf split has two cases: - * 1) the inserted key is less than the current pure block. - * - a new block should be created before the current block - * - the key should be put in it - * 2) the inserted key is greater than or equal to the pure block. - * - the end of run of pure blocks should be found - * - if the key is equal to pure block and the last block is not full insert - * the new kv - * - else split by making a new block after the last block in the run - * and putting the new key there. - * - always return the current block as "a" and the new block as "b" - */ -func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err error) { - if self.Internal() || !self.Pure() { - return nil, nil, BpTreeError("Expected a pure leaf node") - } - if key.Less(self.keys[0]) { - a = NewLeaf(self.Capacity(), self.nodeStore) - b = self - if err := a.put_kv(key, value); err != nil { - return nil, nil, err - } - insert_linked_list_node(a, b.getPrev(), b) - return a, b, persist(a, b) - } else { - a = self - e := self.find_end_of_pure_run() - if e.keys[0].Equals(key) && !e.Full() { - if err := e.put_kv(key, value); err != nil { - return nil, nil, err - } - return a, nil, a.persist() - } else { - b = NewLeaf(self.Capacity(), self.nodeStore) - if err := b.put_kv(key, value); err != nil { - return nil, nil, err - } - insert_linked_list_node(b, e, e.getNext()) - if e.keys[0].Equals(key) { - return a, nil, nil - } - return a, b, persist(a, b) - } - } -} - -func (self *BpNode) put_kp(key ItemKey, ptr *BpNode) error { - if self.Full() { - return BpTreeError("Block is full.") - } - if !self.Internal() { - return BpTreeError("Expected a internal node") - } - i, has := self.find(key) - if has { - return BpTreeError("Tried to insert a duplicate key into an internal node") - } else if i < 0 { - panic(BpTreeError("find returned a negative int")) - } else if i >= cap(self.keys) { - panic(BpTreeError("find returned a int > than cap(keys)")) - } - if err := self.put_key_at(i, key); err != nil { - return err - } - if err := self.put_pointer_at(i, ptr); err != nil { - return err - } - return nil -} - -func (self *BpNode) put_kv(key ItemKey, value ItemValue) error { - if self.Full() { - return BpTreeError("Block is full.") - } - if self.Internal() { - return BpTreeError("Expected a leaf node") - } - i, _ := self.find(key) - if i < 0 { - panic(BpTreeError("find returned a negative int")) - } else if i >= cap(self.keys) { - panic(BpTreeError("find returned a int > than cap(keys)")) - } - if err := self.put_key_at(i, key); err != nil { - return err - } - if err := self.put_value_at(i, value); err != nil { - return err - } - return nil -} - -func (self *BpNode) put_key_at(i int, key ItemKey) error { - if self.Full() { - return BpTreeError("Block is full.") - } - self.keys = self.keys[:len(self.keys)+1] - for j := len(self.keys) - 1; j > i; j-- { - self.keys[j] = self.keys[j-1] - } - self.keys[i] = key - return nil -} - -func (self *BpNode) put_value_at(i int, value ItemValue) error { - if len(self.values) == cap(self.values) { - return BpTreeError("Block is full.") - } - if self.Internal() { - return BpTreeError("Expected a leaf node") - } - self.values = self.values[:len(self.values)+1] - for j := len(self.values) - 1; j > i; j-- { - self.values[j] = self.values[j-1] - } - self.values[i] = value - return nil -} - -func (self *BpNode) put_pointer_at(i int, pointer *BpNode) error { - if len(self.pointers) == cap(self.pointers) { - return BpTreeError("Block is full.") - } - if !self.Internal() { - return BpTreeError("Expected a internal node") - } - self.pointers = self.pointers[:len(self.pointers)+1] - for j := len(self.pointers) - 1; j > i; j-- { - self.pointers[j] = self.pointers[j-1] - } - self.pointers[i] = pointer - return nil -} - -func (self *BpNode) remove(key ItemKey, where WhereFunc) (a *BpNode, err error) { - if self.Internal() { - return self.internal_remove(key, nil, where) - } else { - return self.leaf_remove(key, self.keys[len(self.keys)-1], where) - } -} - -func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFunc) (a *BpNode, err error) { - if !self.Internal() { - panic(BpTreeError("Expected a internal node")) - } - i, has := self.find(key) - if !has && i > 0 { - // if it doesn't have it and the index > 0 then we have the next block - // so we have to subtract one from the index. - i-- - } - if i+1 < len(self.keys) { - sibling = self.pointers[i+1] - } else if sibling != nil { - sibling = sibling.left_most_leaf() - } - child := self.pointers[i] - oldChild := child - if child.Internal() { - child, err = child.internal_remove(key, sibling, where) - } else { - if sibling == nil { - child, err = child.leaf_remove(key, nil, where) - } else { - child, err = child.leaf_remove(key, sibling.keys[0], where) - } - } - if err != nil { - return nil, err - } - if child == nil { - if err := self.remove_key_at(i); err != nil { - return nil, err - } - if err := self.remove_ptr_at(i); err != nil { - return nil, err - } - } else { - self.keys[i] = child.keys[0] - self.pointers[i] = child - } - if len(self.keys) == 0 { - return nil, self.destroy() - } - return self, self.maybePersist(oldChild != child) -} - -func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) { - if self.Internal() { - return nil, BpTreeError("Expected a leaf node") - } - a = self - hasChange := false - for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() { - if where(l.values[j]) { - hasChange = true - if err := l.remove_key_at(j); err != nil { - return nil, err - } - if err := l.remove_value_at(j); err != nil { - return nil, err - } - } - if len(l.keys) == 0 { - remove_linked_list_node(l) - if l.getNext() == nil { - a = nil - } else if stop == nil { - a = nil - } else if !l.getNext().keys[0].Equals(stop) { - a = l.getNext() - } else { - a = nil - } - if err := l.destroy(); err != nil { - return nil, err - } - } - } - if a != nil { - return a, a.maybePersist(hasChange) - } - return a, nil -} - -func (self *BpNode) remove_key_at(i int) error { - if i >= len(self.keys) || i < 0 { - return BpTreeError("i, %v, is out of bounds, %v, %v %v.", i, len(self.keys), len(self.values), self) - } - for j := i; j < len(self.keys)-1; j++ { - self.keys[j] = self.keys[j+1] - } - self.keys = self.keys[:len(self.keys)-1] - return nil -} - -func (self *BpNode) remove_value_at(i int) error { - if i >= len(self.values) || i < 0 { - return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.values)) - } - for j := i; j < len(self.values)-1; j++ { - self.values[j] = self.values[j+1] - } - self.values = self.values[:len(self.values)-1] - return nil -} - -func (self *BpNode) remove_ptr_at(i int) error { - if i >= len(self.pointers) || i < 0 { - return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.pointers)) - } - for j := i; j < len(self.pointers)-1; j++ { - self.pointers[j] = self.pointers[j+1] - } - self.pointers = self.pointers[:len(self.pointers)-1] - return nil -} - -func (self *BpNode) find(key ItemKey) (int, bool) { - var l = 0 - var r = len(self.keys) - 1 - var m int - for l <= r { - m = ((r - l) >> 1) + l - if key.Less(self.keys[m]) { - r = m - 1 - } else if key.Equals(self.keys[m]) { - return m, true - } else { - l = m + 1 - } - } - return l, false -} - -func (self *BpNode) find_end_of_pure_run() *BpNode { - k := self.keys[0] - p := self - n := self.getNext() - for n != nil && n.Pure() && k.Equals(n.keys[0]) { - p = n - n = n.getNext() - } - return p -} - -func (self *BpNode) all() (li loc_iterator) { - j := -1 - l := self.left_most_leaf() - end := false - j, l, end = next_location(j, l) - li = func() (i int, leaf *BpNode, next loc_iterator) { - if end { - return -1, nil, nil - } - i = j - leaf = l - j, l, end = next_location(j, l) - return i, leaf, li - } - return li -} - -func (self *BpNode) all_backward() (li loc_iterator) { - l := self.right_most_leaf() - j := len(l.keys) - end := false - j, l, end = prev_location(j, l) - li = func() (i int, leaf *BpNode, next loc_iterator) { - if end { - return -1, nil, nil - } - i = j - leaf = l - j, l, end = prev_location(j, l) - return i, leaf, li - } - return li -} - -func (self *BpNode) forward(from, to ItemKey) (li loc_iterator) { - j, l := self.get_start(from) - end := false - j-- - li = func() (i int, leaf *BpNode, next loc_iterator) { - j, l, end = next_location(j, l) - if end || to.Less(l.keys[j]) { - return -1, nil, nil - } - return j, l, li - } - return li -} - -func (self *BpNode) backward(from, to ItemKey) (li loc_iterator) { - j, l := self.get_end(from) - end := false - li = func() (i int, leaf *BpNode, next loc_iterator) { - if end || l.keys[j].Less(to) { - return -1, nil, nil - } - i = j - leaf = l - j, l, end = prev_location(i, l) - return i, leaf, li - } - return li -} - -func insert_linked_list_node(n, prev, next *BpNode) { - if (prev != nil && prev.getNext() != next) || (next != nil && next.getPrev() != prev) { - panic(BpTreeError("prev and next not hooked up")) - } - n.setPrev(prev) - n.setNext(next) - if prev != nil { - prev.setNext(n) - } - if next != nil { - next.setPrev(n) - } -} - -func remove_linked_list_node(n *BpNode) { - if n.getPrev() != nil { - n.getPrev().setNext(n.getNext()) - } - if n.getNext() != nil { - n.getNext().setPrev(n.getPrev()) - } -} - -/** - * a must be full and b must be empty else there will be a panic - * - * Different from common btree implementation, this splits the nodes by the inserted key. - * Items less than the splitKey stays in a, or moved to b if otherwise. - * This should help for monotonically increasing inserts. - * - */ -func balance_nodes(a, b *BpNode, splitKey ItemKey) { - if len(b.keys) != 0 { - panic(BpTreeError("b was not empty")) - } - if !a.Full() { - panic(BpTreeError("a was not full", a)) - } - if cap(a.keys) != cap(b.keys) { - panic(BpTreeError("cap(a.keys) != cap(b.keys)")) - } - if cap(a.values) != cap(b.values) { - panic(BpTreeError("cap(a.values) != cap(b.values)")) - } - if cap(a.pointers) != cap(b.pointers) { - panic(BpTreeError("cap(a.pointers) != cap(b.pointers)")) - } - - m := find_split_index(a, b, splitKey) - var lim = len(a.keys) - m - b.keys = b.keys[:lim] - if cap(a.values) > 0 { - if cap(a.values) != cap(a.keys) { - panic(BpTreeError("cap(a.values) != cap(a.keys)")) - } - b.values = b.values[:lim] - } - if cap(a.pointers) > 0 { - if cap(a.pointers) != cap(a.keys) { - panic(BpTreeError("cap(a.pointers) != cap(a.keys)")) - } - b.pointers = b.pointers[:lim] - } - for i := 0; i < lim; i++ { - j := m + i - b.keys[i] = a.keys[j] - if cap(a.values) > 0 { - b.values[i] = a.values[j] - } - if cap(a.pointers) > 0 { - b.pointers[i] = a.pointers[j] - } - } - a.keys = a.keys[:m] - if cap(a.values) > 0 { - a.values = a.values[:m] - } - if cap(a.pointers) > 0 { - a.pointers = a.pointers[:m] - } -} - -func find_split_index(a, b *BpNode, splitKey ItemKey) int { - m := len(a.keys) - for m > 0 && !a.keys[m-1].Less(splitKey) { - m-- - } - return m -} diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go deleted file mode 100644 index 2e034171c..000000000 --- a/weed/util/bptree/bptree_store_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package bptree - -import ( - "fmt" - "testing" -) - -type nodeStorePrintlnImpl struct { -} - -func (n *nodeStorePrintlnImpl) PersistFunc(node *BpNode) error { - println("saving node", node.protoNodeId) - return nil -} -func (n *nodeStorePrintlnImpl) DestroyFunc(node *BpNode) error { - println("delete node", node.protoNodeId) - return nil -} - -func TestAddRemove(t *testing.T) { - - tree := NewBpTree(3, &nodeStorePrintlnImpl{}) - for i:=0;i<9;i++{ - println("++++++++++", i) - tree.Add(String(fmt.Sprintf("%02d", i)), nil) - printTree(tree.root, "") - } - - if !tree.Has(String("08")) { - t.Errorf("lookup error") - } - for i:=5;i<9;i++{ - println("----------", i) - tree.RemoveWhere(String(fmt.Sprintf("%02d", i)), func(value ItemValue) bool { - return true - }) - printTree(tree.root, "") - } - if tree.Has(String("08")) { - t.Errorf("remove error") - } -} - -func printTree(node *BpNode, prefix string) { - fmt.Printf("%sNode %d\n", prefix, node.protoNodeId) - prefix += " " - for i:=0;i>= 8 - pos-- - } - *readPos = pos - *readVal = val - return -} - -// copied from https://sourcegraph.com/github.com/timtadh/data-structures@master/-/blob/test/support.go - -type T testing.T - -func (t *T) Assert(ok bool, msg string, vars ...ItemValue) { - if !ok { - t.Log("\n" + string(debug.Stack())) - var objects []interface{} - for _, t := range vars { - objects = append(objects, t) - } - t.Fatalf(msg, objects...) - } -} - -func (t *T) AssertNil(errors ...error) { - any := false - for _, err := range errors { - if err != nil { - any = true - t.Log("\n" + string(debug.Stack())) - t.Error(err) - } - } - if any { - t.Fatal("assert failed") - } -} - -func RandSlice(length int) []byte { - slice := make([]byte, length) - if _, err := crand.Read(slice); err != nil { - panic(err) - } - return slice -} - -func RandHex(length int) string { - return hex.EncodeToString(RandSlice(length / 2)) -} - -func RandStr(length int) string { - return string(RandSlice(length)) -} \ No newline at end of file diff --git a/weed/util/bptree/getter_setter.go b/weed/util/bptree/getter_setter.go deleted file mode 100644 index dcaa7a0b6..000000000 --- a/weed/util/bptree/getter_setter.go +++ /dev/null @@ -1,72 +0,0 @@ -package bptree - -var ( - protoNodeId = int64(0) -) -func GetProtoNodeId() int64 { - protoNodeId++ - return protoNodeId -} - -func (self *BpMap) getRoot() *BpNode { - return self.root -} -func (self *BpMap) setRoot(root *BpNode) { - self.root = root -} - -func (self *BpTree) getRoot() *BpNode { - return self.root -} -func (self *BpTree) setRoot(root *BpNode) { - self.root = root -} - -func (self *BpNode) getNext() *BpNode { - return self.next -} -func (self *BpNode) setNext(next *BpNode) { - self.next = next -} -func (self *BpNode) getPrev() *BpNode { - return self.prev -} -func (self *BpNode) setPrev(prev *BpNode) { - self.prev = prev -} -func (self *BpNode) getNode(x int)(*BpNode) { - return self.pointers[x] -} - -func (self *BpNode) maybePersist(shouldPersist bool) error { - if !shouldPersist { - return nil - } - return self.persist() -} -func (self *BpNode) persist() error { - if self.nodeStore != nil { - return self.nodeStore.PersistFunc(self) - } - return nil -} -func (self *BpNode) destroy() error { - if self.nodeStore != nil { - return self.nodeStore.DestroyFunc(self) - } - return nil -} - -func persist(a, b *BpNode) error { - if a != nil { - if err := a.persist(); err != nil { - return err - } - } - if b != nil { - if err := b.persist(); err != nil { - return err - } - } - return nil -} \ No newline at end of file diff --git a/weed/util/bptree/int.go b/weed/util/bptree/int.go deleted file mode 100644 index e8fd9511c..000000000 --- a/weed/util/bptree/int.go +++ /dev/null @@ -1,357 +0,0 @@ -package bptree - -import ( - "encoding/binary" - "fmt" -) - -type Int8 int8 -type UInt8 uint8 -type Int16 int16 -type UInt16 uint16 -type Int32 int32 -type UInt32 uint32 -type Int64 int64 -type UInt64 uint64 -type Int int -type UInt uint - -func (self *Int8) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 0) - bytes[0] = uint8(*self) - return bytes, nil -} - -func (self *Int8) UnmarshalBinary(data []byte) error { - if len(data) != 1 { - return fmt.Errorf("data wrong size") - } - *self = Int8(data[0]) - return nil -} - -func (self Int8) Equals(other Equatable) bool { - if o, ok := other.(Int8); ok { - return self == o - } else { - return false - } -} - -func (self Int8) Less(other Sortable) bool { - if o, ok := other.(Int8); ok { - return self < o - } else { - return false - } -} - -func (self Int8) Hash() int { - return int(self) -} - -func (self *UInt8) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 0) - bytes[0] = uint8(*self) - return bytes, nil -} - -func (self *UInt8) UnmarshalBinary(data []byte) error { - if len(data) != 1 { - return fmt.Errorf("data wrong size") - } - *self = UInt8(data[0]) - return nil -} - -func (self UInt8) Equals(other Equatable) bool { - if o, ok := other.(UInt8); ok { - return self == o - } else { - return false - } -} - -func (self UInt8) Less(other Sortable) bool { - if o, ok := other.(UInt8); ok { - return self < o - } else { - return false - } -} - -func (self UInt8) Hash() int { - return int(self) -} - -func (self *Int16) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 2) - binary.BigEndian.PutUint16(bytes, uint16(*self)) - return bytes, nil -} - -func (self *Int16) UnmarshalBinary(data []byte) error { - if len(data) != 2 { - return fmt.Errorf("data wrong size") - } - *self = Int16(binary.BigEndian.Uint16(data)) - return nil -} - -func (self Int16) Equals(other Equatable) bool { - if o, ok := other.(Int16); ok { - return self == o - } else { - return false - } -} - -func (self Int16) Less(other Sortable) bool { - if o, ok := other.(Int16); ok { - return self < o - } else { - return false - } -} - -func (self Int16) Hash() int { - return int(self) -} - -func (self *UInt16) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 2) - binary.BigEndian.PutUint16(bytes, uint16(*self)) - return bytes, nil -} - -func (self *UInt16) UnmarshalBinary(data []byte) error { - if len(data) != 2 { - return fmt.Errorf("data wrong size") - } - *self = UInt16(binary.BigEndian.Uint16(data)) - return nil -} - -func (self UInt16) Equals(other Equatable) bool { - if o, ok := other.(UInt16); ok { - return self == o - } else { - return false - } -} - -func (self UInt16) Less(other Sortable) bool { - if o, ok := other.(UInt16); ok { - return self < o - } else { - return false - } -} - -func (self UInt16) Hash() int { - return int(self) -} - -func (self *Int32) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 4) - binary.BigEndian.PutUint32(bytes, uint32(*self)) - return bytes, nil -} - -func (self *Int32) UnmarshalBinary(data []byte) error { - if len(data) != 4 { - return fmt.Errorf("data wrong size") - } - *self = Int32(binary.BigEndian.Uint32(data)) - return nil -} - -func (self Int32) Equals(other Equatable) bool { - if o, ok := other.(Int32); ok { - return self == o - } else { - return false - } -} - -func (self Int32) Less(other Sortable) bool { - if o, ok := other.(Int32); ok { - return self < o - } else { - return false - } -} - -func (self *UInt32) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 4) - binary.BigEndian.PutUint32(bytes, uint32(*self)) - return bytes, nil -} - -func (self *UInt32) UnmarshalBinary(data []byte) error { - if len(data) != 4 { - return fmt.Errorf("data wrong size") - } - *self = UInt32(binary.BigEndian.Uint32(data)) - return nil -} - -func (self Int32) Hash() int { - return int(self) -} - -func (self UInt32) Equals(other Equatable) bool { - if o, ok := other.(UInt32); ok { - return self == o - } else { - return false - } -} - -func (self UInt32) Less(other Sortable) bool { - if o, ok := other.(UInt32); ok { - return self < o - } else { - return false - } -} - -func (self UInt32) Hash() int { - return int(self) -} - -func (self *Int64) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 8) - binary.BigEndian.PutUint64(bytes, uint64(*self)) - return bytes, nil -} - -func (self *Int64) UnmarshalBinary(data []byte) error { - if len(data) != 8 { - return fmt.Errorf("data wrong size") - } - *self = Int64(binary.BigEndian.Uint64(data)) - return nil -} - -func (self Int64) Equals(other Equatable) bool { - if o, ok := other.(Int64); ok { - return self == o - } else { - return false - } -} - -func (self Int64) Less(other Sortable) bool { - if o, ok := other.(Int64); ok { - return self < o - } else { - return false - } -} - -func (self Int64) Hash() int { - return int(self>>32) ^ int(self) -} - -func (self *UInt64) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 8) - binary.BigEndian.PutUint64(bytes, uint64(*self)) - return bytes, nil -} - -func (self *UInt64) UnmarshalBinary(data []byte) error { - if len(data) != 8 { - return fmt.Errorf("data wrong size") - } - *self = UInt64(binary.BigEndian.Uint64(data)) - return nil -} - -func (self UInt64) Equals(other Equatable) bool { - if o, ok := other.(UInt64); ok { - return self == o - } else { - return false - } -} - -func (self UInt64) Less(other Sortable) bool { - if o, ok := other.(UInt64); ok { - return self < o - } else { - return false - } -} - -func (self UInt64) Hash() int { - return int(self>>32) ^ int(self) -} - -func (self *Int) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 4) - binary.BigEndian.PutUint32(bytes, uint32(*self)) - return bytes, nil -} - -func (self *Int) UnmarshalBinary(data []byte) error { - if len(data) != 4 { - return fmt.Errorf("data wrong size") - } - *self = Int(binary.BigEndian.Uint32(data)) - return nil -} - -func (self Int) Equals(other Equatable) bool { - if o, ok := other.(Int); ok { - return self == o - } else { - return false - } -} - -func (self Int) Less(other Sortable) bool { - if o, ok := other.(Int); ok { - return self < o - } else { - return false - } -} - -func (self Int) Hash() int { - return int(self) -} - -func (self *UInt) MarshalBinary() ([]byte, error) { - bytes := make([]byte, 4) - binary.BigEndian.PutUint32(bytes, uint32(*self)) - return bytes, nil -} - -func (self *UInt) UnmarshalBinary(data []byte) error { - if len(data) != 4 { - return fmt.Errorf("data wrong size") - } - *self = UInt(binary.BigEndian.Uint32(data)) - return nil -} - -func (self UInt) Equals(other Equatable) bool { - if o, ok := other.(UInt); ok { - return self == o - } else { - return false - } -} - -func (self UInt) Less(other Sortable) bool { - if o, ok := other.(UInt); ok { - return self < o - } else { - return false - } -} - -func (self UInt) Hash() int { - return int(self) -} diff --git a/weed/util/bptree/rand.go b/weed/util/bptree/rand.go deleted file mode 100644 index 08b2e50ab..000000000 --- a/weed/util/bptree/rand.go +++ /dev/null @@ -1,2 +0,0 @@ -package bptree - diff --git a/weed/util/bptree/serde.go b/weed/util/bptree/serde.go deleted file mode 100644 index 2a98a774a..000000000 --- a/weed/util/bptree/serde.go +++ /dev/null @@ -1,10 +0,0 @@ -package bptree - -func (protoNode *ProtoNode) ToBpTree() *BpTree { - node := protoNode.ToBpNode() - return &BpTree{root: node} -} - -func (protoNode *ProtoNode) ToBpNode() *BpNode { - return nil -} \ No newline at end of file diff --git a/weed/util/bptree/serde_test.go b/weed/util/bptree/serde_test.go deleted file mode 100644 index 27ccccb78..000000000 --- a/weed/util/bptree/serde_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package bptree - -import ( - "fmt" - "testing" -) - -type nodeStoreMapImpl struct { - m map[int64]*ProtoNode -} - -func (n *nodeStoreMapImpl) PersistFunc(node *BpNode) error { - println("saving node", node.protoNodeId) - n.m[node.protoNodeId] = node.protoNode - return nil -} -func (n *nodeStoreMapImpl) DestroyFunc(node *BpNode) error { - println("delete node", node.protoNodeId) - delete(n.m, node.protoNodeId) - return nil -} - -func TestSerDe(t *testing.T) { - - nodeStore := &nodeStoreMapImpl{ - m: make(map[int64]*ProtoNode), - } - - tree := NewBpTree(3, nodeStore) - - for i:=0;i<32;i++{ - println("add", i) - tree.Add(String(fmt.Sprintf("%02d", i)), nil) - } - - for i:=5;i<9;i++{ - println("----------", i) - tree.RemoveWhere(String(fmt.Sprintf("%02d", i)), func(value ItemValue) bool { - return true - }) - printTree(tree.root, "") - } - - - -} \ No newline at end of file diff --git a/weed/util/bptree/string.go b/weed/util/bptree/string.go deleted file mode 100644 index 262220878..000000000 --- a/weed/util/bptree/string.go +++ /dev/null @@ -1,71 +0,0 @@ -package bptree - -import ( - "bytes" - "hash/fnv" -) - -type String string -type ByteSlice []byte - -func (self *String) MarshalBinary() ([]byte, error) { - return []byte(*self), nil -} - -func (self *String) UnmarshalBinary(data []byte) error { - *self = String(data) - return nil -} - -func (self String) Equals(other Equatable) bool { - if o, ok := other.(String); ok { - return self == o - } else { - return false - } -} - -func (self String) Less(other Sortable) bool { - if o, ok := other.(String); ok { - return self < o - } else { - return false - } -} - -func (self String) Hash() int { - h := fnv.New32a() - h.Write([]byte(string(self))) - return int(h.Sum32()) -} - -func (self *ByteSlice) MarshalBinary() ([]byte, error) { - return []byte(*self), nil -} - -func (self *ByteSlice) UnmarshalBinary(data []byte) error { - *self = ByteSlice(data) - return nil -} - -func (self ByteSlice) Equals(other Equatable) bool { - if o, ok := other.(ByteSlice); ok { - return bytes.Equal(self, o) - } else { - return false - } -} - -func (self ByteSlice) Less(other Sortable) bool { - if o, ok := other.(ByteSlice); ok { - return bytes.Compare(self, o) < 0 // -1 if a < b - } else { - return false - } -} - -func (self ByteSlice) Hash() int { - h := fnv.New32a() - h.Write([]byte(self)) - return int(h.Sum32()) -} diff --git a/weed/util/bptree/tree_store/memory_store.go b/weed/util/bptree/tree_store/memory_store.go deleted file mode 100644 index 467455664..000000000 --- a/weed/util/bptree/tree_store/memory_store.go +++ /dev/null @@ -1,29 +0,0 @@ -package tree_store - -import "errors" - -var ( - NotFound = errors.New("not found") -) - -type MemoryTreeStore struct { - m map[int64][]byte -} - -func NewMemoryTreeStore() *MemoryTreeStore{ - return &MemoryTreeStore{ - m: make(map[int64][]byte), - } -} - -func (m *MemoryTreeStore) Put(k int64, v []byte) error { - m.m[k] = v - return nil -} - -func (m *MemoryTreeStore) Get(k int64) ([]byte, error) { - if v, found := m.m[k]; found { - return v, nil - } - return nil, NotFound -} diff --git a/weed/util/bptree/tree_store/tree_store.go.go b/weed/util/bptree/tree_store/tree_store.go.go deleted file mode 100644 index 6a0af6ae6..000000000 --- a/weed/util/bptree/tree_store/tree_store.go.go +++ /dev/null @@ -1,6 +0,0 @@ -package tree_store - -type TreeStore interface { - Put(k int64, v []byte) error - Get(k int64) ([]byte, error) -} diff --git a/weed/util/bptree/types.go b/weed/util/bptree/types.go deleted file mode 100644 index f987e0419..000000000 --- a/weed/util/bptree/types.go +++ /dev/null @@ -1,98 +0,0 @@ -package bptree - -import ( - "errors" - "fmt" -) - -type Equatable interface { - Equals(b Equatable) bool -} - -type Sortable interface { - Equatable - Less(b Sortable) bool -} - -type Hashable interface { - Sortable - Hash() int -} - -var BpTreeError = fmt.Errorf - -func NegativeSize() error { - return errors.New("negative size") -} - -type Iterator func() (item ItemValue, next Iterator) -type KIterator func() (key ItemKey, next KIterator) -type KVIterator func() (key ItemKey, value ItemValue, next KVIterator) -type KVIterable interface { - Iterate() KVIterator -} - -type MapOperable interface { - Has(key ItemKey) bool - Put(key ItemKey, value ItemValue) (err error) - Get(key ItemKey) (value ItemValue, err error) - Remove(key ItemKey) (value ItemValue, err error) -} - -type WhereFunc func(value ItemValue) bool - -func MakeValuesIterator(obj KVIterable) Iterator { - kv_iterator := obj.Iterate() - var v_iterator Iterator - v_iterator = func() (value ItemValue, next Iterator) { - _, value, kv_iterator = kv_iterator() - if kv_iterator == nil { - return nil, nil - } - return value, v_iterator - } - return v_iterator -} - -func MakeItemsIterator(obj KVIterable) (kit KIterator) { - kv_iterator := obj.Iterate() - kit = func() (item ItemKey, next KIterator) { - var key ItemKey - var value ItemValue - key, value, kv_iterator = kv_iterator() - if kv_iterator == nil { - return nil, nil - } - return &MapEntry{key, value}, kit - } - return kit -} - -type MapEntry struct { - Key ItemKey - Value ItemValue -} - -func (m *MapEntry) Equals(other Equatable) bool { - if o, ok := other.(*MapEntry); ok { - return m.Key.Equals(o.Key) - } else { - return m.Key.Equals(other) - } -} - -func (m *MapEntry) Less(other Sortable) bool { - if o, ok := other.(*MapEntry); ok { - return m.Key.Less(o.Key) - } else { - return m.Key.Less(other) - } -} - -func (m *MapEntry) Hash() int { - return m.Key.Hash() -} - -func (m *MapEntry) String() string { - return fmt.Sprintf("", m.Key, m.Value) -} From 69b84bb771e816e2914daee8635379b1150e0de7 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 2 Oct 2021 14:15:49 -0700 Subject: [PATCH 18/30] TestFindGreaterOrEqual --- weed/util/skiplist/skiplist_test.go | 45 +++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index ca41e382a..60bd5f923 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -2,6 +2,7 @@ package skiplist import ( "bytes" + "fmt" "math/rand" "strconv" "testing" @@ -210,3 +211,47 @@ func TestGetNodeCount(t *testing.T) { t.Fail() } } + +func TestFindGreaterOrEqual(t *testing.T) { + + maxNumber := maxN * 100 + + var list *SkipList + var listPointer *SkipList + + // Test on empty list. + if _, ok := listPointer.FindGreaterOrEqual(Element(0)); ok { + t.Fail() + } + + list = New() + + for i := 0; i < maxN; i++ { + list.Insert(Element(rand.Intn(maxNumber))) + } + + for i := 0; i < maxN; i++ { + key := Element(rand.Intn(maxNumber)) + if v, ok := list.FindGreaterOrEqual(key); ok { + // if f is v should be bigger than the element before + if bytes.Compare(v.Prev.Key, key) >= 0 { + fmt.Printf("PrevV: %s\n key: %s\n\n", string(v.Prev.Key), string(key)) + t.Fail() + } + // v should be bigger or equal to f + // If we compare directly, we get an equal key with a difference on the 10th decimal point, which fails. + if bytes.Compare(v.Values[0], key) < 0 { + fmt.Printf("v: %s\n key: %s\n\n", string(v.Values[0]), string(key)) + t.Fail() + } + } else { + lastV := list.GetLargestNode().GetValue() + // It is OK, to fail, as long as f is bigger than the last element. + if bytes.Compare(key, lastV) <= 0 { + fmt.Printf("lastV: %s\n key: %s\n\n", string(lastV), string(key)) + t.Fail() + } + } + } + +} \ No newline at end of file From 4f50f8c2ca7243a358e7ee9dab40a5a42fc462d1 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 01:07:35 -0700 Subject: [PATCH 19/30] insert key and value --- weed/util/skiplist/serde.go | 12 +--- weed/util/skiplist/skiplist.go | 41 +++++------- weed/util/skiplist/skiplist.pb.go | 97 +++++++++++++---------------- weed/util/skiplist/skiplist.proto | 7 +-- weed/util/skiplist/skiplist_test.go | 50 ++++++--------- 5 files changed, 84 insertions(+), 123 deletions(-) diff --git a/weed/util/skiplist/serde.go b/weed/util/skiplist/serde.go index 2337b4b19..135d3b0b5 100644 --- a/weed/util/skiplist/serde.go +++ b/weed/util/skiplist/serde.go @@ -3,16 +3,10 @@ package skiplist import "bytes" func compareElement(a *SkipListElement, key []byte) int { - if len(a.Values) == 0 { + if len(a.Key) == 0 { return -1 } - if bytes.Compare(a.Values[0], key) < 0 { - return -1 - } - if bytes.Compare(a.Values[len(a.Values)-1], key) > 0 { - return 1 - } - return 0 + return bytes.Compare(a.Key, key) } var ( @@ -25,7 +19,7 @@ func (node *SkipListElement) Reference() *SkipListElementReference { } return &SkipListElementReference{ ElementPointer: node.Id, - Key: node.Values[0], + Key: node.Key, } } func (node *SkipListElement) Save() { diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go index a47cf4608..19fa556ae 100644 --- a/weed/util/skiplist/skiplist.go +++ b/weed/util/skiplist/skiplist.go @@ -15,11 +15,11 @@ const ( ) type SkipList struct { - startLevels [maxLevel]*SkipListElementReference - endLevels [maxLevel]*SkipListElementReference - maxNewLevel int - maxLevel int - elementCount int + startLevels [maxLevel]*SkipListElementReference + endLevels [maxLevel]*SkipListElementReference + maxNewLevel int + maxLevel int + // elementCount int } // NewSeedEps returns a new empty, initialized Skiplist. @@ -32,9 +32,9 @@ func NewSeed(seed int64) *SkipList { //fmt.Printf("SkipList seed: %v\n", seed) list := &SkipList{ - maxNewLevel: maxLevel, - maxLevel: 0, - elementCount: 0, + maxNewLevel: maxLevel, + maxLevel: 0, + // elementCount: 0, } return list @@ -193,7 +193,7 @@ func (t *SkipList) Delete(key []byte) { nextNextNode.Prev = currentNode.Reference() nextNextNode.Save() } - t.elementCount-- + // t.elementCount-- nextNode.DeleteSelf() } @@ -230,7 +230,7 @@ func (t *SkipList) Delete(key []byte) { // Insert inserts the given ListElement into the skiplist. // Insert runs in approx. O(log(n)) -func (t *SkipList) Insert(key []byte) { +func (t *SkipList) Insert(key, value []byte) { if t == nil || key == nil { return @@ -245,13 +245,14 @@ func (t *SkipList) Insert(key []byte) { } elem := &SkipListElement{ - Id: rand.Int63(), - Next: make([]*SkipListElementReference, t.maxNewLevel, t.maxNewLevel), - Level: int32(level), - Values: [][]byte{key}, + Id: rand.Int63(), + Next: make([]*SkipListElementReference, t.maxNewLevel, t.maxNewLevel), + Level: int32(level), + Key: key, + Value: value, } - t.elementCount++ + // t.elementCount++ newFirst := true newLast := true @@ -371,11 +372,6 @@ func (t *SkipList) Insert(key []byte) { } -// GetValue extracts the ListElement value from a skiplist node. -func (e *SkipListElement) GetValue() []byte { - return e.Values[0] -} - // GetSmallestNode returns the very first/smallest node in the skiplist. // GetSmallestNode runs in O(1) func (t *SkipList) GetSmallestNode() *SkipListElement { @@ -406,11 +402,6 @@ func (t *SkipList) Prev(e *SkipListElement) *SkipListElement { return e.Prev.Load() } -// GetNodeCount returns the number of nodes currently in the skiplist. -func (t *SkipList) GetNodeCount() int { - return t.elementCount -} - // String returns a string format of the skiplist. Useful to get a graphical overview and/or debugging. func (t *SkipList) println() { diff --git a/weed/util/skiplist/skiplist.pb.go b/weed/util/skiplist/skiplist.pb.go index 63b6c74a3..82afec453 100644 --- a/weed/util/skiplist/skiplist.pb.go +++ b/weed/util/skiplist/skiplist.pb.go @@ -30,12 +30,10 @@ type SkipListProto struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - StartLevels []*SkipListElementReference `protobuf:"bytes,1,rep,name=start_levels,json=startLevels,proto3" json:"start_levels,omitempty"` - EndLevels []*SkipListElementReference `protobuf:"bytes,2,rep,name=end_levels,json=endLevels,proto3" json:"end_levels,omitempty"` - MaxNewLevel int32 `protobuf:"varint,3,opt,name=max_new_level,json=maxNewLevel,proto3" json:"max_new_level,omitempty"` - MaxLevel int32 `protobuf:"varint,4,opt,name=max_level,json=maxLevel,proto3" json:"max_level,omitempty"` - ElementCount int64 `protobuf:"varint,5,opt,name=element_count,json=elementCount,proto3" json:"element_count,omitempty"` - Eps float64 `protobuf:"fixed64,7,opt,name=eps,proto3" json:"eps,omitempty"` + StartLevels []*SkipListElementReference `protobuf:"bytes,1,rep,name=start_levels,json=startLevels,proto3" json:"start_levels,omitempty"` + EndLevels []*SkipListElementReference `protobuf:"bytes,2,rep,name=end_levels,json=endLevels,proto3" json:"end_levels,omitempty"` + MaxNewLevel int32 `protobuf:"varint,3,opt,name=max_new_level,json=maxNewLevel,proto3" json:"max_new_level,omitempty"` + MaxLevel int32 `protobuf:"varint,4,opt,name=max_level,json=maxLevel,proto3" json:"max_level,omitempty"` } func (x *SkipListProto) Reset() { @@ -98,20 +96,6 @@ func (x *SkipListProto) GetMaxLevel() int32 { return 0 } -func (x *SkipListProto) GetElementCount() int64 { - if x != nil { - return x.ElementCount - } - return 0 -} - -func (x *SkipListProto) GetEps() float64 { - if x != nil { - return x.Eps - } - return 0 -} - type SkipListElementReference struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -172,11 +156,12 @@ type SkipListElement struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` - Next []*SkipListElementReference `protobuf:"bytes,2,rep,name=next,proto3" json:"next,omitempty"` - Level int32 `protobuf:"varint,3,opt,name=level,proto3" json:"level,omitempty"` - Values [][]byte `protobuf:"bytes,4,rep,name=values,proto3" json:"values,omitempty"` - Prev *SkipListElementReference `protobuf:"bytes,5,opt,name=prev,proto3" json:"prev,omitempty"` + Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Next []*SkipListElementReference `protobuf:"bytes,2,rep,name=next,proto3" json:"next,omitempty"` + Level int32 `protobuf:"varint,3,opt,name=level,proto3" json:"level,omitempty"` + Key []byte `protobuf:"bytes,4,opt,name=key,proto3" json:"key,omitempty"` + Value []byte `protobuf:"bytes,5,opt,name=value,proto3" json:"value,omitempty"` + Prev *SkipListElementReference `protobuf:"bytes,6,opt,name=prev,proto3" json:"prev,omitempty"` } func (x *SkipListElement) Reset() { @@ -232,9 +217,16 @@ func (x *SkipListElement) GetLevel() int32 { return 0 } -func (x *SkipListElement) GetValues() [][]byte { +func (x *SkipListElement) GetKey() []byte { + if x != nil { + return x.Key + } + return nil +} + +func (x *SkipListElement) GetValue() []byte { if x != nil { - return x.Values + return x.Value } return nil } @@ -250,7 +242,7 @@ var File_skiplist_proto protoreflect.FileDescriptor var file_skiplist_proto_rawDesc = []byte{ 0x0a, 0x0e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x12, 0x08, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x22, 0x91, 0x02, 0x0a, 0x0d, 0x53, + 0x12, 0x08, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x22, 0xda, 0x01, 0x0a, 0x0d, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x45, 0x0a, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, @@ -264,32 +256,29 @@ var file_skiplist_proto_rawDesc = []byte{ 0x77, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x61, 0x78, 0x4e, 0x65, 0x77, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x61, 0x78, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x6d, - 0x61, 0x78, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6c, 0x65, 0x6d, 0x65, - 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, - 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x65, 0x70, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x01, 0x52, 0x03, 0x65, 0x70, 0x73, 0x22, 0x55, - 0x0a, 0x18, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, - 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x65, 0x6c, - 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x0e, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x50, 0x6f, 0x69, 0x6e, - 0x74, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, - 0x52, 0x03, 0x6b, 0x65, 0x79, 0x22, 0xbf, 0x01, 0x0a, 0x0f, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, - 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x36, 0x0a, 0x04, 0x6e, 0x65, 0x78, - 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, - 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, - 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x04, 0x6e, 0x65, 0x78, - 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, - 0x36, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, - 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, - 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, - 0x65, 0x52, 0x04, 0x70, 0x72, 0x65, 0x76, 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, - 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, - 0x74, 0x69, 0x6c, 0x2f, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x78, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x22, 0x55, 0x0a, 0x18, 0x53, 0x6b, 0x69, 0x70, 0x4c, + 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, + 0x6e, 0x63, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x70, + 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x65, 0x6c, + 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x50, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, + 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x22, 0xcf, + 0x01, 0x0a, 0x0f, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, + 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, + 0x69, 0x64, 0x12, 0x36, 0x0a, 0x04, 0x6e, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, + 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, + 0x65, 0x6e, 0x63, 0x65, 0x52, 0x04, 0x6e, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x65, + 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, + 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x6b, + 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x0c, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x36, 0x0a, 0x04, 0x70, 0x72, 0x65, 0x76, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, + 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, + 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x04, 0x70, 0x72, 0x65, 0x76, + 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, + 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, + 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, 0x2f, 0x73, 0x6b, 0x69, + 0x70, 0x6c, 0x69, 0x73, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/weed/util/skiplist/skiplist.proto b/weed/util/skiplist/skiplist.proto index ce84ed996..bfb190b33 100644 --- a/weed/util/skiplist/skiplist.proto +++ b/weed/util/skiplist/skiplist.proto @@ -9,8 +9,6 @@ message SkipListProto { repeated SkipListElementReference end_levels = 2; int32 max_new_level = 3; int32 max_level = 4; - int64 element_count = 5; - double eps = 7; } message SkipListElementReference { @@ -22,6 +20,7 @@ message SkipListElement { int64 id = 1; repeated SkipListElementReference next = 2; int32 level = 3; - repeated bytes values = 4; - SkipListElementReference prev = 5; + bytes key = 4; + bytes value = 5; + SkipListElementReference prev = 6; } diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index 60bd5f923..811fd5be9 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -18,7 +18,7 @@ func TestInsertAndFind(t *testing.T) { var list *SkipList var listPointer *SkipList - listPointer.Insert(k0) + listPointer.Insert(k0, k0) if _, ok := listPointer.Find(k0); ok { t.Fail() } @@ -34,7 +34,7 @@ func TestInsertAndFind(t *testing.T) { // Test at the beginning of the list. for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(maxN-i)) - list.Insert(key) + list.Insert(key, key) } for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(maxN-i)) @@ -48,7 +48,7 @@ func TestInsertAndFind(t *testing.T) { // Test at the end of the list. for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(i)) - list.Insert(key) + list.Insert(key, key) } for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(i)) @@ -62,17 +62,17 @@ func TestInsertAndFind(t *testing.T) { rList := rand.Perm(maxN) for _, e := range rList { key := []byte(strconv.Itoa(e)) - println("insert", e) - list.Insert(key) + // println("insert", e) + list.Insert(key, key) } for _, e := range rList { key := []byte(strconv.Itoa(e)) - println("find", e) + // println("find", e) if _, ok := list.Find(key); !ok { t.Fail() } } - println("print list") + // println("print list") list.println() } @@ -97,7 +97,7 @@ func TestDelete(t *testing.T) { t.Fail() } - list.Insert(k0) + list.Insert(k0, k0) list.Delete(k0) if !list.IsEmpty() { t.Fail() @@ -105,7 +105,7 @@ func TestDelete(t *testing.T) { // Delete elements at the beginning of the list. for i := 0; i < maxN; i++ { - list.Insert(Element(i)) + list.Insert(Element(i), Element(i)) } for i := 0; i < maxN; i++ { list.Delete(Element(i)) @@ -117,7 +117,7 @@ func TestDelete(t *testing.T) { list = New() // Delete elements at the end of the list. for i := 0; i < maxN; i++ { - list.Insert(Element(i)) + list.Insert(Element(i), Element(i)) } for i := 0; i < maxN; i++ { list.Delete(Element(maxN - i - 1)) @@ -130,7 +130,7 @@ func TestDelete(t *testing.T) { // Delete elements at random positions in the list. rList := rand.Perm(maxN) for _, e := range rList { - list.Insert(Element(e)) + list.Insert(Element(e), Element(e)) } for _, e := range rList { list.Delete(Element(e)) @@ -144,7 +144,7 @@ func TestNext(t *testing.T) { list := New() for i := 0; i < maxN; i++ { - list.Insert(Element(i)) + list.Insert(Element(i), Element(i)) } smallest := list.GetSmallestNode() @@ -155,7 +155,7 @@ func TestNext(t *testing.T) { for node != largest { node = list.Next(node) // Must always be incrementing here! - if bytes.Compare(node.Values[0], lastNode.Values[0]) <= 0 { + if bytes.Compare(node.Key, lastNode.Key) <= 0 { t.Fail() } // Next.Prev must always point to itself! @@ -174,7 +174,7 @@ func TestPrev(t *testing.T) { list := New() for i := 0; i < maxN; i++ { - list.Insert(Element(i)) + list.Insert(Element(i), Element(i)) } smallest := list.GetSmallestNode() @@ -185,7 +185,7 @@ func TestPrev(t *testing.T) { for node != smallest { node = list.Prev(node) // Must always be incrementing here! - if bytes.Compare(node.Values[0], lastNode.Values[0]) >= 0 { + if bytes.Compare(node.Key, lastNode.Key) >= 0 { t.Fail() } // Next.Prev must always point to itself! @@ -200,18 +200,6 @@ func TestPrev(t *testing.T) { } } -func TestGetNodeCount(t *testing.T) { - list := New() - - for i := 0; i < maxN; i++ { - list.Insert(Element(i)) - } - - if list.GetNodeCount() != maxN { - t.Fail() - } -} - func TestFindGreaterOrEqual(t *testing.T) { maxNumber := maxN * 100 @@ -227,21 +215,21 @@ func TestFindGreaterOrEqual(t *testing.T) { list = New() for i := 0; i < maxN; i++ { - list.Insert(Element(rand.Intn(maxNumber))) + list.Insert(Element(rand.Intn(maxNumber)), Element(i)) } for i := 0; i < maxN; i++ { key := Element(rand.Intn(maxNumber)) if v, ok := list.FindGreaterOrEqual(key); ok { // if f is v should be bigger than the element before - if bytes.Compare(v.Prev.Key, key) >= 0 { + if v.Prev != nil && bytes.Compare(v.Prev.Key, key) >= 0 { fmt.Printf("PrevV: %s\n key: %s\n\n", string(v.Prev.Key), string(key)) t.Fail() } // v should be bigger or equal to f // If we compare directly, we get an equal key with a difference on the 10th decimal point, which fails. - if bytes.Compare(v.Values[0], key) < 0 { - fmt.Printf("v: %s\n key: %s\n\n", string(v.Values[0]), string(key)) + if bytes.Compare(v.Key, key) < 0 { + fmt.Printf("v: %s\n key: %s\n\n", string(v.Key), string(key)) t.Fail() } } else { From d343b0db5744d5516fe6ffa368dda792c47e9ee3 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 01:15:14 -0700 Subject: [PATCH 20/30] update value --- weed/util/skiplist/skiplist.go | 13 +++++++++++++ weed/util/skiplist/skiplist_test.go | 29 ++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go index 19fa556ae..50ce53525 100644 --- a/weed/util/skiplist/skiplist.go +++ b/weed/util/skiplist/skiplist.go @@ -1,5 +1,7 @@ package skiplist +// adapted from https://github.com/MauriceGit/skiplist/blob/master/skiplist.go + import ( "bytes" "fmt" @@ -402,6 +404,17 @@ func (t *SkipList) Prev(e *SkipListElement) *SkipListElement { return e.Prev.Load() } +// ChangeValue can be used to change the actual value of a node in the skiplist +// without the need of Deleting and reinserting the node again. +// Be advised, that ChangeValue only works, if the actual key from ExtractKey() will stay the same! +// ok is an indicator, wether the value is actually changed. +func (t *SkipList) ChangeValue(e *SkipListElement, newValue []byte) (ok bool) { + // The key needs to stay correct, so this is very important! + e.Value = newValue + e.Save() + return true +} + // String returns a string format of the skiplist. Useful to get a graphical overview and/or debugging. func (t *SkipList) println() { diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index 811fd5be9..b414c267b 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -242,4 +242,31 @@ func TestFindGreaterOrEqual(t *testing.T) { } } -} \ No newline at end of file +} + +func TestChangeValue(t *testing.T) { + list := New() + + for i := 0; i < maxN; i++ { + list.Insert(Element(i), []byte("value")) + } + + for i := 0; i < maxN; i++ { + // The key only looks at the int so the string doesn't matter here! + f1, ok := list.Find(Element(i)) + if !ok { + t.Fail() + } + ok = list.ChangeValue(f1, []byte("different value")) + if !ok { + t.Fail() + } + f2, ok := list.Find(Element(i)) + if !ok { + t.Fail() + } + if bytes.Compare(f2.GetValue(), []byte("different value")) != 0 { + t.Fail() + } + } +} From 22d8684e88bea2a36063568ba95e6d205aac33b0 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 02:19:21 -0700 Subject: [PATCH 21/30] refactor out listStore --- weed/util/skiplist/list_store.go | 32 +++++++ weed/util/skiplist/serde.go | 31 +++---- weed/util/skiplist/skiplist.go | 135 +++++++++++++++++++--------- weed/util/skiplist/skiplist_test.go | 78 ++++++++-------- 4 files changed, 178 insertions(+), 98 deletions(-) create mode 100644 weed/util/skiplist/list_store.go diff --git a/weed/util/skiplist/list_store.go b/weed/util/skiplist/list_store.go new file mode 100644 index 000000000..0eb1106bc --- /dev/null +++ b/weed/util/skiplist/list_store.go @@ -0,0 +1,32 @@ +package skiplist + +type ListStore interface { + SaveElement(id int64, element *SkipListElement) error + DeleteElement(id int64) error + LoadElement(id int64) (*SkipListElement, error) +} + +type MemStore struct { + m map[int64]*SkipListElement +} + +func newMemStore() *MemStore { + return &MemStore{ + m: make(map[int64]*SkipListElement), + } +} + +func (m *MemStore) SaveElement(id int64, element *SkipListElement) error { + m.m[id] = element + return nil +} + +func (m *MemStore) DeleteElement(id int64) error { + delete(m.m, id) + return nil +} + +func (m *MemStore) LoadElement(id int64) (*SkipListElement, error) { + element := m.m[id] + return element, nil +} diff --git a/weed/util/skiplist/serde.go b/weed/util/skiplist/serde.go index 135d3b0b5..5b7089e80 100644 --- a/weed/util/skiplist/serde.go +++ b/weed/util/skiplist/serde.go @@ -9,10 +9,6 @@ func compareElement(a *SkipListElement, key []byte) int { return bytes.Compare(a.Key, key) } -var ( - memStore = make(map[int64]*SkipListElement) -) - func (node *SkipListElement) Reference() *SkipListElementReference { if node == nil { return nil @@ -22,27 +18,24 @@ func (node *SkipListElement) Reference() *SkipListElementReference { Key: node.Key, } } -func (node *SkipListElement) Save() { - if node == nil { - return + +func (t *SkipList) saveElement(element *SkipListElement) error { + if element == nil { + return nil } - memStore[node.Id] = node - //println("++ node", node.Id, string(node.Values[0])) + return t.listStore.SaveElement(element.Id, element) } -func (node *SkipListElement) DeleteSelf() { - if node == nil { - return +func (t *SkipList) deleteElement(element *SkipListElement) error { + if element == nil { + return nil } - delete(memStore, node.Id) - //println("++ node", node.Id, string(node.Values[0])) + return t.listStore.DeleteElement(element.Id) } -func (ref *SkipListElementReference) Load() *SkipListElement { +func (t *SkipList) loadElement(ref *SkipListElementReference) (*SkipListElement, error) { if ref == nil { - return nil + return nil, nil } - //println("~ node", ref.ElementPointer, string(ref.Key)) - return memStore[ref.ElementPointer] + return t.listStore.LoadElement(ref.ElementPointer) } - diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go index 50ce53525..498af085d 100644 --- a/weed/util/skiplist/skiplist.go +++ b/weed/util/skiplist/skiplist.go @@ -21,13 +21,14 @@ type SkipList struct { endLevels [maxLevel]*SkipListElementReference maxNewLevel int maxLevel int + listStore ListStore // elementCount int } // NewSeedEps returns a new empty, initialized Skiplist. // Given a seed, a deterministic height/list behaviour can be achieved. // Eps is used to compare keys given by the ExtractKey() function on equality. -func NewSeed(seed int64) *SkipList { +func NewSeed(seed int64, listStore ListStore) *SkipList { // Initialize random number generator. rand.Seed(seed) @@ -36,6 +37,7 @@ func NewSeed(seed int64) *SkipList { list := &SkipList{ maxNewLevel: maxLevel, maxLevel: 0, + listStore: listStore, // elementCount: 0, } @@ -43,8 +45,8 @@ func NewSeed(seed int64) *SkipList { } // New returns a new empty, initialized Skiplist. -func New() *SkipList { - return NewSeed(time.Now().UTC().UnixNano()) +func New(listStore ListStore) *SkipList { + return NewSeed(time.Now().UTC().UnixNano(), listStore) } // IsEmpty checks, if the skiplist is empty. @@ -75,7 +77,7 @@ func (t *SkipList) findEntryIndex(key []byte, level int) int { return 0 } -func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem *SkipListElement, ok bool) { +func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem *SkipListElement, ok bool, err error) { foundElem = nil ok = false @@ -87,7 +89,10 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem index := t.findEntryIndex(key, 0) var currentNode *SkipListElement - currentNode = t.startLevels[index].Load() + currentNode, err = t.loadElement(t.startLevels[index]) + if err != nil { + return + } // In case, that our first element is already greater-or-equal! if findGreaterOrEqual && compareElement(currentNode, key) > 0 { @@ -106,13 +111,20 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem // Which direction are we continuing next time? if currentNode.Next[index] != nil && bytes.Compare(currentNode.Next[index].Key, key) <= 0 { // Go right - currentNode = currentNode.Next[index].Load() + currentNode, err = t.loadElement(currentNode.Next[index]) + if err != nil { + return + } } else { if index > 0 { // Early exit if currentNode.Next[0] != nil && bytes.Compare(currentNode.Next[0].Key, key) == 0 { - currentNodeNext := currentNode.Next[0].Load() + var currentNodeNext *SkipListElement + currentNodeNext, err = t.loadElement(currentNode.Next[0]) + if err != nil { + return + } foundElem = currentNodeNext ok = true return @@ -122,7 +134,10 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem } else { // Element is not found and we reached the bottom. if findGreaterOrEqual { - foundElem = currentNode.Next[index].Load() + foundElem, err = t.loadElement(currentNode.Next[index]) + if err != nil { + return + } ok = foundElem != nil } @@ -135,26 +150,26 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem // Find tries to find an element in the skiplist based on the key from the given ListElement. // elem can be used, if ok is true. // Find runs in approx. O(log(n)) -func (t *SkipList) Find(key []byte) (elem *SkipListElement, ok bool) { +func (t *SkipList) Find(key []byte) (elem *SkipListElement, ok bool, err error) { if t == nil || key == nil { return } - elem, ok = t.findExtended(key, false) + elem, ok, err = t.findExtended(key, false) return } // FindGreaterOrEqual finds the first element, that is greater or equal to the given ListElement e. // The comparison is done on the keys (So on ExtractKey()). // FindGreaterOrEqual runs in approx. O(log(n)) -func (t *SkipList) FindGreaterOrEqual(key []byte) (elem *SkipListElement, ok bool) { +func (t *SkipList) FindGreaterOrEqual(key []byte) (elem *SkipListElement, ok bool, err error) { if t == nil || key == nil { return } - elem, ok = t.findExtended(key, true) + elem, ok, err = t.findExtended(key, true) return } @@ -162,7 +177,7 @@ func (t *SkipList) FindGreaterOrEqual(key []byte) (elem *SkipListElement, ok boo // If there are multiple entries with the same value, Delete will remove one of them // (Which one will change based on the actual skiplist layout) // Delete runs in approx. O(log(n)) -func (t *SkipList) Delete(key []byte) { +func (t *SkipList) Delete(key []byte) (err error) { if t == nil || t.IsEmpty() || key == nil { return @@ -176,9 +191,12 @@ func (t *SkipList) Delete(key []byte) { for { if currentNode == nil { - nextNode = t.startLevels[index].Load() + nextNode, err = t.loadElement(t.startLevels[index]) } else { - nextNode = currentNode.Next[index].Load() + nextNode, err = t.loadElement(currentNode.Next[index]) + } + if err != nil { + return err } // Found and remove! @@ -186,17 +204,26 @@ func (t *SkipList) Delete(key []byte) { if currentNode != nil { currentNode.Next[index] = nextNode.Next[index] - currentNode.Save() + if err = t.saveElement(currentNode); err != nil { + return err + } } if index == 0 { if nextNode.Next[index] != nil { - nextNextNode := nextNode.Next[index].Load() + nextNextNode, err := t.loadElement(nextNode.Next[index]) + if err != nil { + return err + } nextNextNode.Prev = currentNode.Reference() - nextNextNode.Save() + if err = t.saveElement(nextNextNode); err != nil { + return err + } } // t.elementCount-- - nextNode.DeleteSelf() + if err = t.deleteElement(nextNode); err != nil { + return err + } } // Link from start needs readjustments. @@ -227,12 +254,12 @@ func (t *SkipList) Delete(key []byte) { } } } - + return } // Insert inserts the given ListElement into the skiplist. // Insert runs in approx. O(log(n)) -func (t *SkipList) Insert(key, value []byte) { +func (t *SkipList) Insert(key, value []byte) (err error){ if t == nil || key == nil { return @@ -288,14 +315,20 @@ func (t *SkipList) Insert(key, value []byte) { elem.Next[index] = nextNodeRef if currentNode != nil { currentNode.Next[index] = elem.Reference() - currentNode.Save() + if err = t.saveElement(currentNode); err != nil { + return + } } if index == 0 { elem.Prev = currentNode.Reference() if nextNodeRef != nil { - nextNode = nextNodeRef.Load() + if nextNode, err = t.loadElement(nextNodeRef); err != nil { + return + } nextNode.Prev = elem.Reference() - nextNode.Save() + if err = t.saveElement(nextNode); err != nil { + return + } } } } @@ -304,7 +337,9 @@ func (t *SkipList) Insert(key, value []byte) { // Go right if nextNode == nil { // reuse nextNode when index == 0 - nextNode = nextNodeRef.Load() + if nextNode, err = t.loadElement(nextNodeRef); err != nil { + return + } } currentNode = nextNode } else { @@ -326,9 +361,14 @@ func (t *SkipList) Insert(key, value []byte) { if t.startLevels[i] == nil || bytes.Compare(t.startLevels[i].Key, key) > 0 { if i == 0 && t.startLevels[i] != nil { - startLevelElement := t.startLevels[i].Load() + startLevelElement, err := t.loadElement(t.startLevels[i]) + if err != nil { + return err + } startLevelElement.Prev = elem.Reference() - startLevelElement.Save() + if err = t.saveElement(startLevelElement); err != nil { + return err + } } elem.Next[i] = t.startLevels[i] t.startLevels[i] = elem.Reference() @@ -347,9 +387,14 @@ func (t *SkipList) Insert(key, value []byte) { // This is very important, so we are not linking the very first element (newFirst AND newLast) to itself! if !newFirst { if t.endLevels[i] != nil { - endLevelElement := t.endLevels[i].Load() + endLevelElement, err := t.loadElement(t.endLevels[i]) + if err != nil { + return err + } endLevelElement.Next[i] = elem.Reference() - endLevelElement.Save() + if err = t.saveElement(endLevelElement); err != nil { + return err + } } if i == 0 { elem.Prev = t.endLevels[i] @@ -370,49 +415,51 @@ func (t *SkipList) Insert(key, value []byte) { } } - elem.Save() + if err = t.saveElement(elem); err != nil { + return err + } + return nil } // GetSmallestNode returns the very first/smallest node in the skiplist. // GetSmallestNode runs in O(1) -func (t *SkipList) GetSmallestNode() *SkipListElement { - return t.startLevels[0].Load() +func (t *SkipList) GetSmallestNode() (*SkipListElement, error) { + return t.loadElement(t.startLevels[0]) } // GetLargestNode returns the very last/largest node in the skiplist. // GetLargestNode runs in O(1) -func (t *SkipList) GetLargestNode() *SkipListElement { - return t.endLevels[0].Load() +func (t *SkipList) GetLargestNode() (*SkipListElement, error) { + return t.loadElement(t.endLevels[0]) } // Next returns the next element based on the given node. // Next will loop around to the first node, if you call it on the last! -func (t *SkipList) Next(e *SkipListElement) *SkipListElement { +func (t *SkipList) Next(e *SkipListElement) (*SkipListElement, error) { if e.Next[0] == nil { - return t.startLevels[0].Load() + return t.loadElement(t.startLevels[0]) } - return e.Next[0].Load() + return t.loadElement(e.Next[0]) } // Prev returns the previous element based on the given node. // Prev will loop around to the last node, if you call it on the first! -func (t *SkipList) Prev(e *SkipListElement) *SkipListElement { +func (t *SkipList) Prev(e *SkipListElement) (*SkipListElement, error) { if e.Prev == nil { - return t.endLevels[0].Load() + return t.loadElement(t.endLevels[0]) } - return e.Prev.Load() + return t.loadElement(e.Prev) } // ChangeValue can be used to change the actual value of a node in the skiplist // without the need of Deleting and reinserting the node again. // Be advised, that ChangeValue only works, if the actual key from ExtractKey() will stay the same! // ok is an indicator, wether the value is actually changed. -func (t *SkipList) ChangeValue(e *SkipListElement, newValue []byte) (ok bool) { +func (t *SkipList) ChangeValue(e *SkipListElement, newValue []byte) (err error) { // The key needs to stay correct, so this is very important! e.Value = newValue - e.Save() - return true + return t.saveElement(e) } // String returns a string format of the skiplist. Useful to get a graphical overview and/or debugging. @@ -437,7 +484,7 @@ func (t *SkipList) println() { nodeRef := t.startLevels[0] for nodeRef != nil { print(fmt.Sprintf("%v: ", string(nodeRef.Key))) - node := nodeRef.Load() + node, _ := t.loadElement(nodeRef) for i := 0; i <= int(node.Level); i++ { l := node.Next[i] diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index b414c267b..75cbc6cfd 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -12,6 +12,10 @@ const ( maxN = 10000 ) +var ( + memStore = newMemStore() +) + func TestInsertAndFind(t *testing.T) { k0 := []byte("0") @@ -19,12 +23,12 @@ func TestInsertAndFind(t *testing.T) { var listPointer *SkipList listPointer.Insert(k0, k0) - if _, ok := listPointer.Find(k0); ok { + if _, ok, _ := listPointer.Find(k0); ok { t.Fail() } - list = New() - if _, ok := list.Find(k0); ok { + list = New(memStore) + if _, ok, _ := list.Find(k0); ok { t.Fail() } if !list.IsEmpty() { @@ -33,18 +37,17 @@ func TestInsertAndFind(t *testing.T) { // Test at the beginning of the list. for i := 0; i < maxN; i++ { - key := []byte(strconv.Itoa(maxN-i)) + key := []byte(strconv.Itoa(maxN - i)) list.Insert(key, key) } for i := 0; i < maxN; i++ { - key := []byte(strconv.Itoa(maxN-i)) - if _, ok := list.Find(key); !ok { + key := []byte(strconv.Itoa(maxN - i)) + if _, ok, _ := list.Find(key); !ok { t.Fail() } } - - list = New() + list = New(memStore) // Test at the end of the list. for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(i)) @@ -52,12 +55,12 @@ func TestInsertAndFind(t *testing.T) { } for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(i)) - if _, ok := list.Find(key); !ok { + if _, ok, _ := list.Find(key); !ok { t.Fail() } } - list = New() + list = New(memStore) // Test at random positions in the list. rList := rand.Perm(maxN) for _, e := range rList { @@ -68,7 +71,7 @@ func TestInsertAndFind(t *testing.T) { for _, e := range rList { key := []byte(strconv.Itoa(e)) // println("find", e) - if _, ok := list.Find(key); !ok { + if _, ok, _ := list.Find(key); !ok { t.Fail() } } @@ -90,7 +93,7 @@ func TestDelete(t *testing.T) { // Delete on empty list list.Delete(k0) - list = New() + list = New(memStore) list.Delete(k0) if !list.IsEmpty() { @@ -114,7 +117,7 @@ func TestDelete(t *testing.T) { t.Fail() } - list = New() + list = New(memStore) // Delete elements at the end of the list. for i := 0; i < maxN; i++ { list.Insert(Element(i), Element(i)) @@ -126,7 +129,7 @@ func TestDelete(t *testing.T) { t.Fail() } - list = New() + list = New(memStore) // Delete elements at random positions in the list. rList := rand.Perm(maxN) for _, e := range rList { @@ -141,61 +144,65 @@ func TestDelete(t *testing.T) { } func TestNext(t *testing.T) { - list := New() + list := New(memStore) for i := 0; i < maxN; i++ { list.Insert(Element(i), Element(i)) } - smallest := list.GetSmallestNode() - largest := list.GetLargestNode() + smallest, _ := list.GetSmallestNode() + largest, _ := list.GetLargestNode() lastNode := smallest node := lastNode for node != largest { - node = list.Next(node) + node, _ = list.Next(node) // Must always be incrementing here! if bytes.Compare(node.Key, lastNode.Key) <= 0 { t.Fail() } // Next.Prev must always point to itself! - if list.Next(list.Prev(node)) != node { + prevNode, _ := list.Prev(node) + nextNode, _ := list.Next(prevNode) + if nextNode != node { t.Fail() } lastNode = node } - if list.Next(largest) != smallest { + if nextNode, _ := list.Next(largest); nextNode != smallest { t.Fail() } } func TestPrev(t *testing.T) { - list := New() + list := New(memStore) for i := 0; i < maxN; i++ { list.Insert(Element(i), Element(i)) } - smallest := list.GetSmallestNode() - largest := list.GetLargestNode() + smallest, _ := list.GetSmallestNode() + largest, _ := list.GetLargestNode() lastNode := largest node := lastNode for node != smallest { - node = list.Prev(node) + node, _ = list.Prev(node) // Must always be incrementing here! if bytes.Compare(node.Key, lastNode.Key) >= 0 { t.Fail() } // Next.Prev must always point to itself! - if list.Prev(list.Next(node)) != node { + nextNode, _ := list.Next(node) + prevNode, _ := list.Prev(nextNode) + if prevNode != node { t.Fail() } lastNode = node } - if list.Prev(smallest) != largest { + if prevNode, _ := list.Prev(smallest); prevNode != largest { t.Fail() } } @@ -208,11 +215,11 @@ func TestFindGreaterOrEqual(t *testing.T) { var listPointer *SkipList // Test on empty list. - if _, ok := listPointer.FindGreaterOrEqual(Element(0)); ok { + if _, ok, _ := listPointer.FindGreaterOrEqual(Element(0)); ok { t.Fail() } - list = New() + list = New(memStore) for i := 0; i < maxN; i++ { list.Insert(Element(rand.Intn(maxNumber)), Element(i)) @@ -220,7 +227,7 @@ func TestFindGreaterOrEqual(t *testing.T) { for i := 0; i < maxN; i++ { key := Element(rand.Intn(maxNumber)) - if v, ok := list.FindGreaterOrEqual(key); ok { + if v, ok, _ := list.FindGreaterOrEqual(key); ok { // if f is v should be bigger than the element before if v.Prev != nil && bytes.Compare(v.Prev.Key, key) >= 0 { fmt.Printf("PrevV: %s\n key: %s\n\n", string(v.Prev.Key), string(key)) @@ -233,7 +240,8 @@ func TestFindGreaterOrEqual(t *testing.T) { t.Fail() } } else { - lastV := list.GetLargestNode().GetValue() + lastNode, _ := list.GetLargestNode() + lastV := lastNode.GetValue() // It is OK, to fail, as long as f is bigger than the last element. if bytes.Compare(key, lastV) <= 0 { fmt.Printf("lastV: %s\n key: %s\n\n", string(lastV), string(key)) @@ -245,7 +253,7 @@ func TestFindGreaterOrEqual(t *testing.T) { } func TestChangeValue(t *testing.T) { - list := New() + list := New(memStore) for i := 0; i < maxN; i++ { list.Insert(Element(i), []byte("value")) @@ -253,15 +261,15 @@ func TestChangeValue(t *testing.T) { for i := 0; i < maxN; i++ { // The key only looks at the int so the string doesn't matter here! - f1, ok := list.Find(Element(i)) + f1, ok, _ := list.Find(Element(i)) if !ok { t.Fail() } - ok = list.ChangeValue(f1, []byte("different value")) - if !ok { + err := list.ChangeValue(f1, []byte("different value")) + if err != nil { t.Fail() } - f2, ok := list.Find(Element(i)) + f2, ok, _ := list.Find(Element(i)) if !ok { t.Fail() } From a481c4a45ef60de22d6dacf83010542ce8c6e1bb Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 13:50:52 -0700 Subject: [PATCH 22/30] return previous element if visited --- weed/util/skiplist/skiplist.go | 15 ++++++++------- weed/util/skiplist/skiplist_test.go | 18 +++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go index 498af085d..b48a05b4a 100644 --- a/weed/util/skiplist/skiplist.go +++ b/weed/util/skiplist/skiplist.go @@ -67,17 +67,17 @@ func (t *SkipList) generateLevel(maxLevel int) int { return level } -func (t *SkipList) findEntryIndex(key []byte, level int) int { +func (t *SkipList) findEntryIndex(key []byte, minLevel int) int { // Find good entry point so we don't accidentally skip half the list. for i := t.maxLevel; i >= 0; i-- { - if t.startLevels[i] != nil && bytes.Compare(t.startLevels[i].Key, key) < 0 || i <= level { + if t.startLevels[i] != nil && bytes.Compare(t.startLevels[i].Key, key) < 0 || i <= minLevel { return i } } return 0 } -func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem *SkipListElement, ok bool, err error) { +func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (prevElementIfVisited *SkipListElement, foundElem *SkipListElement, ok bool, err error) { foundElem = nil ok = false @@ -120,6 +120,7 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem // Early exit if currentNode.Next[0] != nil && bytes.Compare(currentNode.Next[0].Key, key) == 0 { + prevElementIfVisited = currentNode var currentNodeNext *SkipListElement currentNodeNext, err = t.loadElement(currentNode.Next[0]) if err != nil { @@ -150,26 +151,26 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (foundElem // Find tries to find an element in the skiplist based on the key from the given ListElement. // elem can be used, if ok is true. // Find runs in approx. O(log(n)) -func (t *SkipList) Find(key []byte) (elem *SkipListElement, ok bool, err error) { +func (t *SkipList) Find(key []byte) (prevIfVisited *SkipListElement, elem *SkipListElement, ok bool, err error) { if t == nil || key == nil { return } - elem, ok, err = t.findExtended(key, false) + prevIfVisited, elem, ok, err = t.findExtended(key, false) return } // FindGreaterOrEqual finds the first element, that is greater or equal to the given ListElement e. // The comparison is done on the keys (So on ExtractKey()). // FindGreaterOrEqual runs in approx. O(log(n)) -func (t *SkipList) FindGreaterOrEqual(key []byte) (elem *SkipListElement, ok bool, err error) { +func (t *SkipList) FindGreaterOrEqual(key []byte) (prevIfVisited *SkipListElement, elem *SkipListElement, ok bool, err error) { if t == nil || key == nil { return } - elem, ok, err = t.findExtended(key, true) + prevIfVisited, elem, ok, err = t.findExtended(key, true) return } diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index 75cbc6cfd..115656cd9 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -23,12 +23,12 @@ func TestInsertAndFind(t *testing.T) { var listPointer *SkipList listPointer.Insert(k0, k0) - if _, ok, _ := listPointer.Find(k0); ok { + if _, _, ok, _ := listPointer.Find(k0); ok { t.Fail() } list = New(memStore) - if _, ok, _ := list.Find(k0); ok { + if _, _, ok, _ := list.Find(k0); ok { t.Fail() } if !list.IsEmpty() { @@ -42,7 +42,7 @@ func TestInsertAndFind(t *testing.T) { } for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(maxN - i)) - if _, ok, _ := list.Find(key); !ok { + if _, _, ok, _ := list.Find(key); !ok { t.Fail() } } @@ -55,7 +55,7 @@ func TestInsertAndFind(t *testing.T) { } for i := 0; i < maxN; i++ { key := []byte(strconv.Itoa(i)) - if _, ok, _ := list.Find(key); !ok { + if _, _, ok, _ := list.Find(key); !ok { t.Fail() } } @@ -71,7 +71,7 @@ func TestInsertAndFind(t *testing.T) { for _, e := range rList { key := []byte(strconv.Itoa(e)) // println("find", e) - if _, ok, _ := list.Find(key); !ok { + if _, _, ok, _ := list.Find(key); !ok { t.Fail() } } @@ -215,7 +215,7 @@ func TestFindGreaterOrEqual(t *testing.T) { var listPointer *SkipList // Test on empty list. - if _, ok, _ := listPointer.FindGreaterOrEqual(Element(0)); ok { + if _, _, ok, _ := listPointer.FindGreaterOrEqual(Element(0)); ok { t.Fail() } @@ -227,7 +227,7 @@ func TestFindGreaterOrEqual(t *testing.T) { for i := 0; i < maxN; i++ { key := Element(rand.Intn(maxNumber)) - if v, ok, _ := list.FindGreaterOrEqual(key); ok { + if _, v, ok, _ := list.FindGreaterOrEqual(key); ok { // if f is v should be bigger than the element before if v.Prev != nil && bytes.Compare(v.Prev.Key, key) >= 0 { fmt.Printf("PrevV: %s\n key: %s\n\n", string(v.Prev.Key), string(key)) @@ -261,7 +261,7 @@ func TestChangeValue(t *testing.T) { for i := 0; i < maxN; i++ { // The key only looks at the int so the string doesn't matter here! - f1, ok, _ := list.Find(Element(i)) + _, f1, ok, _ := list.Find(Element(i)) if !ok { t.Fail() } @@ -269,7 +269,7 @@ func TestChangeValue(t *testing.T) { if err != nil { t.Fail() } - f2, ok, _ := list.Find(Element(i)) + _, f2, ok, _ := list.Find(Element(i)) if !ok { t.Fail() } From e6196cdc503dbe135c3ac22f4e13f62968d30036 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 17:54:25 -0700 Subject: [PATCH 23/30] add name list --- weed/util/skiplist/name_batch.go | 102 +++++++++ weed/util/skiplist/name_list.go | 303 +++++++++++++++++++++++++++ weed/util/skiplist/name_list_test.go | 73 +++++++ weed/util/skiplist/skiplist.pb.go | 75 ++++++- weed/util/skiplist/skiplist.proto | 4 + 5 files changed, 551 insertions(+), 6 deletions(-) create mode 100644 weed/util/skiplist/name_batch.go create mode 100644 weed/util/skiplist/name_list.go create mode 100644 weed/util/skiplist/name_list_test.go diff --git a/weed/util/skiplist/name_batch.go b/weed/util/skiplist/name_batch.go new file mode 100644 index 000000000..18427d341 --- /dev/null +++ b/weed/util/skiplist/name_batch.go @@ -0,0 +1,102 @@ +package skiplist + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/golang/protobuf/proto" + "sort" + "strings" +) + +type NameBatch struct { + key string + names map[string]struct{} +} + +func (nb *NameBatch) ContainsName(name string) (found bool) { + _, found = nb.names[name] + return +} +func (nb *NameBatch) WriteName(name string) { + if nb.key == "" || strings.Compare(nb.key, name) > 0 { + nb.key = name + } + nb.names[name] = struct{}{} +} +func (nb *NameBatch) DeleteName(name string) { + delete(nb.names, name) + if nb.key == name { + nb.key = "" + for n := range nb.names { + if nb.key == "" || strings.Compare(nb.key, n) > 0 { + nb.key = n + } + } + } +} +func (nb *NameBatch) ListNames(startFrom string, visitNamesFn func(name string) bool) bool { + var names []string + needFilter := startFrom == "" + for n := range nb.names { + if !needFilter || strings.Compare(n, startFrom) >= 0 { + names = append(names, n) + } + } + sort.Slice(names, func(i, j int) bool { + return strings.Compare(names[i], names[j]) < 0 + }) + for _, n := range names { + if !visitNamesFn(n) { + return false + } + } + return true +} + +func NewNameBatch() *NameBatch { + return &NameBatch{ + names: make(map[string]struct{}), + } +} + +func LoadNameBatch(data []byte) *NameBatch { + t := &NameBatchData{} + if len(data) > 0 { + err := proto.Unmarshal(data, t) + if err != nil { + glog.Errorf("unmarshal into NameBatchData{} : %v", err) + return nil + } + } + nb := NewNameBatch() + for _, n := range t.Names { + name := string(n) + if nb.key == "" || strings.Compare(nb.key, name) > 0 { + nb.key = name + } + nb.names[name] = struct{}{} + } + return nb +} + +func (nb *NameBatch) ToBytes() []byte { + t := &NameBatchData{} + for n := range nb.names { + t.Names = append(t.Names, []byte(n)) + } + data, _ := proto.Marshal(t) + return data +} + +func (nb *NameBatch) SplitBy(name string) (x, y *NameBatch) { + x, y = NewNameBatch(), NewNameBatch() + + for n := range nb.names { + // there should be no equal case though + if strings.Compare(n, name) <= 0 { + x.WriteName(n) + } else { + y.WriteName(n) + } + } + return +} diff --git a/weed/util/skiplist/name_list.go b/weed/util/skiplist/name_list.go new file mode 100644 index 000000000..db328afba --- /dev/null +++ b/weed/util/skiplist/name_list.go @@ -0,0 +1,303 @@ +package skiplist + +import ( + "bytes" +) + +type NameList struct { + skipList *SkipList + batchSize int +} + +func NewNameList(store ListStore, batchSize int) *NameList { + return &NameList{ + skipList: New(store), + batchSize: batchSize, + } +} + +/* +Be reluctant to create new nodes. Try to fit into either previous node or next node. +Prefer to add to previous node. + +There are multiple cases after finding the name for greater or equal node + 1. found and node.Key == name + The node contains a batch with leading key the same as the name + nothing to do + 2. no such node found or node.Key > name + + if no such node found + prevNode = list.LargestNode + + // case 2.1 + if previousNode contains name + nothing to do + + // prefer to add to previous node + if prevNode != nil { + // case 2.2 + if prevNode has capacity + prevNode.add name, and save + return + // case 2.3 + split prevNode by name + } + + // case 2.4 + // merge into next node. Avoid too many nodes if adding data in reverse order. + if nextNode is not nil and nextNode has capacity + delete nextNode.Key + nextNode.Key = name + nextNode.batch.add name + insert nodeNode.Key + return + + // case 2.5 + if prevNode is nil + insert new node with key = name, value = batch{name} + return + +*/ +func (nl *NameList) WriteName(name string) error { + lookupKey := []byte(name) + prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) + if err != nil { + return err + } + // case 1: the name already exists as one leading key in the batch + if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { + return nil + } + + if !found { + prevNode, err = nl.skipList.GetLargestNode() + if err != nil { + return err + } + } + + if nextNode != nil && prevNode == nil { + prevNode, err = nl.skipList.loadElement(nextNode.Prev) + if err != nil { + return err + } + } + + if prevNode != nil { + prevNameBatch := LoadNameBatch(prevNode.Value) + // case 2.1 + if prevNameBatch.ContainsName(name) { + return nil + } + + // case 2.2 + if len(prevNameBatch.names) < nl.batchSize { + prevNameBatch.WriteName(name) + return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) + } + + // case 2.3 + x, y := prevNameBatch.SplitBy(name) + addToX := len(x.names) <= len(y.names) + if len(x.names) != len(prevNameBatch.names) { + if addToX { + x.WriteName(name) + } + if x.key == prevNameBatch.key { + if err := nl.skipList.ChangeValue(prevNode, x.ToBytes()); err != nil { + return err + } + } else { + if err := nl.skipList.Insert([]byte(x.key), x.ToBytes()); err != nil { + return err + } + } + } + if len(y.names) != len(prevNameBatch.names) { + if !addToX { + y.WriteName(name) + } + if y.key == prevNameBatch.key { + if err := nl.skipList.ChangeValue(prevNode, y.ToBytes()); err != nil { + return err + } + } else { + if err := nl.skipList.Insert([]byte(y.key), y.ToBytes()); err != nil { + return err + } + } + } + return nil + + } + + // case 2.4 + if nextNode != nil { + nextNameBatch := LoadNameBatch(nextNode.Value) + if len(nextNameBatch.names) < nl.batchSize { + if err := nl.skipList.Delete(nextNode.Key); err != nil { + return err + } + nextNameBatch.WriteName(name) + if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { + return err + } + return nil + } + } + + // case 2.5 + // now prevNode is nil + newNameBatch := NewNameBatch() + newNameBatch.WriteName(name) + if err := nl.skipList.Insert([]byte(newNameBatch.key), newNameBatch.ToBytes()); err != nil { + return err + } + + return nil +} + +/* +// case 1: exists in nextNode +if nextNode != nil && nextNode.Key == name { + remove from nextNode, update nextNode + // TODO: merge with prevNode if possible? + return +} +if nextNode is nil + prevNode = list.Largestnode +if prevNode == nil and nextNode.Prev != nil + prevNode = load(nextNode.Prev) + +// case 2: does not exist +// case 2.1 +if prevNode == nil { + return +} +// case 2.2 +if prevNameBatch does not contain name { + return +} + +// case 3 +delete from prevNameBatch +if prevNameBatch + nextNode < capacityList + // case 3.1 + merge +else + // case 3.2 + update prevNode + + +*/ +func (nl *NameList) DeleteName(name string) error { + lookupKey := []byte(name) + prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) + if err != nil { + return err + } + + // case 1 + var nextNameBatch *NameBatch + if nextNode != nil { + nextNameBatch = LoadNameBatch(nextNode.Value) + } + if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { + if err := nl.skipList.Delete(nextNode.Key); err != nil { + return err + } + nextNameBatch.DeleteName(name) + if len(nextNameBatch.names) > 0 { + if err := nl.skipList.Insert([]byte(nextNameBatch.key), nextNameBatch.ToBytes()); err != nil { + return err + } + } + return nil + } + + if !found { + prevNode, err = nl.skipList.GetLargestNode() + if err != nil { + return err + } + } + + if nextNode != nil && prevNode == nil { + prevNode, err = nl.skipList.loadElement(nextNode.Prev) + if err != nil { + return err + } + } + + // case 2 + if prevNode == nil { + // case 2.1 + return nil + } + prevNameBatch := LoadNameBatch(prevNode.Value) + if !prevNameBatch.ContainsName(name) { + // case 2.2 + return nil + } + + // case 3 + prevNameBatch.DeleteName(name) + if len(prevNameBatch.names) == 0 { + if err := nl.skipList.Delete(prevNode.Key); err != nil { + return err + } + return nil + } + if nextNameBatch != nil && len(nextNameBatch.names) + len(prevNameBatch.names) < nl.batchSize { + // case 3.1 merge nextNode and prevNode + if err := nl.skipList.Delete(nextNode.Key); err != nil { + return err + } + for nextName := range nextNameBatch.names { + prevNameBatch.WriteName(nextName) + } + return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) + } else { + // case 3.2 update prevNode + return nl.skipList.ChangeValue(prevNode, prevNameBatch.ToBytes()) + } + + return nil +} + +func (nl *NameList) ListNames(startFrom string, visitNamesFn func(name string) bool) error { + lookupKey := []byte(startFrom) + prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) + if err != nil { + return err + } + if found && bytes.Compare(nextNode.Key, lookupKey) == 0 { + prevNode = nil + } + if !found { + prevNode, err = nl.skipList.GetLargestNode() + if err != nil { + return err + } + } + + if prevNode != nil { + prevNameBatch := LoadNameBatch(prevNode.Value) + if !prevNameBatch.ListNames(startFrom, visitNamesFn) { + return nil + } + } + + for nextNode != nil { + nextNameBatch := LoadNameBatch(nextNode.Value) + if !nextNameBatch.ListNames(startFrom, visitNamesFn) { + return nil + } + nextNode, err = nl.skipList.loadElement(nextNode.Next[0]) + if err != nil { + return err + } + } + + return nil +} diff --git a/weed/util/skiplist/name_list_test.go b/weed/util/skiplist/name_list_test.go new file mode 100644 index 000000000..811a101f2 --- /dev/null +++ b/weed/util/skiplist/name_list_test.go @@ -0,0 +1,73 @@ +package skiplist + +import ( + "math/rand" + "strconv" + "testing" +) + +const ( + maxNameCount = 100 +) + +func String(x int) string { + return strconv.Itoa(x) +} + +func TestNameList(t *testing.T) { + list := NewNameList(memStore, 7) + + for i := 0; i < maxNameCount; i++ { + list.WriteName(String(i)) + } + + counter := 0 + list.ListNames("", func(name string) bool { + counter++ + print(name, " ") + return true + }) + if counter != maxNameCount { + t.Fail() + } + + // list.skipList.println() + + deleteBase := 5 + deleteCount := maxNameCount - 3 * deleteBase + + for i := deleteBase; i < deleteBase+deleteCount; i++ { + list.DeleteName(String(i)) + } + + counter = 0 + list.ListNames("", func(name string) bool { + counter++ + return true + }) + // list.skipList.println() + if counter != maxNameCount-deleteCount { + t.Fail() + } + + // randomized deletion + list = NewNameList(memStore, 7) + // Delete elements at random positions in the list. + rList := rand.Perm(maxN) + for _, i := range rList { + list.WriteName(String(i)) + } + for _, i := range rList { + list.DeleteName(String(i)) + } + counter = 0 + list.ListNames("", func(name string) bool { + counter++ + print(name, " ") + return true + }) + if counter != 0 { + t.Fail() + } + +} diff --git a/weed/util/skiplist/skiplist.pb.go b/weed/util/skiplist/skiplist.pb.go index 82afec453..adb121bfc 100644 --- a/weed/util/skiplist/skiplist.pb.go +++ b/weed/util/skiplist/skiplist.pb.go @@ -238,6 +238,53 @@ func (x *SkipListElement) GetPrev() *SkipListElementReference { return nil } +type NameBatchData struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Names [][]byte `protobuf:"bytes,1,rep,name=names,proto3" json:"names,omitempty"` +} + +func (x *NameBatchData) Reset() { + *x = NameBatchData{} + if protoimpl.UnsafeEnabled { + mi := &file_skiplist_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *NameBatchData) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NameBatchData) ProtoMessage() {} + +func (x *NameBatchData) ProtoReflect() protoreflect.Message { + mi := &file_skiplist_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NameBatchData.ProtoReflect.Descriptor instead. +func (*NameBatchData) Descriptor() ([]byte, []int) { + return file_skiplist_proto_rawDescGZIP(), []int{3} +} + +func (x *NameBatchData) GetNames() [][]byte { + if x != nil { + return x.Names + } + return nil +} + var File_skiplist_proto protoreflect.FileDescriptor var file_skiplist_proto_rawDesc = []byte{ @@ -275,10 +322,13 @@ var file_skiplist_proto_rawDesc = []byte{ 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x2e, 0x53, 0x6b, 0x69, 0x70, 0x4c, 0x69, 0x73, 0x74, 0x45, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x52, 0x04, 0x70, 0x72, 0x65, 0x76, - 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, - 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, - 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, 0x74, 0x69, 0x6c, 0x2f, 0x73, 0x6b, 0x69, - 0x70, 0x6c, 0x69, 0x73, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x22, 0x25, 0x0a, 0x0d, 0x4e, 0x61, 0x6d, 0x65, 0x42, 0x61, 0x74, 0x63, 0x68, 0x44, 0x61, 0x74, + 0x61, 0x12, 0x14, 0x0a, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0c, + 0x52, 0x05, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x69, 0x73, 0x6c, 0x75, 0x73, 0x66, 0x2f, + 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x77, 0x65, 0x65, 0x64, 0x2f, 0x75, + 0x74, 0x69, 0x6c, 0x2f, 0x73, 0x6b, 0x69, 0x70, 0x6c, 0x69, 0x73, 0x74, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -293,11 +343,12 @@ func file_skiplist_proto_rawDescGZIP() []byte { return file_skiplist_proto_rawDescData } -var file_skiplist_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_skiplist_proto_msgTypes = make([]protoimpl.MessageInfo, 4) var file_skiplist_proto_goTypes = []interface{}{ (*SkipListProto)(nil), // 0: skiplist.SkipListProto (*SkipListElementReference)(nil), // 1: skiplist.SkipListElementReference (*SkipListElement)(nil), // 2: skiplist.SkipListElement + (*NameBatchData)(nil), // 3: skiplist.NameBatchData } var file_skiplist_proto_depIdxs = []int32{ 1, // 0: skiplist.SkipListProto.start_levels:type_name -> skiplist.SkipListElementReference @@ -353,6 +404,18 @@ func file_skiplist_proto_init() { return nil } } + file_skiplist_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*NameBatchData); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -360,7 +423,7 @@ func file_skiplist_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_skiplist_proto_rawDesc, NumEnums: 0, - NumMessages: 3, + NumMessages: 4, NumExtensions: 0, NumServices: 0, }, diff --git a/weed/util/skiplist/skiplist.proto b/weed/util/skiplist/skiplist.proto index bfb190b33..2991ad830 100644 --- a/weed/util/skiplist/skiplist.proto +++ b/weed/util/skiplist/skiplist.proto @@ -24,3 +24,7 @@ message SkipListElement { bytes value = 5; SkipListElementReference prev = 6; } + +message NameBatchData { + repeated bytes names = 1; +} \ No newline at end of file From ba7fbac07fa315cf3c0082e54093fbe0ba3865c2 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 3 Oct 2021 19:23:34 -0700 Subject: [PATCH 24/30] rename --- weed/util/skiplist/{serde.go => skiplist_serde.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename weed/util/skiplist/{serde.go => skiplist_serde.go} (100%) diff --git a/weed/util/skiplist/serde.go b/weed/util/skiplist/skiplist_serde.go similarity index 100% rename from weed/util/skiplist/serde.go rename to weed/util/skiplist/skiplist_serde.go From 366f522a2d2adcb31250d3b0967947749b3ab4a2 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 01:01:31 -0700 Subject: [PATCH 25/30] add redis3 --- weed/command/imports.go | 1 + weed/filer.toml | 5 ++ weed/filer/redis3/kv_directory_children.go | 88 ++++++++++++++++++--- weed/filer/redis3/skiplist_element_store.go | 52 ++++++++++++ weed/filer/redis3/universal_redis_store.go | 34 ++++---- weed/server/filer_server.go | 1 + weed/util/skiplist/name_batch.go | 2 +- weed/util/skiplist/name_list.go | 25 +++++- weed/util/skiplist/name_list_serde.go | 71 +++++++++++++++++ weed/util/skiplist/name_list_test.go | 4 +- weed/util/skiplist/skiplist.go | 59 ++++++++++---- 11 files changed, 295 insertions(+), 47 deletions(-) create mode 100644 weed/filer.toml create mode 100644 weed/filer/redis3/skiplist_element_store.go create mode 100644 weed/util/skiplist/name_list_serde.go diff --git a/weed/command/imports.go b/weed/command/imports.go index a2f59189f..48cda5f90 100644 --- a/weed/command/imports.go +++ b/weed/command/imports.go @@ -29,6 +29,7 @@ import ( _ "github.com/chrislusf/seaweedfs/weed/filer/postgres2" _ "github.com/chrislusf/seaweedfs/weed/filer/redis" _ "github.com/chrislusf/seaweedfs/weed/filer/redis2" + _ "github.com/chrislusf/seaweedfs/weed/filer/redis3" _ "github.com/chrislusf/seaweedfs/weed/filer/sqlite" _ "github.com/chrislusf/seaweedfs/weed/filer/tikv" ) diff --git a/weed/filer.toml b/weed/filer.toml new file mode 100644 index 000000000..a0af38d95 --- /dev/null +++ b/weed/filer.toml @@ -0,0 +1,5 @@ +[redis3] +enabled = true +address = "localhost:6379" +password = "" +database = 0 diff --git a/weed/filer/redis3/kv_directory_children.go b/weed/filer/redis3/kv_directory_children.go index f3152c970..5465a833d 100644 --- a/weed/filer/redis3/kv_directory_children.go +++ b/weed/filer/redis3/kv_directory_children.go @@ -3,11 +3,13 @@ package redis3 import ( "context" "fmt" - "github.com/chrislusf/seaweedfs/weed/util/bptree" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util/skiplist" "github.com/go-redis/redis/v8" - "github.com/golang/protobuf/proto" ) +const maxNameBatchSizeLimit = 5 + func insertChild(ctx context.Context, client redis.UniversalClient, key string, name string) error { data, err := client.Get(ctx, key).Result() if err != nil { @@ -15,12 +17,22 @@ func insertChild(ctx context.Context, client redis.UniversalClient, key string, return fmt.Errorf("read %s: %v", key, err) } } - rootNode := &bptree.ProtoNode{} - if err := proto.UnmarshalMerge([]byte(data), rootNode); err != nil { - return fmt.Errorf("decoding root for %s: %v", key, err) + store := newSkipListElementStore(key, client) + nameList := skiplist.LoadNameList([]byte(data), store, maxNameBatchSizeLimit) + + // println("add", key, name) + if err := nameList.WriteName(name); err != nil { + glog.Errorf("add %s %s: %v", key, name, err) + return err + } + if !nameList.HasChanges() { + return nil + } + + if err := client.Set(ctx, key, nameList.ToBytes(), 0).Err(); err != nil { + return err } - tree := rootNode.ToBpTree() - tree.Add(bptree.String(name), nil) + return nil } @@ -31,19 +43,69 @@ func removeChild(ctx context.Context, client redis.UniversalClient, key string, return fmt.Errorf("read %s: %v", key, err) } } - rootNode := &bptree.ProtoNode{} - if err := proto.UnmarshalMerge([]byte(data), rootNode); err != nil { - return fmt.Errorf("decoding root for %s: %v", key, err) + store := newSkipListElementStore(key, client) + nameList := skiplist.LoadNameList([]byte(data), store, maxNameBatchSizeLimit) + + if err := nameList.DeleteName(name); err != nil { + return err + } + if !nameList.HasChanges() { + return nil + } + + if err := client.Set(ctx, key, nameList.ToBytes(), 0).Err(); err != nil { + return err } - tree := rootNode.ToBpTree() - tree.Add(bptree.String(name), nil) + return nil } func removeChildren(ctx context.Context, client redis.UniversalClient, key string, onDeleteFn func(name string) error) error { + + data, err := client.Get(ctx, key).Result() + if err != nil { + if err != redis.Nil { + return fmt.Errorf("read %s: %v", key, err) + } + } + store := newSkipListElementStore(key, client) + nameList := skiplist.LoadNameList([]byte(data), store, maxNameBatchSizeLimit) + + if err = nameList.ListNames("", func(name string) bool { + if err := onDeleteFn(name); err != nil { + glog.Errorf("delete %s child %s: %v", key, name, err) + return false + } + return true + }); err != nil { + return err + } + + if err = nameList.RemoteAllListElement(); err != nil { + return err + } + return nil + } -func iterateChildren(ctx context.Context, client redis.UniversalClient, key string, eachFn func(name string) error) error { +func listChildren(ctx context.Context, client redis.UniversalClient, key string, startFileName string, eachFn func(name string) bool) error { + + data, err := client.Get(ctx, key).Result() + if err != nil { + if err != redis.Nil { + return fmt.Errorf("read %s: %v", key, err) + } + } + store := newSkipListElementStore(key, client) + nameList := skiplist.LoadNameList([]byte(data), store, maxNameBatchSizeLimit) + + if err = nameList.ListNames(startFileName, func(name string) bool { + return eachFn(name) + }); err != nil { + return err + } + return nil + } diff --git a/weed/filer/redis3/skiplist_element_store.go b/weed/filer/redis3/skiplist_element_store.go new file mode 100644 index 000000000..fa13d35e9 --- /dev/null +++ b/weed/filer/redis3/skiplist_element_store.go @@ -0,0 +1,52 @@ +package redis3 + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util/skiplist" + "github.com/go-redis/redis/v8" + "github.com/golang/protobuf/proto" +) + +type SkipListElementStore struct { + prefix string + client redis.UniversalClient +} + +var _ = skiplist.ListStore(&SkipListElementStore{}) + +func newSkipListElementStore(prefix string, client redis.UniversalClient) *SkipListElementStore { + return &SkipListElementStore{ + prefix: prefix, + client: client, + } +} + +func (m *SkipListElementStore) SaveElement(id int64, element *skiplist.SkipListElement) error { + key := fmt.Sprintf("%s%d", m.prefix, id) + data, err := proto.Marshal(element) + if err != nil { + glog.Errorf("marshal %s: %v", key, err) + } + return m.client.Set(context.Background(), key, data, 0).Err() +} + +func (m *SkipListElementStore) DeleteElement(id int64) error { + key := fmt.Sprintf("%s%d", m.prefix, id) + return m.client.Del(context.Background(), key).Err() +} + +func (m *SkipListElementStore) LoadElement(id int64) (*skiplist.SkipListElement, error) { + key := fmt.Sprintf("%s%d", m.prefix, id) + data, err := m.client.Get(context.Background(), key).Result() + if err != nil { + if err == redis.Nil { + return nil, nil + } + return nil, err + } + t := &skiplist.SkipListElement{} + err = proto.Unmarshal([]byte(data), t) + return t, err +} diff --git a/weed/filer/redis3/universal_redis_store.go b/weed/filer/redis3/universal_redis_store.go index 958338afe..8a89e7c48 100644 --- a/weed/filer/redis3/universal_redis_store.go +++ b/weed/filer/redis3/universal_redis_store.go @@ -115,6 +115,8 @@ func (store *UniversalRedis3Store) DeleteFolderChildren(ctx context.Context, ful if err != nil { return fmt.Errorf("DeleteFolderChildren %s in parent dir: %v", fullpath, err) } + // not efficient, but need to remove if it is a directory + store.Client.Del(ctx, genDirectoryListKey(string(path))) return nil }) @@ -127,41 +129,41 @@ func (store *UniversalRedis3Store) ListDirectoryPrefixedEntries(ctx context.Cont func (store *UniversalRedis3Store) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { dirListKey := genDirectoryListKey(string(dirPath)) - start := int64(0) - if startFileName != "" { - start, _ = store.Client.ZRank(ctx, dirListKey, startFileName).Result() - if !includeStartFile { - start++ + counter := int64(0) + + err = listChildren(ctx, store.Client, dirListKey, startFileName, func(fileName string) bool { + if startFileName != "" { + if !includeStartFile && startFileName == fileName { + return true + } } - } - members, err := store.Client.ZRange(ctx, dirListKey, start, start+int64(limit)-1).Result() - if err != nil { - return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) - } - // fetch entry meta - for _, fileName := range members { path := util.NewFullPath(string(dirPath), fileName) entry, err := store.FindEntry(ctx, path) lastFileName = fileName if err != nil { glog.V(0).Infof("list %s : %v", path, err) if err == filer_pb.ErrNotFound { - continue + return true } } else { if entry.TtlSec > 0 { if entry.Attr.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { store.Client.Del(ctx, string(path)).Result() store.Client.ZRem(ctx, dirListKey, fileName).Result() - continue + return true } } + counter++ if !eachEntryFunc(entry) { - break + return false + } + if counter >= limit { + return false } } - } + return true + }) return lastFileName, err } diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go index b886bf641..aa66b4187 100644 --- a/weed/server/filer_server.go +++ b/weed/server/filer_server.go @@ -34,6 +34,7 @@ import ( _ "github.com/chrislusf/seaweedfs/weed/filer/postgres2" _ "github.com/chrislusf/seaweedfs/weed/filer/redis" _ "github.com/chrislusf/seaweedfs/weed/filer/redis2" + _ "github.com/chrislusf/seaweedfs/weed/filer/redis3" _ "github.com/chrislusf/seaweedfs/weed/filer/sqlite" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/notification" diff --git a/weed/util/skiplist/name_batch.go b/weed/util/skiplist/name_batch.go index 18427d341..71e5aeeba 100644 --- a/weed/util/skiplist/name_batch.go +++ b/weed/util/skiplist/name_batch.go @@ -35,7 +35,7 @@ func (nb *NameBatch) DeleteName(name string) { } func (nb *NameBatch) ListNames(startFrom string, visitNamesFn func(name string) bool) bool { var names []string - needFilter := startFrom == "" + needFilter := startFrom != "" for n := range nb.names { if !needFilter || strings.Compare(n, startFrom) >= 0 { names = append(names, n) diff --git a/weed/util/skiplist/name_list.go b/weed/util/skiplist/name_list.go index db328afba..4ba26665a 100644 --- a/weed/util/skiplist/name_list.go +++ b/weed/util/skiplist/name_list.go @@ -9,7 +9,7 @@ type NameList struct { batchSize int } -func NewNameList(store ListStore, batchSize int) *NameList { +func newNameList(store ListStore, batchSize int) *NameList { return &NameList{ skipList: New(store), batchSize: batchSize, @@ -59,6 +59,7 @@ There are multiple cases after finding the name for greater or equal node */ func (nl *NameList) WriteName(name string) error { + lookupKey := []byte(name) prevNode, nextNode, found, err := nl.skipList.FindGreaterOrEqual(lookupKey) if err != nil { @@ -301,3 +302,25 @@ func (nl *NameList) ListNames(startFrom string, visitNamesFn func(name string) b return nil } + +func (nl *NameList) RemoteAllListElement() error { + + t := nl.skipList + + nodeRef := t.startLevels[0] + for nodeRef != nil { + node, err := t.loadElement(nodeRef) + if err != nil { + return err + } + if node == nil { + return nil + } + if err := t.deleteElement(node); err != nil { + return err + } + nodeRef = node.Next[0] + } + return nil + +} \ No newline at end of file diff --git a/weed/util/skiplist/name_list_serde.go b/weed/util/skiplist/name_list_serde.go new file mode 100644 index 000000000..be9f06698 --- /dev/null +++ b/weed/util/skiplist/name_list_serde.go @@ -0,0 +1,71 @@ +package skiplist + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/golang/protobuf/proto" +) + +func LoadNameList(data []byte, store ListStore, batchSize int) *NameList { + + nl := &NameList{ + skipList: New(store), + batchSize: batchSize, + } + + if len(data) == 0 { + return nl + } + + message := &SkipListProto{} + if err := proto.Unmarshal(data, message); err != nil { + glog.Errorf("loading skiplist: %v", err) + } + nl.skipList.maxNewLevel = int(message.MaxNewLevel) + nl.skipList.maxLevel = int(message.MaxLevel) + for i, ref := range message.StartLevels { + nl.skipList.startLevels[i] = &SkipListElementReference{ + ElementPointer: ref.ElementPointer, + Key: ref.Key, + } + } + for i, ref := range message.EndLevels { + nl.skipList.endLevels[i] = &SkipListElementReference{ + ElementPointer: ref.ElementPointer, + Key: ref.Key, + } + } + return nl +} + +func (nl *NameList) HasChanges() bool { + return nl.skipList.hasChanges +} + +func (nl *NameList) ToBytes() []byte { + message := &SkipListProto{} + message.MaxNewLevel = int32(nl.skipList.maxNewLevel) + message.MaxLevel = int32(nl.skipList.maxLevel) + for _, ref := range nl.skipList.startLevels { + if ref == nil { + break + } + message.StartLevels = append(message.StartLevels, &SkipListElementReference{ + ElementPointer: ref.ElementPointer, + Key: ref.Key, + }) + } + for _, ref := range nl.skipList.endLevels { + if ref == nil { + break + } + message.EndLevels = append(message.EndLevels, &SkipListElementReference{ + ElementPointer: ref.ElementPointer, + Key: ref.Key, + }) + } + data, err := proto.Marshal(message) + if err != nil { + glog.Errorf("marshal skiplist: %v", err) + } + return data +} \ No newline at end of file diff --git a/weed/util/skiplist/name_list_test.go b/weed/util/skiplist/name_list_test.go index 811a101f2..b3a686553 100644 --- a/weed/util/skiplist/name_list_test.go +++ b/weed/util/skiplist/name_list_test.go @@ -15,7 +15,7 @@ func String(x int) string { } func TestNameList(t *testing.T) { - list := NewNameList(memStore, 7) + list := newNameList(memStore, 7) for i := 0; i < maxNameCount; i++ { list.WriteName(String(i)) @@ -51,7 +51,7 @@ func TestNameList(t *testing.T) { } // randomized deletion - list = NewNameList(memStore, 7) + list = newNameList(memStore, 7) // Delete elements at random positions in the list. rList := rand.Perm(maxN) for _, i := range rList { diff --git a/weed/util/skiplist/skiplist.go b/weed/util/skiplist/skiplist.go index b48a05b4a..52e6c606a 100644 --- a/weed/util/skiplist/skiplist.go +++ b/weed/util/skiplist/skiplist.go @@ -22,6 +22,7 @@ type SkipList struct { maxNewLevel int maxLevel int listStore ListStore + hasChanges bool // elementCount int } @@ -93,6 +94,9 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (prevElemen if err != nil { return } + if currentNode == nil { + return + } // In case, that our first element is already greater-or-equal! if findGreaterOrEqual && compareElement(currentNode, key) > 0 { @@ -115,6 +119,9 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (prevElemen if err != nil { return } + if currentNode == nil { + return + } } else { if index > 0 { @@ -126,6 +133,9 @@ func (t *SkipList) findExtended(key []byte, findGreaterOrEqual bool) (prevElemen if err != nil { return } + if currentNodeNext == nil { + return + } foundElem = currentNodeNext ok = true return @@ -216,9 +226,11 @@ func (t *SkipList) Delete(key []byte) (err error) { if err != nil { return err } - nextNextNode.Prev = currentNode.Reference() - if err = t.saveElement(nextNextNode); err != nil { - return err + if nextNextNode != nil { + nextNextNode.Prev = currentNode.Reference() + if err = t.saveElement(nextNextNode); err != nil { + return err + } } } // t.elementCount-- @@ -230,6 +242,7 @@ func (t *SkipList) Delete(key []byte) (err error) { // Link from start needs readjustments. startNextKey := t.startLevels[index].Key if compareElement(nextNode, startNextKey) == 0 { + t.hasChanges = true t.startLevels[index] = nextNode.Next[index] // This was our currently highest node! if t.startLevels[index] == nil { @@ -240,6 +253,7 @@ func (t *SkipList) Delete(key []byte) (err error) { // Link from end needs readjustments. if nextNode.Next[index] == nil { t.endLevels[index] = currentNode.Reference() + t.hasChanges = true } nextNode.Next[index] = nil } @@ -260,7 +274,7 @@ func (t *SkipList) Delete(key []byte) (err error) { // Insert inserts the given ListElement into the skiplist. // Insert runs in approx. O(log(n)) -func (t *SkipList) Insert(key, value []byte) (err error){ +func (t *SkipList) Insert(key, value []byte) (err error) { if t == nil || key == nil { return @@ -272,6 +286,7 @@ func (t *SkipList) Insert(key, value []byte) (err error){ if level > t.maxLevel { level = t.maxLevel + 1 t.maxLevel = level + t.hasChanges = true } elem := &SkipListElement{ @@ -326,9 +341,11 @@ func (t *SkipList) Insert(key, value []byte) (err error){ if nextNode, err = t.loadElement(nextNodeRef); err != nil { return } - nextNode.Prev = elem.Reference() - if err = t.saveElement(nextNode); err != nil { - return + if nextNode != nil { + nextNode.Prev = elem.Reference() + if err = t.saveElement(nextNode); err != nil { + return + } } } } @@ -343,6 +360,9 @@ func (t *SkipList) Insert(key, value []byte) (err error){ } } currentNode = nextNode + if currentNode == nil { + return + } } else { // Go down index-- @@ -366,18 +386,22 @@ func (t *SkipList) Insert(key, value []byte) (err error){ if err != nil { return err } - startLevelElement.Prev = elem.Reference() - if err = t.saveElement(startLevelElement); err != nil { - return err + if startLevelElement != nil { + startLevelElement.Prev = elem.Reference() + if err = t.saveElement(startLevelElement); err != nil { + return err + } } } elem.Next[i] = t.startLevels[i] t.startLevels[i] = elem.Reference() + t.hasChanges = true } // link the endLevels to this element! if elem.Next[i] == nil { t.endLevels[i] = elem.Reference() + t.hasChanges = true } didSomething = true @@ -392,20 +416,24 @@ func (t *SkipList) Insert(key, value []byte) (err error){ if err != nil { return err } - endLevelElement.Next[i] = elem.Reference() - if err = t.saveElement(endLevelElement); err != nil { - return err + if endLevelElement != nil { + endLevelElement.Next[i] = elem.Reference() + if err = t.saveElement(endLevelElement); err != nil { + return err + } } } if i == 0 { elem.Prev = t.endLevels[i] } t.endLevels[i] = elem.Reference() + t.hasChanges = true } // Link the startLevels to this element! if t.startLevels[i] == nil || bytes.Compare(t.startLevels[i].Key, key) > 0 { t.startLevels[i] = elem.Reference() + t.hasChanges = true } didSomething = true @@ -486,6 +514,9 @@ func (t *SkipList) println() { for nodeRef != nil { print(fmt.Sprintf("%v: ", string(nodeRef.Key))) node, _ := t.loadElement(nodeRef) + if node == nil { + break + } for i := 0; i <= int(node.Level); i++ { l := node.Next[i] @@ -510,8 +541,8 @@ func (t *SkipList) println() { } } - println() nodeRef = node.Next[0] + println() } print("end --> ") From 2b9aab344228cfbb0ba3da5f410eb76bd41d18ea Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 01:03:40 -0700 Subject: [PATCH 26/30] use 1000 per batch --- weed/filer/redis3/kv_directory_children.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/filer/redis3/kv_directory_children.go b/weed/filer/redis3/kv_directory_children.go index 5465a833d..16d921d03 100644 --- a/weed/filer/redis3/kv_directory_children.go +++ b/weed/filer/redis3/kv_directory_children.go @@ -8,7 +8,7 @@ import ( "github.com/go-redis/redis/v8" ) -const maxNameBatchSizeLimit = 5 +const maxNameBatchSizeLimit = 1000 func insertChild(ctx context.Context, client redis.UniversalClient, key string, name string) error { data, err := client.Get(ctx, key).Result() From 04662126bb9374b355d4bf85ae8a04548b2e9283 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 01:04:27 -0700 Subject: [PATCH 27/30] add redis3 --- weed/command/scaffold/filer.toml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/weed/command/scaffold/filer.toml b/weed/command/scaffold/filer.toml index caf9d173d..aeb8a5b67 100644 --- a/weed/command/scaffold/filer.toml +++ b/weed/command/scaffold/filer.toml @@ -185,6 +185,28 @@ routeByLatency = false # This changes the data layout. Only add new directories. Removing/Updating will cause data loss. superLargeDirectories = [] +[redis3] # beta +enabled = false +address = "localhost:6379" +password = "" +database = 0 + +[redis_cluster3] # beta +enabled = false +addresses = [ + "localhost:30001", + "localhost:30002", + "localhost:30003", + "localhost:30004", + "localhost:30005", + "localhost:30006", +] +password = "" +# allows reads from slave servers or the master, but all writes still go to the master +readOnly = false +# automatically use the closest Redis server for reads +routeByLatency = false + [etcd] enabled = false servers = "localhost:2379" From 280ab7f95cdbbaf2fee4a49d10d944e2865829fc Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 02:30:24 -0700 Subject: [PATCH 28/30] add test --- weed/util/skiplist/skiplist_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/weed/util/skiplist/skiplist_test.go b/weed/util/skiplist/skiplist_test.go index 115656cd9..a35bef6f3 100644 --- a/weed/util/skiplist/skiplist_test.go +++ b/weed/util/skiplist/skiplist_test.go @@ -16,6 +16,21 @@ var ( memStore = newMemStore() ) +func TestReverseInsert(t *testing.T) { + list := NewSeed(100, memStore) + + list.Insert([]byte("zzz"), []byte("zzz")) + list.Delete([]byte("zzz")) + + list.Insert([]byte("aaa"), []byte("aaa")) + + if list.IsEmpty() { + t.Fail() + } + +} + + func TestInsertAndFind(t *testing.T) { k0 := []byte("0") From 513fed323a86f30996adc8e66f6cf6641b40e77a Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 02:30:44 -0700 Subject: [PATCH 29/30] SkipListElementReference can be an empty object --- weed/util/skiplist/skiplist_serde.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/weed/util/skiplist/skiplist_serde.go b/weed/util/skiplist/skiplist_serde.go index 5b7089e80..e528b8a3d 100644 --- a/weed/util/skiplist/skiplist_serde.go +++ b/weed/util/skiplist/skiplist_serde.go @@ -34,8 +34,18 @@ func (t *SkipList) deleteElement(element *SkipListElement) error { } func (t *SkipList) loadElement(ref *SkipListElementReference) (*SkipListElement, error) { - if ref == nil { + if ref.IsNil() { return nil, nil } return t.listStore.LoadElement(ref.ElementPointer) } + +func (ref *SkipListElementReference) IsNil() bool { + if ref == nil { + return true + } + if len(ref.Key) == 0 { + return true + } + return false +} \ No newline at end of file From 947add39e6b2bca54ddc04fd33be28868cae8a0f Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 4 Oct 2021 02:31:38 -0700 Subject: [PATCH 30/30] clean up *SkipListElementReference loaded from Redis --- weed/filer/redis3/skiplist_element_store.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/weed/filer/redis3/skiplist_element_store.go b/weed/filer/redis3/skiplist_element_store.go index fa13d35e9..66a5408d6 100644 --- a/weed/filer/redis3/skiplist_element_store.go +++ b/weed/filer/redis3/skiplist_element_store.go @@ -48,5 +48,15 @@ func (m *SkipListElementStore) LoadElement(id int64) (*skiplist.SkipListElement, } t := &skiplist.SkipListElement{} err = proto.Unmarshal([]byte(data), t) + if err == nil { + for i:=0;i