diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index cbf363c95..399ac7b86 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -11,7 +11,7 @@ type BpMap BpTree func NewBpMap(node_size int) *BpMap { return &BpMap{ - root: NewLeaf(node_size, true), + root: NewLeaf(node_size), } } @@ -41,13 +41,13 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { if err != nil { return nil, err } - ns := self.getRoot().NodeSize() + ns := self.getRoot().Capacity() new_root, err := self.getRoot().remove(key, func(value ItemValue) bool { return true }) if err != nil { return nil, err } if new_root == nil { - new_root = NewLeaf(ns, false) + new_root = NewLeaf(ns) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index f9a5cf058..3ad73ad30 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -14,7 +14,7 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator) func NewBpTree(node_size int) *BpTree { return &BpTree{ - root: NewLeaf(node_size, false), + root: NewLeaf(node_size), } } @@ -87,13 +87,13 @@ func (self *BpTree) Range(from, to ItemKey) (kvi KVIterator) { } func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { - ns := self.getRoot().NodeSize() + ns := self.getRoot().Capacity() new_root, err := self.getRoot().remove(key, where) if err != nil { return err } if new_root == nil { - new_root = NewLeaf(ns, false) + new_root = NewLeaf(ns) err = new_root.persist() self.setRoot(new_root) } else { diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 4e6d63ac6..5c3461cfd 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -16,7 +16,6 @@ type BpNode struct { pointers []*BpNode next *BpNode prev *BpNode - no_dup bool protoNodeId int64 protoNode *ProtoNode } @@ -32,14 +31,13 @@ func NewInternal(size int) *BpNode { } } -func NewLeaf(size int, no_dup bool) *BpNode { +func NewLeaf(size int) *BpNode { if size < 0 { panic(NegativeSize()) } return &BpNode{ keys: make([]ItemKey, 0, size), values: make([]ItemValue, 0, size), - no_dup: no_dup, protoNodeId: GetProtoNodeId(), } } @@ -65,7 +63,11 @@ func (self *BpNode) Internal() bool { return cap(self.pointers) > 0 } -func (self *BpNode) NodeSize() int { +func (self *BpNode) Len() int { + return len(self.keys) +} + +func (self *BpNode) Capacity() int { return cap(self.keys) } @@ -78,19 +80,6 @@ func (self *BpNode) Height() int { return self.pointers[0].Height() + 1 } -func (self *BpNode) count(key ItemKey) int { - i, _ := self.find(key) - count := 0 - for ; i < len(self.keys); i++ { - if self.keys[i].Equals(key) { - count++ - } else { - break - } - } - return count -} - func (self *BpNode) has(key ItemKey) bool { _, has := self.find(key) return has @@ -198,7 +187,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) return a, nil } // else we have root split - root = NewInternal(self.NodeSize()) + root = NewInternal(self.Capacity()) root.put_kp(a.keys[0], a) root.put_kp(b.keys[0], b) return root, root.persist() @@ -267,9 +256,9 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err return nil, nil, BpTreeError("Tried to split an internal block on duplicate key") } a = self - b = NewInternal(self.NodeSize()) - balance_nodes(a, b) - if key.Less(b.keys[0]) { + b = NewInternal(self.Capacity()) + balance_nodes(a, b, key) + if b.Len() > 0 && key.Less(b.keys[0]) { if err := a.put_kp(key, ptr); err != nil { return nil, nil, err } @@ -290,7 +279,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err if self.Internal() { return nil, nil, BpTreeError("Expected a leaf node") } - if self.no_dup { + if true { // no_dup = true i, has := self.find(key) if has { self.values[i] = value @@ -321,10 +310,10 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err return self.pure_leaf_split(key, value) } a = self - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.Capacity()) insert_linked_list_node(b, a, a.getNext()) - balance_nodes(a, b) - if key.Less(b.keys[0]) { + balance_nodes(a, b, key) + if b.Len() > 0 && key.Less(b.keys[0]) { if err := a.put_kv(key, value); err != nil { return nil, nil, err } @@ -353,7 +342,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, return nil, nil, BpTreeError("Expected a pure leaf node") } if key.Less(self.keys[0]) { - a = NewLeaf(self.NodeSize(), self.no_dup) + a = NewLeaf(self.Capacity()) b = self if err := a.put_kv(key, value); err != nil { return nil, nil, err @@ -369,7 +358,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, } return a, nil, a.persist() } else { - b = NewLeaf(self.NodeSize(), self.no_dup) + b = NewLeaf(self.Capacity()) if err := b.put_kv(key, value); err != nil { return nil, nil, err } @@ -604,11 +593,7 @@ func (self *BpNode) find(key ItemKey) (int, bool) { if key.Less(self.keys[m]) { r = m - 1 } else if key.Equals(self.keys[m]) { - for j := m; j >= 0; j-- { - if j == 0 || !key.Equals(self.keys[j-1]) { - return j, true - } - } + return m, true } else { l = m + 1 } @@ -713,9 +698,15 @@ func remove_linked_list_node(n *BpNode) { } } -/* a must be full and b must be empty else there will be a panic +/** + * a must be full and b must be empty else there will be a panic + * + * Different from common btree implementation, this splits the nodes by the inserted key. + * Items less than the splitKey stays in a, or moved to b if otherwise. + * This should help for monotonically increasing inserts. + * */ -func balance_nodes(a, b *BpNode) { +func balance_nodes(a, b *BpNode, splitKey ItemKey) { if len(b.keys) != 0 { panic(BpTreeError("b was not empty")) } @@ -731,16 +722,8 @@ func balance_nodes(a, b *BpNode) { if cap(a.pointers) != cap(b.pointers) { panic(BpTreeError("cap(a.pointers) != cap(b.pointers)")) } - m := len(a.keys) / 2 - for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) { - m++ - } - if m == len(a.keys) { - m-- - for m > 0 && a.keys[m-1].Equals(a.keys[m]) { - m-- - } - } + + m := find_split_index(a, b, splitKey) var lim = len(a.keys) - m b.keys = b.keys[:lim] if cap(a.values) > 0 { @@ -773,3 +756,11 @@ func balance_nodes(a, b *BpNode) { a.pointers = a.pointers[:m] } } + +func find_split_index(a, b *BpNode, splitKey ItemKey) int { + m := len(a.keys) + for m > 0 && !a.keys[m-1].Less(splitKey) { + m-- + } + return m +} diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go index a5e330aa9..6ed4abca8 100644 --- a/weed/util/bptree/bptree_store_test.go +++ b/weed/util/bptree/bptree_store_test.go @@ -6,7 +6,7 @@ import ( ) func TestAddRemove(t *testing.T) { - tree := NewBpTree(32) + tree := NewBpTree(5) PersistFn = func(node *BpNode) error { println("saving", node.protoNodeId) return nil @@ -24,11 +24,11 @@ func TestAddRemove(t *testing.T) { func printTree(node *BpNode, prefix string) { fmt.Printf("%sNode %d\n", prefix, node.protoNodeId) - prefix += " " + prefix += " " for i:=0;i