Browse Source

split node based on the last inserted key

pull/2354/head
Chris Lu 3 years ago
parent
commit
b751debd31
  1. 6
      weed/util/bptree/bpmap.go
  2. 6
      weed/util/bptree/bptree.go
  3. 79
      weed/util/bptree/bptree_node.go
  4. 8
      weed/util/bptree/bptree_store_test.go
  5. 203
      weed/util/bptree/bptree_test.go

6
weed/util/bptree/bpmap.go

@ -11,7 +11,7 @@ type BpMap BpTree
func NewBpMap(node_size int) *BpMap {
return &BpMap{
root: NewLeaf(node_size, true),
root: NewLeaf(node_size),
}
}
@ -41,13 +41,13 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) {
if err != nil {
return nil, err
}
ns := self.getRoot().NodeSize()
ns := self.getRoot().Capacity()
new_root, err := self.getRoot().remove(key, func(value ItemValue) bool { return true })
if err != nil {
return nil, err
}
if new_root == nil {
new_root = NewLeaf(ns, false)
new_root = NewLeaf(ns)
err = new_root.persist()
self.setRoot(new_root)
} else {

6
weed/util/bptree/bptree.go

@ -14,7 +14,7 @@ type loc_iterator func() (i int, leaf *BpNode, li loc_iterator)
func NewBpTree(node_size int) *BpTree {
return &BpTree{
root: NewLeaf(node_size, false),
root: NewLeaf(node_size),
}
}
@ -87,13 +87,13 @@ func (self *BpTree) Range(from, to ItemKey) (kvi KVIterator) {
}
func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) {
ns := self.getRoot().NodeSize()
ns := self.getRoot().Capacity()
new_root, err := self.getRoot().remove(key, where)
if err != nil {
return err
}
if new_root == nil {
new_root = NewLeaf(ns, false)
new_root = NewLeaf(ns)
err = new_root.persist()
self.setRoot(new_root)
} else {

79
weed/util/bptree/bptree_node.go

@ -16,7 +16,6 @@ type BpNode struct {
pointers []*BpNode
next *BpNode
prev *BpNode
no_dup bool
protoNodeId int64
protoNode *ProtoNode
}
@ -32,14 +31,13 @@ func NewInternal(size int) *BpNode {
}
}
func NewLeaf(size int, no_dup bool) *BpNode {
func NewLeaf(size int) *BpNode {
if size < 0 {
panic(NegativeSize())
}
return &BpNode{
keys: make([]ItemKey, 0, size),
values: make([]ItemValue, 0, size),
no_dup: no_dup,
protoNodeId: GetProtoNodeId(),
}
}
@ -65,7 +63,11 @@ func (self *BpNode) Internal() bool {
return cap(self.pointers) > 0
}
func (self *BpNode) NodeSize() int {
func (self *BpNode) Len() int {
return len(self.keys)
}
func (self *BpNode) Capacity() int {
return cap(self.keys)
}
@ -78,19 +80,6 @@ func (self *BpNode) Height() int {
return self.pointers[0].Height() + 1
}
func (self *BpNode) count(key ItemKey) int {
i, _ := self.find(key)
count := 0
for ; i < len(self.keys); i++ {
if self.keys[i].Equals(key) {
count++
} else {
break
}
}
return count
}
func (self *BpNode) has(key ItemKey) bool {
_, has := self.find(key)
return has
@ -198,7 +187,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error)
return a, nil
}
// else we have root split
root = NewInternal(self.NodeSize())
root = NewInternal(self.Capacity())
root.put_kp(a.keys[0], a)
root.put_kp(b.keys[0], b)
return root, root.persist()
@ -267,9 +256,9 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err
return nil, nil, BpTreeError("Tried to split an internal block on duplicate key")
}
a = self
b = NewInternal(self.NodeSize())
balance_nodes(a, b)
if key.Less(b.keys[0]) {
b = NewInternal(self.Capacity())
balance_nodes(a, b, key)
if b.Len() > 0 && key.Less(b.keys[0]) {
if err := a.put_kp(key, ptr); err != nil {
return nil, nil, err
}
@ -290,7 +279,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
if self.Internal() {
return nil, nil, BpTreeError("Expected a leaf node")
}
if self.no_dup {
if true { // no_dup = true
i, has := self.find(key)
if has {
self.values[i] = value
@ -321,10 +310,10 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err
return self.pure_leaf_split(key, value)
}
a = self
b = NewLeaf(self.NodeSize(), self.no_dup)
b = NewLeaf(self.Capacity())
insert_linked_list_node(b, a, a.getNext())
balance_nodes(a, b)
if key.Less(b.keys[0]) {
balance_nodes(a, b, key)
if b.Len() > 0 && key.Less(b.keys[0]) {
if err := a.put_kv(key, value); err != nil {
return nil, nil, err
}
@ -353,7 +342,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
return nil, nil, BpTreeError("Expected a pure leaf node")
}
if key.Less(self.keys[0]) {
a = NewLeaf(self.NodeSize(), self.no_dup)
a = NewLeaf(self.Capacity())
b = self
if err := a.put_kv(key, value); err != nil {
return nil, nil, err
@ -369,7 +358,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
}
return a, nil, a.persist()
} else {
b = NewLeaf(self.NodeSize(), self.no_dup)
b = NewLeaf(self.Capacity())
if err := b.put_kv(key, value); err != nil {
return nil, nil, err
}
@ -604,11 +593,7 @@ func (self *BpNode) find(key ItemKey) (int, bool) {
if key.Less(self.keys[m]) {
r = m - 1
} else if key.Equals(self.keys[m]) {
for j := m; j >= 0; j-- {
if j == 0 || !key.Equals(self.keys[j-1]) {
return j, true
}
}
return m, true
} else {
l = m + 1
}
@ -713,9 +698,15 @@ func remove_linked_list_node(n *BpNode) {
}
}
/* a must be full and b must be empty else there will be a panic
/**
* a must be full and b must be empty else there will be a panic
*
* Different from common btree implementation, this splits the nodes by the inserted key.
* Items less than the splitKey stays in a, or moved to b if otherwise.
* This should help for monotonically increasing inserts.
*
*/
func balance_nodes(a, b *BpNode) {
func balance_nodes(a, b *BpNode, splitKey ItemKey) {
if len(b.keys) != 0 {
panic(BpTreeError("b was not empty"))
}
@ -731,16 +722,8 @@ func balance_nodes(a, b *BpNode) {
if cap(a.pointers) != cap(b.pointers) {
panic(BpTreeError("cap(a.pointers) != cap(b.pointers)"))
}
m := len(a.keys) / 2
for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) {
m++
}
if m == len(a.keys) {
m--
for m > 0 && a.keys[m-1].Equals(a.keys[m]) {
m--
}
}
m := find_split_index(a, b, splitKey)
var lim = len(a.keys) - m
b.keys = b.keys[:lim]
if cap(a.values) > 0 {
@ -773,3 +756,11 @@ func balance_nodes(a, b *BpNode) {
a.pointers = a.pointers[:m]
}
}
func find_split_index(a, b *BpNode, splitKey ItemKey) int {
m := len(a.keys)
for m > 0 && !a.keys[m-1].Less(splitKey) {
m--
}
return m
}

8
weed/util/bptree/bptree_store_test.go

@ -6,7 +6,7 @@ import (
)
func TestAddRemove(t *testing.T) {
tree := NewBpTree(32)
tree := NewBpTree(5)
PersistFn = func(node *BpNode) error {
println("saving", node.protoNodeId)
return nil
@ -24,11 +24,11 @@ func TestAddRemove(t *testing.T) {
func printTree(node *BpNode, prefix string) {
fmt.Printf("%sNode %d\n", prefix, node.protoNodeId)
prefix += " "
prefix += " "
for i:=0;i<len(node.keys);i++{
fmt.Printf("%skey %s\n", prefix, node.keys[i])
fmt.Printf("%skey %v\n", prefix, node.keys[i])
if i < len(node.pointers) && node.pointers[i] != nil {
printTree(node.pointers[i], prefix+" ")
printTree(node.pointers[i], prefix+" ")
}
}
}

203
weed/util/bptree/bptree_test.go

@ -275,7 +275,7 @@ func TestBpMap(t *testing.T) {
}
func Test_get_start(t *testing.T) {
root := NewLeaf(2, false)
root := NewLeaf(2)
root, err := root.put(Int(1), Int(1))
if err != nil {
t.Error(err)
@ -344,99 +344,51 @@ func Test_get_start(t *testing.T) {
}
func Test_get_end(t *testing.T) {
root := NewLeaf(3, false)
root, err := root.put(Int(1), Int(-1))
root := NewLeaf(3)
root, err := root.put(Int(1), Int(1))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(4), Int(-1))
root, err = root.put(Int(4), Int(4))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(3), Int(1))
root, err = root.put(Int(3), Int(3))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(3), Int(2))
root, err = root.put(Int(8), Int(8))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(3), Int(3))
root, err = root.put(Int(9), Int(9))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(10), Int(10))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(3), Int(4))
root, err = root.put(Int(6), Int(6))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(3), Int(5))
root, err = root.put(Int(7), Int(7))
if err != nil {
t.Fatal(err)
}
root, err = root.put(Int(5), Int(5))
if err != nil {
t.Fatal(err)
}
t.Log(root)
t.Log(root.pointers[0])
t.Log(root.pointers[1])
t.Log(root.pointers[2])
i, n := root.get_start(Int(3))
t.Log(n)
if n != root.pointers[1] {
t.Error("wrong node from get_start")
}
if i != 0 {
t.Error("wrong index from get_start")
}
i, n = root.get_end(Int(3))
t.Log(n)
if n != root.pointers[1].getNext() {
t.Error("wrong node from get_end")
}
if i != 1 {
t.Error("wrong index from get_end")
}
i, n = root.get_end(Int(1))
t.Log(n)
if n != root.pointers[0] {
t.Error("wrong node from get_end")
}
if i != 0 {
t.Error("wrong index from get_end")
}
i, n = root.get_end(Int(4))
t.Log(n)
if n != root.pointers[2] {
t.Error("wrong node from get_end")
}
if i != 0 {
t.Error("wrong index from get_end")
}
i, n = root.get_end(Int(0))
t.Log(n)
if n != root.pointers[0] {
t.Error("wrong node from get_end")
}
if i != 0 {
t.Error("wrong index from get_end")
}
i, n = root.get_end(Int(5))
t.Log(n)
if n != root.pointers[2] {
t.Error("wrong node from get_end")
}
if i != 0 {
t.Error("wrong index from get_end")
}
i, n = root.get_end(Int(2))
t.Log(n)
if n != root.pointers[1] {
t.Error("wrong node from get_end")
}
if i != 0 {
t.Error("wrong index from get_end")
}
printTree(root, "")
}
func Test_put_no_root_split(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
}
@ -452,6 +404,10 @@ func Test_put_no_root_split(t *testing.T) {
}
}
p, err = a.put(Int(1), Int(3))
t.Log(a)
printTree(a, "")
if err != nil {
t.Error(err)
} else {
@ -461,16 +417,13 @@ func Test_put_no_root_split(t *testing.T) {
if !p.has(Int(1)) {
t.Error("p didn't have the right keys", p)
}
if p.getNext() == nil {
t.Error("p.next should not be nil")
}
t.Log(p)
t.Log(p.getNext())
}
}
func Test_put_root_split(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
p, err := a.put(Int(1), Int(1))
if err != nil {
t.Error(err)
@ -520,7 +473,7 @@ func Test_put_root_split(t *testing.T) {
func Test_internal_insert_no_split(t *testing.T) {
a := NewInternal(3)
leaf := NewLeaf(1, false)
leaf := NewLeaf(1)
if err := leaf.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
}
@ -548,7 +501,7 @@ func Test_internal_insert_no_split(t *testing.T) {
func Test_internal_insert_split_less(t *testing.T) {
a := NewInternal(3)
leaf := NewLeaf(1, false)
leaf := NewLeaf(1)
if err := leaf.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
}
@ -648,17 +601,17 @@ func Test_internal_split_greater(t *testing.T) {
if q == nil {
t.Errorf("q == nil")
}
if !p.has(Int(1)) {
if !p.has(Int(1)) || !p.has(Int(3)) || !p.has(Int(4)){
t.Error("p didn't have the right keys", p)
}
if !q.has(Int(3)) || !q.has(Int(4)) || !q.has(Int(5)) {
if !q.has(Int(5)) {
t.Error("q didn't have the right keys", q)
}
}
}
func Test_leaf_insert_no_split(t *testing.T) {
a := NewLeaf(3, false)
a := NewLeaf(3)
insert_linked_list_node(a, nil, nil)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
@ -684,7 +637,7 @@ func Test_leaf_insert_no_split(t *testing.T) {
// tests the defer to split logic
func Test_leaf_insert_split_less(t *testing.T) {
a := NewLeaf(3, false)
a := NewLeaf(3)
insert_linked_list_node(a, nil, nil)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
@ -715,7 +668,7 @@ func Test_leaf_insert_split_less(t *testing.T) {
}
func Test_leaf_split_less(t *testing.T) {
a := NewLeaf(3, false)
a := NewLeaf(3)
insert_linked_list_node(a, nil, nil)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
@ -746,7 +699,7 @@ func Test_leaf_split_less(t *testing.T) {
}
func Test_leaf_split_equal(t *testing.T) {
a := NewLeaf(3, false)
a := NewLeaf(3)
insert_linked_list_node(a, nil, nil)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
@ -770,14 +723,14 @@ func Test_leaf_split_equal(t *testing.T) {
if !p.has(Int(1)) {
t.Error("p didn't have the right keys", p)
}
if !q.has(Int(3)) || q.count(Int(3)) != 2 || !q.has(Int(5)) {
t.Error("q didn't have the right keys", q, q.count(Int(3)))
if !q.has(Int(3)) || !q.has(Int(5)) {
t.Error("q didn't have the right keys", q)
}
}
}
func Test_leaf_split_greater(t *testing.T) {
a := NewLeaf(3, false)
a := NewLeaf(3)
insert_linked_list_node(a, nil, nil)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
@ -798,10 +751,10 @@ func Test_leaf_split_greater(t *testing.T) {
if q == nil {
t.Errorf("q == nil")
}
if !p.has(Int(1)) {
if !p.has(Int(1)) || !p.has(Int(3)) || !p.has(Int(4)) {
t.Error("p didn't have the right keys", p)
}
if !q.has(Int(3)) || !q.has(Int(4)) || !q.has(Int(5)) {
if !q.has(Int(5)) {
t.Error("q didn't have the right keys", q)
}
}
@ -809,13 +762,13 @@ func Test_leaf_split_greater(t *testing.T) {
// tests the defer logic
func Test_pure_leaf_insert_split_less(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(2, false)
c := NewLeaf(2)
insert_linked_list_node(c, b, nil)
d := NewLeaf(2, false)
d := NewLeaf(2)
insert_linked_list_node(d, c, nil)
if err := a.put_kv(Int(3), Int(1)); err != nil {
t.Error(err)
@ -882,13 +835,13 @@ func Test_pure_leaf_insert_split_less(t *testing.T) {
}
func Test_pure_leaf_split_less(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(2, false)
c := NewLeaf(2)
insert_linked_list_node(c, b, nil)
d := NewLeaf(2, false)
d := NewLeaf(2)
insert_linked_list_node(d, c, nil)
if err := a.put_kv(Int(3), Int(1)); err != nil {
t.Error(err)
@ -955,13 +908,13 @@ func Test_pure_leaf_split_less(t *testing.T) {
}
func Test_pure_leaf_split_equal(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(2, false)
c := NewLeaf(2)
insert_linked_list_node(c, b, nil)
d := NewLeaf(2, false)
d := NewLeaf(2)
insert_linked_list_node(d, c, nil)
if err := a.put_kv(Int(3), Int(1)); err != nil {
t.Error(err)
@ -1019,13 +972,13 @@ func Test_pure_leaf_split_equal(t *testing.T) {
}
func Test_pure_leaf_split_greater(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(2, false)
c := NewLeaf(2)
insert_linked_list_node(c, b, nil)
d := NewLeaf(2, false)
d := NewLeaf(2)
insert_linked_list_node(d, c, nil)
if err := a.put_kv(Int(3), Int(1)); err != nil {
t.Error(err)
@ -1089,13 +1042,13 @@ func Test_pure_leaf_split_greater(t *testing.T) {
}
func Test_find_end_of_pure_run(t *testing.T) {
a := NewLeaf(2, false)
a := NewLeaf(2)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(2, false)
c := NewLeaf(2)
insert_linked_list_node(c, b, nil)
d := NewLeaf(2, false)
d := NewLeaf(2)
insert_linked_list_node(d, c, nil)
if err := a.put_kv(Int(3), Int(1)); err != nil {
t.Error(err)
@ -1125,13 +1078,13 @@ func Test_find_end_of_pure_run(t *testing.T) {
}
func Test_insert_linked_list_node(t *testing.T) {
a := NewLeaf(1, false)
a := NewLeaf(1)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(3, false)
c := NewLeaf(3)
insert_linked_list_node(c, b, nil)
d := NewLeaf(4, false)
d := NewLeaf(4)
insert_linked_list_node(d, a, b)
if a.getPrev() != nil {
t.Errorf("expected a.prev == nil")
@ -1160,13 +1113,13 @@ func Test_insert_linked_list_node(t *testing.T) {
}
func Test_remove_linked_list_node(t *testing.T) {
a := NewLeaf(1, false)
a := NewLeaf(1)
insert_linked_list_node(a, nil, nil)
b := NewLeaf(2, false)
b := NewLeaf(2)
insert_linked_list_node(b, a, nil)
c := NewLeaf(3, false)
c := NewLeaf(3)
insert_linked_list_node(c, b, nil)
d := NewLeaf(4, false)
d := NewLeaf(4)
insert_linked_list_node(d, a, b)
if a.getPrev() != nil {
t.Errorf("expected a.prev == nil")
@ -1235,8 +1188,8 @@ func Test_remove_linked_list_node(t *testing.T) {
}
func Test_balance_leaf_nodes_with_dup(t *testing.T) {
a := NewLeaf(3, false)
b := NewLeaf(3, false)
a := NewLeaf(3)
b := NewLeaf(3)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
}
@ -1246,18 +1199,18 @@ func Test_balance_leaf_nodes_with_dup(t *testing.T) {
if err := a.put_kv(Int(2), Int(1)); err != nil {
t.Error(err)
}
balance_nodes(a, b)
if !a.has(Int(1)) || a.count(Int(1)) != 2 || a.has(Int(2)) {
balance_nodes(a, b, Int(2))
if !a.has(Int(1)) || a.has(Int(2)) {
t.Error("a had wrong items", a)
}
if !b.has(Int(2)) || b.count(Int(2)) != 1 || b.has(Int(1)) {
if !b.has(Int(2)) || b.has(Int(1)) {
t.Error("a had wrong items", b)
}
}
func Test_balance_leaf_nodes(t *testing.T) {
a := NewLeaf(7, false)
b := NewLeaf(7, false)
a := NewLeaf(7)
b := NewLeaf(7)
if err := a.put_kv(Int(1), Int(1)); err != nil {
t.Error(err)
}
@ -1279,15 +1232,15 @@ func Test_balance_leaf_nodes(t *testing.T) {
if err := a.put_kv(Int(7), Int(7)); err != nil {
t.Error(err)
}
balance_nodes(a, b)
balance_nodes(a, b, Int(5))
for i, k := range a.keys {
if int(k.(Int)) != i+1 {
t.Errorf("k != %d", i+1)
}
}
for i, k := range b.keys {
if int(k.(Int)) != 3+i+1 {
t.Errorf("k != %d", 3+i+1)
if int(k.(Int)) != 5+i {
t.Errorf("k != %d", 5+i)
}
}
for i, v := range a.values {
@ -1296,8 +1249,8 @@ func Test_balance_leaf_nodes(t *testing.T) {
}
}
for i, v := range b.values {
if int(v.(Int)) != 3+i+1 {
t.Errorf("v != %d", 3+i+1)
if int(v.(Int)) != 5+i {
t.Errorf("v != %d", 5+i)
}
}
t.Log(a)
@ -1325,7 +1278,7 @@ func Test_balance_internal_nodes(t *testing.T) {
if err := a.put_kp(Int(6), nil); err != nil {
t.Error(err)
}
balance_nodes(a, b)
balance_nodes(a, b, Int(4))
for i, k := range a.keys {
if int(k.(Int)) != i+1 {
t.Errorf("k != %d", i+1)

Loading…
Cancel
Save