Browse Source

bptree does not work well for auto-increasing keys

pull/2354/head
Chris Lu 3 years ago
parent
commit
df1d6133a8
  1. 60
      weed/util/bptree/README.md
  2. 4
      weed/util/bptree/bpmap.go
  3. 12
      weed/util/bptree/bptree.go
  4. 67
      weed/util/bptree/bptree_node.go
  5. 34
      weed/util/bptree/bptree_store_test.go
  6. 44
      weed/util/bptree/getter_setter.go

60
weed/util/bptree/README.md

@ -0,0 +1,60 @@
This adapts one b+ tree implementation
https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree
to persist changes to on disk.
# When a node needs to persist itself?
* A node changed its key or value
* When an item is added.
* When an item is updated.
* When an item is deleted.
* When a node is split.
* 2 new nodes are created (they shoud persist themselves).
* Parent node need to point to the new nodes.
* When a node is merged.
* delete one node
* persist the merged node
In general, if one node is returned from a function, the node should have already been persisted.
The parent node may need to delete the old node.
BpTree
Add(key ItemKey, value ItemValue)
new_root = self.getRoot().put(key,value)
a, b, err := self.insert(key, value)
self.internal_insert(key, value)
self.internal_split(q.keys[0], q)
persist(a,b)
self.persist() // child add q node
self.maybePersist(child == p)
self.leaf_insert(key, value)
self.persist() // if dedup
self.leaf_split(key, value)
self.pure_leaf_split(key, value)
persist(a,b)
a.persist()
persist(a,b)
self.put_kv(key, value)
new_root.persist()
self.setRoot(new_root)
oldroot.destroy()
// maybe persist BpTree new root
Replace(key ItemKey, where WhereFunc, value ItemValue)
leaf.persist()
RemoveWhere(key ItemKey, where WhereFunc)
self.getRoot().remove(key, where)
self.internal_remove(key, nil, where)
child.leaf_remove(key, nil, where)
child.leaf_remove(key, sibling.keys[0], where)
l.destroy() // when the node is empty
a.maybePersist(hasChange)
self.destroy() // when no keys left
self.persist() // when some keys are left
self.leaf_remove(key, self.keys[len(self.keys)-1], where)
new_root.persist() // when new root is added
// maybe persist BpTree new root

4
weed/util/bptree/bpmap.go

@ -47,7 +47,9 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) {
return nil, err return nil, err
} }
if new_root == nil { if new_root == nil {
self.setRoot(NewLeaf(ns, true))
new_root = NewLeaf(ns, false)
err = new_root.persist()
self.setRoot(new_root)
} else { } else {
self.setRoot(new_root) self.setRoot(new_root)
} }

12
weed/util/bptree/bptree.go

@ -54,9 +54,13 @@ func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err
for i, leaf, next := li(); next != nil; i, leaf, next = next() { for i, leaf, next := li(); next != nil; i, leaf, next = next() {
if where(leaf.values[i]) { if where(leaf.values[i]) {
leaf.values[i] = value leaf.values[i] = value
if persistErr := leaf.persist(); persistErr != nil && err == nil {
err = persistErr
break
}
} }
} }
return nil
return err
} }
func (self *BpTree) Find(key ItemKey) (kvi KVIterator) { func (self *BpTree) Find(key ItemKey) (kvi KVIterator) {
@ -89,11 +93,13 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) {
return err return err
} }
if new_root == nil { if new_root == nil {
self.setRoot(NewLeaf(ns, false))
new_root = NewLeaf(ns, false)
err = new_root.persist()
self.setRoot(new_root)
} else { } else {
self.setRoot(new_root) self.setRoot(new_root)
} }
return nil
return err
} }
func (self *BpTree) Keys() (ki KIterator) { func (self *BpTree) Keys() (ki KIterator) {

67
weed/util/bptree/bptree_node.go

@ -2,15 +2,23 @@ package bptree
type ItemKey Hashable type ItemKey Hashable
type ItemValue Equatable type ItemValue Equatable
type PersistFunc func(node *BpNode) error
type DestroyFunc func(node *BpNode) error
var (
PersistFn PersistFunc
DestroyFn DestroyFunc
)
type BpNode struct { type BpNode struct {
keys []ItemKey
values []ItemValue
pointers []*BpNode
next *BpNode
prev *BpNode
no_dup bool
protoNode *ProtoNode
keys []ItemKey
values []ItemValue
pointers []*BpNode
next *BpNode
prev *BpNode
no_dup bool
protoNodeId int64
protoNode *ProtoNode
} }
func NewInternal(size int) *BpNode { func NewInternal(size int) *BpNode {
@ -18,8 +26,9 @@ func NewInternal(size int) *BpNode {
panic(NegativeSize()) panic(NegativeSize())
} }
return &BpNode{ return &BpNode{
keys: make([]ItemKey, 0, size),
pointers: make([]*BpNode, 0, size),
keys: make([]ItemKey, 0, size),
pointers: make([]*BpNode, 0, size),
protoNodeId: GetProtoNodeId(),
} }
} }
@ -28,9 +37,10 @@ func NewLeaf(size int, no_dup bool) *BpNode {
panic(NegativeSize()) panic(NegativeSize())
} }
return &BpNode{ return &BpNode{
keys: make([]ItemKey, 0, size),
values: make([]ItemValue, 0, size),
no_dup: no_dup,
keys: make([]ItemKey, 0, size),
values: make([]ItemValue, 0, size),
no_dup: no_dup,
protoNodeId: GetProtoNodeId(),
} }
} }
@ -191,7 +201,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error)
root = NewInternal(self.NodeSize()) root = NewInternal(self.NodeSize())
root.put_kp(a.keys[0], a) root.put_kp(a.keys[0], a)
root.put_kp(b.keys[0], b) root.put_kp(b.keys[0], b)
return root, nil
return root, root.persist()
} }
// right is only set on split // right is only set on split
@ -237,10 +247,10 @@ func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode,
if err := self.put_kp(q.keys[0], q); err != nil { if err := self.put_kp(q.keys[0], q); err != nil {
return nil, nil, err return nil, nil, err
} }
return self, nil, nil
return self, nil, self.persist()
} }
} }
return self, nil, nil
return self, nil, self.maybePersist(child != p)
} }
/* On split /* On split
@ -268,7 +278,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err
return nil, nil, err return nil, nil, err
} }
} }
return a, b, nil
return a, b, persist(a, b)
} }
/* if the leaf is full then it will defer to a leaf_split /* if the leaf is full then it will defer to a leaf_split
@ -284,7 +294,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
i, has := self.find(key) i, has := self.find(key)
if has { if has {
self.values[i] = value self.values[i] = value
return self, nil, nil
return self, nil, self.persist()
} }
} }
if self.Full() { if self.Full() {
@ -293,7 +303,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err
if err := self.put_kv(key, value); err != nil { if err := self.put_kv(key, value); err != nil {
return nil, nil, err return nil, nil, err
} }
return self, nil, nil
return self, nil, self.persist()
} }
} }
@ -323,7 +333,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err
return nil, nil, err return nil, nil, err
} }
} }
return a, b, nil
return a, b, persist(a, b)
} }
/* a pure leaf split has two cases: /* a pure leaf split has two cases:
@ -349,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
return nil, nil, err return nil, nil, err
} }
insert_linked_list_node(a, b.getPrev(), b) insert_linked_list_node(a, b.getPrev(), b)
return a, b, nil
return a, b, persist(a, b)
} else { } else {
a = self a = self
e := self.find_end_of_pure_run() e := self.find_end_of_pure_run()
@ -357,7 +367,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
if err := e.put_kv(key, value); err != nil { if err := e.put_kv(key, value); err != nil {
return nil, nil, err return nil, nil, err
} }
return a, nil, nil
return a, nil, a.persist()
} else { } else {
b = NewLeaf(self.NodeSize(), self.no_dup) b = NewLeaf(self.NodeSize(), self.no_dup)
if err := b.put_kv(key, value); err != nil { if err := b.put_kv(key, value); err != nil {
@ -367,7 +377,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode,
if e.keys[0].Equals(key) { if e.keys[0].Equals(key) {
return a, nil, nil return a, nil, nil
} }
return a, b, nil
return a, b, persist(a, b)
} }
} }
} }
@ -484,6 +494,7 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
sibling = sibling.left_most_leaf() sibling = sibling.left_most_leaf()
} }
child := self.pointers[i] child := self.pointers[i]
oldChild := child
if child.Internal() { if child.Internal() {
child, err = child.internal_remove(key, sibling, where) child, err = child.internal_remove(key, sibling, where)
} else { } else {
@ -508,9 +519,9 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun
self.pointers[i] = child self.pointers[i] = child
} }
if len(self.keys) == 0 { if len(self.keys) == 0 {
return nil, nil
return nil, self.destroy()
} }
return self, nil
return self, self.maybePersist(oldChild != child)
} }
func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) { func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) {
@ -518,8 +529,10 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
return nil, BpTreeError("Expected a leaf node") return nil, BpTreeError("Expected a leaf node")
} }
a = self a = self
hasChange := false
for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() { for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
if where(l.values[j]) { if where(l.values[j]) {
hasChange = true
if err := l.remove_key_at(j); err != nil { if err := l.remove_key_at(j); err != nil {
return nil, err return nil, err
} }
@ -538,8 +551,14 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode,
} else { } else {
a = nil a = nil
} }
if err := l.destroy(); err != nil {
return nil, err
}
} }
} }
if a != nil {
return a, a.maybePersist(hasChange)
}
return a, nil return a, nil
} }

34
weed/util/bptree/bptree_store_test.go

@ -0,0 +1,34 @@
package bptree
import (
"fmt"
"testing"
)
func TestAddRemove(t *testing.T) {
tree := NewBpTree(32)
PersistFn = func(node *BpNode) error {
println("saving", node.protoNodeId)
return nil
}
DestroyFn = func(node *BpNode) error {
println("delete", node.protoNodeId)
return nil
}
for i:=0;i<1024;i++{
println("++++++++++", i)
tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i)))
printTree(tree.root, "")
}
}
func printTree(node *BpNode, prefix string) {
fmt.Printf("%sNode %d\n", prefix, node.protoNodeId)
prefix += " "
for i:=0;i<len(node.keys);i++{
fmt.Printf("%skey %s\n", prefix, node.keys[i])
if i < len(node.pointers) && node.pointers[i] != nil {
printTree(node.pointers[i], prefix+" ")
}
}
}

44
weed/util/bptree/getter_setter.go

@ -1,5 +1,13 @@
package bptree package bptree
var (
protoNodeId = int64(0)
)
func GetProtoNodeId() int64 {
protoNodeId++
return protoNodeId
}
func (self *BpMap) getRoot() *BpNode { func (self *BpMap) getRoot() *BpNode {
return self.root return self.root
} }
@ -26,3 +34,39 @@ func (self *BpNode) getPrev() *BpNode {
func (self *BpNode) setPrev(prev *BpNode) { func (self *BpNode) setPrev(prev *BpNode) {
self.prev = prev self.prev = prev
} }
func (self *BpNode) getNode(x int)(*BpNode) {
return self.pointers[x]
}
func (self *BpNode) maybePersist(shouldPersist bool) error {
if !shouldPersist {
return nil
}
return self.persist()
}
func (self *BpNode) persist() error {
if PersistFn != nil {
return PersistFn(self)
}
return nil
}
func (self *BpNode) destroy() error {
if DestroyFn != nil {
return DestroyFn(self)
}
return nil
}
func persist(a, b *BpNode) error {
if a != nil {
if err := a.persist(); err != nil {
return err
}
}
if b != nil {
if err := b.persist(); err != nil {
return err
}
}
return nil
}
Loading…
Cancel
Save