From df1d6133a82680d3b58c922ad02a14fc7ee017ba Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 22 Aug 2021 18:19:26 -0700 Subject: [PATCH] bptree does not work well for auto-increasing keys --- weed/util/bptree/README.md | 60 ++++++++++++++++++++++++ weed/util/bptree/bpmap.go | 4 +- weed/util/bptree/bptree.go | 12 +++-- weed/util/bptree/bptree_node.go | 67 +++++++++++++++++---------- weed/util/bptree/bptree_store_test.go | 34 ++++++++++++++ weed/util/bptree/getter_setter.go | 44 ++++++++++++++++++ 6 files changed, 193 insertions(+), 28 deletions(-) create mode 100644 weed/util/bptree/README.md create mode 100644 weed/util/bptree/bptree_store_test.go diff --git a/weed/util/bptree/README.md b/weed/util/bptree/README.md new file mode 100644 index 000000000..1dddae940 --- /dev/null +++ b/weed/util/bptree/README.md @@ -0,0 +1,60 @@ +This adapts one b+ tree implementation +https://sourcegraph.com/github.com/timtadh/data-structures@master/-/tree/tree/bptree +to persist changes to on disk. + +# When a node needs to persist itself? + +* A node changed its key or value + * When an item is added. + * When an item is updated. + * When an item is deleted. + +* When a node is split. + * 2 new nodes are created (they shoud persist themselves). + * Parent node need to point to the new nodes. + +* When a node is merged. + * delete one node + * persist the merged node + + +In general, if one node is returned from a function, the node should have already been persisted. +The parent node may need to delete the old node. + +BpTree + Add(key ItemKey, value ItemValue) + new_root = self.getRoot().put(key,value) + a, b, err := self.insert(key, value) + self.internal_insert(key, value) + self.internal_split(q.keys[0], q) + persist(a,b) + self.persist() // child add q node + self.maybePersist(child == p) + self.leaf_insert(key, value) + self.persist() // if dedup + self.leaf_split(key, value) + self.pure_leaf_split(key, value) + persist(a,b) + a.persist() + persist(a,b) + self.put_kv(key, value) + new_root.persist() + self.setRoot(new_root) + oldroot.destroy() + // maybe persist BpTree new root + + Replace(key ItemKey, where WhereFunc, value ItemValue) + leaf.persist() + RemoveWhere(key ItemKey, where WhereFunc) + self.getRoot().remove(key, where) + self.internal_remove(key, nil, where) + child.leaf_remove(key, nil, where) + child.leaf_remove(key, sibling.keys[0], where) + l.destroy() // when the node is empty + a.maybePersist(hasChange) + self.destroy() // when no keys left + self.persist() // when some keys are left + self.leaf_remove(key, self.keys[len(self.keys)-1], where) + new_root.persist() // when new root is added + // maybe persist BpTree new root + \ No newline at end of file diff --git a/weed/util/bptree/bpmap.go b/weed/util/bptree/bpmap.go index e7509b179..cbf363c95 100644 --- a/weed/util/bptree/bpmap.go +++ b/weed/util/bptree/bpmap.go @@ -47,7 +47,9 @@ func (self *BpMap) Remove(key ItemKey) (value ItemValue, err error) { return nil, err } if new_root == nil { - self.setRoot(NewLeaf(ns, true)) + new_root = NewLeaf(ns, false) + err = new_root.persist() + self.setRoot(new_root) } else { self.setRoot(new_root) } diff --git a/weed/util/bptree/bptree.go b/weed/util/bptree/bptree.go index 2d09026c3..f9a5cf058 100644 --- a/weed/util/bptree/bptree.go +++ b/weed/util/bptree/bptree.go @@ -54,9 +54,13 @@ func (self *BpTree) Replace(key ItemKey, where WhereFunc, value ItemValue) (err for i, leaf, next := li(); next != nil; i, leaf, next = next() { if where(leaf.values[i]) { leaf.values[i] = value + if persistErr := leaf.persist(); persistErr != nil && err == nil { + err = persistErr + break + } } } - return nil + return err } func (self *BpTree) Find(key ItemKey) (kvi KVIterator) { @@ -89,11 +93,13 @@ func (self *BpTree) RemoveWhere(key ItemKey, where WhereFunc) (err error) { return err } if new_root == nil { - self.setRoot(NewLeaf(ns, false)) + new_root = NewLeaf(ns, false) + err = new_root.persist() + self.setRoot(new_root) } else { self.setRoot(new_root) } - return nil + return err } func (self *BpTree) Keys() (ki KIterator) { diff --git a/weed/util/bptree/bptree_node.go b/weed/util/bptree/bptree_node.go index 160dfad74..4e6d63ac6 100644 --- a/weed/util/bptree/bptree_node.go +++ b/weed/util/bptree/bptree_node.go @@ -2,15 +2,23 @@ package bptree type ItemKey Hashable type ItemValue Equatable +type PersistFunc func(node *BpNode) error +type DestroyFunc func(node *BpNode) error + +var ( + PersistFn PersistFunc + DestroyFn DestroyFunc +) type BpNode struct { - keys []ItemKey - values []ItemValue - pointers []*BpNode - next *BpNode - prev *BpNode - no_dup bool - protoNode *ProtoNode + keys []ItemKey + values []ItemValue + pointers []*BpNode + next *BpNode + prev *BpNode + no_dup bool + protoNodeId int64 + protoNode *ProtoNode } func NewInternal(size int) *BpNode { @@ -18,8 +26,9 @@ func NewInternal(size int) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]ItemKey, 0, size), - pointers: make([]*BpNode, 0, size), + keys: make([]ItemKey, 0, size), + pointers: make([]*BpNode, 0, size), + protoNodeId: GetProtoNodeId(), } } @@ -28,9 +37,10 @@ func NewLeaf(size int, no_dup bool) *BpNode { panic(NegativeSize()) } return &BpNode{ - keys: make([]ItemKey, 0, size), - values: make([]ItemValue, 0, size), - no_dup: no_dup, + keys: make([]ItemKey, 0, size), + values: make([]ItemValue, 0, size), + no_dup: no_dup, + protoNodeId: GetProtoNodeId(), } } @@ -191,7 +201,7 @@ func (self *BpNode) put(key ItemKey, value ItemValue) (root *BpNode, err error) root = NewInternal(self.NodeSize()) root.put_kp(a.keys[0], a) root.put_kp(b.keys[0], b) - return root, nil + return root, root.persist() } // right is only set on split @@ -237,10 +247,10 @@ func (self *BpNode) internal_insert(key ItemKey, value ItemValue) (a, b *BpNode, if err := self.put_kp(q.keys[0], q); err != nil { return nil, nil, err } - return self, nil, nil + return self, nil, self.persist() } } - return self, nil, nil + return self, nil, self.maybePersist(child != p) } /* On split @@ -268,7 +278,7 @@ func (self *BpNode) internal_split(key ItemKey, ptr *BpNode) (a, b *BpNode, err return nil, nil, err } } - return a, b, nil + return a, b, persist(a, b) } /* if the leaf is full then it will defer to a leaf_split @@ -284,7 +294,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err i, has := self.find(key) if has { self.values[i] = value - return self, nil, nil + return self, nil, self.persist() } } if self.Full() { @@ -293,7 +303,7 @@ func (self *BpNode) leaf_insert(key ItemKey, value ItemValue) (a, b *BpNode, err if err := self.put_kv(key, value); err != nil { return nil, nil, err } - return self, nil, nil + return self, nil, self.persist() } } @@ -323,7 +333,7 @@ func (self *BpNode) leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, err return nil, nil, err } } - return a, b, nil + return a, b, persist(a, b) } /* a pure leaf split has two cases: @@ -349,7 +359,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, return nil, nil, err } insert_linked_list_node(a, b.getPrev(), b) - return a, b, nil + return a, b, persist(a, b) } else { a = self e := self.find_end_of_pure_run() @@ -357,7 +367,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, if err := e.put_kv(key, value); err != nil { return nil, nil, err } - return a, nil, nil + return a, nil, a.persist() } else { b = NewLeaf(self.NodeSize(), self.no_dup) if err := b.put_kv(key, value); err != nil { @@ -367,7 +377,7 @@ func (self *BpNode) pure_leaf_split(key ItemKey, value ItemValue) (a, b *BpNode, if e.keys[0].Equals(key) { return a, nil, nil } - return a, b, nil + return a, b, persist(a, b) } } } @@ -484,6 +494,7 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun sibling = sibling.left_most_leaf() } child := self.pointers[i] + oldChild := child if child.Internal() { child, err = child.internal_remove(key, sibling, where) } else { @@ -508,9 +519,9 @@ func (self *BpNode) internal_remove(key ItemKey, sibling *BpNode, where WhereFun self.pointers[i] = child } if len(self.keys) == 0 { - return nil, nil + return nil, self.destroy() } - return self, nil + return self, self.maybePersist(oldChild != child) } func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, err error) { @@ -518,8 +529,10 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, return nil, BpTreeError("Expected a leaf node") } a = self + hasChange := false for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() { if where(l.values[j]) { + hasChange = true if err := l.remove_key_at(j); err != nil { return nil, err } @@ -538,8 +551,14 @@ func (self *BpNode) leaf_remove(key, stop ItemKey, where WhereFunc) (a *BpNode, } else { a = nil } + if err := l.destroy(); err != nil { + return nil, err + } } } + if a != nil { + return a, a.maybePersist(hasChange) + } return a, nil } diff --git a/weed/util/bptree/bptree_store_test.go b/weed/util/bptree/bptree_store_test.go new file mode 100644 index 000000000..a5e330aa9 --- /dev/null +++ b/weed/util/bptree/bptree_store_test.go @@ -0,0 +1,34 @@ +package bptree + +import ( + "fmt" + "testing" +) + +func TestAddRemove(t *testing.T) { + tree := NewBpTree(32) + PersistFn = func(node *BpNode) error { + println("saving", node.protoNodeId) + return nil + } + DestroyFn = func(node *BpNode) error { + println("delete", node.protoNodeId) + return nil + } + for i:=0;i<1024;i++{ + println("++++++++++", i) + tree.Add(String(fmt.Sprintf("%02d", i)), String(fmt.Sprintf("%02d", i))) + printTree(tree.root, "") + } +} + +func printTree(node *BpNode, prefix string) { + fmt.Printf("%sNode %d\n", prefix, node.protoNodeId) + prefix += " " + for i:=0;i