You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

743 lines
18 KiB

4 years ago
4 years ago
4 years ago
4 years ago
  1. package bptree
  2. type BpNode struct {
  3. keys []Hashable
  4. values []interface{}
  5. pointers []*BpNode
  6. next *BpNode
  7. prev *BpNode
  8. }
  9. func NewInternal(size int) *BpNode {
  10. if size < 0 {
  11. panic(NegativeSize())
  12. }
  13. return &BpNode{
  14. keys: make([]Hashable, 0, size),
  15. pointers: make([]*BpNode, 0, size),
  16. }
  17. }
  18. func NewLeaf(size int) *BpNode {
  19. if size < 0 {
  20. panic(NegativeSize())
  21. }
  22. return &BpNode{
  23. keys: make([]Hashable, 0, size),
  24. values: make([]interface{}, 0, size),
  25. }
  26. }
  27. func (self *BpNode) Full() bool {
  28. return len(self.keys) == cap(self.keys)
  29. }
  30. func (self *BpNode) Pure() bool {
  31. if len(self.keys) == 0 {
  32. return true
  33. }
  34. k0 := self.keys[0]
  35. for _, k := range self.keys {
  36. if !k0.Equals(k) {
  37. return false
  38. }
  39. }
  40. return true
  41. }
  42. func (self *BpNode) Internal() bool {
  43. return cap(self.pointers) > 0
  44. }
  45. func (self *BpNode) NodeSize() int {
  46. return cap(self.keys)
  47. }
  48. func (self *BpNode) Height() int {
  49. if !self.Internal() {
  50. return 1
  51. } else if len(self.pointers) == 0 {
  52. panic(BpTreeError("Internal node has no pointers but asked for height"))
  53. }
  54. return self.pointers[0].Height() + 1
  55. }
  56. func (self *BpNode) count(key Hashable) int {
  57. i, _ := self.find(key)
  58. count := 0
  59. for ; i < len(self.keys); i++ {
  60. if self.keys[i].Equals(key) {
  61. count++
  62. } else {
  63. break
  64. }
  65. }
  66. return count
  67. }
  68. func (self *BpNode) has(key Hashable) bool {
  69. _, has := self.find(key)
  70. return has
  71. }
  72. func (self *BpNode) left_most_leaf() *BpNode {
  73. if self.Internal() {
  74. return self.pointers[0].left_most_leaf()
  75. }
  76. return self
  77. }
  78. func (self *BpNode) right_most_leaf() *BpNode {
  79. if self.Internal() {
  80. return self.pointers[len(self.pointers)-1].right_most_leaf()
  81. }
  82. return self
  83. }
  84. /* returns the index and leaf-block of the first key greater than or equal to
  85. * the search key. (unless the search key is greater than all the keys in the
  86. * tree, in that case it will be the last key in the tree)
  87. */
  88. func (self *BpNode) get_start(key Hashable) (i int, leaf *BpNode) {
  89. if self.Internal() {
  90. return self.internal_get_start(key)
  91. } else {
  92. return self.leaf_get_start(key)
  93. }
  94. }
  95. func next_location(i int, leaf *BpNode) (int, *BpNode, bool) {
  96. j := i + 1
  97. for j >= len(leaf.keys) && leaf.getNext() != nil {
  98. j = 0
  99. leaf = leaf.getNext()
  100. }
  101. if j >= len(leaf.keys) {
  102. return -1, nil, true
  103. }
  104. return j, leaf, false
  105. }
  106. func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) {
  107. j := i - 1
  108. for j < 0 && leaf.getPrev() != nil {
  109. leaf = leaf.getPrev()
  110. j = len(leaf.keys) - 1
  111. }
  112. if j < 0 {
  113. return -1, nil, true
  114. }
  115. return j, leaf, false
  116. }
  117. /* returns the index and leaf-block of the last key equal to the search key or
  118. * the first key greater than the search key. (unless the search key is greater
  119. * than all the keys in the tree, in that case it will be the last key in the
  120. * tree)
  121. */
  122. func (self *BpNode) get_end(key Hashable) (i int, leaf *BpNode) {
  123. end := false
  124. i, leaf = self.get_start(key)
  125. pi, pleaf := i, leaf
  126. for !end && leaf.keys[i].Equals(key) {
  127. pi, pleaf = i, leaf
  128. i, leaf, end = next_location(i, leaf)
  129. }
  130. return pi, pleaf
  131. }
  132. func (self *BpNode) internal_get_start(key Hashable) (i int, leaf *BpNode) {
  133. if !self.Internal() {
  134. panic(BpTreeError("Expected a internal node"))
  135. }
  136. i, has := self.find(key)
  137. if !has && i > 0 {
  138. // if it doesn't have it and the index > 0 then we have the next block
  139. // so we have to subtract one from the index.
  140. i--
  141. }
  142. child := self.pointers[i]
  143. return child.get_start(key)
  144. }
  145. func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) {
  146. i, has := self.find(key)
  147. if i >= len(self.keys) && i > 0 {
  148. i = len(self.keys) - 1
  149. }
  150. if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.getNext() != nil {
  151. return self.getNext().leaf_get_start(key)
  152. }
  153. return i, self
  154. }
  155. /* This puts the k/v pair into the B+Tree rooted at this node and returns the
  156. * (possibly) new root of the tree.
  157. */
  158. func (self *BpNode) put(key Hashable, value interface{}) (root *BpNode, err error) {
  159. a, b, err := self.insert(key, value)
  160. if err != nil {
  161. return nil, err
  162. } else if b == nil {
  163. return a, nil
  164. }
  165. // else we have root split
  166. root = NewInternal(self.NodeSize())
  167. root.put_kp(a.keys[0], a)
  168. root.put_kp(b.keys[0], b)
  169. return root, nil
  170. }
  171. // right is only set on split
  172. // left is always set. When split is false left is the pointer to block
  173. // When split is true left is the pointer to the new left
  174. // block
  175. func (self *BpNode) insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  176. if self.Internal() {
  177. return self.internal_insert(key, value)
  178. } else { // leaf node
  179. return self.leaf_insert(key, value)
  180. }
  181. }
  182. /* - first find the child to insert into
  183. * - do the child insert
  184. * - if there was a split:
  185. * - if the block is full, split this block
  186. * - else insert the new key/pointer into this block
  187. */
  188. func (self *BpNode) internal_insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  189. if !self.Internal() {
  190. return nil, nil, BpTreeError("Expected a internal node")
  191. }
  192. i, has := self.find(key)
  193. if !has && i > 0 {
  194. // if it doesn't have it and the index > 0 then we have the next block
  195. // so we have to subtract one from the index.
  196. i--
  197. }
  198. child := self.pointers[i]
  199. p, q, err := child.insert(key, value)
  200. if err != nil {
  201. return nil, nil, err
  202. }
  203. self.keys[i] = p.keys[0]
  204. self.pointers[i] = p
  205. if q != nil {
  206. // we had a split
  207. if self.Full() {
  208. return self.internal_split(q.keys[0], q)
  209. } else {
  210. if err := self.put_kp(q.keys[0], q); err != nil {
  211. return nil, nil, err
  212. }
  213. return self, nil, nil
  214. }
  215. }
  216. return self, nil, nil
  217. }
  218. /* On split
  219. * - first assert that the key to be inserted is not already in the block.
  220. * - Make a new block
  221. * - balance the two blocks.
  222. * - insert the new key/pointer combo into the correct block
  223. */
  224. func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err error) {
  225. if !self.Internal() {
  226. return nil, nil, BpTreeError("Expected a internal node")
  227. }
  228. if self.has(key) {
  229. return nil, nil, BpTreeError("Tried to split an internal block on duplicate key")
  230. }
  231. a = self
  232. b = NewInternal(self.NodeSize())
  233. balance_nodes(a, b)
  234. if key.Less(b.keys[0]) {
  235. if err := a.put_kp(key, ptr); err != nil {
  236. return nil, nil, err
  237. }
  238. } else {
  239. if err := b.put_kp(key, ptr); err != nil {
  240. return nil, nil, err
  241. }
  242. }
  243. return a, b, nil
  244. }
  245. /* if the leaf is full then it will defer to a leaf_split
  246. * (but in one case that will not actually split in the case of a insert into
  247. * a pure block with a matching key)
  248. * else this leaf will get a new entry.
  249. */
  250. func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  251. if self.Internal() {
  252. return nil, nil, BpTreeError("Expected a leaf node")
  253. }
  254. if self.Full() {
  255. return self.leaf_split(key, value)
  256. } else {
  257. if err := self.put_kv(key, value); err != nil {
  258. return nil, nil, err
  259. }
  260. return self, nil, nil
  261. }
  262. }
  263. /* on leaf split if the block is pure then it will defer to pure_leaf_split
  264. * else
  265. * - a new block will be made and inserted after this one
  266. * - the two blocks will be balanced with balanced_nodes
  267. * - if the key is less than b.keys[0] it will go in a else b
  268. */
  269. func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) {
  270. if self.Internal() {
  271. return nil, nil, BpTreeError("Expected a leaf node")
  272. }
  273. if self.Pure() {
  274. return self.pure_leaf_split(key, value)
  275. }
  276. a = self
  277. b = NewLeaf(self.NodeSize())
  278. insert_linked_list_node(b, a, a.getNext())
  279. balance_nodes(a, b)
  280. if key.Less(b.keys[0]) {
  281. if err := a.put_kv(key, value); err != nil {
  282. return nil, nil, err
  283. }
  284. } else {
  285. if err := b.put_kv(key, value); err != nil {
  286. return nil, nil, err
  287. }
  288. }
  289. return a, b, nil
  290. }
  291. /* a pure leaf split has two cases:
  292. * 1) the inserted key is less than the current pure block.
  293. * - a new block should be created before the current block
  294. * - the key should be put in it
  295. * 2) the inserted key is greater than or equal to the pure block.
  296. * - the end of run of pure blocks should be found
  297. * - if the key is equal to pure block and the last block is not full insert
  298. * the new kv
  299. * - else split by making a new block after the last block in the run
  300. * and putting the new key there.
  301. * - always return the current block as "a" and the new block as "b"
  302. */
  303. func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) {
  304. if self.Internal() || !self.Pure() {
  305. return nil, nil, BpTreeError("Expected a pure leaf node")
  306. }
  307. if key.Less(self.keys[0]) {
  308. a = NewLeaf(self.NodeSize())
  309. b = self
  310. if err := a.put_kv(key, value); err != nil {
  311. return nil, nil, err
  312. }
  313. insert_linked_list_node(a, b.getPrev(), b)
  314. return a, b, nil
  315. } else {
  316. a = self
  317. e := self.find_end_of_pure_run()
  318. if e.keys[0].Equals(key) && !e.Full() {
  319. if err := e.put_kv(key, value); err != nil {
  320. return nil, nil, err
  321. }
  322. return a, nil, nil
  323. } else {
  324. b = NewLeaf(self.NodeSize())
  325. if err := b.put_kv(key, value); err != nil {
  326. return nil, nil, err
  327. }
  328. insert_linked_list_node(b, e, e.getNext())
  329. if e.keys[0].Equals(key) {
  330. return a, nil, nil
  331. }
  332. return a, b, nil
  333. }
  334. }
  335. }
  336. func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error {
  337. if self.Full() {
  338. return BpTreeError("Block is full.")
  339. }
  340. if !self.Internal() {
  341. return BpTreeError("Expected a internal node")
  342. }
  343. i, has := self.find(key)
  344. if has {
  345. return BpTreeError("Tried to insert a duplicate key into an internal node")
  346. } else if i < 0 {
  347. panic(BpTreeError("find returned a negative int"))
  348. } else if i >= cap(self.keys) {
  349. panic(BpTreeError("find returned a int > than cap(keys)"))
  350. }
  351. if err := self.put_key_at(i, key); err != nil {
  352. return err
  353. }
  354. if err := self.put_pointer_at(i, ptr); err != nil {
  355. return err
  356. }
  357. return nil
  358. }
  359. func (self *BpNode) put_kv(key Hashable, value interface{}) error {
  360. if self.Full() {
  361. return BpTreeError("Block is full.")
  362. }
  363. if self.Internal() {
  364. return BpTreeError("Expected a leaf node")
  365. }
  366. i, _ := self.find(key)
  367. if i < 0 {
  368. panic(BpTreeError("find returned a negative int"))
  369. } else if i >= cap(self.keys) {
  370. panic(BpTreeError("find returned a int > than cap(keys)"))
  371. }
  372. if err := self.put_key_at(i, key); err != nil {
  373. return err
  374. }
  375. if err := self.put_value_at(i, value); err != nil {
  376. return err
  377. }
  378. return nil
  379. }
  380. func (self *BpNode) put_key_at(i int, key Hashable) error {
  381. if self.Full() {
  382. return BpTreeError("Block is full.")
  383. }
  384. self.keys = self.keys[:len(self.keys)+1]
  385. for j := len(self.keys) - 1; j > i; j-- {
  386. self.keys[j] = self.keys[j-1]
  387. }
  388. self.keys[i] = key
  389. return nil
  390. }
  391. func (self *BpNode) put_value_at(i int, value interface{}) error {
  392. if len(self.values) == cap(self.values) {
  393. return BpTreeError("Block is full.")
  394. }
  395. if self.Internal() {
  396. return BpTreeError("Expected a leaf node")
  397. }
  398. self.values = self.values[:len(self.values)+1]
  399. for j := len(self.values) - 1; j > i; j-- {
  400. self.values[j] = self.values[j-1]
  401. }
  402. self.values[i] = value
  403. return nil
  404. }
  405. func (self *BpNode) put_pointer_at(i int, pointer *BpNode) error {
  406. if len(self.pointers) == cap(self.pointers) {
  407. return BpTreeError("Block is full.")
  408. }
  409. if !self.Internal() {
  410. return BpTreeError("Expected a internal node")
  411. }
  412. self.pointers = self.pointers[:len(self.pointers)+1]
  413. for j := len(self.pointers) - 1; j > i; j-- {
  414. self.pointers[j] = self.pointers[j-1]
  415. }
  416. self.pointers[i] = pointer
  417. return nil
  418. }
  419. func (self *BpNode) remove(key Hashable, where WhereFunc) (a *BpNode, err error) {
  420. if self.Internal() {
  421. return self.internal_remove(key, nil, where)
  422. } else {
  423. return self.leaf_remove(key, self.keys[len(self.keys)-1], where)
  424. }
  425. }
  426. func (self *BpNode) internal_remove(key Hashable, sibling *BpNode, where WhereFunc) (a *BpNode, err error) {
  427. if !self.Internal() {
  428. panic(BpTreeError("Expected a internal node"))
  429. }
  430. i, has := self.find(key)
  431. if !has && i > 0 {
  432. // if it doesn't have it and the index > 0 then we have the next block
  433. // so we have to subtract one from the index.
  434. i--
  435. }
  436. if i+1 < len(self.keys) {
  437. sibling = self.pointers[i+1]
  438. } else if sibling != nil {
  439. sibling = sibling.left_most_leaf()
  440. }
  441. child := self.pointers[i]
  442. if child.Internal() {
  443. child, err = child.internal_remove(key, sibling, where)
  444. } else {
  445. if sibling == nil {
  446. child, err = child.leaf_remove(key, nil, where)
  447. } else {
  448. child, err = child.leaf_remove(key, sibling.keys[0], where)
  449. }
  450. }
  451. if err != nil {
  452. return nil, err
  453. }
  454. if child == nil {
  455. if err := self.remove_key_at(i); err != nil {
  456. return nil, err
  457. }
  458. if err := self.remove_ptr_at(i); err != nil {
  459. return nil, err
  460. }
  461. } else {
  462. self.keys[i] = child.keys[0]
  463. self.pointers[i] = child
  464. }
  465. if len(self.keys) == 0 {
  466. return nil, nil
  467. }
  468. return self, nil
  469. }
  470. func (self *BpNode) leaf_remove(key, stop Hashable, where WhereFunc) (a *BpNode, err error) {
  471. if self.Internal() {
  472. return nil, BpTreeError("Expected a leaf node")
  473. }
  474. a = self
  475. for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
  476. if where(l.values[j]) {
  477. if err := l.remove_key_at(j); err != nil {
  478. return nil, err
  479. }
  480. if err := l.remove_value_at(j); err != nil {
  481. return nil, err
  482. }
  483. }
  484. if len(l.keys) == 0 {
  485. remove_linked_list_node(l)
  486. if l.getNext() == nil {
  487. a = nil
  488. } else if stop == nil {
  489. a = nil
  490. } else if !l.getNext().keys[0].Equals(stop) {
  491. a = l.getNext()
  492. } else {
  493. a = nil
  494. }
  495. }
  496. }
  497. return a, nil
  498. }
  499. func (self *BpNode) remove_key_at(i int) error {
  500. if i >= len(self.keys) || i < 0 {
  501. return BpTreeError("i, %v, is out of bounds, %v, %v %v.", i, len(self.keys), len(self.values), self)
  502. }
  503. for j := i; j < len(self.keys)-1; j++ {
  504. self.keys[j] = self.keys[j+1]
  505. }
  506. self.keys = self.keys[:len(self.keys)-1]
  507. return nil
  508. }
  509. func (self *BpNode) remove_value_at(i int) error {
  510. if i >= len(self.values) || i < 0 {
  511. return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.values))
  512. }
  513. for j := i; j < len(self.values)-1; j++ {
  514. self.values[j] = self.values[j+1]
  515. }
  516. self.values = self.values[:len(self.values)-1]
  517. return nil
  518. }
  519. func (self *BpNode) remove_ptr_at(i int) error {
  520. if i >= len(self.pointers) || i < 0 {
  521. return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.pointers))
  522. }
  523. for j := i; j < len(self.pointers)-1; j++ {
  524. self.pointers[j] = self.pointers[j+1]
  525. }
  526. self.pointers = self.pointers[:len(self.pointers)-1]
  527. return nil
  528. }
  529. func (self *BpNode) find(key Hashable) (int, bool) {
  530. var l int = 0
  531. var r int = len(self.keys) - 1
  532. var m int
  533. for l <= r {
  534. m = ((r - l) >> 1) + l
  535. if key.Less(self.keys[m]) {
  536. r = m - 1
  537. } else if key.Equals(self.keys[m]) {
  538. for j := m; j >= 0; j-- {
  539. if j == 0 || !key.Equals(self.keys[j-1]) {
  540. return j, true
  541. }
  542. }
  543. } else {
  544. l = m + 1
  545. }
  546. }
  547. return l, false
  548. }
  549. func (self *BpNode) find_end_of_pure_run() *BpNode {
  550. k := self.keys[0]
  551. p := self
  552. n := self.getNext()
  553. for n != nil && n.Pure() && k.Equals(n.keys[0]) {
  554. p = n
  555. n = n.getNext()
  556. }
  557. return p
  558. }
  559. func (self *BpNode) all() (li loc_iterator) {
  560. j := -1
  561. l := self.left_most_leaf()
  562. end := false
  563. j, l, end = next_location(j, l)
  564. li = func() (i int, leaf *BpNode, next loc_iterator) {
  565. if end {
  566. return -1, nil, nil
  567. }
  568. i = j
  569. leaf = l
  570. j, l, end = next_location(j, l)
  571. return i, leaf, li
  572. }
  573. return li
  574. }
  575. func (self *BpNode) all_backward() (li loc_iterator) {
  576. l := self.right_most_leaf()
  577. j := len(l.keys)
  578. end := false
  579. j, l, end = prev_location(j, l)
  580. li = func() (i int, leaf *BpNode, next loc_iterator) {
  581. if end {
  582. return -1, nil, nil
  583. }
  584. i = j
  585. leaf = l
  586. j, l, end = prev_location(j, l)
  587. return i, leaf, li
  588. }
  589. return li
  590. }
  591. func (self *BpNode) forward(from, to Hashable) (li loc_iterator) {
  592. j, l := self.get_start(from)
  593. end := false
  594. j--
  595. li = func() (i int, leaf *BpNode, next loc_iterator) {
  596. j, l, end = next_location(j, l)
  597. if end || to.Less(l.keys[j]) {
  598. return -1, nil, nil
  599. }
  600. return j, l, li
  601. }
  602. return li
  603. }
  604. func (self *BpNode) backward(from, to Hashable) (li loc_iterator) {
  605. j, l := self.get_end(from)
  606. end := false
  607. li = func() (i int, leaf *BpNode, next loc_iterator) {
  608. if end || l.keys[j].Less(to) {
  609. return -1, nil, nil
  610. }
  611. i = j
  612. leaf = l
  613. j, l, end = prev_location(i, l)
  614. return i, leaf, li
  615. }
  616. return li
  617. }
  618. func insert_linked_list_node(n, prev, next *BpNode) {
  619. if (prev != nil && prev.getNext() != next) || (next != nil && next.getPrev() != prev) {
  620. panic(BpTreeError("prev and next not hooked up"))
  621. }
  622. n.setPrev(prev)
  623. n.setNext(next)
  624. if prev != nil {
  625. prev.setNext(n)
  626. }
  627. if next != nil {
  628. next.setPrev(n)
  629. }
  630. }
  631. func remove_linked_list_node(n *BpNode) {
  632. if n.getPrev() != nil {
  633. n.getPrev().setNext(n.getNext())
  634. }
  635. if n.getNext() != nil {
  636. n.getNext().setPrev(n.getPrev())
  637. }
  638. }
  639. /* a must be full and b must be empty else there will be a panic
  640. */
  641. func balance_nodes(a, b *BpNode) {
  642. if len(b.keys) != 0 {
  643. panic(BpTreeError("b was not empty"))
  644. }
  645. if !a.Full() {
  646. panic(BpTreeError("a was not full", a))
  647. }
  648. if cap(a.keys) != cap(b.keys) {
  649. panic(BpTreeError("cap(a.keys) != cap(b.keys)"))
  650. }
  651. if cap(a.values) != cap(b.values) {
  652. panic(BpTreeError("cap(a.values) != cap(b.values)"))
  653. }
  654. if cap(a.pointers) != cap(b.pointers) {
  655. panic(BpTreeError("cap(a.pointers) != cap(b.pointers)"))
  656. }
  657. m := len(a.keys) / 2
  658. for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) {
  659. m++
  660. }
  661. if m == len(a.keys) {
  662. m--
  663. for m > 0 && a.keys[m-1].Equals(a.keys[m]) {
  664. m--
  665. }
  666. }
  667. var lim int = len(a.keys) - m
  668. b.keys = b.keys[:lim]
  669. if cap(a.values) > 0 {
  670. if cap(a.values) != cap(a.keys) {
  671. panic(BpTreeError("cap(a.values) != cap(a.keys)"))
  672. }
  673. b.values = b.values[:lim]
  674. }
  675. if cap(a.pointers) > 0 {
  676. if cap(a.pointers) != cap(a.keys) {
  677. panic(BpTreeError("cap(a.pointers) != cap(a.keys)"))
  678. }
  679. b.pointers = b.pointers[:lim]
  680. }
  681. for i := 0; i < lim; i++ {
  682. j := m + i
  683. b.keys[i] = a.keys[j]
  684. if cap(a.values) > 0 {
  685. b.values[i] = a.values[j]
  686. }
  687. if cap(a.pointers) > 0 {
  688. b.pointers[i] = a.pointers[j]
  689. }
  690. }
  691. a.keys = a.keys[:m]
  692. if cap(a.values) > 0 {
  693. a.values = a.values[:m]
  694. }
  695. if cap(a.pointers) > 0 {
  696. a.pointers = a.pointers[:m]
  697. }
  698. }