You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

752 lines
18 KiB

  1. package bptree
  2. type BpNode struct {
  3. keys []Hashable
  4. values []interface{}
  5. pointers []*BpNode
  6. next *BpNode
  7. prev *BpNode
  8. no_dup bool
  9. }
  10. func NewInternal(size int) *BpNode {
  11. if size < 0 {
  12. panic(NegativeSize())
  13. }
  14. return &BpNode{
  15. keys: make([]Hashable, 0, size),
  16. pointers: make([]*BpNode, 0, size),
  17. }
  18. }
  19. func NewLeaf(size int, no_dup bool) *BpNode {
  20. if size < 0 {
  21. panic(NegativeSize())
  22. }
  23. return &BpNode{
  24. keys: make([]Hashable, 0, size),
  25. values: make([]interface{}, 0, size),
  26. no_dup: no_dup,
  27. }
  28. }
  29. func (self *BpNode) Full() bool {
  30. return len(self.keys) == cap(self.keys)
  31. }
  32. func (self *BpNode) Pure() bool {
  33. if len(self.keys) == 0 {
  34. return true
  35. }
  36. k0 := self.keys[0]
  37. for _, k := range self.keys {
  38. if !k0.Equals(k) {
  39. return false
  40. }
  41. }
  42. return true
  43. }
  44. func (self *BpNode) Internal() bool {
  45. return cap(self.pointers) > 0
  46. }
  47. func (self *BpNode) NodeSize() int {
  48. return cap(self.keys)
  49. }
  50. func (self *BpNode) Height() int {
  51. if !self.Internal() {
  52. return 1
  53. } else if len(self.pointers) == 0 {
  54. panic(BpTreeError("Internal node has no pointers but asked for height"))
  55. }
  56. return self.pointers[0].Height() + 1
  57. }
  58. func (self *BpNode) count(key Hashable) int {
  59. i, _ := self.find(key)
  60. count := 0
  61. for ; i < len(self.keys); i++ {
  62. if self.keys[i].Equals(key) {
  63. count++
  64. } else {
  65. break
  66. }
  67. }
  68. return count
  69. }
  70. func (self *BpNode) has(key Hashable) bool {
  71. _, has := self.find(key)
  72. return has
  73. }
  74. func (self *BpNode) left_most_leaf() *BpNode {
  75. if self.Internal() {
  76. return self.pointers[0].left_most_leaf()
  77. }
  78. return self
  79. }
  80. func (self *BpNode) right_most_leaf() *BpNode {
  81. if self.Internal() {
  82. return self.pointers[len(self.pointers)-1].right_most_leaf()
  83. }
  84. return self
  85. }
  86. /* returns the index and leaf-block of the first key greater than or equal to
  87. * the search key. (unless the search key is greater than all the keys in the
  88. * tree, in that case it will be the last key in the tree)
  89. */
  90. func (self *BpNode) get_start(key Hashable) (i int, leaf *BpNode) {
  91. if self.Internal() {
  92. return self.internal_get_start(key)
  93. } else {
  94. return self.leaf_get_start(key)
  95. }
  96. }
  97. func next_location(i int, leaf *BpNode) (int, *BpNode, bool) {
  98. j := i + 1
  99. for j >= len(leaf.keys) && leaf.next != nil {
  100. j = 0
  101. leaf = leaf.next
  102. }
  103. if j >= len(leaf.keys) {
  104. return -1, nil, true
  105. }
  106. return j, leaf, false
  107. }
  108. func prev_location(i int, leaf *BpNode) (int, *BpNode, bool) {
  109. j := i - 1
  110. for j < 0 && leaf.prev != nil {
  111. leaf = leaf.prev
  112. j = len(leaf.keys) - 1
  113. }
  114. if j < 0 {
  115. return -1, nil, true
  116. }
  117. return j, leaf, false
  118. }
  119. /* returns the index and leaf-block of the last key equal to the search key or
  120. * the first key greater than the search key. (unless the search key is greater
  121. * than all the keys in the tree, in that case it will be the last key in the
  122. * tree)
  123. */
  124. func (self *BpNode) get_end(key Hashable) (i int, leaf *BpNode) {
  125. end := false
  126. i, leaf = self.get_start(key)
  127. pi, pleaf := i, leaf
  128. for !end && leaf.keys[i].Equals(key) {
  129. pi, pleaf = i, leaf
  130. i, leaf, end = next_location(i, leaf)
  131. }
  132. return pi, pleaf
  133. }
  134. func (self *BpNode) internal_get_start(key Hashable) (i int, leaf *BpNode) {
  135. if !self.Internal() {
  136. panic(BpTreeError("Expected a internal node"))
  137. }
  138. i, has := self.find(key)
  139. if !has && i > 0 {
  140. // if it doesn't have it and the index > 0 then we have the next block
  141. // so we have to subtract one from the index.
  142. i--
  143. }
  144. child := self.pointers[i]
  145. return child.get_start(key)
  146. }
  147. func (self *BpNode) leaf_get_start(key Hashable) (i int, leaf *BpNode) {
  148. i, has := self.find(key)
  149. if i >= len(self.keys) && i > 0 {
  150. i = len(self.keys) - 1
  151. }
  152. if !has && (len(self.keys) == 0 || self.keys[i].Less(key)) && self.next != nil {
  153. return self.next.leaf_get_start(key)
  154. }
  155. return i, self
  156. }
  157. /* This puts the k/v pair into the B+Tree rooted at this node and returns the
  158. * (possibly) new root of the tree.
  159. */
  160. func (self *BpNode) put(key Hashable, value interface{}) (root *BpNode, err error) {
  161. a, b, err := self.insert(key, value)
  162. if err != nil {
  163. return nil, err
  164. } else if b == nil {
  165. return a, nil
  166. }
  167. // else we have root split
  168. root = NewInternal(self.NodeSize())
  169. root.put_kp(a.keys[0], a)
  170. root.put_kp(b.keys[0], b)
  171. return root, nil
  172. }
  173. // right is only set on split
  174. // left is always set. When split is false left is the pointer to block
  175. // When split is true left is the pointer to the new left
  176. // block
  177. func (self *BpNode) insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  178. if self.Internal() {
  179. return self.internal_insert(key, value)
  180. } else { // leaf node
  181. return self.leaf_insert(key, value)
  182. }
  183. }
  184. /* - first find the child to insert into
  185. * - do the child insert
  186. * - if there was a split:
  187. * - if the block is full, split this block
  188. * - else insert the new key/pointer into this block
  189. */
  190. func (self *BpNode) internal_insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  191. if !self.Internal() {
  192. return nil, nil, BpTreeError("Expected a internal node")
  193. }
  194. i, has := self.find(key)
  195. if !has && i > 0 {
  196. // if it doesn't have it and the index > 0 then we have the next block
  197. // so we have to subtract one from the index.
  198. i--
  199. }
  200. child := self.pointers[i]
  201. p, q, err := child.insert(key, value)
  202. if err != nil {
  203. return nil, nil, err
  204. }
  205. self.keys[i] = p.keys[0]
  206. self.pointers[i] = p
  207. if q != nil {
  208. // we had a split
  209. if self.Full() {
  210. return self.internal_split(q.keys[0], q)
  211. } else {
  212. if err := self.put_kp(q.keys[0], q); err != nil {
  213. return nil, nil, err
  214. }
  215. return self, nil, nil
  216. }
  217. }
  218. return self, nil, nil
  219. }
  220. /* On split
  221. * - first assert that the key to be inserted is not already in the block.
  222. * - Make a new block
  223. * - balance the two blocks.
  224. * - insert the new key/pointer combo into the correct block
  225. */
  226. func (self *BpNode) internal_split(key Hashable, ptr *BpNode) (a, b *BpNode, err error) {
  227. if !self.Internal() {
  228. return nil, nil, BpTreeError("Expected a internal node")
  229. }
  230. if self.has(key) {
  231. return nil, nil, BpTreeError("Tried to split an internal block on duplicate key")
  232. }
  233. a = self
  234. b = NewInternal(self.NodeSize())
  235. balance_nodes(a, b)
  236. if key.Less(b.keys[0]) {
  237. if err := a.put_kp(key, ptr); err != nil {
  238. return nil, nil, err
  239. }
  240. } else {
  241. if err := b.put_kp(key, ptr); err != nil {
  242. return nil, nil, err
  243. }
  244. }
  245. return a, b, nil
  246. }
  247. /* if the leaf is full then it will defer to a leaf_split
  248. * (but in one case that will not actually split in the case of a insert into
  249. * a pure block with a matching key)
  250. * else this leaf will get a new entry.
  251. */
  252. func (self *BpNode) leaf_insert(key Hashable, value interface{}) (a, b *BpNode, err error) {
  253. if self.Internal() {
  254. return nil, nil, BpTreeError("Expected a leaf node")
  255. }
  256. if self.no_dup {
  257. i, has := self.find(key)
  258. if has {
  259. self.values[i] = value
  260. return self, nil, nil
  261. }
  262. }
  263. if self.Full() {
  264. return self.leaf_split(key, value)
  265. } else {
  266. if err := self.put_kv(key, value); err != nil {
  267. return nil, nil, err
  268. }
  269. return self, nil, nil
  270. }
  271. }
  272. /* on leaf split if the block is pure then it will defer to pure_leaf_split
  273. * else
  274. * - a new block will be made and inserted after this one
  275. * - the two blocks will be balanced with balanced_nodes
  276. * - if the key is less than b.keys[0] it will go in a else b
  277. */
  278. func (self *BpNode) leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) {
  279. if self.Internal() {
  280. return nil, nil, BpTreeError("Expected a leaf node")
  281. }
  282. if self.Pure() {
  283. return self.pure_leaf_split(key, value)
  284. }
  285. a = self
  286. b = NewLeaf(self.NodeSize(), self.no_dup)
  287. insert_linked_list_node(b, a, a.next)
  288. balance_nodes(a, b)
  289. if key.Less(b.keys[0]) {
  290. if err := a.put_kv(key, value); err != nil {
  291. return nil, nil, err
  292. }
  293. } else {
  294. if err := b.put_kv(key, value); err != nil {
  295. return nil, nil, err
  296. }
  297. }
  298. return a, b, nil
  299. }
  300. /* a pure leaf split has two cases:
  301. * 1) the inserted key is less than the current pure block.
  302. * - a new block should be created before the current block
  303. * - the key should be put in it
  304. * 2) the inserted key is greater than or equal to the pure block.
  305. * - the end of run of pure blocks should be found
  306. * - if the key is equal to pure block and the last block is not full insert
  307. * the new kv
  308. * - else split by making a new block after the last block in the run
  309. * and putting the new key there.
  310. * - always return the current block as "a" and the new block as "b"
  311. */
  312. func (self *BpNode) pure_leaf_split(key Hashable, value interface{}) (a, b *BpNode, err error) {
  313. if self.Internal() || !self.Pure() {
  314. return nil, nil, BpTreeError("Expected a pure leaf node")
  315. }
  316. if key.Less(self.keys[0]) {
  317. a = NewLeaf(self.NodeSize(), self.no_dup)
  318. b = self
  319. if err := a.put_kv(key, value); err != nil {
  320. return nil, nil, err
  321. }
  322. insert_linked_list_node(a, b.prev, b)
  323. return a, b, nil
  324. } else {
  325. a = self
  326. e := self.find_end_of_pure_run()
  327. if e.keys[0].Equals(key) && !e.Full() {
  328. if err := e.put_kv(key, value); err != nil {
  329. return nil, nil, err
  330. }
  331. return a, nil, nil
  332. } else {
  333. b = NewLeaf(self.NodeSize(), self.no_dup)
  334. if err := b.put_kv(key, value); err != nil {
  335. return nil, nil, err
  336. }
  337. insert_linked_list_node(b, e, e.next)
  338. if e.keys[0].Equals(key) {
  339. return a, nil, nil
  340. }
  341. return a, b, nil
  342. }
  343. }
  344. }
  345. func (self *BpNode) put_kp(key Hashable, ptr *BpNode) error {
  346. if self.Full() {
  347. return BpTreeError("Block is full.")
  348. }
  349. if !self.Internal() {
  350. return BpTreeError("Expected a internal node")
  351. }
  352. i, has := self.find(key)
  353. if has {
  354. return BpTreeError("Tried to insert a duplicate key into an internal node")
  355. } else if i < 0 {
  356. panic(BpTreeError("find returned a negative int"))
  357. } else if i >= cap(self.keys) {
  358. panic(BpTreeError("find returned a int > than cap(keys)"))
  359. }
  360. if err := self.put_key_at(i, key); err != nil {
  361. return err
  362. }
  363. if err := self.put_pointer_at(i, ptr); err != nil {
  364. return err
  365. }
  366. return nil
  367. }
  368. func (self *BpNode) put_kv(key Hashable, value interface{}) error {
  369. if self.Full() {
  370. return BpTreeError("Block is full.")
  371. }
  372. if self.Internal() {
  373. return BpTreeError("Expected a leaf node")
  374. }
  375. i, _ := self.find(key)
  376. if i < 0 {
  377. panic(BpTreeError("find returned a negative int"))
  378. } else if i >= cap(self.keys) {
  379. panic(BpTreeError("find returned a int > than cap(keys)"))
  380. }
  381. if err := self.put_key_at(i, key); err != nil {
  382. return err
  383. }
  384. if err := self.put_value_at(i, value); err != nil {
  385. return err
  386. }
  387. return nil
  388. }
  389. func (self *BpNode) put_key_at(i int, key Hashable) error {
  390. if self.Full() {
  391. return BpTreeError("Block is full.")
  392. }
  393. self.keys = self.keys[:len(self.keys)+1]
  394. for j := len(self.keys) - 1; j > i; j-- {
  395. self.keys[j] = self.keys[j-1]
  396. }
  397. self.keys[i] = key
  398. return nil
  399. }
  400. func (self *BpNode) put_value_at(i int, value interface{}) error {
  401. if len(self.values) == cap(self.values) {
  402. return BpTreeError("Block is full.")
  403. }
  404. if self.Internal() {
  405. return BpTreeError("Expected a leaf node")
  406. }
  407. self.values = self.values[:len(self.values)+1]
  408. for j := len(self.values) - 1; j > i; j-- {
  409. self.values[j] = self.values[j-1]
  410. }
  411. self.values[i] = value
  412. return nil
  413. }
  414. func (self *BpNode) put_pointer_at(i int, pointer *BpNode) error {
  415. if len(self.pointers) == cap(self.pointers) {
  416. return BpTreeError("Block is full.")
  417. }
  418. if !self.Internal() {
  419. return BpTreeError("Expected a internal node")
  420. }
  421. self.pointers = self.pointers[:len(self.pointers)+1]
  422. for j := len(self.pointers) - 1; j > i; j-- {
  423. self.pointers[j] = self.pointers[j-1]
  424. }
  425. self.pointers[i] = pointer
  426. return nil
  427. }
  428. func (self *BpNode) remove(key Hashable, where WhereFunc) (a *BpNode, err error) {
  429. if self.Internal() {
  430. return self.internal_remove(key, nil, where)
  431. } else {
  432. return self.leaf_remove(key, self.keys[len(self.keys)-1], where)
  433. }
  434. }
  435. func (self *BpNode) internal_remove(key Hashable, sibling *BpNode, where WhereFunc) (a *BpNode, err error) {
  436. if !self.Internal() {
  437. panic(BpTreeError("Expected a internal node"))
  438. }
  439. i, has := self.find(key)
  440. if !has && i > 0 {
  441. // if it doesn't have it and the index > 0 then we have the next block
  442. // so we have to subtract one from the index.
  443. i--
  444. }
  445. if i+1 < len(self.keys) {
  446. sibling = self.pointers[i+1]
  447. } else if sibling != nil {
  448. sibling = sibling.left_most_leaf()
  449. }
  450. child := self.pointers[i]
  451. if child.Internal() {
  452. child, err = child.internal_remove(key, sibling, where)
  453. } else {
  454. if sibling == nil {
  455. child, err = child.leaf_remove(key, nil, where)
  456. } else {
  457. child, err = child.leaf_remove(key, sibling.keys[0], where)
  458. }
  459. }
  460. if err != nil {
  461. return nil, err
  462. }
  463. if child == nil {
  464. if err := self.remove_key_at(i); err != nil {
  465. return nil, err
  466. }
  467. if err := self.remove_ptr_at(i); err != nil {
  468. return nil, err
  469. }
  470. } else {
  471. self.keys[i] = child.keys[0]
  472. self.pointers[i] = child
  473. }
  474. if len(self.keys) == 0 {
  475. return nil, nil
  476. }
  477. return self, nil
  478. }
  479. func (self *BpNode) leaf_remove(key, stop Hashable, where WhereFunc) (a *BpNode, err error) {
  480. if self.Internal() {
  481. return nil, BpTreeError("Expected a leaf node")
  482. }
  483. a = self
  484. for j, l, next := self.forward(key, key)(); next != nil; j, l, next = next() {
  485. if where(l.values[j]) {
  486. if err := l.remove_key_at(j); err != nil {
  487. return nil, err
  488. }
  489. if err := l.remove_value_at(j); err != nil {
  490. return nil, err
  491. }
  492. }
  493. if len(l.keys) == 0 {
  494. remove_linked_list_node(l)
  495. if l.next == nil {
  496. a = nil
  497. } else if stop == nil {
  498. a = nil
  499. } else if !l.next.keys[0].Equals(stop) {
  500. a = l.next
  501. } else {
  502. a = nil
  503. }
  504. }
  505. }
  506. return a, nil
  507. }
  508. func (self *BpNode) remove_key_at(i int) error {
  509. if i >= len(self.keys) || i < 0 {
  510. return BpTreeError("i, %v, is out of bounds, %v, %v %v.", i, len(self.keys), len(self.values), self)
  511. }
  512. for j := i; j < len(self.keys)-1; j++ {
  513. self.keys[j] = self.keys[j+1]
  514. }
  515. self.keys = self.keys[:len(self.keys)-1]
  516. return nil
  517. }
  518. func (self *BpNode) remove_value_at(i int) error {
  519. if i >= len(self.values) || i < 0 {
  520. return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.values))
  521. }
  522. for j := i; j < len(self.values)-1; j++ {
  523. self.values[j] = self.values[j+1]
  524. }
  525. self.values = self.values[:len(self.values)-1]
  526. return nil
  527. }
  528. func (self *BpNode) remove_ptr_at(i int) error {
  529. if i >= len(self.pointers) || i < 0 {
  530. return BpTreeError("i, %v, is out of bounds, %v.", i, len(self.pointers))
  531. }
  532. for j := i; j < len(self.pointers)-1; j++ {
  533. self.pointers[j] = self.pointers[j+1]
  534. }
  535. self.pointers = self.pointers[:len(self.pointers)-1]
  536. return nil
  537. }
  538. func (self *BpNode) find(key Hashable) (int, bool) {
  539. var l int = 0
  540. var r int = len(self.keys) - 1
  541. var m int
  542. for l <= r {
  543. m = ((r - l) >> 1) + l
  544. if key.Less(self.keys[m]) {
  545. r = m - 1
  546. } else if key.Equals(self.keys[m]) {
  547. for j := m; j >= 0; j-- {
  548. if j == 0 || !key.Equals(self.keys[j-1]) {
  549. return j, true
  550. }
  551. }
  552. } else {
  553. l = m + 1
  554. }
  555. }
  556. return l, false
  557. }
  558. func (self *BpNode) find_end_of_pure_run() *BpNode {
  559. k := self.keys[0]
  560. p := self
  561. n := self.next
  562. for n != nil && n.Pure() && k.Equals(n.keys[0]) {
  563. p = n
  564. n = n.next
  565. }
  566. return p
  567. }
  568. func (self *BpNode) all() (li loc_iterator) {
  569. j := -1
  570. l := self.left_most_leaf()
  571. end := false
  572. j, l, end = next_location(j, l)
  573. li = func() (i int, leaf *BpNode, next loc_iterator) {
  574. if end {
  575. return -1, nil, nil
  576. }
  577. i = j
  578. leaf = l
  579. j, l, end = next_location(j, l)
  580. return i, leaf, li
  581. }
  582. return li
  583. }
  584. func (self *BpNode) all_backward() (li loc_iterator) {
  585. l := self.right_most_leaf()
  586. j := len(l.keys)
  587. end := false
  588. j, l, end = prev_location(j, l)
  589. li = func() (i int, leaf *BpNode, next loc_iterator) {
  590. if end {
  591. return -1, nil, nil
  592. }
  593. i = j
  594. leaf = l
  595. j, l, end = prev_location(j, l)
  596. return i, leaf, li
  597. }
  598. return li
  599. }
  600. func (self *BpNode) forward(from, to Hashable) (li loc_iterator) {
  601. j, l := self.get_start(from)
  602. end := false
  603. j--
  604. li = func() (i int, leaf *BpNode, next loc_iterator) {
  605. j, l, end = next_location(j, l)
  606. if end || to.Less(l.keys[j]) {
  607. return -1, nil, nil
  608. }
  609. return j, l, li
  610. }
  611. return li
  612. }
  613. func (self *BpNode) backward(from, to Hashable) (li loc_iterator) {
  614. j, l := self.get_end(from)
  615. end := false
  616. li = func() (i int, leaf *BpNode, next loc_iterator) {
  617. if end || l.keys[j].Less(to) {
  618. return -1, nil, nil
  619. }
  620. i = j
  621. leaf = l
  622. j, l, end = prev_location(i, l)
  623. return i, leaf, li
  624. }
  625. return li
  626. }
  627. func insert_linked_list_node(n, prev, next *BpNode) {
  628. if (prev != nil && prev.next != next) || (next != nil && next.prev != prev) {
  629. panic(BpTreeError("prev and next not hooked up"))
  630. }
  631. n.prev = prev
  632. n.next = next
  633. if prev != nil {
  634. prev.next = n
  635. }
  636. if next != nil {
  637. next.prev = n
  638. }
  639. }
  640. func remove_linked_list_node(n *BpNode) {
  641. if n.prev != nil {
  642. n.prev.next = n.next
  643. }
  644. if n.next != nil {
  645. n.next.prev = n.prev
  646. }
  647. }
  648. /* a must be full and b must be empty else there will be a panic
  649. */
  650. func balance_nodes(a, b *BpNode) {
  651. if len(b.keys) != 0 {
  652. panic(BpTreeError("b was not empty"))
  653. }
  654. if !a.Full() {
  655. panic(BpTreeError("a was not full", a))
  656. }
  657. if cap(a.keys) != cap(b.keys) {
  658. panic(BpTreeError("cap(a.keys) != cap(b.keys)"))
  659. }
  660. if cap(a.values) != cap(b.values) {
  661. panic(BpTreeError("cap(a.values) != cap(b.values)"))
  662. }
  663. if cap(a.pointers) != cap(b.pointers) {
  664. panic(BpTreeError("cap(a.pointers) != cap(b.pointers)"))
  665. }
  666. m := len(a.keys) / 2
  667. for m < len(a.keys) && a.keys[m-1].Equals(a.keys[m]) {
  668. m++
  669. }
  670. if m == len(a.keys) {
  671. m--
  672. for m > 0 && a.keys[m-1].Equals(a.keys[m]) {
  673. m--
  674. }
  675. }
  676. var lim int = len(a.keys) - m
  677. b.keys = b.keys[:lim]
  678. if cap(a.values) > 0 {
  679. if cap(a.values) != cap(a.keys) {
  680. panic(BpTreeError("cap(a.values) != cap(a.keys)"))
  681. }
  682. b.values = b.values[:lim]
  683. }
  684. if cap(a.pointers) > 0 {
  685. if cap(a.pointers) != cap(a.keys) {
  686. panic(BpTreeError("cap(a.pointers) != cap(a.keys)"))
  687. }
  688. b.pointers = b.pointers[:lim]
  689. }
  690. for i := 0; i < lim; i++ {
  691. j := m + i
  692. b.keys[i] = a.keys[j]
  693. if cap(a.values) > 0 {
  694. b.values[i] = a.values[j]
  695. }
  696. if cap(a.pointers) > 0 {
  697. b.pointers[i] = a.pointers[j]
  698. }
  699. }
  700. a.keys = a.keys[:m]
  701. if cap(a.values) > 0 {
  702. a.values = a.values[:m]
  703. }
  704. if cap(a.pointers) > 0 {
  705. a.pointers = a.pointers[:m]
  706. }
  707. }