Coverage report: /home/ellis/comp/core/lib/obj/tree/btree.lisp
Kind | Covered | All | % |
expression | 0 | 633 | 0.0 |
branch | 0 | 40 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; btree.lisp --- Lisp B-Trees
3
;; B-Trees and B+-Trees for Lisp.
7
;; Mostly useful in DBMS indexing - is the core data structure for some
8
;; popular K/V stores including RocksDB (B+-tree) and BerkleyDB (B-Tree)
11
;; Originally conceived at Boeing Research Labs in the 70s.
13
;; https://en.wikipedia.org/wiki/B-tree
14
;; https://github.com/danlentz/cl-btree
15
;; https://planetscale.com/blog/btrees-and-database-indexes
18
(in-package :obj/tree/btree)
20
(defgeneric existsp (key tree)
21
(:documentation "Test existence of a key in a tree."))
23
(defgeneric build-btree (self)
25
"Construct a btree of the appropriate type corresponding to the current *STORE*."))
27
(defclass btree (stored-collection) ()
29
"A map-like interface to a BTree object, which stores things in a semi-ordered
32
(defmethod drop-instance ((self btree))
33
"The standard method for reclaiming storage of stored objects"
37
(defgeneric drop-btree (bt)
38
(:documentation "Delete all key-value pairs from the btree and
39
render it an invalid object in the data store"))
41
(defgeneric build-indexed-btree (store)
43
"Construct a btree of the appropriate type corresponding to this store-controller."))
45
(defclass indexed-btree (btree) ()
46
(:documentation "A BTree which supports secondary indices."))
48
(defgeneric add-index (self &key index-name key-form populate)
50
"Add a secondary index. The indices are stored in an eq
51
hash-table, so the index-name should be a symbol. key-form
52
should be a symbol naming a function, a function call form
53
eg \'(create-index 3) or a lambda expression --
54
actual functions aren't supported.
55
Lambda expresssions are converted to functions through compile
56
and function call forms are transformed applying
57
the first element of the list to the rest of the list.
58
The function should take 3 arguments: the secondary DB, primary
59
key and value, and return two values: a boolean indicating
60
whether to index this key / value, and the secondary key if
61
so. If populate = t it will fill in secondary keys for
62
existing primary entries (may be expensive!)"))
64
(defgeneric get-index (self index-name)
65
(:documentation "Get a named index."))
67
(defgeneric remove-index (self index-name)
68
(:documentation "Remove a named index."))
70
(defgeneric map-indices (fn self)
71
(:documentation "Calls a two input function with the name and
72
btree-index object of all secondary indices in the btree"))
74
(defmethod ensure-index ((self indexed-btree) idxname &key key-form populate)
75
(ifret (get-index self idxname)
76
(add-index self :index-name idxname :key-form key-form :populate populate)))
79
(defgeneric build-btree-index (st &key name primary key-form)
81
"Construct a btree of the appropriate type corresponding to this store-controller."))
83
(defclass btree-index (btree)
84
((primary :type indexed-btree :reader primary :initarg :primary)
85
(key-form :reader key-form :initarg :key-form :initform nil)
86
(key-fn :type function :accessor key-fn :transient t))
87
(:metaclass stored-class)
88
(:documentation "Secondary index to an indexed-btree."))
90
(define-condition invalid-keyform (error)
91
((key-form :reader error-key-form :initarg :key-form))
92
(:report (lambda (c s)
93
(format s "~S is an invalid key form for an index."
94
(error-key-form c)))))
96
(defun function<-keyform (key-form)
97
(cond ((and (symbolp key-form) (fboundp key-form))
98
(fdefinition key-form))
99
((and (consp key-form) (eql (first key-form) 'lambda))
100
(compile nil key-form))
102
(apply (first key-form) (rest key-form)))
103
(t (error 'invalid-keyform :key-form key-form))))
105
(defmethod shared-initialize :after ((instance btree-index) slot-names
107
(declare (ignore slot-names rest))
108
(setf (key-fn instance) (function<-keyform (key-form instance))))
110
(defgeneric get-primary-key (key bt)
111
(:documentation "Get the primary key from a secondary key."))
113
;; secondary index defaults
114
(defmethod (setf get-value) (value key (bt btree-index))
115
"Puts are not allowed on secondary indices. Try adding to
117
(declare (ignore value key)
119
(error "Puts are forbidden on secondary indices. Try adding to the primary."))
121
(defmethod delete-key (key (bt btree-index) &key)
122
"Remove a key / value from the PRIMARY by a secondary
123
lookup, updating ALL other secondary indices."
124
(delete-key (get-primary-key key bt) (primary bt)))
128
((oid :accessor cursor-oid :type fixnum :initarg :oid)
129
(initialized-p :accessor cursor-initialized-p
130
:type boolean :initform nil :initarg :initialized-p
131
:documentation "Predicate indicating whether
132
the btree in question is initialized or not. Initialized means
133
that the cursor has a legitimate position, not that any
134
initialization action has been taken. The implementors of this
135
abstract class should make sure that happens under the
136
sheets... Cursors are initialized when you invoke an operation
137
that sets them to something (such as cursor-first), and are
138
uninitialized if you move them in such a way that they no longer
139
have a legimtimate value.")
140
(btree :accessor btree :initarg :btree))
141
(:documentation "A cursor for traversing (primary) BTrees."))
143
(defgeneric make-cursor (bt)
144
(:documentation "Construct a cursor for traversing BTrees."))
146
(defgeneric make-simple-cursor (bt)
147
(:documentation "Allow users to walk secondary indices and only
148
get back primary keys rather than associated
151
(defgeneric cursor-close (cursor)
153
"Close the cursor. Make sure to close cursors before the
154
enclosing transaction is closed!"))
156
(defgeneric cursor-duplicate (cursor)
157
(:documentation "Duplicate a cursor."))
159
(defgeneric cursor-current (cursor)
161
"Get the key / value at the cursor position. Returns
162
has-pair key value, where has-pair is a boolean indicating
165
(defgeneric cursor-first (cursor)
167
"Move the cursor to the beginning of the BTree, returning
168
has-pair key value."))
170
(defgeneric cursor-last (cursor)
172
"Move the cursor to the end of the BTree, returning
173
has-pair key value."))
175
(defgeneric cursor-next (cursor)
177
"Advance the cursor, returning has-pair key value."))
179
(defgeneric cursor-prev (cursor)
181
"Move the cursor back, returning has-pair key value."))
183
(defgeneric cursor-set (cursor key)
185
"Move the cursor to a particular key, returning has-pair
188
(defgeneric cursor-set-range (cursor key)
190
"Move the cursor to the first key-value pair with key
191
greater or equal to the key argument, according to the lisp
192
sorter. Returns has-pair key value."))
194
(defclass secondary-cursor (cursor) ()
195
(:documentation "Cursor for traversing secondary indices."))
197
(defgeneric cursor-get-both (cursor key value)
199
"Moves the cursor to a particular key / value pair,
200
returning has-pair key value.")
201
(:method :before ((cursor secondary-cursor) key value)
202
(declare (ignore key value) (ignorable cursor))
203
(error "Cannot use get-both on secondary cursor; use pget-both")))
205
(defgeneric cursor-get-both-range (cursor key value)
207
"Moves the cursor to the first key / value pair with key
208
equal to the key argument and value greater or equal to the
209
value argument. Not really useful for us since primaries
210
don't have duplicates. Returns has-pair key value.")
211
(:method :before ((cursor secondary-cursor) key value)
212
(declare (ignore key value) (ignorable cursor))
213
(error "Cannot use get-both-range on secondary cursor; use pget-both-range")))
215
(defgeneric cursor-delete (cursor)
217
"Delete by cursor. The cursor is at an invalid position,
218
and uninitialized, after a successful delete."))
220
(defgeneric cursor-put (cursor value &key key)
222
"Overwrite value at current cursor location. Cursor remains
223
at the current location")
224
(:method :before ((cursor secondary-cursor) value &key key)
225
(declare (ignore key value) (ignorable cursor))
226
(error "Cannot use put on a secondary cursor; use (setf get-value) on primary")))
228
(defgeneric cursor-pcurrent (cursor)
230
"Returns has-tuple / secondary key / value / primary key
231
at the current position."))
233
(defgeneric cursor-pfirst (cursor)
235
"Moves the key to the beginning of the secondary index.
236
Returns has-tuple / secondary key / value / primary key."))
238
(defgeneric cursor-plast (cursor)
240
"Moves the key to the end of the secondary index. Returns
241
has-tuple / secondary key / value / primary key."))
243
(defgeneric cursor-pnext (cursor)
245
"Advances the cursor. Returns has-tuple / secondary key /
246
value / primary key."))
248
(defgeneric cursor-pprev (cursor)
250
"Moves the cursor back. Returns has-tuple / secondary key
251
/ value / primary key."))
253
(defgeneric cursor-pset (cursor key)
255
"Moves the cursor to a particular key. Returns has-tuple
256
/ secondary key / value / primary key."))
258
(defgeneric cursor-pset-range (cursor key)
260
"Move the cursor to the first key-value pair with key
261
greater or equal to the key argument, according to the lisp
262
sorter. Returns has-pair secondary key value primary key."))
264
(defgeneric cursor-pget-both (cursor key value)
266
"Moves the cursor to a particular secondary key / primary
267
key pair. Returns has-tuple / secondary key / value /
270
(defgeneric cursor-pget-both-range (cursor key value)
272
"Moves the cursor to a the first secondary key / primary
273
key pair, with secondary key equal to the key argument, and
274
primary key greater or equal to the pkey argument. Returns
275
has-tuple / secondary key / value / primary key."))
277
(defgeneric cursor-next-dup (cursor)
279
"Move to the next duplicate element (with the same key.)
280
Returns has-pair key value."))
282
(defgeneric cursor-next-nodup (cursor)
284
"Move to the next non-duplicate element (with different
285
key.) Returns has-pair key value."))
287
(defgeneric cursor-pnext-dup (cursor)
289
"Move to the next duplicate element (with the same key.)
290
Returns has-tuple / secondary key / value / primary key."))
292
(defgeneric cursor-pnext-nodup (cursor)
294
"Move to the next non-duplicate element (with different
295
key.) Returns has-tuple / secondary key / value / primary
299
(defgeneric cursor-prev-dup (cursor)
301
"Move to the previous duplicate element (with the same key.)
302
Returns has-pair key value."))
304
;; Default implementation.
305
(defmethod cursor-prev-dup ((cur cursor))
306
(when (cursor-initialized-p cur)
307
(multiple-value-bind (exists? skey-cur)
309
(declare (ignore exists?))
310
(multiple-value-bind (exists? skey value)
312
(if (compare-equal skey-cur skey)
313
(values exists? skey value)
314
(setf (cursor-initialized-p cur) nil))))))
316
(defgeneric cursor-prev-nodup (cursor)
318
"Move to the previous non-duplicate element (with
319
different key.) Returns has-pair key value."))
321
(defgeneric cursor-pprev-dup (cursor)
323
"Move to the previous duplicate element (with the same key.)
324
Returns has-tuple / secondary key / value / primary key."))
326
;; Default implementation.
327
(defmethod cursor-pprev-dup ((cur cursor))
328
(when (cursor-initialized-p cur)
329
(multiple-value-bind (exists? skey-cur)
331
(declare (ignore exists?))
332
(multiple-value-bind (exists? skey value pkey)
334
(if (compare-equal skey-cur skey)
335
(values exists? skey value pkey)
336
(setf (cursor-initialized-p cur) nil))))))
338
(defgeneric cursor-pprev-nodup (cursor)
340
"Move to the previous non-duplicate element (with
341
different key.) Returns has-tuple / secondary key / value /
344
(defmacro with-btree-cursor ((var bt) &body body)
345
"Macro which opens a named cursor on a BTree (primary or
346
not), evaluates the forms, then closes the cursor."
347
(declare (inline make-cursor))
349
(declare (dynamic-extent ,var))
350
(sb-sys:without-interrupts
351
(setf ,var (make-cursor ,bt)))
354
(sb-sys:without-interrupts
355
(cursor-close ,var)))))
357
(defmethod drop-btree ((self btree))
358
(with-btree-cursor (cur self)
359
(loop for (exists? key) = (multiple-value-list (cursor-first cur))
360
then (multiple-value-list (cursor-next cur))
362
do (delete-key key self))))
364
(defmethod drop-btree ((bt indexed-btree))
365
(map-indices (lambda (name index)
366
(declare (ignore index))
367
(remove-index bt name))
371
(defmethod drop-btree ((index btree-index))
372
"Btree indices don't need to have values removed, this happens on the primary
373
when remove-kv is called"
376
(defun compare<= (a b)
377
"A comparison function that mirrors the ordering of the data stores for <= on
378
all sortable types. It does not provide ordering on non-sorted values other
379
than by type class (i.e. not serialized lexical values)"
380
(declare (optimize (speed 3) (safety 2) (debug 0)))
384
(character (<= (char-code a) (char-code b)))
385
(string (string-not-greaterp a b))
386
(symbol (string-not-greaterp (symbol-name a) (symbol-name b)))
387
(pathname (string-not-greaterp (namestring a) (namestring b)))
388
(stored (<= (oid a) (oid b)))
389
(cons (or (compare<= (car a) (car b))
390
(compare<= (cdr a) (cdr b))))
395
(defun compare< (a b)
396
"A comparison function that mirrors the ordering of the data stores for < on
397
all sortable types. It does not provide ordering on non-sorted values other
398
than by type class (i.e. not serialized lexical values)"
399
(declare (optimize (speed 3) (safety 2) (debug 0)))
403
(character (< (char-code a) (char-code b)))
404
(string (string-lessp a b))
405
(symbol (string-lessp (symbol-name a) (symbol-name b)))
406
(pathname (string-lessp (namestring a) (namestring b)))
407
(stored (< (oid a) (oid b)))
408
(cons (if (compare-equal (car a) (car b))
409
(compare< (cdr a) (cdr b))
410
(compare< (car a) (car b))))
415
(defun compare-equal (a b)
416
"A lisp compare equal in same spirit as compare<. Case insensitive for strings."
419
(stored (eq (oid a) (oid b)))
424
(defun compare>= (a b)
425
(not (compare< a b)))
427
(defvar *current-cursor* nil
428
"This dynamic variable is referenced only when deleting elements using the
429
following function. This allows mapping functions to delete elements as they
430
map. This is safe as we don't revisit values during maps")
432
(defmacro with-current-cursor ((cur) &body body)
433
`(let ((*current-cursor* ,cur))
434
(declare (special *current-cursor*))
437
(defun remove-current-kv ()
438
(unless *current-cursor*
439
(error "Cannot call remove-current-kv outside of a map-btree or map-index function argument"))
440
(cursor-delete *current-cursor*))
442
;; The primary mapping function
444
(defgeneric map-btree (fn btree &rest args &key start end value from-end collect &allow-other-keys)
445
(:documentation "Map btree maps over a btree from the value start to the value of end. If
446
values are not provided, then it maps over all values. BTrees do not have
447
duplicates, but map-btree can also be used with indices in the case where you
448
don't want access to the primary key so we require a value argument as well
449
for mapping duplicate value sets. The collect keyword will accumulate the
450
results from each call of fn in a fresh list and return that list in the same
451
order the calls were made (first to last)."))
453
(defun validate-map-call (start end)
454
(unless (or (null start) (null end) (compare<= start end))
455
(error "map-index called with start = ~A and end = ~A. Start must be less than or equal to end according to compare<=."
458
(defmacro with-map-collector ((fn collect-p) &body body)
459
"Binds free var results to the collected results of function in symbol-argument
460
fn based on boolean parameter collect-p, otherwise result is nil"
461
(with-gensyms (collector k v)
462
`(let ((results nil))
463
(flet ((,collector (,k ,v)
464
(push (funcall ,fn ,k ,v) results)))
465
(declare (dynamic-extent (function ,collector)))
466
(let ((,fn (if ,collect-p #',collector ,fn)))
469
(defmacro with-map-wrapper ((fn btree collect cur) &body body)
470
"Binds variable st to the store controller, overrieds fn with a collector if
471
dynamic value of collect is true and binds variable named cur to the current
473
`(with-map-collector (,fn ,collect)
474
(with-btree-cursor (,cur ,btree)
475
(with-current-cursor (,cur)
478
(defmacro with-cursor-values (expr &body body)
479
"Binds exists?, skey, val and pkey from expression assuming expression returns
480
a set of cursor operation values or nil"
481
`(multiple-value-bind (exists? skey val pkey)
482
(the (values boolean t t t) ,expr)
483
(declare (ignorable exists? skey val pkey))
486
(defmacro iterate-map-btree (&key start continue step)
487
"In context with bound variables: cur, store, value, start, end, fn
488
Provide a start expression that returns index cursor values
489
Provide a continue expression that uses the
490
bound variables key, start, value or end to determine if
491
the iteration should continue
492
Provide a step expression that returns index cursor values."
493
`(labels ((continue-p (key)
494
(declare (ignorable key))
496
(declare (dynamic-extent (function continue-p)))
498
(with-cursor-values ,start
499
(when (and exists? (continue-p skey))
500
(funcall fn skey val)
503
(with-cursor-values ,step
504
(if (and exists? (continue-p skey))
505
(funcall fn skey val)
506
(return (nreverse results))))
508
(warn "Deserialization error in map: returning nil for element~%")
511
(format t "Deserialization error in map: returning nil for element~%")
514
;; NOTE: the use of nil for the last element in a btree only works because the C comparison
515
;; function orders by type tag and nil is the highest valued type tag so nils are the last
516
;; possible element in a btree ordered by value.
517
(defmethod map-btree (fn (btree btree) &rest args &key start end (value nil value-set-p)
518
from-end collect &allow-other-keys)
519
(declare (ignorable args))
520
(validate-map-call start end)
521
(cond (value-set-p (map-btree-values fn btree value collect))
522
(from-end (map-btree-from-end fn btree start end collect))
523
(t (map-btree-from-start fn btree start end collect))))
525
(defun map-btree-values (fn btree value collect)
526
(with-map-wrapper (fn btree collect cur)
528
:start (cursor-set cur value)
529
:continue (compare-equal key value)
530
:step (cursor-next cur))))
532
(defun map-btree-from-start (fn btree start end collect)
533
(with-map-wrapper (fn btree collect cur)
536
(cursor-set-range cur start)
538
:continue (or (null end) (compare<= key end))
539
:step (cursor-next cur))))
541
(defun map-btree-from-end (fn btree start end collect)
542
(with-map-wrapper (fn btree collect cur)
545
(with-cursor-values (cursor-set-range cur end)
546
(cond ((and exists? (compare-equal skey end))
547
(cursor-next-nodup cur)
549
(t (cursor-prev cur))))
551
:continue (or (null start) (compare>= key start))
552
:step (cursor-prev cur))))
555
;; Special support for mapping indexes of a secondary btree
557
(defgeneric map-index (fn index &rest args &key start end value from-end collect &allow-other-keys)
558
(:documentation "Map-index is like map-btree but for secondary indices, it
559
takes a function of three arguments: key, value and primary
560
key. As with map-btree the keyword arguments start and end
561
determine the starting element and ending element, inclusive.
562
Also, start = nil implies the first element, end = nil implies
563
the last element in the index. If you want to traverse only a
564
set of identical key values, for example all nil values, then
565
use the value keyword which will override any values of start
566
and end. The collect keyword will accumulate the results from
567
each call of fn in a fresh list and return that list in the
568
same order the calls were made (first to last)"))
570
(defmacro with-map-index-collector ((fn collect-p) &body body)
571
"Binds free var results to the collected results of function in
572
symbol-argument fn based on boolean parameter collect-p,
573
otherwise result is nil"
574
(with-gensyms (collector k v pk)
575
`(let ((results nil))
576
(flet ((,collector (,k ,v ,pk)
577
(push (funcall ,fn ,k ,v ,pk) results)))
578
(declare (dynamic-extent (function ,collector)))
579
(let ((,fn (if ,collect-p #',collector ,fn)))
582
(defmacro iterate-map-index (&key start continue step)
583
"In context with bound variables: cur, store, value, start, end, fn
584
Provide a start expression that returns index cursor values
585
Provide a continue expression that uses the
586
bound variables key, start, value or end to determine if
587
the iteration should continue
588
Provide a step expression that returns index cursor values."
589
`(labels ((continue-p (key)
590
(declare (ignorable key))
592
(declare (dynamic-extent (function continue-p)))
593
(with-cursor-values ,start
594
(when (and exists? (continue-p skey))
595
(funcall fn skey val pkey)
597
(with-cursor-values ,step
598
(if (and exists? (continue-p skey))
599
(funcall fn skey val pkey)
600
(return (nreverse results)))))))))
602
(defmacro with-map-index-wrapper ((fn btree collect cur) &body body)
603
"Binds variable store to the store controller, overrieds fn with a collector
604
if dynamic value of collect is true and binds variable named cur to
606
`(with-map-index-collector (,fn ,collect)
607
(with-btree-cursor (,cur ,btree)
608
(with-current-cursor (,cur)
611
(defun pset-range-for-descending (cur end)
612
(if (cursor-pset cur end)
614
(cursor-next-nodup cur)
617
(cursor-pset-range cur end)
618
(cursor-pprev cur))))
620
(defmethod map-index (fn (index btree-index) &rest args
621
&key start end (value nil value-set-p) from-end collect
623
(declare (ignore args))
624
(validate-map-call start end)
625
(cond (value-set-p (map-index-values fn index value collect))
626
(from-end (map-index-from-end fn index start end collect))
627
(t (map-index-from-start fn index start end collect))))
629
(defun map-index-values (fn index value collect)
630
(with-map-index-wrapper (fn index collect cur)
632
:start (cursor-pset cur value)
634
:step (cursor-pnext-dup cur))))
636
(defun map-index-from-start (fn index start end collect)
637
(with-map-index-wrapper (fn index collect cur)
640
(cursor-pset-range cur start)
642
:continue (or (null end) (compare<= key end))
643
:step (cursor-pnext cur))))
645
(defun map-index-from-end (fn index start end collect)
646
(with-map-index-wrapper (fn index collect cur)
649
(pset-range-for-descending cur end)
651
:continue (or (null start) (compare>= key start))
652
:step (cursor-pprev cur))))
654
;; Some generic utility functions
655
(defun print-btree-entry (k v)
656
(format t "key: ~A / value: ~A~%" k v))
658
(defun dump-btree (bt &key (print-fn #'print-btree-entry) (count nil))
659
"Print the contents of a btree for easy inspection & debugging"
660
(format t "DUMP ~A~%" bt)
664
(when (and count (>= (incf i) count))
665
(return-from dump-btree))
666
(funcall print-fn k v))
669
(defun print-btree-key-and-type (k v)
670
(format t "key ~A / value type ~A~%" k (type-of v)))
672
(defun btree-keys (bt &key (print-fn #'print-btree-key-and-type) (count nil))
673
(format t "BTREE keys and types for ~A~%" bt)
674
(dump-btree bt :print-fn print-fn :count count))
676
(defun print-index-entry (k v pk)
677
(format t "key: ~A / value: ~A / primary-key: ~A~%" k v pk))
679
(defun dump-index (idx &key (print-fn #'print-index-entry) (count nil))
680
(format t "DMP INDEX ~A~%" idx)
684
(when (and count (>= (incf i) count))
685
(return-from dump-index))
686
(funcall print-fn k v pk))
689
(defmethod btree-differ-p ((x btree) (y btree))
690
;; (assert (eq (get-store x) (get-store y)))
691
(let ((cx1 (make-cursor x))
692
(cy1 (make-cursor y))
705
(multiple-value-bind (m k v) (cursor-current cx1)
709
(multiple-value-bind (m k v) (cursor-current cy1)
713
(if (not (and (equal mx my)
716
(setf rv (list mx my kx ky vx vy)))
717
(setf done (and (not mx) (not mx)))