Coverage report: /home/ellis/comp/core/lib/q/sql.lisp
Kind | Covered | All | % |
expression | 0 | 1232 | 0.0 |
branch | 0 | 96 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; sql.lisp --- Structured Query Langs
3
;; SQL parser and query specification.
7
;; Parser derived from PARSE/PRATT:PRATT-PARSER
9
;; ref: https://tdop.github.io/
14
(declaim (optimize (speed 3)))
17
(define-condition sql-error (error) ())
19
(deferror simple-sql-error (sql-error simple-error) ())
21
(defun simple-sql-error (ctrl &rest args)
22
(error 'simple-sql-error :format-control ctrl :format-arguments args))
24
(define-condition sql-token-error (sql-error)
25
((token :initarg :token :reader bad-token))
26
(:report (lambda (c s)
27
(format s "Bad Token: ~A" (bad-token c)))))
29
(defun sql-token-error (token)
30
(error 'sql-token-error :token token))
32
(define-condition illegal-sql-state (sql-error)
33
((state :initform nil :initarg :state :reader illegal-state))
34
(:report (lambda (c s)
35
(format s "Illegal SQL State: ~A" (illegal-state c)))))
37
(defun illegal-sql-state (state)
38
(error 'illegal-sql-state :state state))
41
(defclass sql-query (query) ())
43
(defclass sql-data-source (data-source) ()
44
(:documentation "Data source which can be used within SQL expressions."))
46
;; SQL-EXPRESSIONs are the output of a SQL-PARSER. These objects are further
47
;; lowered to LOGICAL-EXPRESSIONs.
48
(defclass sql-expression () ())
50
(deftype sql-expression-vector () '(vector sql-expression))
52
(defclass sql-identifier (id sql-expression) ())
54
(defclass sql-binary-expression (binary-expression sql-expression) ())
56
(defclass sql-math-expression (sql-binary-expression)
57
((op :initarg :op :type symbol :accessor binary-expression-op)))
59
(defclass sql-string (sql-expression literal-expr)
60
((value :type string :initarg :value :accessor literal-value)))
62
(defclass sql-number (sql-expression literal-expr)
63
((value :type number :initarg :value :accessor literal-value)))
65
(defclass sql-function (id sql-expression)
66
((args :type sql-expression-vector :initarg :args)))
68
(defclass sql-alias (sql-expression alias-expression) ())
70
(defclass sql-cast (sql-expression)
71
((expr :type sql-expression :initarg :expr)
72
(type :type sql-identifier :initarg :type)))
74
(defclass sql-sort (sql-expression)
75
((expr :type sql-expression :initarg :expr)
76
(asc :type boolean :initarg :asc)))
78
(defclass sql-relation (sql-expression) ())
80
(defclass sql-select (sql-relation)
81
((projection :type sql-expression-vector :initarg :projection)
82
(selection :type (or sql-expression null) :initarg :selection)
83
(group-by :type sql-expression-vector :initarg :group-by)
84
(order-by :type sql-expression-vector :initarg :order-by)
85
(having :type (or null sql-expression) :initarg :having)
86
(table-name :type string :initarg :table-name)))
90
(defvar *sql-token-types* (list :ident :str :num :kw :op :sym t))
91
(deftype sql-token-type-designator () `(member ,@*sql-token-types*))
92
(defvar *sql-keywords*
294
(defvar *sql-keyword-start-chars*
295
(remove-duplicates (mapcar
297
(declare (simple-string k))
301
(defvar *sql-keyword-table*
302
(let* ((pairs (mapcar (lambda (x) (cons (keywordicate x) x)) *sql-keywords*))
303
(table (make-hash-table :size (length pairs))))
304
(dolist (p pairs table)
305
(setf (gethash (car p) table) (cdr p)))))
307
(defvar *sql-symbol-table*
308
(let* ((pairs '((:LEFT-PAREN . "(")
312
(:LEFT-BRACKET . "[")
313
(:RIGHT-BRACKET . "]")
331
(:DOUBLE-COLON . "::")
348
(table (make-hash-table :size (length pairs))))
349
(dolist (p pairs table)
350
(setf (gethash (car p) table) (cdr p)))))
352
(declaim (ftype (function (keyword) (values string boolean))
355
(defun get-sql-keyword (kw) (gethash kw *sql-keyword-table*))
356
(defun get-sql-symbol (kw) (gethash kw *sql-symbol-table*)))
358
(defvar *sql-symbols* (hash-table-values *sql-symbol-table*))
360
(defvar *sql-symbol-start-chars* (remove-duplicates
362
(declare (simple-string x))
367
(text "" :type string)
368
(type t :type sql-token-type-designator)
369
(end 0 :type fixnum))
371
(defun num-start-p (c) (or (digit-char-p c) (char= #\. c) (char= #\- c)))
372
(defun ident-start-p (c) (alpha-char-p c))
373
(defun ident-part-p (c) (or (alpha-char-p c) (digit-char-p c) (char= #\_ c)))
374
(defun str-start-p (c) (or (char= #\' c) (char= #\" c)))
375
(defun kw-start-p (c) (member c *sql-keyword-start-chars* :test 'char=))
376
(defun sym-start-p (c) (member c *sql-symbol-start-chars* :test 'char=))
378
;; low-level token readers
379
(defmacro def-sql-reader (name (&rest args) &body body)
380
`(defun ,(symbolicate 'read-sql- name) (,@args)
381
(declare (optimize (safety 0)))
384
(defun peek-sql-char (expected stream &optional skip-ws)
385
(char= (peek-char skip-ws stream) expected))
387
(def-sql-reader char (stream expected &optional skip-ws)
388
(when (peek-sql-char expected stream skip-ws)
389
(read-char stream nil nil)))
391
(def-sql-reader num-token (stream)
394
(with-output-to-string (s)
395
(when (read-sql-char stream #\- nil)
397
(loop for x = (peek-char nil stream nil nil)
399
while (or (digit-char-p x) (char= #\. x))
400
do (write-char (read-char stream nil nil) s)
403
:end (file-position stream)))
405
(def-sql-reader str-token (stream)
406
(let ((tok (make-sql-token :type :str))
408
(unless (read-sql-char stream terminator)
409
(setf terminator #\')
410
(unless (read-sql-char stream terminator)
411
(sql-token-error tok)))
412
(setf (sql-token-text tok)
413
(with-output-to-string (s)
414
(loop for x = (peek-char nil stream) ;; must not be EOF before terminator
415
if (not (char= terminator x))
416
do (write-char (read-char stream) s)
417
else if (char= terminator x)
418
do (return (read-char stream)))))
419
(setf (sql-token-end tok) (file-position stream))
422
(def-sql-reader sym-token (stream)
423
(let ((tok (make-sql-token :type :sym)))
424
(setf (sql-token-text tok)
425
(with-output-to-string (s)
426
(write-char (read-char stream nil nil) s))
427
(sql-token-end tok) (file-position stream))
430
(defun ambiguous-ident-p (tok)
431
(let ((text (sql-token-text tok)))
432
(or (string-equal #.(get-sql-keyword :ORDER) text)
433
(string-equal #.(get-sql-keyword :GROUP) text))))
435
(defun proc-ambiguous-ident (stream start)
436
(declare (stream stream) (fixnum start))
438
(read-sequence (make-string 2) stream :start start :end (the fixnum (+ start 2)))
439
#.(get-sql-keyword :BY))
443
(def-sql-reader ident-token (stream)
444
(let ((tok (make-sql-token)))
445
(if (read-sql-char stream #\`)
446
(setf (sql-token-text tok)
447
(with-output-to-string (s)
448
(loop for x = (peek-char nil stream) ;; must not be EOF before terminator
449
if (not (char= #\` x))
450
do (write-char (read-char stream) s)
451
else do (return (read-char stream))))
452
(sql-token-type tok) :ident)
453
;; may not actually be ident - we check for kw after we have a known end position
454
(setf (sql-token-text tok)
455
(with-output-to-string (s)
456
(loop for x = (peek-char nil stream nil nil)
457
while (and x (ident-part-p x))
458
do (write-char (read-char stream) s)))))
459
(setf (sql-token-end tok) (file-position stream))
460
;; resolve sql-token-type
462
((ambiguous-ident-p tok)
463
(setf (sql-token-type tok) (proc-ambiguous-ident stream (sql-token-end tok))))
464
((and (not (eql (sql-token-type tok) :ident)) (member (sql-token-text tok) *sql-keywords* :test 'string-equal))
465
(setf (sql-token-type tok) :kw)))
468
(defun next-sql-token (stream)
469
"Parse the next sql token from input STREAM else return nil."
472
(next (peek-char t stream nil nil)))
474
(return-from :next tok))
476
((num-start-p next) (read-sql-num-token stream))
477
((ident-start-p next) (read-sql-ident-token stream))
478
((str-start-p next) (read-sql-str-token stream))
479
((sym-start-p next) (read-sql-sym-token stream))
480
(t (make-sql-token :end (file-position stream)))))))
482
(defun read-sql-stream (stream)
483
(loop for tok = (next-sql-token stream)
487
(defun read-sql-string (sql)
488
"Convert SQL string into a list of tokens. Tokens are of the form
490
(with-input-from-string (sql sql)
491
(read-sql-stream sql)))
495
;; At this point we have a sequence (list) of tokens
496
(defclass sql-parser (pratt-parser query-parser)
497
((tokens :type list :initarg :tokens :accessor sql-tokens)))
499
(defmethod next-precedence ((self sql-parser))
500
(let ((token (car (sql-tokens self))))
503
(case (sql-token-type token)
504
(:kw (string-case ((sql-token-text token) :default 0)
510
(:sym (string-case ((sql-token-text token) :default 0)
511
(#.(get-sql-symbol :LT) 40)
512
(#.(get-sql-symbol :LT-EQ) 40)
513
(#.(get-sql-symbol :EQ) 40)
514
(#.(get-sql-symbol :BANG-EQ) 40)
515
(#.(get-sql-symbol :GT-EQ) 40)
516
(#.(get-sql-symbol :GT) 40)
517
(#.(get-sql-symbol :PLUS) 50)
518
(#.(get-sql-symbol :SUB) 50)
519
(#.(get-sql-symbol :STAR) 60)
520
(#.(get-sql-symbol :SLASH) 60)
521
(#.(get-sql-symbol :LEFT-PAREN) 70)))
524
(defmethod parse-prefix ((self sql-parser))
525
(let ((token (pop (sql-tokens self))))
527
(case (sql-token-type token)
528
(:kw (string-case ((sql-token-text token))
529
("SELECT" (parse-select self))
530
("CAST" (parse-cast self))
531
("MAX" (make-instance 'sql-identifier :id "MAX"))
532
("INT" (make-instance 'sql-identifier :id "INT"))
533
("DOUBLE" (make-instance 'sql-identifier :id "DOUBLE"))))
534
(:ident (make-instance 'sql-identifier :id (sql-token-text token)))
535
(:str (make-instance 'sql-string :value (sql-token-text token)))
536
(:num (make-instance 'sql-number :value (parse-number (sql-token-text token))))
537
;; unknown identifier
538
(t (make-instance 'sql-identifier :id (sql-token-text token)))))))
540
(defmethod parse-infix ((self sql-parser) (left sql-expression) precedence)
541
(let* ((tokens (sql-tokens self))
542
(token (pop tokens)))
544
(case (sql-token-type token)
546
((member (sql-token-text token) (list #.(get-sql-symbol :PLUS) #.(get-sql-symbol :SUB)
547
#.(get-sql-symbol :STAR) #.(get-sql-symbol :SLASH)
548
#.(get-sql-symbol :EQ) #.(get-sql-symbol :GT)
549
#.(get-sql-symbol :LT))
551
(pop (sql-tokens self)) ;; consume
552
(make-instance 'sql-math-expression
554
:op (sql-token-text token)
555
:rhs (parse self precedence)))
556
((string-equal "(" (sql-token-text token))
558
(let ((args (parse-expression-list self)))
559
(assert (string-equal (sql-token-text (pop tokens)) ")"))
560
(make-instance 'sql-function :id (id left) :args args)))
562
(:kw (string-case ((sql-token-text token))
564
(make-instance 'sql-alias
566
:alias (parse-identifier self)))
568
(make-instance 'sql-binary-expression
571
:rhs (parse self precedence)))
573
(make-instance 'sql-binary-expression
576
:rhs (parse self precedence)))
578
("DESC" (pop tokens))))))))
580
(defmethod parse-order ((self sql-parser))
582
(sort (parse-expression self)))
585
(case (sql-token-type sort)
586
(:ident (setf sort (make-instance 'sql-sort :expr sort :asc t)))
588
(push sort sort-list)
589
(let ((next (car (sql-tokens self))))
590
(when (and (eql (sql-token-type next) :sym) (string-equal (sql-token-text next) ","))
591
(pop (sql-tokens self)))
592
(setf sort (parse-expression self))))
593
finally (return sort-list))))
595
(defmethod parse-cast ((self sql-parser))
596
(let ((tokens (sql-tokens self)))
597
(assert (string-equal (sql-token-text (pop tokens)) "("))
598
(let* ((expr (parse-expression self))
599
(alias (make-instance 'sql-alias :expr expr)))
600
(assert (string-equal (sql-token-text (pop tokens)) ")"))
601
(make-instance 'sql-cast :expr expr :type (slot-value alias 'alias)))))
603
(defmethod parse-select ((self sql-parser))
604
(let ((projection (parse-expression-list self))
605
table filter-expr group-by having-expr order-by
606
(tok (pop (sql-tokens self))))
607
(case (sql-token-type tok)
608
(:kw (string-case ((sql-token-text tok))
610
(setf table (parse-expression self))
612
;; parse optional WHERE
613
(let ((next (car (sql-tokens self))))
615
(when (string-equal "WHERE" (sql-token-text next))
616
(setf filter-expr (parse-expression self)))
618
(string-equal "GROUP" (sql-token-text next))
619
(string-equal "BY" (sql-token-text (cadr (sql-tokens self)))))
620
(setf group-by (parse-expression-list self)))
621
(when (string-equal "HAVING" (sql-token-text next))
622
(setf having-expr (parse-expression self)))
623
(when (and (string-equal "ORDER" (sql-token-text next))
624
(string-equal "BY" (sql-token-text next)))
625
(setf order-by (parse-order self))))))))
626
(t (illegal-sql-state tok)))
627
(make-instance 'sql-select
628
:projection projection
629
:selection filter-expr
633
:table-name (id table))))
635
(defmethod parse-expression-list ((self sql-parser))
636
(log:trace! "> parse-expression-list")
638
(loop for expr = (parse-expression self)
641
if ;; check for comma and repeat, else return
642
(let ((peek (car (sql-tokens self))))
644
(eql :sym (sql-token-type peek))
645
(string-equal (sql-token-text peek) #.(get-sql-symbol :comma))))
646
do (pop (sql-tokens self))
648
finally (return ret))))
650
(defmethod parse-expression ((self sql-parser))
653
(defmethod parse-identifier ((self sql-parser))
654
(let ((expr (parse-expression self)))
655
(if (typep expr 'sql-identifier)
657
(simple-sql-error "Expected identifier, got ~A" expr))))
659
(defmacro with-sql-parser ((sym &optional tokens) &body body)
660
`(let ((,sym (make-instance 'sql-parser :tokens ,tokens)))
663
(defmacro with-sql-string ((sym str) &body body)
664
`(with-sql-parser (,sym (read-sql-string ,str))
667
(defmacro with-sql-stream ((sym stream) &body body)
668
`(with-sql-parser (,sym (read-sql-stream ,stream))
672
(defun make-sql-logical-expression (expr input)
674
(sql-identifier (make-instance 'column-expression :name (id expr)))
675
(sql-string (literal-value expr))
676
(sql-number (literal-value expr))
677
;; TODO 2024-08-04: sql-unary-expression
678
(sql-binary-expression
679
(let ((l (make-sql-logical-expression (lhs expr) input))
680
(r (make-sql-logical-expression (rhs expr) input)))
683
(string-case ((binary-expression-op expr))
685
("=" (make-instance 'eq-expression :lhs l :rhs r))
686
("!=" (make-instance 'neq-expression :lhs l :rhs r))
687
(">" (make-instance 'gt-expression :lhs l :rhs r))
688
(">=" (make-instance 'gteq-expression :lhs l :rhs r))
689
("<" (make-instance 'lt-expression :lhs l :rhs r))
690
("<=" (make-instance 'lteq-expression :lhs l :rhs r))
692
("AND" (make-instance 'and-expression :lhs l :rhs r))
693
("OR" (make-instance 'or-expression :lhs l :rhs r))
695
("+" (make-instance 'add-expression :lhs l :rhs r))
696
("-" (make-instance 'sub-expression :lhs l :rhs r))
697
("*" (make-instance 'mult-expression :lhs l :rhs r))
698
("/" (make-instance 'div-expression :lhs l :rhs r))
699
("%" (make-instance 'mod-expression :lhs l :rhs r)))))))
700
(sql-alias (make-instance 'alias-expression
701
:expr (make-sql-logical-expression (slot-value expr 'expr) input)
702
:alias (id (slot-value expr 'alias))))
703
;; TODO 2024-08-04: requires cast-expression impl in obj/query
704
;; (sql-cast (make-instance 'cast))
707
(string-case ((id expr))
708
("MIN" (make-instance 'min-expression
709
:expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
710
("MAX" (make-instance 'max-expression
711
:expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
712
("SUM" (make-instance 'sum-expression
713
:expr (make-sql-logical-expression (car (slot-value expr 'args)) input)))
714
("AVG" (make-instance 'avg-expression
715
:expr (make-sql-logical-expression (car (slot-value expr 'args)) input))))))))
717
(labels ((visit (expr accum)
720
(column-expression (accumulate accum (name expr)))
721
(alias-expression (visit (slot-value expr 'expr) accum))
723
(visit (lhs expr) accum)
724
(visit (rhs expr) accum))
725
(aggregate-expression (visit (slot-value expr 'expr) accum))))))
726
(defun get-ref-columns (exprs)
728
(loop for expr across exprs
729
collect (visit expr accum))))
730
(defun get-selection-ref-columns (select table)
732
(when (slot-value select 'selection)
733
(let ((filter-expr (make-sql-logical-expression (slot-value select 'selection) table)))
734
(visit filter-expr accum)
735
(let ((valid-cols (map 'list (lambda (x) (field-name x)) (fields (schema table)))))
736
(remove-if (lambda (x) (not (member x valid-cols :test 'string-equal))) accum)))))))
738
(defun plan-non-aggregate-query (select df projection-expr column-names-in-selection column-names-in-projection)
740
(unless (slot-value select 'selection)
741
(return-from plan-non-aggregate-query (df-project plan projection-expr)))
742
(let ((missing (member-if-not
743
(lambda (x) (member x column-names-in-projection :test 'string-equal))
744
column-names-in-selection)))
746
(setq plan (df-filter
748
(make-sql-logical-expression
749
(slot-value select 'selection)
750
(setf plan (df-project plan projection-expr)))))
751
(let ((n (length projection-expr)))
752
(setq plan (df-filter plan
753
(make-sql-logical-expression
754
(slot-value select 'selection)
760
(lambda (x) (make-instance 'column-expression :name x))
762
(lambda (x y) (declare (ignore y)) x)))))))
767
collect (make-instance 'column-expression
768
:name (field-name (field (schema plan) i))))
772
(defun plan-aggregate-query (projection-expr select column-names-in-selection df aggregate-expr)
774
(proj-no-agg (remove-if 'aggregate-expression-p projection-expr)))
775
(when (slot-value select 'selection)
776
(let* ((cols-in-proj-no-agg (get-ref-columns proj-no-agg))
777
(missing (member-if-not
778
(lambda (x) (member x cols-in-proj-no-agg :test 'string-equal))
779
column-names-in-selection)))
781
(setq plan (df-filter
783
(make-sql-logical-expression
784
(slot-value select 'selection)
785
(setf plan (df-project plan proj-no-agg)))))
786
(setq plan (df-filter
788
(make-sql-logical-expression
789
(slot-value select 'selection)
794
(mapcar (lambda (x) (make-instance 'column-expression :name x))
796
(lambda (x y) (declare (ignore y)) x))))))))
798
(map 'vector (lambda (x) (make-sql-logical-expression x plan))
799
(slot-value select 'group-by))
802
(defun make-sql-df (select tables)
803
"Process the given SELECT statement with the provided hash-table of
804
string:data-frame. Returns a data-frame."
806
(gethash (slot-value select 'table-name)
809
(simple-sql-error "No table named ~A" (slot-value select 'table-name))))
811
(lambda (x) (make-sql-logical-expression x table))
812
(slot-value select 'projection)))
813
(cols-in-proj (get-ref-columns proj))
814
(agg-count (count-if 'aggregate-expression-p proj)))
815
(when (and (zerop agg-count) (not (sequence:emptyp (slot-value select 'group-by))))
816
(simple-sql-error "GROUP BY without aggregate expression is not supported"))
817
(let ((cols-in-sel (get-selection-ref-columns select table))
819
(if (zerop agg-count)
820
(plan-non-aggregate-query select plan proj cols-in-sel cols-in-proj)
825
(declare (fixnum n-group-cols group-count))
826
(loop for expr across proj
828
(aggregate-expression
830
(push (+ n-group-cols (length agg)) pro)
834
(push (make-instance 'alias-expression
835
:name (+ n-group-cols (length agg))
836
:expr (slot-value expr 'alias))
838
;; TODO 2024-08-07: does this need to be cast to aggregate-expression?
839
(push (expr expr) agg)))
841
(push group-count pro)
842
(incf group-count)))))
845
(plan-aggregate-query proj select cols-in-sel plan agg)
847
(if-let ((having (slot-value select 'having)))
848
(df-filter plan (make-sql-logical-expression having plan))
852
(defclass sql-optimizer (query-optimizer) ())
854
(defmethod optimize-query ((self sql-optimizer) (expr sql-expression))
855
(declare (ignore self))
859
(defclass sql-engine (query-engine) ()
861
:parser (make-instance 'sql-parser)))
864
(defmacro with-sql ((sym input &key (parse t) optimize execute) &body body)
865
`(with-sql-parser (,sym ,@(etypecase input
866
(stream `((read-sql-stream ,input)))
867
(string `((read-sql-string ,input)))))
869
(optimize `((setq ,sym (optimize-query (make-instance 'sql-optimizer) (parse ,sym)))))
870
(parse `((setq ,sym (parse ,sym)))))
872
`((execute (make-physical-plan ,sym))))