Coverage report: /home/ellis/comp/ext/cl-ppcre/convert.lisp
Kind | Covered | All | % |
expression | 341 | 712 | 47.9 |
branch | 40 | 100 | 40.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.57 2009/09/17 19:17:31 edi Exp $
3
;;; Here the parse tree is converted into its internal representation
4
;;; using REGEX objects. At the same time some optimizations are
7
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
9
;;; Redistribution and use in source and binary forms, with or without
10
;;; modification, are permitted provided that the following conditions
13
;;; * Redistributions of source code must retain the above copyright
14
;;; notice, this list of conditions and the following disclaimer.
16
;;; * Redistributions in binary form must reproduce the above
17
;;; copyright notice, this list of conditions and the following
18
;;; disclaimer in the documentation and/or other materials
19
;;; provided with the distribution.
21
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
22
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
25
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
(in-package :cl-ppcre)
35
;;; The flags that represent the "ism" modifiers are always kept
36
;;; together in a three-element list. We use the following macros to
37
;;; access individual elements.
39
(defmacro case-insensitive-mode-p (flags)
40
"Accessor macro to extract the first flag out of a three-element flag list."
43
(defmacro multi-line-mode-p (flags)
44
"Accessor macro to extract the second flag out of a three-element flag list."
47
(defmacro single-line-mode-p (flags)
48
"Accessor macro to extract the third flag out of a three-element flag list."
51
(defun set-flag (token)
52
"Reads a flag token and sets or unsets the corresponding entry in
53
the special FLAGS list."
54
(declare #.*standard-optimize-settings*)
55
(declare (special flags))
57
((:case-insensitive-p)
58
(setf (case-insensitive-mode-p flags) t))
60
(setf (case-insensitive-mode-p flags) nil))
62
(setf (multi-line-mode-p flags) t))
63
((:not-multi-line-mode-p)
64
(setf (multi-line-mode-p flags) nil))
65
((:single-line-mode-p)
66
(setf (single-line-mode-p flags) t))
67
((:not-single-line-mode-p)
68
(setf (single-line-mode-p flags) nil))
70
(signal-syntax-error "Unknown flag token ~A." token))))
72
(defgeneric resolve-property (property)
73
(:documentation "Resolves PROPERTY to a unary character test
74
function. PROPERTY can either be a function designator or it can be a
75
string which is resolved using *PROPERTY-RESOLVER*.")
76
(:method ((property-name string))
77
(funcall *property-resolver* property-name))
78
(:method ((function-name symbol))
80
(:method ((test-function function))
83
(defun convert-char-class-to-test-function (list invertedp case-insensitive-p)
84
"Combines all items in LIST into test function and returns a
85
logical-OR combination of these functions. Items can be single
86
characters, character ranges like \(:RANGE #\\A #\\E), or special
87
character classes like :DIGIT-CLASS. Does the right thing with
88
respect to case-\(in)sensitivity as specified by the special variable
90
(declare #.*standard-optimize-settings*)
91
(declare (special flags))
93
(loop for item in list
94
collect (cond ((characterp item)
95
;; rebind so closure captures the right one
96
(let ((this-char item))
98
(declare (character char this-char))
99
(char= char this-char))))
102
((:digit-class) #'digit-char-p)
103
((:non-digit-class) (complement* #'digit-char-p))
104
((:whitespace-char-class) #'whitespacep)
105
((:non-whitespace-char-class) (complement* #'whitespacep))
106
((:word-char-class) #'word-char-p)
107
((:non-word-char-class) (complement* #'word-char-p))
109
(signal-syntax-error "Unknown symbol ~A in character class." item))))
111
(eq (first item) :property))
112
(resolve-property (second item)))
114
(eq (first item) :inverted-property))
115
(complement* (resolve-property (second item))))
117
(eq (first item) :range))
118
(let ((from (second item))
120
(when (char> from to)
121
(signal-syntax-error "Invalid range from ~S to ~S in char-class." from to))
123
(declare (character char from to))
124
(char<= from char to))))
125
(t (signal-syntax-error "Unknown item ~A in char-class list." item))))))
126
(unless test-functions
127
(signal-syntax-error "Empty character class."))
128
(cond ((cdr test-functions)
129
(cond ((and invertedp case-insensitive-p)
131
(declare (character char))
132
(loop with both-case-p = (both-case-p char)
133
with char-down = (if both-case-p (char-downcase char) char)
134
with char-up = (if both-case-p (char-upcase char) nil)
135
for test-function in test-functions
136
never (or (funcall test-function char-down)
137
(and char-up (funcall test-function char-up))))))
140
(declare (character char))
141
(loop with both-case-p = (both-case-p char)
142
with char-down = (if both-case-p (char-downcase char) char)
143
with char-up = (if both-case-p (char-upcase char) nil)
144
for test-function in test-functions
145
thereis (or (funcall test-function char-down)
146
(and char-up (funcall test-function char-up))))))
149
(loop for test-function in test-functions
150
never (funcall test-function char))))
153
(loop for test-function in test-functions
154
thereis (funcall test-function char))))))
155
;; there's only one test-function
156
(t (let ((test-function (first test-functions)))
157
(cond ((and invertedp case-insensitive-p)
159
(declare (character char))
160
(not (or (funcall test-function (char-downcase char))
161
(and (both-case-p char)
162
(funcall test-function (char-upcase char)))))))
165
(declare (character char))
166
(or (funcall test-function (char-downcase char))
167
(and (both-case-p char)
168
(funcall test-function (char-upcase char))))))
169
(invertedp (complement* test-function))
170
(t test-function)))))))
172
(defun maybe-split-repetition (regex
179
"Splits a REPETITION object into a constant and a varying part if
180
applicable, i.e. something like
182
The arguments to this function correspond to the REPETITION slots of
184
(declare #.*standard-optimize-settings*)
185
(declare (fixnum minimum)
186
(type (or fixnum null) maximum))
187
;; note the usage of COPY-REGEX here; we can't use the same REGEX
188
;; object in both REPETITIONS because they will have different
191
(when (zerop maximum)
192
;; trivial case: don't repeat at all
193
(return-from maybe-split-repetition
194
(make-instance 'void)))
195
(when (= 1 minimum maximum)
196
;; another trivial case: "repeat" exactly once
197
(return-from maybe-split-repetition
199
;; first set up the constant part of the repetition
200
;; maybe that's all we need
201
(let ((constant-repetition (if (plusp minimum)
202
(make-instance 'repetition
203
:regex (copy-regex regex)
209
:contains-register-p reg-seen)
210
;; don't create garbage if minimum is 0
214
(return-from maybe-split-repetition
215
;; no varying part needed because min = max
216
constant-repetition))
217
;; now construct the varying part
218
(let ((varying-repetition
219
(make-instance 'repetition
223
:maximum (if maximum (- maximum minimum) nil)
226
:contains-register-p reg-seen)))
227
(cond ((zerop minimum)
228
;; min = 0, no constant part needed
231
;; min = 1, constant part needs no REPETITION wrapped around
233
:elements (list (copy-regex regex)
234
varying-repetition)))
238
:elements (list constant-repetition
239
varying-repetition)))))))
241
;; During the conversion of the parse tree we keep track of the start
242
;; of the parse tree in the special variable STARTS-WITH which'll
243
;; either hold a STR object or an EVERYTHING object. The latter is the
244
;; case if the regex starts with ".*" which implicitly anchors the
245
;; regex at the start (perhaps modulo #\Newline).
247
(defun maybe-accumulate (str)
248
"Accumulate STR into the special variable STARTS-WITH if
249
ACCUMULATE-START-P (also special) is true and STARTS-WITH is either
250
NIL or a STR object of the same case mode. Always returns NIL."
251
(declare #.*standard-optimize-settings*)
252
(declare (special accumulate-start-p starts-with))
253
(declare (ftype (function (t) fixnum) len))
254
(when accumulate-start-p
255
(etypecase starts-with
257
;; STARTS-WITH already holds a STR, so we check if we can
259
(cond ((eq (case-insensitive-p starts-with)
260
(case-insensitive-p str))
261
;; we modify STARTS-WITH in place
262
(setf (len starts-with)
263
(+ (len starts-with) (len str)))
264
;; note that we use SLOT-VALUE because the accessor
265
;; STR has a declared FTYPE which doesn't fit here
266
(adjust-array (slot-value starts-with 'str)
269
(setf (subseq (slot-value starts-with 'str)
270
(- (len starts-with) (len str)))
272
;; STR objects that are parts of STARTS-WITH
273
;; always have their SKIP slot set to true
274
;; because the SCAN function will take care of
275
;; them, i.e. the matcher can ignore them
277
(t (setq accumulate-start-p nil))))
279
;; STARTS-WITH is still empty, so we create a new STR object
283
:case-insensitive-p (case-insensitive-p str))
284
;; INITIALIZE-INSTANCE will coerce the STR to a simple
285
;; string, so we have to fill it afterwards
286
(slot-value starts-with 'str)
287
(make-array (len str)
288
:initial-contents (str str)
289
:element-type 'character
294
;; see remark about SKIP above
297
;; STARTS-WITH already holds an EVERYTHING object - we can't
299
(setq accumulate-start-p nil))))
302
(declaim (inline convert-aux))
303
(defun convert-aux (parse-tree)
304
"Converts the parse tree PARSE-TREE into a REGEX object and returns
307
- split and optimize repetitions,
308
- accumulate strings or EVERYTHING objects into the special variable
310
- keep track of all registers seen in the special variable REG-NUM,
311
- keep track of all named registers seen in the special variable REG-NAMES
312
- keep track of the highest backreference seen in the special
313
variable MAX-BACK-REF,
314
- maintain and adher to the currently applicable modifiers in the special
316
- maybe even wash your car..."
317
(declare #.*standard-optimize-settings*)
318
(if (consp parse-tree)
319
(convert-compound-parse-tree (first parse-tree) parse-tree)
320
(convert-simple-parse-tree parse-tree)))
322
(defgeneric convert-compound-parse-tree (token parse-tree &key)
323
(declare #.*standard-optimize-settings*)
324
(:documentation "Helper function for CONVERT-AUX which converts
325
parse trees which are conses and dispatches on TOKEN which is the
326
first element of the parse tree.")
327
(:method ((token t) (parse-tree t) &key)
328
(signal-syntax-error "Unknown token ~A in parse-tree." token)))
330
(defmethod convert-compound-parse-tree ((token (eql :sequence)) parse-tree &key)
331
"The case for parse trees like \(:SEQUENCE {<regex>}*)."
332
(declare #.*standard-optimize-settings*)
333
(cond ((cddr parse-tree)
334
;; this is essentially like
335
;; (MAPCAR 'CONVERT-AUX (REST PARSE-TREE))
336
;; but we don't cons a new list
337
(loop for parse-tree-rest on (rest parse-tree)
338
while parse-tree-rest
339
do (setf (car parse-tree-rest)
340
(convert-aux (car parse-tree-rest))))
341
(make-instance 'seq :elements (rest parse-tree)))
342
(t (convert-aux (second parse-tree)))))
344
(defmethod convert-compound-parse-tree ((token (eql :group)) parse-tree &key)
345
"The case for parse trees like \(:GROUP {<regex>}*).
347
This is a syntactical construct equivalent to :SEQUENCE intended to
348
keep the effect of modifiers local."
349
(declare #.*standard-optimize-settings*)
350
(declare (special flags))
351
;; make a local copy of FLAGS and shadow the global value while we
352
;; descend into the enclosed regexes
353
(let ((flags (copy-list flags)))
354
(declare (special flags))
355
(cond ((cddr parse-tree)
356
(loop for parse-tree-rest on (rest parse-tree)
357
while parse-tree-rest
358
do (setf (car parse-tree-rest)
359
(convert-aux (car parse-tree-rest))))
360
(make-instance 'seq :elements (rest parse-tree)))
361
(t (convert-aux (second parse-tree))))))
363
(defmethod convert-compound-parse-tree ((token (eql :alternation)) parse-tree &key)
364
"The case for \(:ALTERNATION {<regex>}*)."
365
(declare #.*standard-optimize-settings*)
366
(declare (special accumulate-start-p))
367
;; we must stop accumulating objects into STARTS-WITH once we reach
369
(setq accumulate-start-p nil)
370
(loop for parse-tree-rest on (rest parse-tree)
371
while parse-tree-rest
372
do (setf (car parse-tree-rest)
373
(convert-aux (car parse-tree-rest))))
374
(make-instance 'alternation :choices (rest parse-tree)))
376
(defmethod convert-compound-parse-tree ((token (eql :branch)) parse-tree &key)
377
"The case for \(:BRANCH <test> <regex>).
379
Here, <test> must be look-ahead, look-behind or number; if <regex> is
380
an alternation it must have one or two choices."
381
(declare #.*standard-optimize-settings*)
382
(declare (special accumulate-start-p))
383
(setq accumulate-start-p nil)
384
(let* ((test-candidate (second parse-tree))
385
(test (cond ((numberp test-candidate)
386
(when (zerop (the fixnum test-candidate))
387
(signal-syntax-error "Register 0 doesn't exist: ~S." parse-tree))
388
(1- (the fixnum test-candidate)))
389
(t (convert-aux test-candidate))))
390
(alternations (convert-aux (third parse-tree))))
391
(when (and (not (numberp test))
392
(not (typep test 'lookahead))
393
(not (typep test 'lookbehind)))
394
(signal-syntax-error "Branch test must be look-ahead, look-behind or number: ~S." parse-tree))
395
(typecase alternations
397
(case (length (choices alternations))
399
(signal-syntax-error "No choices in branch: ~S." parse-tree))
401
(make-instance 'branch
404
(choices alternations))))
406
(make-instance 'branch
409
(choices alternations))
411
(choices alternations))))
413
(signal-syntax-error "Too much choices in branch: ~S." parse-tree))))
415
(make-instance 'branch
417
:then-regex alternations)))))
419
(defmethod convert-compound-parse-tree ((token (eql :positive-lookahead)) parse-tree &key)
420
"The case for \(:POSITIVE-LOOKAHEAD <regex>)."
421
(declare #.*standard-optimize-settings*)
422
(declare (special flags accumulate-start-p))
423
;; keep the effect of modifiers local to the enclosed regex and stop
424
;; accumulating into STARTS-WITH
425
(setq accumulate-start-p nil)
426
(let ((flags (copy-list flags)))
427
(declare (special flags))
428
(make-instance 'lookahead
429
:regex (convert-aux (second parse-tree))
432
(defmethod convert-compound-parse-tree ((token (eql :negative-lookahead)) parse-tree &key)
433
"The case for \(:NEGATIVE-LOOKAHEAD <regex>)."
434
(declare #.*standard-optimize-settings*)
435
;; do the same as for positive look-aheads and just switch afterwards
436
(let ((regex (convert-compound-parse-tree :positive-lookahead parse-tree)))
437
(setf (slot-value regex 'positivep) nil)
440
(defmethod convert-compound-parse-tree ((token (eql :positive-lookbehind)) parse-tree &key)
441
"The case for \(:POSITIVE-LOOKBEHIND <regex>)."
442
(declare #.*standard-optimize-settings*)
443
(declare (special flags accumulate-start-p))
444
;; keep the effect of modifiers local to the enclosed regex and stop
445
;; accumulating into STARTS-WITH
446
(setq accumulate-start-p nil)
447
(let* ((flags (copy-list flags))
448
(regex (convert-aux (second parse-tree)))
449
(len (regex-length regex)))
450
(declare (special flags))
451
;; lookbehind assertions must be of fixed length
453
(signal-syntax-error "Variable length look-behind not implemented \(yet): ~S." parse-tree))
454
(make-instance 'lookbehind
459
(defmethod convert-compound-parse-tree ((token (eql :negative-lookbehind)) parse-tree &key)
460
"The case for \(:NEGATIVE-LOOKBEHIND <regex>)."
461
(declare #.*standard-optimize-settings*)
462
;; do the same as for positive look-behinds and just switch afterwards
463
(let ((regex (convert-compound-parse-tree :positive-lookbehind parse-tree)))
464
(setf (slot-value regex 'positivep) nil)
467
(defmethod convert-compound-parse-tree ((token (eql :greedy-repetition)) parse-tree &key (greedyp t))
468
"The case for \(:GREEDY-REPETITION|:NON-GREEDY-REPETITION <min> <max> <regex>).
470
This function is also used for the non-greedy case in which case it is
471
called with GREEDYP set to NIL as you would expect."
472
(declare #.*standard-optimize-settings*)
473
(declare (special accumulate-start-p starts-with))
474
;; remember the value of ACCUMULATE-START-P upon entering
475
(let ((local-accumulate-start-p accumulate-start-p))
476
(let ((minimum (second parse-tree))
477
(maximum (third parse-tree)))
478
(declare (fixnum minimum))
479
(declare (type (or null fixnum) maximum))
481
(= 1 minimum maximum))
482
;; set ACCUMULATE-START-P to NIL for the rest of
483
;; the conversion because we can't continue to
484
;; accumulate inside as well as after a proper
486
(setq accumulate-start-p nil))
488
(regex (convert-aux (fourth parse-tree)))
489
(min-len (regex-min-length regex))
490
(length (regex-length regex)))
491
;; note that this declaration already applies to
492
;; the call to CONVERT-AUX above
493
(declare (special reg-seen))
494
(when (and local-accumulate-start-p
498
;; if this repetition is (equivalent to) ".*"
499
;; and if we're at the start of the regex we
500
;; remember it for ADVANCE-FN (see the SCAN
502
(setq starts-with (everythingp regex)))
503
(if (or (not reg-seen)
507
(and maximum (= minimum maximum)))
508
;; the repetition doesn't enclose a register, or
509
;; it's not greedy, or we can't determine it's
510
;; (inner) length, or the length is zero, or the
511
;; number of repetitions is fixed; in all of
512
;; these cases we don't bother to optimize
513
(maybe-split-repetition regex
520
;; otherwise we make a transformation that looks
521
;; roughly like one of
522
;; <regex>* -> (?:<regex'>*<regex>)?
523
;; <regex>+ -> <regex'>*<regex>
524
;; where the trick is that as much as possible
525
;; registers from <regex> are removed in
527
(let* (reg-seen ; new instance for REMOVE-REGISTERS
528
(remove-registers-p t)
529
(inner-regex (remove-registers regex))
531
;; this is the "<regex'>" part
532
(maybe-split-repetition inner-regex
535
;; reduce minimum by 1
536
;; unless it's already 0
540
;; reduce maximum by 1
548
;; this is the "<regex'>*<regex>" part
550
:elements (list inner-repetition
552
;; note that this declaration already applies
553
;; to the call to REMOVE-REGISTERS above
554
(declare (special remove-registers-p reg-seen))
555
;; wrap INNER-SEQ with a greedy
556
;; {0,1}-repetition (i.e. "?") if necessary
559
(maybe-split-repetition inner-seq
567
(defmethod convert-compound-parse-tree ((token (eql :non-greedy-repetition)) parse-tree &key)
568
"The case for \(:NON-GREEDY-REPETITION <min> <max> <regex>)."
569
(declare #.*standard-optimize-settings*)
570
;; just dispatch to the method above with GREEDYP explicitly set to NIL
571
(convert-compound-parse-tree :greedy-repetition parse-tree :greedyp nil))
573
(defmethod convert-compound-parse-tree ((token (eql :register)) parse-tree &key name)
574
"The case for \(:REGISTER <regex>). Also used for named registers
575
when NAME is not NIL."
576
(declare #.*standard-optimize-settings*)
577
(declare (special flags reg-num reg-names))
578
;; keep the effect of modifiers local to the enclosed regex; also,
579
;; assign the current value of REG-NUM to the corresponding slot of
580
;; the REGISTER object and increase this counter afterwards; for
581
;; named register update REG-NAMES and set the corresponding name
582
;; slot of the REGISTER object too
583
(let ((flags (copy-list flags))
584
(stored-reg-num reg-num))
585
(declare (special flags reg-seen named-reg-seen))
587
(when name (setq named-reg-seen t))
588
(incf (the fixnum reg-num))
589
(push name reg-names)
590
(make-instance 'register
591
:regex (convert-aux (if name (third parse-tree) (second parse-tree)))
595
(defmethod convert-compound-parse-tree ((token (eql :named-register)) parse-tree &key)
596
"The case for \(:NAMED-REGISTER <regex>)."
597
(declare #.*standard-optimize-settings*)
598
;; call the method above and use the :NAME keyword argument
599
(convert-compound-parse-tree :register parse-tree :name (copy-seq (second parse-tree))))
601
(defmethod convert-compound-parse-tree ((token (eql :filter)) parse-tree &key)
602
"The case for \(:FILTER <function> &optional <length>)."
603
(declare #.*standard-optimize-settings*)
604
(declare (special accumulate-start-p))
605
;; stop accumulating into STARTS-WITH
606
(setq accumulate-start-p nil)
607
(make-instance 'filter
608
:fn (second parse-tree)
609
:len (third parse-tree)))
611
(defmethod convert-compound-parse-tree ((token (eql :standalone)) parse-tree &key)
612
"The case for \(:STANDALONE <regex>)."
613
(declare #.*standard-optimize-settings*)
614
(declare (special flags accumulate-start-p))
615
;; stop accumulating into STARTS-WITH
616
(setq accumulate-start-p nil)
617
;; keep the effect of modifiers local to the enclosed regex
618
(let ((flags (copy-list flags)))
619
(declare (special flags))
620
(make-instance 'standalone :regex (convert-aux (second parse-tree)))))
622
(defmethod convert-compound-parse-tree ((token (eql :back-reference)) parse-tree &key)
623
"The case for \(:BACK-REFERENCE <number>|<name>)."
624
(declare #.*standard-optimize-settings*)
625
(declare (special flags accumulate-start-p reg-num reg-names max-back-ref))
626
(let* ((backref-name (and (stringp (second parse-tree))
627
(second parse-tree)))
630
;; find which register corresponds to the given name
631
;; we have to deal with case where several registers share
632
;; the same name and collect their respective numbers
633
(loop for name in reg-names
635
when (string= name backref-name)
636
;; NOTE: REG-NAMES stores register names in reversed
637
;; order REG-NUM contains number of (any) registers
638
;; seen so far; 1- will be done later
639
collect (- reg-num reg-index))))
640
;; store the register number for the simple case
641
(backref-number (or (first referred-regs) (second parse-tree))))
642
(declare (type (or fixnum null) backref-number))
643
(when (or (not (typep backref-number 'fixnum))
644
(<= backref-number 0))
645
(signal-syntax-error "Illegal back-reference: ~S." parse-tree))
646
;; stop accumulating into STARTS-WITH and increase MAX-BACK-REF if
648
(setq accumulate-start-p nil
649
max-back-ref (max (the fixnum max-back-ref)
651
(flet ((make-back-ref (backref-number)
652
(make-instance 'back-reference
653
;; we start counting from 0 internally
654
:num (1- backref-number)
655
:case-insensitive-p (case-insensitive-mode-p flags)
656
;; backref-name is NIL or string, safe to copy
657
:name (copy-seq backref-name))))
660
;; several registers share the same name we will try to match
661
;; any of them, starting with the most recent first
662
;; alternation is used to accomplish matching
663
(make-instance 'alternation
665
for reg-index in referred-regs
666
collect (make-back-ref reg-index))))
667
;; simple case - backref corresponds to only one register
669
(make-back-ref backref-number))))))
671
(defmethod convert-compound-parse-tree ((token (eql :regex)) parse-tree &key)
672
"The case for \(:REGEX <string>)."
673
(declare #.*standard-optimize-settings*)
674
(convert-aux (parse-string (second parse-tree))))
676
(defmethod convert-compound-parse-tree ((token (eql :char-class)) parse-tree &key invertedp)
677
"The case for \(:CHAR-CLASS {<item>}*) where item is one of
680
- a character range: \(:RANGE <char1> <char2>), or
681
- a special char class symbol like :DIGIT-CHAR-CLASS.
683
Also used for inverted char classes when INVERTEDP is true."
684
(declare #.*standard-optimize-settings*)
685
(declare (special flags accumulate-start-p))
687
(create-optimized-test-function
688
(convert-char-class-to-test-function (rest parse-tree)
690
(case-insensitive-mode-p flags)))))
691
(setq accumulate-start-p nil)
692
(make-instance 'char-class :test-function test-function)))
694
(defmethod convert-compound-parse-tree ((token (eql :inverted-char-class)) parse-tree &key)
695
"The case for \(:INVERTED-CHAR-CLASS {<item>}*)."
696
(declare #.*standard-optimize-settings*)
697
;; just dispatch to the "real" method
698
(convert-compound-parse-tree :char-class parse-tree :invertedp t))
700
(defmethod convert-compound-parse-tree ((token (eql :property)) parse-tree &key)
701
"The case for \(:PROPERTY <name>) where <name> is a string."
702
(declare #.*standard-optimize-settings*)
703
(declare (special accumulate-start-p))
704
(setq accumulate-start-p nil)
705
(make-instance 'char-class :test-function (resolve-property (second parse-tree))))
707
(defmethod convert-compound-parse-tree ((token (eql :inverted-property)) parse-tree &key)
708
"The case for \(:INVERTED-PROPERTY <name>) where <name> is a string."
709
(declare #.*standard-optimize-settings*)
710
(declare (special accumulate-start-p))
711
(setq accumulate-start-p nil)
712
(make-instance 'char-class :test-function (complement* (resolve-property (second parse-tree)))))
714
(defmethod convert-compound-parse-tree ((token (eql :flags)) parse-tree &key)
715
"The case for \(:FLAGS {<flag>}*) where flag is a modifier symbol
716
like :CASE-INSENSITIVE-P."
717
(declare #.*standard-optimize-settings*)
718
;; set/unset the flags corresponding to the symbols
720
(mapc #'set-flag (rest parse-tree))
721
;; we're only interested in the side effect of
722
;; setting/unsetting the flags and turn this syntactical
723
;; construct into a VOID object which'll be optimized
724
;; away when creating the matcher
725
(make-instance 'void))
727
(defgeneric convert-simple-parse-tree (parse-tree)
728
(declare #.*standard-optimize-settings*)
729
(:documentation "Helper function for CONVERT-AUX which converts
730
parse trees which are atoms.")
731
(:method ((parse-tree (eql :void)))
732
(declare #.*standard-optimize-settings*)
733
(make-instance 'void))
734
(:method ((parse-tree (eql :word-boundary)))
735
(declare #.*standard-optimize-settings*)
736
(make-instance 'word-boundary :negatedp nil))
737
(:method ((parse-tree (eql :non-word-boundary)))
738
(declare #.*standard-optimize-settings*)
739
(make-instance 'word-boundary :negatedp t))
740
(:method ((parse-tree (eql :everything)))
741
(declare #.*standard-optimize-settings*)
742
(declare (special flags accumulate-start-p))
743
(setq accumulate-start-p nil)
744
(make-instance 'everything :single-line-p (single-line-mode-p flags)))
745
(:method ((parse-tree (eql :digit-class)))
746
(declare #.*standard-optimize-settings*)
747
(declare (special accumulate-start-p))
748
(setq accumulate-start-p nil)
749
(make-instance 'char-class :test-function #'digit-char-p))
750
(:method ((parse-tree (eql :word-char-class)))
751
(declare #.*standard-optimize-settings*)
752
(declare (special accumulate-start-p))
753
(setq accumulate-start-p nil)
754
(make-instance 'char-class :test-function #'word-char-p))
755
(:method ((parse-tree (eql :whitespace-char-class)))
756
(declare #.*standard-optimize-settings*)
757
(declare (special accumulate-start-p))
758
(setq accumulate-start-p nil)
759
(make-instance 'char-class :test-function #'whitespacep))
760
(:method ((parse-tree (eql :non-digit-class)))
761
(declare #.*standard-optimize-settings*)
762
(declare (special accumulate-start-p))
763
(setq accumulate-start-p nil)
764
(make-instance 'char-class :test-function (complement* #'digit-char-p)))
765
(:method ((parse-tree (eql :non-word-char-class)))
766
(declare #.*standard-optimize-settings*)
767
(declare (special accumulate-start-p))
768
(setq accumulate-start-p nil)
769
(make-instance 'char-class :test-function (complement* #'word-char-p)))
770
(:method ((parse-tree (eql :non-whitespace-char-class)))
771
(declare #.*standard-optimize-settings*)
772
(declare (special accumulate-start-p))
773
(setq accumulate-start-p nil)
774
(make-instance 'char-class :test-function (complement* #'whitespacep)))
775
(:method ((parse-tree (eql :start-anchor)))
777
(declare #.*standard-optimize-settings*)
778
(declare (special flags))
779
(make-instance 'anchor :startp t :multi-line-p (multi-line-mode-p flags)))
780
(:method ((parse-tree (eql :end-anchor)))
782
(declare #.*standard-optimize-settings*)
783
(declare (special flags))
784
(make-instance 'anchor :startp nil :multi-line-p (multi-line-mode-p flags)))
785
(:method ((parse-tree (eql :modeless-start-anchor)))
787
(declare #.*standard-optimize-settings*)
788
(make-instance 'anchor :startp t))
789
(:method ((parse-tree (eql :modeless-end-anchor)))
791
(declare #.*standard-optimize-settings*)
792
(make-instance 'anchor :startp nil))
793
(:method ((parse-tree (eql :modeless-end-anchor-no-newline)))
795
(declare #.*standard-optimize-settings*)
796
(make-instance 'anchor :startp nil :no-newline-p t))
797
(:method ((parse-tree (eql :case-insensitive-p)))
798
(declare #.*standard-optimize-settings*)
799
(set-flag parse-tree)
800
(make-instance 'void))
801
(:method ((parse-tree (eql :case-sensitive-p)))
802
(declare #.*standard-optimize-settings*)
803
(set-flag parse-tree)
804
(make-instance 'void))
805
(:method ((parse-tree (eql :multi-line-mode-p)))
806
(declare #.*standard-optimize-settings*)
807
(set-flag parse-tree)
808
(make-instance 'void))
809
(:method ((parse-tree (eql :not-multi-line-mode-p)))
810
(declare #.*standard-optimize-settings*)
811
(set-flag parse-tree)
812
(make-instance 'void))
813
(:method ((parse-tree (eql :single-line-mode-p)))
814
(declare #.*standard-optimize-settings*)
815
(set-flag parse-tree)
816
(make-instance 'void))
817
(:method ((parse-tree (eql :not-single-line-mode-p)))
818
(declare #.*standard-optimize-settings*)
819
(set-flag parse-tree)
820
(make-instance 'void)))
822
(defmethod convert-simple-parse-tree ((parse-tree string))
823
(declare #.*standard-optimize-settings*)
824
(declare (special flags))
825
;; turn strings into STR objects and try to accumulate into
827
(let ((str (make-instance 'str
829
:case-insensitive-p (case-insensitive-mode-p flags))))
830
(maybe-accumulate str)
833
(defmethod convert-simple-parse-tree ((parse-tree character))
834
(declare #.*standard-optimize-settings*)
835
;; dispatch to the method for strings
836
(convert-simple-parse-tree (string parse-tree)))
838
(defmethod convert-simple-parse-tree (parse-tree)
839
"The default method - check if there's a translation."
840
(declare #.*standard-optimize-settings*)
841
(let ((translation (and (symbolp parse-tree) (parse-tree-synonym parse-tree))))
843
(convert-aux (copy-tree translation))
844
(signal-syntax-error "Unknown token ~A in parse tree." parse-tree))))
846
(defun convert (parse-tree)
847
"Converts the parse tree PARSE-TREE into an equivalent REGEX object
848
and returns three values: the REGEX object, the number of registers
849
seen and an object the regex starts with which is either a STR object
850
or an EVERYTHING object \(if the regex starts with something like
852
(declare #.*standard-optimize-settings*)
853
;; this function basically just initializes the special variables
854
;; and then calls CONVERT-AUX to do all the work
855
(let* ((flags (list nil nil nil))
859
(accumulate-start-p t)
862
(converted-parse-tree (convert-aux parse-tree)))
863
(declare (special flags reg-num reg-names named-reg-seen
864
accumulate-start-p starts-with max-back-ref))
865
;; make sure we don't reference registers which aren't there
866
(when (> (the fixnum max-back-ref)
867
(the fixnum reg-num))
868
(signal-syntax-error "Backreference to register ~A which has not been defined." max-back-ref))
869
(when (typep starts-with 'str)
870
(setf (slot-value starts-with 'str)
871
(coerce (slot-value starts-with 'str)
873
(values converted-parse-tree reg-num starts-with
874
;; we can't simply use *ALLOW-NAMED-REGISTERS*
875
;; since parse-tree syntax ignores it
877
(nreverse reg-names)))))