Coverage report: /home/ellis/comp/ext/cl-ppcre/repetition-closures.lisp
Kind | Covered | All | % |
expression | 145 | 685 | 21.2 |
branch | 28 | 142 | 19.7 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.34 2009/09/17 19:17:31 edi Exp $
3
;;; This is actually a part of closures.lisp which we put into a
4
;;; separate file because it is rather complex. We only deal with
5
;;; REPETITIONs here. Note that this part of the code contains some
6
;;; rather crazy micro-optimizations which were introduced to be as
7
;;; competitive with Perl as possible in tight loops.
9
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
11
;;; Redistribution and use in source and binary forms, with or without
12
;;; modification, are permitted provided that the following conditions
15
;;; * Redistributions of source code must retain the above copyright
16
;;; notice, this list of conditions and the following disclaimer.
18
;;; * Redistributions in binary form must reproduce the above
19
;;; copyright notice, this list of conditions and the following
20
;;; disclaimer in the documentation and/or other materials
21
;;; provided with the distribution.
23
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
24
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
27
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
(in-package :cl-ppcre)
37
(defmacro incf-after (place &optional (delta 1) &environment env)
38
"Utility macro inspired by C's \"place++\", i.e. first return the
39
value of PLACE and afterwards increment it by DELTA."
40
(with-unique-names (%temp)
41
(multiple-value-bind (vars vals store-vars writer-form reader-form)
42
(get-setf-expansion place env)
43
`(let* (,@(mapcar #'list vars vals)
45
(,(car store-vars) (+ ,%temp ,delta)))
49
;; code for greedy repetitions with minimum zero
51
(defmacro greedy-constant-length-closure (check-curr-pos)
52
"This is the template for simple greedy repetitions (where simple
53
means that the minimum number of repetitions is zero, that the inner
54
regex to be checked is of fixed length LEN, and that it doesn't
55
contain registers, i.e. there's no need for backtracking).
56
CHECK-CURR-POS is a form which checks whether the inner regex of the
57
repetition matches at CURR-POS."
60
(declare (fixnum start-pos maximum))
61
;; because we know LEN we know in advance where to stop at the
62
;; latest; we also take into consideration MIN-REST, i.e. the
63
;; minimal length of the part behind the repetition
64
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
65
;; don't go further than MAXIMUM
66
;; repetitions, of course
68
(the fixnum (* len maximum)))))
70
(declare (fixnum target-end-pos curr-pos))
71
(block greedy-constant-length-matcher
72
;; we use an ugly TAGBODY construct because this might be a
73
;; tight loop and this version is a bit faster than our LOOP
74
;; version (at least in CMUCL)
77
;; first go forward as far as possible, i.e. while
78
;; the inner regex matches
79
(when (>= curr-pos target-end-pos)
85
;; now go back LEN steps each until we're able to match
86
;; the rest of the regex
87
(when (< curr-pos start-pos)
88
(return-from greedy-constant-length-matcher nil))
89
(let ((result (funcall next-fn curr-pos)))
91
(return-from greedy-constant-length-matcher result)))
93
(go backward-loop)))))
94
;; basically the same code; it's just a bit easier because we're
95
;; not bounded by MAXIMUM
97
(declare (fixnum start-pos))
98
(let ((target-end-pos (1+ (- *end-pos* len min-rest)))
100
(declare (fixnum target-end-pos curr-pos))
101
(block greedy-constant-length-matcher
104
(when (>= curr-pos target-end-pos)
106
(when ,check-curr-pos
110
(when (< curr-pos start-pos)
111
(return-from greedy-constant-length-matcher nil))
112
(let ((result (funcall next-fn curr-pos)))
114
(return-from greedy-constant-length-matcher result)))
116
(go backward-loop)))))))
118
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
119
"Creates a closure which just matches as far ahead as possible,
120
i.e. a closure for a dot in single-line mode."
121
(declare #.*standard-optimize-settings*)
122
(declare (fixnum min-rest) (function next-fn))
125
(declare (fixnum start-pos maximum))
126
;; because we know LEN we know in advance where to stop at the
127
;; latest; we also take into consideration MIN-REST, i.e. the
128
;; minimal length of the part behind the repetition
129
(let ((target-end-pos (min (+ start-pos maximum)
130
(- *end-pos* min-rest))))
131
(declare (fixnum target-end-pos))
132
;; start from the highest possible position and go backward
133
;; until we're able to match the rest of the regex
134
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
135
thereis (funcall next-fn curr-pos))))
136
;; basically the same code; it's just a bit easier because we're
137
;; not bounded by MAXIMUM
139
(declare (fixnum start-pos))
140
(let ((target-end-pos (- *end-pos* min-rest)))
141
(declare (fixnum target-end-pos))
142
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
143
thereis (funcall next-fn curr-pos))))))
145
(defgeneric create-greedy-constant-length-matcher (repetition next-fn)
146
(declare #.*standard-optimize-settings*)
147
(:documentation "Creates a closure which tries to match REPETITION.
148
It is assumed that REPETITION is greedy and the minimal number of
149
repetitions is zero. It is furthermore assumed that the inner regex
150
of REPETITION is of fixed length and doesn't contain registers."))
152
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
154
(declare #.*standard-optimize-settings*)
155
(let ((len (len repetition))
156
(maximum (maximum repetition))
157
(regex (regex repetition))
158
(min-rest (min-rest repetition)))
159
(declare (fixnum len min-rest)
162
;; inner regex has zero-length, so we can discard it
166
;; now first try to optimize for a couple of common cases
169
(let ((str (str regex)))
171
;; a single character
172
(let ((chr (schar str 0)))
173
(if (case-insensitive-p regex)
174
(greedy-constant-length-closure
175
(char-equal chr (schar *string* curr-pos)))
176
(greedy-constant-length-closure
177
(char= chr (schar *string* curr-pos)))))
179
(if (case-insensitive-p regex)
180
(greedy-constant-length-closure
181
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
182
(greedy-constant-length-closure
183
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
186
(insert-char-class-tester (regex (schar *string* curr-pos))
187
(greedy-constant-length-closure
190
;; an EVERYTHING object, i.e. a dot
191
(if (single-line-p regex)
192
(create-greedy-everything-matcher maximum min-rest next-fn)
193
(greedy-constant-length-closure
194
(char/= #\Newline (schar *string* curr-pos)))))
196
;; the general case - we build an inner matcher which
197
;; just checks for immediate success, i.e. NEXT-FN is
199
(let ((inner-matcher (create-matcher-aux regex #'identity)))
200
(declare (function inner-matcher))
201
(greedy-constant-length-closure
202
(funcall inner-matcher curr-pos)))))))))
204
(defgeneric create-greedy-no-zero-matcher (repetition next-fn)
205
(declare #.*standard-optimize-settings*)
206
(:documentation "Creates a closure which tries to match REPETITION.
207
It is assumed that REPETITION is greedy and the minimal number of
208
repetitions is zero. It is furthermore assumed that the inner regex
209
of REPETITION can never match a zero-length string \(or instead the
210
maximal number of repetitions is 1)."))
212
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
213
(declare #.*standard-optimize-settings*)
214
(let ((maximum (maximum repetition))
215
;; REPEAT-MATCHER is part of the closure's environment but it
216
;; can only be defined after GREEDY-AUX is defined
218
(declare (function next-fn))
221
;; this is essentially like the next case but with a known
222
;; MAXIMUM of 1 we can get away without a counter; note that
223
;; we always arrive here if CONVERT optimizes <regex>* to
224
;; (?:<regex'>*<regex>)?
226
(create-matcher-aux (regex repetition) next-fn))
228
(declare (function repeat-matcher))
229
(or (funcall repeat-matcher start-pos)
230
(funcall next-fn start-pos))))
232
;; we make a reservation for our slot in *REPEAT-COUNTERS*
233
;; because we need to keep track whether we've reached MAXIMUM
235
(let ((rep-num (incf-after *rep-num*)))
236
(flet ((greedy-aux (start-pos)
237
(declare (fixnum start-pos maximum rep-num)
238
(function repeat-matcher))
239
;; the actual matcher which first tries to match the
240
;; inner regex of REPETITION (if we haven't done so
241
;; too often) and on failure calls NEXT-FN
242
(or (and (< (aref *repeat-counters* rep-num) maximum)
243
(incf (aref *repeat-counters* rep-num))
244
;; note that REPEAT-MATCHER will call
245
;; GREEDY-AUX again recursively
247
(funcall repeat-matcher start-pos)
248
(decf (aref *repeat-counters* rep-num))))
249
(funcall next-fn start-pos))))
250
;; create a closure to match the inner regex and to
251
;; implement backtracking via GREEDY-AUX
253
(create-matcher-aux (regex repetition) #'greedy-aux))
254
;; the closure we return is just a thin wrapper around
255
;; GREEDY-AUX to initialize the repetition counter
257
(declare (fixnum start-pos))
258
(setf (aref *repeat-counters* rep-num) 0)
259
(greedy-aux start-pos)))))
261
;; easier code because we're not bounded by MAXIMUM, but
262
;; basically the same
263
(flet ((greedy-aux (start-pos)
264
(declare (fixnum start-pos)
265
(function repeat-matcher))
266
(or (funcall repeat-matcher start-pos)
267
(funcall next-fn start-pos))))
269
(create-matcher-aux (regex repetition) #'greedy-aux))
272
(defgeneric create-greedy-matcher (repetition next-fn)
273
(declare #.*standard-optimize-settings*)
274
(:documentation "Creates a closure which tries to match REPETITION.
275
It is assumed that REPETITION is greedy and the minimal number of
276
repetitions is zero."))
278
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
279
(declare #.*standard-optimize-settings*)
280
(let ((maximum (maximum repetition))
281
;; we make a reservation for our slot in *LAST-POS-STORES* because
282
;; we have to watch out for endless loops as the inner regex might
283
;; match zero-length strings
284
(zero-length-num (incf-after *zero-length-num*))
285
;; REPEAT-MATCHER is part of the closure's environment but it
286
;; can only be defined after GREEDY-AUX is defined
288
(declare (fixnum zero-length-num)
292
;; we make a reservation for our slot in *REPEAT-COUNTERS*
293
;; because we need to keep track whether we've reached MAXIMUM
295
(let ((rep-num (incf-after *rep-num*)))
296
(flet ((greedy-aux (start-pos)
297
;; the actual matcher which first tries to match the
298
;; inner regex of REPETITION (if we haven't done so
299
;; too often) and on failure calls NEXT-FN
300
(declare (fixnum start-pos maximum rep-num)
301
(function repeat-matcher))
303
(svref *last-pos-stores* zero-length-num)))
304
(when (and old-last-pos
305
(= (the fixnum old-last-pos) start-pos))
306
;; stop immediately if we've been here before,
307
;; i.e. if the last attempt matched a zero-length
309
(return-from greedy-aux (funcall next-fn start-pos)))
310
;; otherwise remember this position for the next
312
(setf (svref *last-pos-stores* zero-length-num) start-pos)
313
(or (and (< (aref *repeat-counters* rep-num) maximum)
314
(incf (aref *repeat-counters* rep-num))
315
;; note that REPEAT-MATCHER will call
316
;; GREEDY-AUX again recursively
318
(funcall repeat-matcher start-pos)
319
(decf (aref *repeat-counters* rep-num))
320
(setf (svref *last-pos-stores* zero-length-num)
322
(funcall next-fn start-pos)))))
323
;; create a closure to match the inner regex and to
324
;; implement backtracking via GREEDY-AUX
326
(create-matcher-aux (regex repetition) #'greedy-aux))
327
;; the closure we return is just a thin wrapper around
328
;; GREEDY-AUX to initialize the repetition counter and our
329
;; slot in *LAST-POS-STORES*
331
(declare (fixnum start-pos))
332
(setf (aref *repeat-counters* rep-num) 0
333
(svref *last-pos-stores* zero-length-num) nil)
334
(greedy-aux start-pos)))))
336
;; easier code because we're not bounded by MAXIMUM, but
337
;; basically the same
338
(flet ((greedy-aux (start-pos)
339
(declare (fixnum start-pos)
340
(function repeat-matcher))
342
(svref *last-pos-stores* zero-length-num)))
343
(when (and old-last-pos
344
(= (the fixnum old-last-pos) start-pos))
345
(return-from greedy-aux (funcall next-fn start-pos)))
346
(setf (svref *last-pos-stores* zero-length-num) start-pos)
348
(funcall repeat-matcher start-pos)
349
(setf (svref *last-pos-stores* zero-length-num) old-last-pos))
350
(funcall next-fn start-pos)))))
352
(create-matcher-aux (regex repetition) #'greedy-aux))
354
(declare (fixnum start-pos))
355
(setf (svref *last-pos-stores* zero-length-num) nil)
356
(greedy-aux start-pos)))))))
358
;; code for non-greedy repetitions with minimum zero
360
(defmacro non-greedy-constant-length-closure (check-curr-pos)
361
"This is the template for simple non-greedy repetitions \(where
362
simple means that the minimum number of repetitions is zero, that the
363
inner regex to be checked is of fixed length LEN, and that it doesn't
364
contain registers, i.e. there's no need for backtracking).
365
CHECK-CURR-POS is a form which checks whether the inner regex of the
366
repetition matches at CURR-POS."
369
(declare (fixnum start-pos maximum))
370
;; because we know LEN we know in advance where to stop at the
371
;; latest; we also take into consideration MIN-REST, i.e. the
372
;; minimal length of the part behind the repetition
373
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
375
(the fixnum (* len maximum))))))
376
;; move forward by LEN and always try NEXT-FN first, then
378
(loop for curr-pos of-type fixnum from start-pos
381
thereis (funcall next-fn curr-pos)
382
while ,check-curr-pos
383
finally (return (funcall next-fn curr-pos)))))
384
;; basically the same code; it's just a bit easier because we're
385
;; not bounded by MAXIMUM
387
(declare (fixnum start-pos))
388
(let ((target-end-pos (1+ (- *end-pos* len min-rest))))
389
(loop for curr-pos of-type fixnum from start-pos
392
thereis (funcall next-fn curr-pos)
393
while ,check-curr-pos
394
finally (return (funcall next-fn curr-pos)))))))
396
(defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
397
(declare #.*standard-optimize-settings*)
398
(:documentation "Creates a closure which tries to match REPETITION.
399
It is assumed that REPETITION is non-greedy and the minimal number of
400
repetitions is zero. It is furthermore assumed that the inner regex
401
of REPETITION is of fixed length and doesn't contain registers."))
403
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
404
(declare #.*standard-optimize-settings*)
405
(let ((len (len repetition))
406
(maximum (maximum repetition))
407
(regex (regex repetition))
408
(min-rest (min-rest repetition)))
409
(declare (fixnum len min-rest)
412
;; inner regex has zero-length, so we can discard it
416
;; now first try to optimize for a couple of common cases
419
(let ((str (str regex)))
421
;; a single character
422
(let ((chr (schar str 0)))
423
(if (case-insensitive-p regex)
424
(non-greedy-constant-length-closure
425
(char-equal chr (schar *string* curr-pos)))
426
(non-greedy-constant-length-closure
427
(char= chr (schar *string* curr-pos)))))
429
(if (case-insensitive-p regex)
430
(non-greedy-constant-length-closure
431
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
432
(non-greedy-constant-length-closure
433
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
436
(insert-char-class-tester (regex (schar *string* curr-pos))
437
(non-greedy-constant-length-closure
440
(if (single-line-p regex)
441
;; a dot which really can match everything; we rely
442
;; on the compiler to optimize this away
443
(non-greedy-constant-length-closure
445
;; a dot which has to watch out for #\Newline
446
(non-greedy-constant-length-closure
447
(char/= #\Newline (schar *string* curr-pos)))))
449
;; the general case - we build an inner matcher which
450
;; just checks for immediate success, i.e. NEXT-FN is
452
(let ((inner-matcher (create-matcher-aux regex #'identity)))
453
(declare (function inner-matcher))
454
(non-greedy-constant-length-closure
455
(funcall inner-matcher curr-pos)))))))))
457
(defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
458
(declare #.*standard-optimize-settings*)
459
(:documentation "Creates a closure which tries to match REPETITION.
460
It is assumed that REPETITION is non-greedy and the minimal number of
461
repetitions is zero. It is furthermore assumed that the inner regex
462
of REPETITION can never match a zero-length string \(or instead the
463
maximal number of repetitions is 1)."))
465
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
466
(declare #.*standard-optimize-settings*)
467
(let ((maximum (maximum repetition))
468
;; REPEAT-MATCHER is part of the closure's environment but it
469
;; can only be defined after NON-GREEDY-AUX is defined
471
(declare (function next-fn))
474
;; this is essentially like the next case but with a known
475
;; MAXIMUM of 1 we can get away without a counter
477
(create-matcher-aux (regex repetition) next-fn))
479
(declare (function repeat-matcher))
480
(or (funcall next-fn start-pos)
481
(funcall repeat-matcher start-pos))))
483
;; we make a reservation for our slot in *REPEAT-COUNTERS*
484
;; because we need to keep track whether we've reached MAXIMUM
486
(let ((rep-num (incf-after *rep-num*)))
487
(flet ((non-greedy-aux (start-pos)
488
;; the actual matcher which first calls NEXT-FN and
489
;; on failure tries to match the inner regex of
490
;; REPETITION (if we haven't done so too often)
491
(declare (fixnum start-pos maximum rep-num)
492
(function repeat-matcher))
493
(or (funcall next-fn start-pos)
494
(and (< (aref *repeat-counters* rep-num) maximum)
495
(incf (aref *repeat-counters* rep-num))
496
;; note that REPEAT-MATCHER will call
497
;; NON-GREEDY-AUX again recursively
499
(funcall repeat-matcher start-pos)
500
(decf (aref *repeat-counters* rep-num)))))))
501
;; create a closure to match the inner regex and to
502
;; implement backtracking via NON-GREEDY-AUX
504
(create-matcher-aux (regex repetition) #'non-greedy-aux))
505
;; the closure we return is just a thin wrapper around
506
;; NON-GREEDY-AUX to initialize the repetition counter
508
(declare (fixnum start-pos))
509
(setf (aref *repeat-counters* rep-num) 0)
510
(non-greedy-aux start-pos)))))
512
;; easier code because we're not bounded by MAXIMUM, but
513
;; basically the same
514
(flet ((non-greedy-aux (start-pos)
515
(declare (fixnum start-pos)
516
(function repeat-matcher))
517
(or (funcall next-fn start-pos)
518
(funcall repeat-matcher start-pos))))
520
(create-matcher-aux (regex repetition) #'non-greedy-aux))
521
#'non-greedy-aux)))))
523
(defgeneric create-non-greedy-matcher (repetition next-fn)
524
(declare #.*standard-optimize-settings*)
525
(:documentation "Creates a closure which tries to match REPETITION.
526
It is assumed that REPETITION is non-greedy and the minimal number of
527
repetitions is zero."))
529
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
530
(declare #.*standard-optimize-settings*)
531
;; we make a reservation for our slot in *LAST-POS-STORES* because
532
;; we have to watch out for endless loops as the inner regex might
533
;; match zero-length strings
534
(let ((zero-length-num (incf-after *zero-length-num*))
535
(maximum (maximum repetition))
536
;; REPEAT-MATCHER is part of the closure's environment but it
537
;; can only be defined after NON-GREEDY-AUX is defined
539
(declare (fixnum zero-length-num)
543
;; we make a reservation for our slot in *REPEAT-COUNTERS*
544
;; because we need to keep track whether we've reached MAXIMUM
546
(let ((rep-num (incf-after *rep-num*)))
547
(flet ((non-greedy-aux (start-pos)
548
;; the actual matcher which first calls NEXT-FN and
549
;; on failure tries to match the inner regex of
550
;; REPETITION (if we haven't done so too often)
551
(declare (fixnum start-pos maximum rep-num)
552
(function repeat-matcher))
554
(svref *last-pos-stores* zero-length-num)))
555
(when (and old-last-pos
556
(= (the fixnum old-last-pos) start-pos))
557
;; stop immediately if we've been here before,
558
;; i.e. if the last attempt matched a zero-length
560
(return-from non-greedy-aux (funcall next-fn start-pos)))
561
;; otherwise remember this position for the next
563
(setf (svref *last-pos-stores* zero-length-num) start-pos)
564
(or (funcall next-fn start-pos)
565
(and (< (aref *repeat-counters* rep-num) maximum)
566
(incf (aref *repeat-counters* rep-num))
567
;; note that REPEAT-MATCHER will call
568
;; NON-GREEDY-AUX again recursively
570
(funcall repeat-matcher start-pos)
571
(decf (aref *repeat-counters* rep-num))
572
(setf (svref *last-pos-stores* zero-length-num)
574
;; create a closure to match the inner regex and to
575
;; implement backtracking via NON-GREEDY-AUX
577
(create-matcher-aux (regex repetition) #'non-greedy-aux))
578
;; the closure we return is just a thin wrapper around
579
;; NON-GREEDY-AUX to initialize the repetition counter and our
580
;; slot in *LAST-POS-STORES*
582
(declare (fixnum start-pos))
583
(setf (aref *repeat-counters* rep-num) 0
584
(svref *last-pos-stores* zero-length-num) nil)
585
(non-greedy-aux start-pos)))))
587
;; easier code because we're not bounded by MAXIMUM, but
588
;; basically the same
589
(flet ((non-greedy-aux (start-pos)
590
(declare (fixnum start-pos)
591
(function repeat-matcher))
593
(svref *last-pos-stores* zero-length-num)))
594
(when (and old-last-pos
595
(= (the fixnum old-last-pos) start-pos))
596
(return-from non-greedy-aux (funcall next-fn start-pos)))
597
(setf (svref *last-pos-stores* zero-length-num) start-pos)
598
(or (funcall next-fn start-pos)
600
(funcall repeat-matcher start-pos)
601
(setf (svref *last-pos-stores* zero-length-num)
604
(create-matcher-aux (regex repetition) #'non-greedy-aux))
606
(declare (fixnum start-pos))
607
(setf (svref *last-pos-stores* zero-length-num) nil)
608
(non-greedy-aux start-pos)))))))
610
;; code for constant repetitions, i.e. those with a fixed number of repetitions
612
(defmacro constant-repetition-constant-length-closure (check-curr-pos)
613
"This is the template for simple constant repetitions (where simple
614
means that the inner regex to be checked is of fixed length LEN, and
615
that it doesn't contain registers, i.e. there's no need for
616
backtracking) and where constant means that MINIMUM is equal to
617
MAXIMUM. CHECK-CURR-POS is a form which checks whether the inner
618
regex of the repetition matches at CURR-POS."
620
(declare (fixnum start-pos))
621
(let ((target-end-pos (+ start-pos
622
(the fixnum (* len repetitions)))))
623
(declare (fixnum target-end-pos))
624
;; first check if we won't go beyond the end of the string
625
(and (>= *end-pos* target-end-pos)
626
;; then loop through all repetitions step by step
627
(loop for curr-pos of-type fixnum from start-pos
630
always ,check-curr-pos)
631
;; finally call NEXT-FN if we made it that far
632
(funcall next-fn target-end-pos)))))
634
(defgeneric create-constant-repetition-constant-length-matcher
636
(declare #.*standard-optimize-settings*)
637
(:documentation "Creates a closure which tries to match REPETITION.
638
It is assumed that REPETITION has a constant number of repetitions.
639
It is furthermore assumed that the inner regex of REPETITION is of
640
fixed length and doesn't contain registers."))
642
(defmethod create-constant-repetition-constant-length-matcher
643
((repetition repetition) next-fn)
644
(declare #.*standard-optimize-settings*)
645
(let ((len (len repetition))
646
(repetitions (minimum repetition))
647
(regex (regex repetition)))
648
(declare (fixnum len repetitions)
651
;; if the length is zero it suffices to try once
652
(create-matcher-aux regex next-fn)
653
;; otherwise try to optimize for a couple of common cases
656
(let ((str (str regex)))
658
;; a single character
659
(let ((chr (schar str 0)))
660
(if (case-insensitive-p regex)
661
(constant-repetition-constant-length-closure
662
(and (char-equal chr (schar *string* curr-pos))
664
(constant-repetition-constant-length-closure
665
(and (char= chr (schar *string* curr-pos))
668
(if (case-insensitive-p regex)
669
(constant-repetition-constant-length-closure
670
(let ((next-pos (+ curr-pos len)))
671
(declare (fixnum next-pos))
672
(and (*string*-equal str curr-pos next-pos 0 len)
674
(constant-repetition-constant-length-closure
675
(let ((next-pos (+ curr-pos len)))
676
(declare (fixnum next-pos))
677
(and (*string*= str curr-pos next-pos 0 len)
681
(insert-char-class-tester (regex (schar *string* curr-pos))
682
(constant-repetition-constant-length-closure
683
(and (char-class-test)
686
(if (single-line-p regex)
687
;; a dot which really matches everything - we just have to
688
;; advance the index into *STRING* accordingly and check
689
;; if we didn't go past the end
691
(declare (fixnum start-pos))
692
(let ((next-pos (+ start-pos repetitions)))
693
(declare (fixnum next-pos))
694
(and (<= next-pos *end-pos*)
695
(funcall next-fn next-pos))))
696
;; a dot which is not in single-line-mode - make sure we
697
;; don't match #\Newline
698
(constant-repetition-constant-length-closure
699
(and (char/= #\Newline (schar *string* curr-pos))
702
;; the general case - we build an inner matcher which just
703
;; checks for immediate success, i.e. NEXT-FN is #'IDENTITY
704
(let ((inner-matcher (create-matcher-aux regex #'identity)))
705
(declare (function inner-matcher))
706
(constant-repetition-constant-length-closure
707
(funcall inner-matcher curr-pos))))))))
709
(defgeneric create-constant-repetition-matcher (repetition next-fn)
710
(declare #.*standard-optimize-settings*)
711
(:documentation "Creates a closure which tries to match REPETITION.
712
It is assumed that REPETITION has a constant number of repetitions."))
714
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
715
(declare #.*standard-optimize-settings*)
716
(let ((repetitions (minimum repetition))
717
;; we make a reservation for our slot in *REPEAT-COUNTERS*
718
;; because we need to keep track of the number of repetitions
719
(rep-num (incf-after *rep-num*))
720
;; REPEAT-MATCHER is part of the closure's environment but it
721
;; can only be defined after NON-GREEDY-AUX is defined
723
(declare (fixnum repetitions rep-num)
725
(if (zerop (min-len repetition))
726
;; we make a reservation for our slot in *LAST-POS-STORES*
727
;; because we have to watch out for needless loops as the inner
728
;; regex might match zero-length strings
729
(let ((zero-length-num (incf-after *zero-length-num*)))
730
(declare (fixnum zero-length-num))
731
(flet ((constant-aux (start-pos)
732
;; the actual matcher which first calls NEXT-FN and
733
;; on failure tries to match the inner regex of
734
;; REPETITION (if we haven't done so too often)
735
(declare (fixnum start-pos)
736
(function repeat-matcher))
738
(svref *last-pos-stores* zero-length-num)))
739
(when (and old-last-pos
740
(= (the fixnum old-last-pos) start-pos))
741
;; if we've been here before we matched a
742
;; zero-length string the last time, so we can
743
;; just carry on because we will definitely be
744
;; able to do this again often enough
745
(return-from constant-aux (funcall next-fn start-pos)))
746
;; otherwise remember this position for the next
748
(setf (svref *last-pos-stores* zero-length-num) start-pos)
749
(cond ((< (aref *repeat-counters* rep-num) repetitions)
750
;; not enough repetitions yet, try it again
751
(incf (aref *repeat-counters* rep-num))
752
;; note that REPEAT-MATCHER will call
753
;; CONSTANT-AUX again recursively
755
(funcall repeat-matcher start-pos)
756
(decf (aref *repeat-counters* rep-num))
757
(setf (svref *last-pos-stores* zero-length-num)
760
;; we're done - call NEXT-FN
761
(funcall next-fn start-pos))))))
762
;; create a closure to match the inner regex and to
763
;; implement backtracking via CONSTANT-AUX
765
(create-matcher-aux (regex repetition) #'constant-aux))
766
;; the closure we return is just a thin wrapper around
767
;; CONSTANT-AUX to initialize the repetition counter
769
(declare (fixnum start-pos))
770
(setf (aref *repeat-counters* rep-num) 0
771
(aref *last-pos-stores* zero-length-num) nil)
772
(constant-aux start-pos))))
773
;; easier code because we don't have to care about zero-length
774
;; matches but basically the same
775
(flet ((constant-aux (start-pos)
776
(declare (fixnum start-pos)
777
(function repeat-matcher))
778
(cond ((< (aref *repeat-counters* rep-num) repetitions)
779
(incf (aref *repeat-counters* rep-num))
781
(funcall repeat-matcher start-pos)
782
(decf (aref *repeat-counters* rep-num))))
783
(t (funcall next-fn start-pos)))))
785
(create-matcher-aux (regex repetition) #'constant-aux))
787
(declare (fixnum start-pos))
788
(setf (aref *repeat-counters* rep-num) 0)
789
(constant-aux start-pos))))))
791
;; the actual CREATE-MATCHER-AUX method for REPETITION objects which
792
;; utilizes all the functions and macros defined above
794
(defmethod create-matcher-aux ((repetition repetition) next-fn)
795
(declare #.*standard-optimize-settings*)
796
(with-slots (minimum maximum len min-len greedyp contains-register-p)
800
;; this should have been optimized away by CONVERT but just
802
(error "Got REPETITION with MAXIMUM 0 \(should not happen)"))
804
(= minimum maximum 1))
805
;; this should have been optimized away by CONVERT but just
807
(error "Got REPETITION with MAXIMUM 1 and MINIMUM 1 \(should not happen)"))
808
((and (eql minimum maximum)
810
(not contains-register-p))
811
(create-constant-repetition-constant-length-matcher repetition next-fn))
812
((eql minimum maximum)
813
(create-constant-repetition-matcher repetition next-fn))
816
(not contains-register-p))
817
(create-greedy-constant-length-matcher repetition next-fn))
821
(create-greedy-no-zero-matcher repetition next-fn))
823
(create-greedy-matcher repetition next-fn))
826
(not contains-register-p))
827
(create-non-greedy-constant-length-matcher repetition next-fn))
830
(create-non-greedy-no-zero-matcher repetition next-fn))
832
(create-non-greedy-matcher repetition next-fn)))))