Coverage report: /home/ellis/comp/ext/cl-ppcre/closures.lisp
Kind | Covered | All | % |
expression | 197 | 561 | 35.1 |
branch | 27 | 116 | 23.3 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; $Header: /usr/local/cvsrep/cl-ppcre/closures.lisp,v 1.45 2009/09/17 19:17:30 edi Exp $
3
;;; Here we create the closures which together build the final
6
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
8
;;; Redistribution and use in source and binary forms, with or without
9
;;; modification, are permitted provided that the following conditions
12
;;; * Redistributions of source code must retain the above copyright
13
;;; notice, this list of conditions and the following disclaimer.
15
;;; * Redistributions in binary form must reproduce the above
16
;;; copyright notice, this list of conditions and the following
17
;;; disclaimer in the documentation and/or other materials
18
;;; provided with the distribution.
20
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
21
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
24
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
26
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
(in-package :cl-ppcre)
34
(declaim (inline *string*= *string*-equal))
35
(defun *string*= (string2 start1 end1 start2 end2)
36
"Like STRING=, i.e. compares the special string *STRING* from START1
37
to END1 with STRING2 from START2 to END2. Note that there's no
38
boundary check - this has to be implemented by the caller."
39
(declare #.*standard-optimize-settings*)
40
(declare (fixnum start1 end1 start2 end2))
41
(loop for string1-idx of-type fixnum from start1 below end1
42
for string2-idx of-type fixnum from start2 below end2
43
always (char= (schar *string* string1-idx)
44
(schar string2 string2-idx))))
46
(defun *string*-equal (string2 start1 end1 start2 end2)
47
"Like STRING-EQUAL, i.e. compares the special string *STRING* from
48
START1 to END1 with STRING2 from START2 to END2. Note that there's no
49
boundary check - this has to be implemented by the caller."
50
(declare #.*standard-optimize-settings*)
51
(declare (fixnum start1 end1 start2 end2))
52
(loop for string1-idx of-type fixnum from start1 below end1
53
for string2-idx of-type fixnum from start2 below end2
54
always (char-equal (schar *string* string1-idx)
55
(schar string2 string2-idx))))
57
(defgeneric create-matcher-aux (regex next-fn)
58
(declare #.*standard-optimize-settings*)
59
(:documentation "Creates a closure which takes one parameter,
60
START-POS, and tests whether REGEX can match *STRING* at START-POS
61
such that the call to NEXT-FN after the match would succeed."))
63
(defmethod create-matcher-aux ((seq seq) next-fn)
64
(declare #.*standard-optimize-settings*)
65
;; the closure for a SEQ is a chain of closures for the elements of
66
;; this sequence which call each other in turn; the last closure
68
(loop for element in (reverse (elements seq))
69
for curr-matcher = next-fn then next-matcher
70
for next-matcher = (create-matcher-aux element curr-matcher)
71
finally (return next-matcher)))
73
(defmethod create-matcher-aux ((alternation alternation) next-fn)
74
(declare #.*standard-optimize-settings*)
75
;; first create closures for all alternations of ALTERNATION
76
(let ((all-matchers (mapcar #'(lambda (choice)
77
(create-matcher-aux choice next-fn))
78
(choices alternation))))
79
;; now create a closure which checks if one of the closures
80
;; created above can succeed
82
(declare (fixnum start-pos))
83
(loop for matcher in all-matchers
84
thereis (funcall (the function matcher) start-pos)))))
86
(defmethod create-matcher-aux ((register register) next-fn)
87
(declare #.*standard-optimize-settings*)
88
;; the position of this REGISTER within the whole regex; we start to
90
(let ((num (num register)))
91
(declare (fixnum num))
92
;; STORE-END-OF-REG is a thin wrapper around NEXT-FN which will
93
;; update the corresponding values of *REGS-START* and *REGS-END*
94
;; after the inner matcher has succeeded
95
(flet ((store-end-of-reg (start-pos)
96
(declare (fixnum start-pos)
98
(setf (svref *reg-starts* num) (svref *regs-maybe-start* num)
99
(svref *reg-ends* num) start-pos)
100
(funcall next-fn start-pos)))
101
;; the inner matcher is a closure corresponding to the regex
102
;; wrapped by this REGISTER
103
(let ((inner-matcher (create-matcher-aux (regex register)
104
#'store-end-of-reg)))
105
(declare (function inner-matcher))
106
;; here comes the actual closure for REGISTER
108
(declare (fixnum start-pos))
109
;; remember the old values of *REGS-START* and friends in
110
;; case we cannot match
111
(let ((old-*reg-starts* (svref *reg-starts* num))
112
(old-*regs-maybe-start* (svref *regs-maybe-start* num))
113
(old-*reg-ends* (svref *reg-ends* num)))
114
;; we cannot use *REGS-START* here because Perl allows
115
;; regular expressions like /(a|\1x)*/
116
(setf (svref *regs-maybe-start* num) start-pos)
117
(let ((next-pos (funcall inner-matcher start-pos)))
119
;; restore old values on failure
120
(setf (svref *reg-starts* num) old-*reg-starts*
121
(svref *regs-maybe-start* num) old-*regs-maybe-start*
122
(svref *reg-ends* num) old-*reg-ends*))
125
(defmethod create-matcher-aux ((lookahead lookahead) next-fn)
126
(declare #.*standard-optimize-settings*)
127
;; create a closure which just checks for the inner regex and
128
;; doesn't care about NEXT-FN
129
(let ((test-matcher (create-matcher-aux (regex lookahead) #'identity)))
130
(declare (function next-fn test-matcher))
131
(if (positivep lookahead)
132
;; positive look-ahead: check success of inner regex, then call
135
(and (funcall test-matcher start-pos)
136
(funcall next-fn start-pos)))
137
;; negative look-ahead: check failure of inner regex, then call
140
(and (not (funcall test-matcher start-pos))
141
(funcall next-fn start-pos))))))
143
(defmethod create-matcher-aux ((lookbehind lookbehind) next-fn)
144
(declare #.*standard-optimize-settings*)
145
(let ((len (len lookbehind))
146
;; create a closure which just checks for the inner regex and
147
;; doesn't care about NEXT-FN
148
(test-matcher (create-matcher-aux (regex lookbehind) #'identity)))
149
(declare (function next-fn test-matcher)
151
(if (positivep lookbehind)
152
;; positive look-behind: check success of inner regex (if we're
153
;; far enough from the start of *STRING*), then call NEXT-FN
155
(declare (fixnum start-pos))
156
(and (>= (- start-pos (or *real-start-pos* *start-pos*)) len)
157
(funcall test-matcher (- start-pos len))
158
(funcall next-fn start-pos)))
159
;; negative look-behind: check failure of inner regex (if we're
160
;; far enough from the start of *STRING*), then call NEXT-FN
162
(declare (fixnum start-pos))
163
(and (or (< (- start-pos (or *real-start-pos* *start-pos*)) len)
164
(not (funcall test-matcher (- start-pos len))))
165
(funcall next-fn start-pos))))))
167
(defmacro insert-char-class-tester ((char-class chr-expr) &body body)
168
"Utility macro to replace each occurence of '\(CHAR-CLASS-TEST)
169
within BODY with the correct test (corresponding to CHAR-CLASS)
171
(with-rebinding (char-class)
172
(with-unique-names (test-function)
173
(flet ((substitute-char-class-tester (new)
174
(subst new '(char-class-test) body
176
`(let ((,test-function (test-function ,char-class)))
177
,@(substitute-char-class-tester
178
`(funcall ,test-function ,chr-expr)))))))
180
(defmethod create-matcher-aux ((char-class char-class) next-fn)
181
(declare #.*standard-optimize-settings*)
182
(declare (function next-fn))
183
;; insert a test against the current character within *STRING*
184
(insert-char-class-tester (char-class (schar *string* start-pos))
186
(declare (fixnum start-pos))
187
(and (< start-pos *end-pos*)
189
(funcall next-fn (1+ start-pos))))))
191
(defmethod create-matcher-aux ((str str) next-fn)
192
(declare #.*standard-optimize-settings*)
193
(declare (fixnum *end-string-pos*)
195
;; this special value is set by CREATE-SCANNER when the
196
;; closures are built
197
(special end-string))
198
(let* ((len (len str))
199
(case-insensitive-p (case-insensitive-p str))
200
(start-of-end-string-p (start-of-end-string-p str))
203
(chr (if (zerop len) ;; empty STR is same as VOID
204
(return-from create-matcher-aux next-fn)
206
(end-string (and end-string (str end-string)))
207
(end-string-len (if end-string
210
(declare (fixnum len))
211
(cond ((and start-of-end-string-p case-insensitive-p)
212
;; closure for the first STR which belongs to the constant
213
;; string at the end of the regular expression;
214
;; case-insensitive version
216
(declare (fixnum start-pos end-string-len))
217
(let ((test-end-pos (+ start-pos end-string-len)))
218
(declare (fixnum test-end-pos))
219
;; either we're at *END-STRING-POS* (which means that
220
;; it has already been confirmed that end-string
221
;; starts here) or we really have to test
222
(and (or (= start-pos *end-string-pos*)
223
(and (<= test-end-pos *end-pos*)
224
(*string*-equal end-string start-pos test-end-pos
226
(funcall next-fn (+ start-pos len))))))
227
(start-of-end-string-p
228
;; closure for the first STR which belongs to the constant
229
;; string at the end of the regular expression;
230
;; case-sensitive version
232
(declare (fixnum start-pos end-string-len))
233
(let ((test-end-pos (+ start-pos end-string-len)))
234
(declare (fixnum test-end-pos))
235
;; either we're at *END-STRING-POS* (which means that
236
;; it has already been confirmed that end-string
237
;; starts here) or we really have to test
238
(and (or (= start-pos *end-string-pos*)
239
(and (<= test-end-pos *end-pos*)
240
(*string*= end-string start-pos test-end-pos
242
(funcall next-fn (+ start-pos len))))))
244
;; a STR which can be skipped because some other function
245
;; has already confirmed that it matches
247
(declare (fixnum start-pos))
248
(funcall next-fn (+ start-pos len))))
249
((and (= len 1) case-insensitive-p)
250
;; STR represent exactly one character; case-insensitive
253
(declare (fixnum start-pos))
254
(and (< start-pos *end-pos*)
255
(char-equal (schar *string* start-pos) chr)
256
(funcall next-fn (1+ start-pos)))))
258
;; STR represent exactly one character; case-sensitive
261
(declare (fixnum start-pos))
262
(and (< start-pos *end-pos*)
263
(char= (schar *string* start-pos) chr)
264
(funcall next-fn (1+ start-pos)))))
266
;; general case, case-insensitive version
268
(declare (fixnum start-pos))
269
(let ((next-pos (+ start-pos len)))
270
(declare (fixnum next-pos))
271
(and (<= next-pos *end-pos*)
272
(*string*-equal str start-pos next-pos 0 len)
273
(funcall next-fn next-pos)))))
275
;; general case, case-sensitive version
277
(declare (fixnum start-pos))
278
(let ((next-pos (+ start-pos len)))
279
(declare (fixnum next-pos))
280
(and (<= next-pos *end-pos*)
281
(*string*= str start-pos next-pos 0 len)
282
(funcall next-fn next-pos))))))))
284
(declaim (inline word-boundary-p))
285
(defun word-boundary-p (start-pos)
286
"Check whether START-POS is a word-boundary within *STRING*."
287
(declare #.*standard-optimize-settings*)
288
(declare (fixnum start-pos))
289
(let ((1-start-pos (1- start-pos))
290
(*start-pos* (or *real-start-pos* *start-pos*)))
291
;; either the character before START-POS is a word-constituent and
292
;; the character at START-POS isn't...
293
(or (and (or (= start-pos *end-pos*)
294
(and (< start-pos *end-pos*)
295
(not (word-char-p (schar *string* start-pos)))))
296
(and (< 1-start-pos *end-pos*)
297
(<= *start-pos* 1-start-pos)
298
(word-char-p (schar *string* 1-start-pos))))
300
(and (or (= start-pos *start-pos*)
301
(and (< 1-start-pos *end-pos*)
302
(<= *start-pos* 1-start-pos)
303
(not (word-char-p (schar *string* 1-start-pos)))))
304
(and (< start-pos *end-pos*)
305
(word-char-p (schar *string* start-pos)))))))
307
(defmethod create-matcher-aux ((word-boundary word-boundary) next-fn)
308
(declare #.*standard-optimize-settings*)
309
(declare (function next-fn))
310
(if (negatedp word-boundary)
312
(and (not (word-boundary-p start-pos))
313
(funcall next-fn start-pos)))
315
(and (word-boundary-p start-pos)
316
(funcall next-fn start-pos)))))
318
(defmethod create-matcher-aux ((everything everything) next-fn)
319
(declare #.*standard-optimize-settings*)
320
(declare (function next-fn))
321
(if (single-line-p everything)
322
;; closure for single-line-mode: we really match everything, so we
323
;; just advance the index into *STRING* by one and carry on
325
(declare (fixnum start-pos))
326
(and (< start-pos *end-pos*)
327
(funcall next-fn (1+ start-pos))))
328
;; not single-line-mode, so we have to make sure we don't match
331
(declare (fixnum start-pos))
332
(and (< start-pos *end-pos*)
333
(char/= (schar *string* start-pos) #\Newline)
334
(funcall next-fn (1+ start-pos))))))
336
(defmethod create-matcher-aux ((anchor anchor) next-fn)
337
(declare #.*standard-optimize-settings*)
338
(declare (function next-fn))
339
(let ((startp (startp anchor))
340
(multi-line-p (multi-line-p anchor)))
341
(cond ((no-newline-p anchor)
342
;; this must be an end-anchor and it must be modeless, so
343
;; we just have to check whether START-POS equals
346
(declare (fixnum start-pos))
347
(and (= start-pos *end-pos*)
348
(funcall next-fn start-pos))))
349
((and startp multi-line-p)
350
;; a start-anchor in multi-line-mode: check if we're at
351
;; *START-POS* or if the last character was #\Newline
353
(declare (fixnum start-pos))
354
(let ((*start-pos* (or *real-start-pos* *start-pos*)))
355
(and (or (= start-pos *start-pos*)
356
(and (<= start-pos *end-pos*)
357
(> start-pos *start-pos*)
359
(schar *string* (1- start-pos)))))
360
(funcall next-fn start-pos)))))
362
;; a start-anchor which is not in multi-line-mode, so just
363
;; check whether we're at *START-POS*
365
(declare (fixnum start-pos))
366
(and (= start-pos (or *real-start-pos* *start-pos*))
367
(funcall next-fn start-pos))))
369
;; an end-anchor in multi-line-mode: check if we're at
370
;; *END-POS* or if the character we're looking at is
373
(declare (fixnum start-pos))
374
(and (or (= start-pos *end-pos*)
375
(and (< start-pos *end-pos*)
377
(schar *string* start-pos))))
378
(funcall next-fn start-pos))))
380
;; an end-anchor which is not in multi-line-mode, so just
381
;; check if we're at *END-POS* or if we're looking at
382
;; #\Newline and there's nothing behind it
384
(declare (fixnum start-pos))
385
(and (or (= start-pos *end-pos*)
386
(and (= start-pos (1- *end-pos*))
388
(schar *string* start-pos))))
389
(funcall next-fn start-pos)))))))
391
(defmethod create-matcher-aux ((back-reference back-reference) next-fn)
392
(declare #.*standard-optimize-settings*)
393
(declare (function next-fn))
394
;; the position of the corresponding REGISTER within the whole
395
;; regex; we start to count at 0
396
(let ((num (num back-reference)))
397
(if (case-insensitive-p back-reference)
398
;; the case-insensitive version
400
(declare (fixnum start-pos))
401
(let ((reg-start (svref *reg-starts* num))
402
(reg-end (svref *reg-ends* num)))
403
;; only bother to check if the corresponding REGISTER as
404
;; matched successfully already
406
(let ((next-pos (+ start-pos (- (the fixnum reg-end)
407
(the fixnum reg-start)))))
408
(declare (fixnum next-pos))
410
(<= next-pos *end-pos*)
411
(*string*-equal *string* start-pos next-pos
413
(funcall next-fn next-pos))))))
414
;; the case-sensitive version
416
(declare (fixnum start-pos))
417
(let ((reg-start (svref *reg-starts* num))
418
(reg-end (svref *reg-ends* num)))
419
;; only bother to check if the corresponding REGISTER as
420
;; matched successfully already
422
(let ((next-pos (+ start-pos (- (the fixnum reg-end)
423
(the fixnum reg-start)))))
424
(declare (fixnum next-pos))
426
(<= next-pos *end-pos*)
427
(*string*= *string* start-pos next-pos
429
(funcall next-fn next-pos)))))))))
431
(defmethod create-matcher-aux ((branch branch) next-fn)
432
(declare #.*standard-optimize-settings*)
433
(let* ((test (test branch))
434
(then-matcher (create-matcher-aux (then-regex branch) next-fn))
435
(else-matcher (create-matcher-aux (else-regex branch) next-fn)))
436
(declare (function then-matcher else-matcher))
437
(cond ((numberp test)
439
(declare (fixnum test))
440
(if (and (< test (length *reg-starts*))
441
(svref *reg-starts* test))
442
(funcall then-matcher start-pos)
443
(funcall else-matcher start-pos))))
445
(let ((test-matcher (create-matcher-aux test #'identity)))
446
(declare (function test-matcher))
448
(if (funcall test-matcher start-pos)
449
(funcall then-matcher start-pos)
450
(funcall else-matcher start-pos))))))))
452
(defmethod create-matcher-aux ((standalone standalone) next-fn)
453
(declare #.*standard-optimize-settings*)
454
(let ((inner-matcher (create-matcher-aux (regex standalone) #'identity)))
455
(declare (function next-fn inner-matcher))
457
(let ((next-pos (funcall inner-matcher start-pos)))
459
(funcall next-fn next-pos))))))
461
(defmethod create-matcher-aux ((filter filter) next-fn)
462
(declare #.*standard-optimize-settings*)
463
(let ((fn (fn filter)))
465
(let ((next-pos (funcall fn start-pos)))
467
(funcall next-fn next-pos))))))
469
(defmethod create-matcher-aux ((void void) next-fn)
470
(declare #.*standard-optimize-settings*)
471
;; optimize away VOIDs: don't create a closure, just return NEXT-FN