Coverage report: /home/ellis/comp/ext/cl-ppcre/api.lisp
Kind | Covered | All | % |
expression | 330 | 910 | 36.3 |
branch | 21 | 88 | 23.9 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.85 2009/09/17 19:17:30 edi Exp $
3
;;; The external API for creating and using scanners.
5
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
7
;;; Redistribution and use in source and binary forms, with or without
8
;;; modification, are permitted provided that the following conditions
11
;;; * Redistributions of source code must retain the above copyright
12
;;; notice, this list of conditions and the following disclaimer.
14
;;; * Redistributions in binary form must reproduce the above
15
;;; copyright notice, this list of conditions and the following
16
;;; disclaimer in the documentation and/or other materials
17
;;; provided with the distribution.
19
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
20
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
(in-package :cl-ppcre)
33
(defvar *look-ahead-for-suffix* t
34
"Controls whether scanners will optimistically look ahead for a
35
constant suffix of a regular expression, if there is one.")
37
(defgeneric create-scanner (regex &key case-insensitive-mode
42
(:documentation "Accepts a regular expression - either as a
43
parse-tree or as a string - and returns a scan closure which will scan
44
strings for this regular expression and a list mapping registers to
45
their names \(NIL stands for unnamed ones). The \"mode\" keyword
46
arguments are equivalent to the imsx modifiers in Perl. If
47
DESTRUCTIVE is not NIL, the function is allowed to destructively
48
modify its first argument \(but only if it's a parse tree)."))
50
(defmethod create-scanner ((regex-string string) &key case-insensitive-mode
55
(declare #.*standard-optimize-settings*)
56
(declare (ignore destructive))
57
;; parse the string into a parse-tree and then call CREATE-SCANNER
59
(let* ((*extended-mode-p* extended-mode)
60
(quoted-regex-string (if *allow-quoting*
61
(quote-sections (clean-comments regex-string extended-mode))
63
(*syntax-error-string* (copy-seq quoted-regex-string)))
64
;; wrap the result with :GROUP to avoid infinite loops for
66
(create-scanner (cons :group (list (parse-string quoted-regex-string)))
67
:case-insensitive-mode case-insensitive-mode
68
:multi-line-mode multi-line-mode
69
:single-line-mode single-line-mode
72
(defmethod create-scanner ((scanner function) &key case-insensitive-mode
77
(declare #.*standard-optimize-settings*)
78
(declare (ignore destructive))
79
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
80
(signal-invocation-error "You can't use the keyword arguments to modify an existing scanner."))
83
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
88
(declare #.*standard-optimize-settings*)
90
(signal-invocation-error "Extended mode doesn't make sense in parse trees."))
91
;; convert parse-tree into internal representation REGEX and at the
92
;; same time compute the number of registers and the constant string
93
;; (or anchor) the regex starts with (if any)
95
(setq parse-tree (copy-tree parse-tree)))
98
(push :single-line-mode-p flags))
100
(push :multi-line-mode-p flags))
101
(if case-insensitive-mode
102
(push :case-insensitive-p flags))
104
(setq parse-tree (list :group (cons :flags flags) parse-tree))))
105
(let ((*syntax-error-string* nil))
106
(multiple-value-bind (regex reg-num starts-with reg-names)
108
;; simplify REGEX by flattening nested SEQ and ALTERNATION
109
;; constructs and gathering STR objects
110
(let ((regex (gather-strings (flatten regex))))
111
;; set the MIN-REST slots of the REPETITION objects
112
(compute-min-rest regex 0)
113
;; set the OFFSET slots of the STR objects
114
(compute-offsets regex 0)
115
(let* (end-string-offset
117
;; compute the constant string the regex ends with (if
118
;; any) and at the same time set the special variables
119
;; END-STRING-OFFSET and END-ANCHORED-P
120
(end-string (end-string regex))
121
;; if we found a non-zero-length end-string we create an
122
;; efficient search function for it
123
(end-string-test (and *look-ahead-for-suffix*
125
(plusp (len end-string))
126
(if (= 1 (len end-string))
127
(create-char-searcher
128
(schar (str end-string) 0)
129
(case-insensitive-p end-string))
132
(case-insensitive-p end-string)))))
133
;; initialize the counters for CREATE-MATCHER-AUX
135
(*zero-length-num* 0)
136
;; create the actual matcher function (which does all the
137
;; work of matching the regular expression) corresponding
138
;; to REGEX and at the same time set the special
139
;; variables *REP-NUM* and *ZERO-LENGTH-NUM*
140
(match-fn (create-matcher-aux regex #'identity))
141
;; if the regex starts with a string we create an
142
;; efficient search function for it
143
(start-string-test (and (typep starts-with 'str)
144
(plusp (len starts-with))
145
(if (= 1 (len starts-with))
146
(create-char-searcher
147
(schar (str starts-with) 0)
148
(case-insensitive-p starts-with))
151
(case-insensitive-p starts-with))))))
152
(declare (special end-string-offset end-anchored-p end-string))
153
;; now create the scanner and return it
154
(values (create-scanner-aux match-fn
155
(regex-min-length regex)
156
(or (start-anchored-p regex)
157
;; a dot in single-line-mode also
158
;; implicitly anchors the regex at
159
;; the start, i.e. if we can't match
160
;; from the first position we won't
162
(and (typep starts-with 'everything)
163
(single-line-p starts-with)))
166
;; only mark regex as end-anchored if we
167
;; found a non-zero-length string before
169
(and end-string-test end-anchored-p)
180
(defgeneric scan (regex target-string &key start end real-start-pos)
181
(:documentation "Searches TARGET-STRING from START to END and tries
182
to match REGEX. On success returns four values - the start of the
183
match, the end of the match, and two arrays denoting the beginnings
184
and ends of register matches. On failure returns NIL. REGEX can be a
185
string which will be parsed according to Perl syntax, a parse tree, or
186
a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
187
be coerced to a simple string if it isn't one already. The
188
REAL-START-POS parameter should be ignored - it exists only for
189
internal purposes."))
191
#-:use-acl-regexp2-engine
192
(defmethod scan ((regex-string string) target-string
194
(end (length target-string))
195
((:real-start-pos *real-start-pos*) nil))
196
(declare #.*standard-optimize-settings*)
197
;; note that the scanners are optimized for simple strings so we
198
;; have to coerce TARGET-STRING into one if it isn't already
199
(funcall (create-scanner regex-string)
200
(maybe-coerce-to-simple-string target-string)
203
#-:use-acl-regexp2-engine
204
(defmethod scan ((scanner function) target-string
206
(end (length target-string))
207
((:real-start-pos *real-start-pos*) nil))
208
(declare #.*standard-optimize-settings*)
210
(maybe-coerce-to-simple-string target-string)
213
#-:use-acl-regexp2-engine
214
(defmethod scan ((parse-tree t) target-string
216
(end (length target-string))
217
((:real-start-pos *real-start-pos*) nil))
218
(declare #.*standard-optimize-settings*)
219
(funcall (create-scanner parse-tree)
220
(maybe-coerce-to-simple-string target-string)
223
(define-compiler-macro scan (&whole form regex target-string &rest rest)
224
"Make sure that constant forms are compiled into scanners at compile time."
225
;; Don't pass &environment to CONSTANTP, it may not be digestable by
226
;; LOAD-TIME-VALUE, e.g., MACROLETs.
227
(cond ((constantp regex)
228
`(scan (load-time-value (create-scanner ,regex))
229
,target-string ,@rest))
232
(defun scan-to-strings (regex target-string &key (start 0)
233
(end (length target-string))
235
"Like SCAN but returns substrings of TARGET-STRING instead of
236
positions, i.e. this function returns two values on success: the whole
237
match as a string plus an array of substrings (or NILs) corresponding
238
to the matched registers. If SHAREDP is true, the substrings may
239
share structure with TARGET-STRING."
240
(declare #.*standard-optimize-settings*)
241
(multiple-value-bind (match-start match-end reg-starts reg-ends)
242
(scan regex target-string :start start :end end)
244
(return-from scan-to-strings nil))
245
(let ((substr-fn (if sharedp #'nsubseq #'subseq)))
246
(values (funcall substr-fn
247
target-string match-start match-end)
249
(lambda (reg-start reg-end)
252
target-string reg-start reg-end)
258
(define-compiler-macro scan-to-strings
259
(&whole form regex target-string &rest rest)
260
"Make sure that constant forms are compiled into scanners at compile time."
261
(cond ((constantp regex)
262
`(scan-to-strings (load-time-value (create-scanner ,regex))
263
,target-string ,@rest))
266
(defmacro register-groups-bind (var-list (regex target-string
267
&key start end sharedp)
269
"Executes BODY with the variables in VAR-LIST bound to the
270
corresponding register groups after TARGET-STRING has been matched
271
against REGEX, i.e. each variable is either bound to a string or to
272
NIL. If there is no match, BODY is _not_ executed. For each element
273
of VAR-LIST which is NIL there's no binding to the corresponding
274
register group. The number of variables in VAR-LIST must not be
275
greater than the number of register groups. If SHAREDP is true, the
276
substrings may share structure with TARGET-STRING."
277
(with-rebinding (target-string)
278
(with-unique-names (match-start match-end reg-starts reg-ends
279
start-index substr-fn)
282
(loop for (function var) in (normalize-var-list var-list)
285
collect `(,var (let ((,start-index
286
(aref ,reg-starts ,counter)))
288
,(if (equal function '#'parse-integer)
289
`(parse-integer ,target-string :start ,start-index
290
:end (aref ,reg-ends ,counter))
292
(funcall ,(setf substr-needed substr-fn)
295
(aref ,reg-ends ,counter))))
297
`(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
298
(scan ,regex ,target-string :start (or ,start 0)
299
:end (or ,end (length ,target-string)))
300
(declare (ignore ,match-end))
301
,@(unless var-bindings
302
`((declare (ignore ,reg-starts ,reg-ends))))
305
`((let* (,@(and substr-needed
306
`((,substr-fn (if ,sharedp #'nsubseq #'subseq))))
311
(defmacro do-scans ((match-start match-end reg-starts reg-ends regex
313
&optional result-form
317
"Iterates over TARGET-STRING and tries to match REGEX as often as
318
possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
319
REG-ENDS bound to the four return values of each match in turn. After
320
the last match, returns RESULT-FORM if provided or NIL otherwise. An
321
implicit block named NIL surrounds DO-SCANS; RETURN may be used to
322
terminate the loop immediately. If REGEX matches an empty string the
323
scan is continued one position behind this match. BODY may start with
325
(with-rebinding (target-string)
326
(with-unique-names (%start %end %regex scanner)
327
(declare (ignorable %regex scanner))
328
;; the NIL BLOCK to enable exits via (RETURN ...)
330
(let* ((,%start (or ,start 0))
331
(,%end (or ,end (length ,target-string)))
332
,@(unless (constantp regex env)
333
;; leave constant regular expressions as they are -
334
;; SCAN's compiler macro will take care of them;
335
;; otherwise create a scanner unless the regex is
336
;; already a function (otherwise SCAN will do this
337
;; on each iteration)
339
(,scanner (typecase ,%regex
341
(t (create-scanner ,%regex)))))))
342
;; coerce TARGET-STRING to a simple string unless it is one
343
;; already (otherwise SCAN will do this on each iteration)
345
(maybe-coerce-to-simple-string ,target-string))
347
;; invoke SCAN and bind the returned values to the
348
;; provided variables
350
(,match-start ,match-end ,reg-starts ,reg-ends)
351
(scan ,(cond ((constantp regex env) regex)
353
,target-string :start ,%start :end ,%end
354
:real-start-pos (or ,start 0))
355
;; declare the variables to be IGNORABLE to prevent the
356
;; compiler from issuing warnings
358
(ignorable ,match-start ,match-end ,reg-starts ,reg-ends))
360
;; stop iteration on first failure
361
(return ,result-form))
362
;; execute BODY (wrapped in LOCALLY so it can start with
366
;; advance by one position if we had a zero-length match
367
(setq ,%start (if (= ,match-start ,match-end)
371
(defmacro do-matches ((match-start match-end regex
373
&optional result-form
376
"Iterates over TARGET-STRING and tries to match REGEX as often as
377
possible evaluating BODY with MATCH-START and MATCH-END bound to the
378
start/end positions of each match in turn. After the last match,
379
returns RESULT-FORM if provided or NIL otherwise. An implicit block
380
named NIL surrounds DO-MATCHES; RETURN may be used to terminate the
381
loop immediately. If REGEX matches an empty string the scan is
382
continued one position behind this match. BODY may start with
384
;; this is a simplified form of DO-SCANS - we just provide two dummy
385
;; vars and ignore them
386
(with-unique-names (reg-starts reg-ends)
387
`(do-scans (,match-start ,match-end
388
,reg-starts ,reg-ends
389
,regex ,target-string
391
:start ,start :end ,end)
394
(defmacro do-matches-as-strings ((match-var regex
396
&optional result-form
397
&key start end sharedp)
399
"Iterates over TARGET-STRING and tries to match REGEX as often as
400
possible evaluating BODY with MATCH-VAR bound to the substring of
401
TARGET-STRING corresponding to each match in turn. After the last
402
match, returns RESULT-FORM if provided or NIL otherwise. An implicit
403
block named NIL surrounds DO-MATCHES-AS-STRINGS; RETURN may be used to
404
terminate the loop immediately. If REGEX matches an empty string the
405
scan is continued one position behind this match. If SHAREDP is true,
406
the substrings may share structure with TARGET-STRING. BODY may start
408
(with-rebinding (target-string)
409
(with-unique-names (match-start match-end substr-fn)
410
`(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
411
;; simple use DO-MATCHES to extract the substrings
412
(do-matches (,match-start ,match-end ,regex ,target-string
413
,result-form :start ,start :end ,end)
416
,target-string ,match-start ,match-end)))
419
(defmacro do-register-groups (var-list (regex target-string
420
&optional result-form
421
&key start end sharedp)
423
"Iterates over TARGET-STRING and tries to match REGEX as often as
424
possible evaluating BODY with the variables in VAR-LIST bound to the
425
corresponding register groups for each match in turn, i.e. each
426
variable is either bound to a string or to NIL. For each element of
427
VAR-LIST which is NIL there's no binding to the corresponding register
428
group. The number of variables in VAR-LIST must not be greater than
429
the number of register groups. After the last match, returns
430
RESULT-FORM if provided or NIL otherwise. An implicit block named NIL
431
surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
432
immediately. If REGEX matches an empty string the scan is continued
433
one position behind this match. If SHAREDP is true, the substrings
434
may share structure with TARGET-STRING. BODY may start with
436
(with-rebinding (target-string)
437
(with-unique-names (substr-fn match-start match-end
438
reg-starts reg-ends start-index)
439
`(let ((,substr-fn (if ,sharedp
442
(do-scans (,match-start ,match-end ,reg-starts ,reg-ends
443
,regex ,target-string
444
,result-form :start ,start :end ,end)
445
(let ,(loop for (function var) in (normalize-var-list var-list)
448
collect `(,var (let ((,start-index
449
(aref ,reg-starts ,counter)))
455
(aref ,reg-ends ,counter)))
459
(defun count-matches (regex target-string
461
(end (length target-string)))
462
"Returns a count of all substrings of TARGET-STRING which match REGEX."
463
(declare #.*standard-optimize-settings*)
465
(do-matches (s e regex target-string count
466
:start start :end end)
470
(define-compiler-macro count-matches (&whole form regex &rest rest)
471
"Make sure that constant forms are compiled into scanners at
473
(cond ((constantp regex)
474
`(count-matches (load-time-value (create-scanner ,regex))
478
(defun all-matches (regex target-string
480
(end (length target-string)))
481
"Returns a list containing the start and end positions of all
482
matches of REGEX against TARGET-STRING, i.e. if there are N matches
483
the list contains (* 2 N) elements. If REGEX matches an empty string
484
the scan is continued one position behind this match."
485
(declare #.*standard-optimize-settings*)
487
(do-matches (match-start match-end
489
(nreverse result-list)
490
:start start :end end)
491
(push match-start result-list)
492
(push match-end result-list))))
495
(define-compiler-macro all-matches (&whole form regex &rest rest)
496
"Make sure that constant forms are compiled into scanners at
498
(cond ((constantp regex)
499
`(all-matches (load-time-value (create-scanner ,regex))
503
(defun all-matches-as-strings (regex target-string
505
(end (length target-string))
507
"Returns a list containing all substrings of TARGET-STRING which
508
match REGEX. If REGEX matches an empty string the scan is continued
509
one position behind this match. If SHAREDP is true, the substrings may
510
share structure with TARGET-STRING."
511
(declare #.*standard-optimize-settings*)
513
(do-matches-as-strings (match regex target-string (nreverse result-list)
514
:start start :end end :sharedp sharedp)
515
(push match result-list))))
518
(define-compiler-macro all-matches-as-strings (&whole form regex &rest rest)
519
"Make sure that constant forms are compiled into scanners at
521
(cond ((constantp regex)
522
`(all-matches-as-strings
523
(load-time-value (create-scanner ,regex))
527
(defun split (regex target-string
529
(end (length target-string))
534
"Matches REGEX against TARGET-STRING as often as possible and
535
returns a list of the substrings between the matches. If
536
WITH-REGISTERS-P is true, substrings corresponding to matched
537
registers are inserted into the list as well. If OMIT-UNMATCHED-P is
538
true, unmatched registers will simply be left out, otherwise they will
539
show up as NIL. LIMIT limits the number of elements returned -
540
registers aren't counted. If LIMIT is NIL \(or 0 which is
541
equivalent), trailing empty strings are removed from the result list.
542
If REGEX matches an empty string the scan is continued one position
543
behind this match. If SHAREDP is true, the substrings may share
544
structure with TARGET-STRING."
545
(declare #.*standard-optimize-settings*)
546
;; initialize list of positions POS-LIST to extract substrings with
547
;; START so that the start of the next match will mark the end of
548
;; the first substring
549
(let ((pos-list (list start))
551
;; how would Larry Wall do it?
554
(do-scans (match-start match-end
556
regex target-string nil
557
:start start :end end)
558
(unless (and (= match-start match-end)
559
(= match-start (car pos-list)))
560
;; push start of match on list unless this would be an empty
561
;; string adjacent to the last element pushed onto the list
564
;; If LIMIT is negative, it is treated as if
565
;; it were instead arbitrarily large;
566
;; as many fields as possible are produced.
568
(>= (incf counter) limit))
570
(push match-start pos-list)
571
(when with-registers-p
572
;; optionally insert matched registers
573
(loop for reg-start across reg-starts
574
for reg-end across reg-ends
576
;; but only if they've matched
577
do (push reg-start pos-list)
578
(push reg-end pos-list)
579
else unless omit-unmatched-p
580
;; or if we're allowed to insert NIL instead
581
do (push nil pos-list)
582
(push nil pos-list)))
584
(push match-end pos-list)))
585
;; end of whole string
587
;; now collect substrings
589
(loop with substr-fn = (if sharedp #'nsubseq #'subseq)
590
with string-seen = nil
591
for (this-end this-start) on pos-list by #'cddr
592
;; skip empty strings from end of list
597
(> this-end this-start)))))
598
collect (if this-start
600
target-string this-start this-end)
604
(define-compiler-macro split (&whole form regex target-string &rest rest)
605
"Make sure that constant forms are compiled into scanners at compile time."
606
(cond ((constantp regex)
607
`(split (load-time-value (create-scanner ,regex))
608
,target-string ,@rest))
611
(defun string-case-modifier (str from to start end)
612
(declare #.*standard-optimize-settings*)
613
(declare (fixnum from to start end))
614
"Checks whether all words in STR between FROM and TO are upcased,
615
downcased or capitalized and returns a function which applies a
616
corresponding case modification to strings. Returns #'IDENTITY
617
otherwise, especially if words in the target area extend beyond FROM
618
or TO. STR is supposed to be bounded by START and END. It is assumed
619
that \(<= START FROM TO END)."
623
(alphanumericp (char str (1- from)))
624
(alphanumericp (char str from)))
626
(alphanumericp (char str to))
627
(alphanumericp (char str (1- to)))))
628
;; if it's a zero-length string or if words extend beyond FROM
629
;; or TO we return NIL, i.e. #'IDENTITY
631
;; otherwise we loop through STR from FROM to TO
632
(loop with last-char-both-case
634
for index of-type fixnum from from below to
635
for chr = (char str index)
636
do (cond ((not #-:cormanlisp (both-case-p chr))
637
;; this character doesn't have a case so we
638
;; consider it as a word boundary (note that
639
;; this differs from how \b works in Perl)
640
(setq last-char-both-case nil))
642
;; an uppercase character
644
(if last-char-both-case
645
;; not the first character in a
647
((:undecided) :upcase)
648
((:downcase :capitalize) (return nil))
649
((:upcase) current-result))
652
((:downcase) (return nil))
653
((:capitalize :upcase) current-result)))
654
last-char-both-case t))
656
;; a lowercase character
660
((:undecided) :capitalize)
661
((:downcase) current-result)
662
((:capitalize) (if last-char-both-case
665
((:upcase) (return nil)))
666
last-char-both-case t)))
667
finally (return current-result)))
669
((:undecided :upcase) #'string-upcase)
670
((:downcase) #'string-downcase)
671
((:capitalize) #'string-capitalize)))
673
;; first create a scanner to identify the special parts of the
674
;; replacement string (eat your own dog food...)
676
(defgeneric build-replacement-template (replacement-string)
677
(declare #.*standard-optimize-settings*)
678
(:documentation "Converts a replacement string for REGEX-REPLACE or
679
REGEX-REPLACE-ALL into a replacement template which is an
682
(let* ((*use-bmh-matchers* nil)
683
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
684
(defmethod build-replacement-template ((replacement-string string))
685
(declare #.*standard-optimize-settings*)
687
;; COLLECTOR will hold the (reversed) template
689
;; scan through all special parts of the replacement string
690
(do-matches (match-start match-end reg-scanner replacement-string)
691
(when (< from match-start)
692
;; strings between matches are copied verbatim
693
(push (subseq replacement-string from match-start) collector))
694
;; PARSE-START is true if the pattern matched a number which
695
;; refers to a register
696
(let* ((parse-start (position-if #'digit-char-p
700
(token (if parse-start
701
(1- (parse-integer replacement-string
704
;; if we didn't match a number we convert the
705
;; character to a symbol
706
(case (char replacement-string (1+ match-start))
708
((#\`) :before-match)
710
((#\\) :backslash)))))
711
(when (and (numberp token) (< token 0))
712
;; make sure we don't accept something like "\\0"
713
(signal-invocation-error "Illegal substring ~S in replacement string."
714
(subseq replacement-string match-start match-end)))
715
(push token collector))
716
;; remember where the match ended
717
(setq from match-end))
718
(when (< from (length replacement-string))
719
;; push the rest of the replacement string onto the list
720
(push (subseq replacement-string from) collector))
721
(nreverse collector))))
724
(defmethod build-replacement-template ((replacement-function function))
725
(declare #.*standard-optimize-settings*)
726
(list replacement-function))
729
(defmethod build-replacement-template ((replacement-function-symbol symbol))
730
(declare #.*standard-optimize-settings*)
731
(list replacement-function-symbol))
734
(defmethod build-replacement-template ((replacement-list list))
735
(declare #.*standard-optimize-settings*)
738
(defun build-replacement (replacement-template
741
match-start match-end
745
(declare #.*standard-optimize-settings*)
746
"Accepts a replacement template and the current values from the
747
matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
748
corresponding string."
749
;; the upper exclusive bound of the register numbers in the regular
751
(let ((reg-bound (if reg-starts
752
(array-dimension reg-starts 0)
754
(with-output-to-string (s nil :element-type element-type)
755
(loop for token in replacement-template
758
;; transfer string parts verbatim
759
(write-string token s))
761
;; replace numbers with the corresponding registers
762
(when (>= token reg-bound)
763
;; but only if the register was referenced in the
764
;; regular expression
765
(signal-invocation-error "Reference to non-existent register ~A in replacement string."
767
(when (svref reg-starts token)
768
;; and only if it matched, i.e. no match results
769
;; in an empty string
770
(write-string target-string s
771
:start (svref reg-starts token)
772
:end (svref reg-ends token))))
777
(nsubseq target-string match-start match-end)
779
(lambda (reg-start reg-end)
781
(nsubseq target-string reg-start reg-end)))
782
reg-starts reg-ends)))
787
match-start match-end
788
reg-starts reg-ends)))
797
(write-string target-string s
801
;; the part of the target string before the match
802
(write-string target-string s
806
;; the part of the target string after the match
807
(write-string target-string s
814
(nsubseq target-string match-start match-end)
816
(lambda (reg-start reg-end)
818
(nsubseq target-string reg-start reg-end)))
819
reg-starts reg-ends)))
824
match-start match-end
825
reg-starts reg-ends)))
828
(defun replace-aux (target-string replacement pos-list reg-list start end
829
preserve-case simple-calls element-type)
830
"Auxiliary function used by REGEX-REPLACE and REGEX-REPLACE-ALL.
831
POS-LIST contains a list with the start and end positions of all
832
matches while REG-LIST contains a list of arrays representing the
833
corresponding register start and end positions."
834
(declare #.*standard-optimize-settings*)
835
;; build the template once before we start the loop
836
(let ((replacement-template (build-replacement-template replacement)))
837
(with-output-to-string (s nil :element-type element-type)
838
;; loop through all matches and take the start and end of the
839
;; whole string into account
840
(loop for (from to) on (append (list start) pos-list (list end))
841
;; alternate between replacement and no replacement
842
for replace = nil then (and (not replace) to)
843
for reg-starts = (if replace (pop reg-list) nil)
844
for reg-ends = (if replace (pop reg-list) nil)
845
for curr-replacement = (if replace
846
;; build the replacement string
847
(build-replacement replacement-template
857
do (write-string (if preserve-case
858
;; modify the case of the replacement
859
;; string if necessary
860
(funcall (string-case-modifier target-string
868
do (write-string target-string s :start from :end to)))))
870
(defun regex-replace (regex target-string replacement &key
872
(end (length target-string))
875
(element-type 'character))
876
"Try to match TARGET-STRING between START and END against REGEX and
877
replace the first match with REPLACEMENT. Two values are returned;
878
the modified string, and T if REGEX matched or NIL otherwise.
880
REPLACEMENT can be a string which may contain the special substrings
881
\"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
882
before the match, \"\\'\" for the part of TARGET-STRING after the
883
match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
886
REPLACEMENT can also be a function designator in which case the
887
match will be replaced with the result of calling the function
888
designated by REPLACEMENT with the arguments TARGET-STRING, START,
889
END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
890
REG-ENDS are arrays holding the start and end positions of matched
891
registers or NIL - the meaning of the other arguments should be
894
Finally, REPLACEMENT can be a list where each element is a string,
895
one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
896
corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
897
representing register (1+ N) -, or a function designator.
899
If PRESERVE-CASE is true, the replacement will try to preserve the
900
case (all upper case, all lower case, or capitalized) of the
901
match. The result will always be a fresh string, even if REGEX doesn't
904
ELEMENT-TYPE is the element type of the resulting string."
905
(declare #.*standard-optimize-settings*)
906
(multiple-value-bind (match-start match-end reg-starts reg-ends)
907
(scan regex target-string :start start :end end)
909
(values (replace-aux target-string replacement
910
(list match-start match-end)
911
(list reg-starts reg-ends)
912
start end preserve-case
913
simple-calls element-type)
915
(values (subseq target-string start end)
919
(define-compiler-macro regex-replace
920
(&whole form regex target-string replacement &rest rest)
921
"Make sure that constant forms are compiled into scanners at compile time."
922
(cond ((constantp regex)
923
`(regex-replace (load-time-value (create-scanner ,regex))
924
,target-string ,replacement ,@rest))
927
(defun regex-replace-all (regex target-string replacement &key
929
(end (length target-string))
932
(element-type 'character))
933
"Try to match TARGET-STRING between START and END against REGEX and
934
replace all matches with REPLACEMENT. Two values are returned; the
935
modified string, and T if REGEX matched or NIL otherwise.
937
REPLACEMENT can be a string which may contain the special substrings
938
\"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
939
before the match, \"\\'\" for the part of TARGET-STRING after the
940
match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
943
REPLACEMENT can also be a function designator in which case the
944
match will be replaced with the result of calling the function
945
designated by REPLACEMENT with the arguments TARGET-STRING, START,
946
END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
947
REG-ENDS are arrays holding the start and end positions of matched
948
registers or NIL - the meaning of the other arguments should be
951
Finally, REPLACEMENT can be a list where each element is a string,
952
one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
953
corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
954
representing register (1+ N) -, or a function designator.
956
If PRESERVE-CASE is true, the replacement will try to preserve the
957
case (all upper case, all lower case, or capitalized) of the
958
match. The result will always be a fresh string, even if REGEX doesn't
961
ELEMENT-TYPE is the element type of the resulting string."
962
(declare #.*standard-optimize-settings*)
965
(do-scans (match-start match-end reg-starts reg-ends regex target-string
967
:start start :end end)
968
(push match-start pos-list)
969
(push match-end pos-list)
970
(push reg-starts reg-list)
971
(push reg-ends reg-list))
973
(values (replace-aux target-string replacement
976
start end preserve-case
977
simple-calls element-type)
979
(values (subseq target-string start end)
982
(define-compiler-macro regex-replace-all
983
(&whole form regex target-string replacement &rest rest)
984
"Make sure that constant forms are compiled into scanners at compile time."
985
(cond ((constantp regex)
986
`(regex-replace-all (load-time-value (create-scanner ,regex))
987
,target-string ,replacement ,@rest))
990
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
992
"Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST. Loops
993
through PACKAGES and executes BODY with SYMBOL bound to each symbol
994
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
995
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
996
scanner, a case-insensitive scanner is used."
997
(with-rebinding (regex)
998
(with-unique-names (scanner %packages next morep hash)
999
`(let* ((,scanner (create-scanner ,regex
1000
:case-insensitive-mode
1001
(and ,case-insensitive
1002
(not (functionp ,regex)))))
1003
(,%packages (or ,packages
1004
(list-all-packages)))
1005
(,hash (make-hash-table :test #'eq)))
1006
(with-package-iterator (,next ,%packages :external :internal :inherited)
1008
(multiple-value-bind (,morep symbol)
1011
(return ,return-form))
1012
(unless (gethash symbol ,hash)
1013
(when (scan ,scanner (symbol-name symbol))
1014
(setf (gethash symbol ,hash) t)
1017
;;; The following two functions were provided by Karsten Poeck
1018
(defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
1019
(declare #.*standard-optimize-settings*)
1020
"Similar to the standard function APROPOS-LIST but returns a list of
1021
all symbols which match the regular expression REGEX. If
1022
CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
1023
case-insensitive scanner is used."
1024
(let ((collector '()))
1025
(regex-apropos-aux (regex packages case-insensitive collector)
1026
(push symbol collector))))
1028
(defun print-symbol-info (symbol)
1029
"Auxiliary function used by REGEX-APROPOS. Tries to print some
1030
meaningful information about a symbol."
1031
(declare #.*standard-optimize-settings*)
1033
(let ((output-list '()))
1034
(cond ((special-operator-p symbol)
1035
(push "[special operator]" output-list))
1036
((macro-function symbol)
1037
(push "[macro]" output-list))
1039
(let* ((function (symbol-function symbol))
1040
(compiledp (compiled-function-p function)))
1041
(multiple-value-bind (lambda-expr closurep)
1042
(function-lambda-expression function)
1045
"[~:[~;compiled ~]~:[function~;closure~]]~:[~; ~A~]"
1046
compiledp closurep lambda-expr (cadr lambda-expr))
1048
(let ((class (find-class symbol nil)))
1050
(push (format nil "[class] ~S" class) output-list)))
1051
(cond ((keywordp symbol)
1052
(push "[keyword]" output-list))
1054
(push (format nil "[constant]~:[~; value: ~S~]"
1055
(boundp symbol) (symbol-value symbol)) output-list))
1057
(push (format nil "[variable] value: ~S"
1058
(symbol-value symbol))
1060
(format t "~&~S ~<~;~^~A~@{~:@_~A~}~;~:>" symbol output-list))
1062
;; this seems to be necessary due to some errors I encountered
1064
(format t "~&~S [an error occurred while trying to print more info]" symbol))))
1066
(defun regex-apropos (regex &optional packages &key (case-insensitive t))
1067
"Similar to the standard function APROPOS but returns a list of all
1068
symbols which match the regular expression REGEX. If CASE-INSENSITIVE
1069
is true and REGEX isn't already a scanner, a case-insensitive scanner
1071
(declare #.*standard-optimize-settings*)
1072
(regex-apropos-aux (regex packages case-insensitive)
1073
(print-symbol-info symbol))
1076
(let* ((*use-bmh-matchers* nil)
1077
(non-word-char-scanner (create-scanner "[^a-zA-Z_0-9]")))
1078
(defun quote-meta-chars (string &key (start 0) (end (length string)))
1079
"Quote, i.e. prefix with #\\\\, all non-word characters in STRING."
1080
(regex-replace-all non-word-char-scanner string "\\\\\\&"
1081
:start start :end end)))
1083
(let* ((*use-bmh-matchers* nil)
1084
(*allow-quoting* nil)
1085
(quote-char-scanner (create-scanner "\\\\Q"))
1086
(section-scanner (create-scanner "\\\\Q((?:[^\\\\]|\\\\(?!Q))*?)(?:\\\\E|$)")))
1087
(defun quote-sections (string)
1088
"Replace sections inside of STRING which are enclosed by \\Q and
1089
\\E with the quoted equivalent of these sections \(see
1090
QUOTE-META-CHARS). Repeat this as long as there are such
1091
sections. These sections may nest."
1092
(flet ((quote-substring (target-string start end match-start
1093
match-end reg-starts reg-ends)
1094
(declare (ignore start end match-start match-end))
1095
(quote-meta-chars target-string
1096
:start (svref reg-starts 0)
1097
:end (svref reg-ends 0))))
1098
(loop for result = string then (regex-replace-all section-scanner
1101
while (scan quote-char-scanner result)
1102
finally (return result)))))
1104
(let* ((*use-bmh-matchers* nil)
1105
(comment-scanner (create-scanner "(?s)\\(\\?#.*?\\)"))
1106
(extended-comment-scanner (create-scanner "(?m:#.*?$)|(?s:\\(\\?#.*?\\))"))
1107
(quote-token-scanner (create-scanner "\\\\[QE]"))
1108
(quote-token-replace-scanner (create-scanner "\\\\([QE])")))
1109
(defun clean-comments (string &optional extended-mode)
1110
"Clean \(?#...) comments within STRING for quoting, i.e. convert
1111
\\Q to Q and \\E to E. If EXTENDED-MODE is true, also clean
1112
end-of-line comments, i.e. those starting with #\\# and ending with
1114
(flet ((remove-tokens (target-string start end match-start
1115
match-end reg-starts reg-ends)
1116
(declare (ignore start end reg-starts reg-ends))
1117
(loop for result = (nsubseq target-string match-start match-end)
1118
then (regex-replace-all quote-token-replace-scanner result "\\1")
1119
;; we must probably repeat this because the comment
1120
;; can contain substrings like \\Q
1121
while (scan quote-token-scanner result)
1122
finally (return result))))
1123
(regex-replace-all (if extended-mode
1124
extended-comment-scanner
1129
(defun parse-tree-synonym (symbol)
1130
"Returns the parse tree the SYMBOL symbol is a synonym for. Returns
1131
NIL is SYMBOL wasn't yet defined to be a synonym."
1132
(get symbol 'parse-tree-synonym))
1134
(defun (setf parse-tree-synonym) (new-parse-tree symbol)
1135
"Defines SYMBOL to be a synonm for the parse tree NEW-PARSE-TREE."
1136
(setf (get symbol 'parse-tree-synonym) new-parse-tree))
1138
(defmacro define-parse-tree-synonym (name parse-tree)
1139
"Defines the symbol NAME to be a synonym for the parse tree
1140
PARSE-TREE. Both arguments are quoted."
1141
`(eval-when (:compile-toplevel :load-toplevel :execute)
1142
(setf (parse-tree-synonym ',name) ',parse-tree)))