Kind | Covered | All | % |
expression | 0 | 347 | 0.0 |
branch | 0 | 60 | 0.0 |
1
;;; parse/bytes.lisp --- Procedural Parser
2
3
;; swiped from Fukamachi's proc-parser.lisp. Will re-implement at a later
4
;; date.
5
6
;;; License:
7
;; Copyright 2015 Eitaro Fukamachi
8
9
;; Redistribution and use in source and binary forms, with or without
10
;; modification, are permitted provided that the following conditions are met:
11
12
;; 1. Redistributions of source code must retain the above copyright notice,
13
;; this list of conditions and the following disclaimer.
14
15
;; 2. Redistributions in binary form must reproduce the above copyright
16
;; notice, this list of conditions and the following disclaimer in the
17
;; documentation and/or other materials provided with the distribution.
18
19
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23
;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
;; POSSIBILITY OF SUCH DAMAGE.
30
31
;;; Code:
32
(in-package :parse/bytes)
33
34
(define-condition match-failed (error)
35
((elem :initarg :elem
36
:initform nil)
37
(expected :initarg :expected
38
:initform nil))
39
(:report (lambda (condition stream)
40
(with-slots (elem expected) condition
41
(format stream
42
"Match failed~:[~;~:*: ~S~]~:[~;~:* (expected: ~{~S~^, ~})~]"
43
(ensure-char-elem elem) expected)))))
44
45
(defun convert-case-conditions (var chars)
46
(cond
47
((consp chars)
48
`(or ,@(loop for ch in chars
49
if (characterp ch)
50
collect `(char= ,var ,ch)
51
else
52
collect `(= ,var ,ch))))
53
((eq chars 'otherwise)
54
t)
55
(t (if (characterp chars)
56
`(char= ,var ,chars)
57
`(= ,var ,chars)))))
58
59
(defun typed-case-tagbodies (var &rest cases)
60
(cond
61
((null cases) nil)
62
((= 1 (length cases))
63
`((when ,(convert-case-conditions var (car (first cases)))
64
,@(cdr (first cases)))))
65
((and (= 2 (length cases))
66
(eq (car (second cases)) 'otherwise))
67
`((unless ,(convert-case-conditions var (car (first cases)))
68
,@(cdr (second cases)))
69
,@(cdr (first cases))))
70
(t
71
(let ((tags (make-array (length cases) :initial-contents (loop repeat (length cases)
72
collect (gensym))))
73
(end (gensym "END")))
74
`(,@(loop for (chars . body) in cases
75
for i from 0
76
collect `(when ,(convert-case-conditions var chars)
77
(go ,(aref tags i))))
78
,@(loop for case in cases
79
for i from 0
80
append `(,(aref tags i)
81
,@(cdr case)
82
(go ,end)))
83
,end)))))
84
85
(defmacro vector-case (elem-var vec-and-options &body cases)
86
(destructuring-bind (vec &key case-insensitive)
87
(ensure-cons vec-and-options)
88
(with-gensyms (otherwise end-tag vector-case-block)
89
(labels ((case-candidates (el)
90
(cond
91
((not case-insensitive) el)
92
((characterp el)
93
(cond
94
((char<= #\a el #\z)
95
`(,el
96
,(code-char
97
(- (char-code el)
98
#.(- (char-code #\a) (char-code #\A))))))
99
((char<= #\A el #\Z)
100
`(,el
101
,(code-char
102
(+ (char-code el)
103
#.(- (char-code #\a) (char-code #\A))))))
104
(t el)))
105
((typep el '(unsigned-byte 8))
106
(cond
107
((<= #.(char-code #\a) el #.(char-code #\z))
108
`(,el
109
,(- el #.(- (char-code #\a) (char-code #\A)))))
110
((<= #.(char-code #\A) el #.(char-code #\Z))
111
`(,el
112
,(+ el #.(- (char-code #\a) (char-code #\A)))))
113
(t el)))
114
(t el)))
115
(build-case (i cases vec)
116
(when cases
117
(let ((map (make-hash-table)))
118
(map nil
119
(lambda (case)
120
(unless (vectorp (car case))
121
(error "The first element of cases must be a constant vector"))
122
(unless (<= (length (car case)) i)
123
(push case (gethash (aref (car case) i) map))))
124
cases)
125
(let (res-cases)
126
(maphash (lambda (el cases)
127
(let ((next-case (build-case (1+ i) cases vec)))
128
(cond
129
(next-case
130
(push
131
`(,(case-candidates el)
132
(unless (advance*)
133
,(if (= (length (caar cases)) (1+ i))
134
`(progn ,@(cdr (car cases))
135
(go ,end-tag))
136
`(go :eof)))
137
,@(apply #'typed-case-tagbodies elem-var
138
(append
139
next-case
140
`((otherwise (go ,otherwise))))))
141
res-cases))
142
(t
143
(push `(,(case-candidates el)
144
(advance*)
145
(return-from ,vector-case-block
146
(progn ,@(cdr (car cases)))))
147
res-cases)))))
148
map)
149
res-cases)))))
150
(let ((otherwise-case nil))
151
(when (eq (caar (last cases)) 'otherwise)
152
(setq otherwise-case (car (last cases))
153
cases (butlast cases)))
154
`(block ,vector-case-block
155
(tagbody
156
,@(apply #'typed-case-tagbodies elem-var
157
(append
158
(build-case 0 cases vec)
159
`((otherwise (go ,otherwise)))))
160
(go ,end-tag)
161
,otherwise
162
,@(when otherwise-case
163
`(unless (eofp)
164
(return-from ,vector-case-block
165
(progn ,@(cdr otherwise-case)))))
166
,end-tag)))))))
167
168
(defun variable-type (var &optional env)
169
(declare (ignorable env))
170
(cond
171
((constantp var) (type-of var))
172
#+(or sbcl openmcl cmu allegro)
173
((and (symbolp var)
174
#+allegro (cadr (assoc 'type (nth-value 2 (variable-information var env))))
175
#-allegro (cdr (assoc 'type (nth-value 2 (variable-information var env))))))
176
((and (listp var)
177
(eq (car var) 'the)
178
(cadr var)))))
179
180
(defun variable-type* (var &optional env)
181
(let ((type (variable-type var env)))
182
(cond
183
((null type) nil)
184
((subtypep type 'string) 'string)
185
((subtypep type 'octet-vector) 'octet-vector))))
186
187
(defun check-skip-elems (elems)
188
(or (every (lambda (elem)
189
(or (characterp elem)
190
(and (consp elem)
191
(null (cddr elem))
192
(eq (first elem) 'not)
193
(characterp (second elem)))))
194
elems)
195
(error "'skip' takes only constant characters, or a cons starts with 'not'.")))
196
197
(defun check-match-cases (cases)
198
(or (every (lambda (case)
199
(and (consp case)
200
(or (eq (car case) 'otherwise)
201
(stringp (car case)))))
202
cases)
203
(error "'match-case' takes only constant strings at the car position.~% ~S" cases)))
204
205
206
(defmacro bind* ((symb &body bind-forms) &body body)
207
(declare (ignore symb bind-forms body)))
208
209
(defmacro subseq* (data start &optional end)
210
`(subseq ,data ,start ,end))
211
(defmacro get-elem (form) form)
212
(defun ensure-char-elem (elem)
213
(if (characterp elem)
214
elem
215
(code-char elem)))
216
217
(defmacro tagbody-with-match-failed (elem &body body)
218
(with-gensyms (block)
219
`(block ,block
220
(tagbody
221
(return-from ,block ,@body)
222
:match-failed
223
(error 'match-failed :elem ,elem)))))
224
225
(defmacro parsing-macrolet ((elem data p end)
226
(&rest macros) &body body)
227
`(macrolet ((advance (&optional (step 1))
228
`(or (advance* ,step)
229
(go :eof)))
230
(advance* (&optional (step 1))
231
`(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
232
(incf ,',p ,step)
233
,@(if (eql step 0)
234
()
235
`((if (<= ,',end ,',p)
236
nil
237
(progn
238
(setq ,',elem
239
(aref ,',data ,',p))
240
t))))))
241
(advance-to (to)
242
`(or (advance-to* ,to)
243
(go :eof)))
244
(advance-to* (to)
245
(once-only (to)
246
`(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
247
(check-type ,to fixnum)
248
(setq ,',p ,to)
249
(if (<= ,',end ,',p)
250
nil
251
(progn
252
(setq ,',elem
253
(aref ,',data ,',p))
254
t)))))
255
(skip (&rest elems)
256
(check-skip-elems elems)
257
`(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
258
(if (skip-conditions ,',elem ,elems)
259
(advance)
260
(error 'match-failed
261
:elem ,',elem
262
:expected ',elems))))
263
(skip* (&rest elems)
264
(check-skip-elems elems)
265
`(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
266
(unless (eofp)
267
(loop
268
(unless (skip-conditions ,',elem ,elems)
269
(return))
270
(or (advance*) (go :eof))))))
271
(skip+ (&rest elems)
272
`(progn
273
(skip ,@elems)
274
(skip* ,@elems)))
275
(skip? (&rest elems)
276
(check-skip-elems elems)
277
`(locally (declare (optimize (speed 3) (safety 0) (debug 0) (compilation-speed 0)))
278
(when (skip-conditions ,',elem ,elems)
279
(or (advance*) (go :eof)))))
280
(skip-until (fn)
281
`(loop until ,(if (symbolp fn)
282
`(,fn (get-elem ,',elem))
283
`(funcall ,fn (get-elem ,',elem)))
284
do (or (advance*) (go :eof))))
285
(skip-while (fn)
286
`(loop while ,(if (symbolp fn)
287
`(,fn (get-elem ,',elem))
288
`(funcall ,fn (get-elem ,',elem)))
289
do (or (advance*) (go :eof))))
290
(bind* ((symb &body bind-forms) &body body)
291
(with-gensyms (start)
292
`(let ((,start ,',p))
293
(tagbody
294
,@bind-forms
295
:eof)
296
(prog1
297
(let ((,symb (subseq* ,',data ,start ,',p)))
298
,@body)
299
(when (eofp)
300
(go :eof))))))
301
(%match (&rest vectors)
302
`(%match-case
303
,@(loop for vec in vectors
304
collect `(,vec))))
305
(match (&rest vectors)
306
`(block match-block
307
(tagbody
308
(return-from match-block (%match ,@vectors))
309
:match-failed
310
(error 'match-failed :elem ,',elem))))
311
(match? (&rest vectors)
312
(with-gensyms (start start-elem)
313
`(let ((,start ,',p)
314
(,start-elem ,',elem))
315
(block match?-block
316
(tagbody
317
(%match ,@vectors)
318
(return-from match?-block t)
319
:match-failed
320
(setq ,',p ,start
321
,',elem ,start-elem))))))
322
(match-i (&rest vectors)
323
`(match-i-case
324
,@(loop for vec in vectors
325
collect `(,vec))))
326
,@macros)
327
#+sbcl (declare (sb-ext:muffle-conditions sb-ext:code-deletion-note))
328
(labels ((eofp ()
329
(declare (optimize (speed 3) (safety 0) (debug 0)))
330
(<= ,end ,p))
331
(current () (get-elem ,elem))
332
(peek (&key eof-value)
333
(declare (optimize (speed 3) (safety 0) (debug 0)))
334
(let ((len (length ,data)))
335
(declare (type fixnum len))
336
(if (or (eofp) (>= ,p (- ,end 1)) (= ,p (- len 1)))
337
eof-value
338
(aref ,data (+ 1 ,p)))))
339
(pos () (the fixnum ,p)))
340
(declare (inline eofp current pos))
341
,@body)))
342
343
(defmacro with-string-parsing ((data &key start end) &body body)
344
(with-gensyms (g-end elem p body-block)
345
(once-only (data)
346
`(let ((,elem #\Nul)
347
(,p ,(if start
348
`(or ,start 0)
349
0))
350
(,g-end ,(if end
351
`(or ,end (length ,data))
352
`(length ,data))))
353
(declare (type simple-string ,data)
354
(type fixnum ,p ,g-end)
355
(type character ,elem))
356
(parsing-macrolet (,elem ,data ,p ,g-end)
357
((skip-conditions (elem-var elems)
358
`(or ,@(loop for el in elems
359
if (and (consp el)
360
(eq (car el) 'not))
361
collect `(not (char= ,(cadr el) ,elem-var))
362
else
363
collect `(char= ,el ,elem-var))))
364
(%match-case (&rest cases)
365
(check-match-cases cases)
366
`(prog1
367
(vector-case ,',elem (,',data)
368
,@(if (find 'otherwise cases :key #'car :test #'eq)
369
cases
370
(append cases
371
'((otherwise (go :match-failed))))))
372
(when (eofp) (go :eof))))
373
(%match-i-case (&rest cases)
374
(check-match-cases cases)
375
`(prog1
376
(vector-case ,',elem (,',data :case-insensitive t)
377
,@(if (find 'otherwise cases :key #'car :test #'eq)
378
cases
379
(append cases
380
'((otherwise (go :match-failed))))))
381
(when (eofp) (go :eof))))
382
(match-case
383
(&rest cases)
384
`(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
385
(match-i-case
386
(&rest cases)
387
`(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
388
(block ,body-block
389
(tagbody
390
(when (eofp)
391
(go :eof))
392
(setq ,elem (aref ,data ,p))
393
(return-from ,body-block (progn ,@body))
394
:eof)))))))
395
396
(defmacro with-octets-parsing ((data &key start end) &body body)
397
(with-gensyms (g-end elem p body-block)
398
(once-only (data)
399
`(let ((,elem 0)
400
(,p ,(if start
401
`(or ,start 0)
402
0))
403
(,g-end ,(if end
404
`(or ,end (length ,data))
405
`(length ,data))))
406
(declare (type octet-vector ,data)
407
(type fixnum ,p ,g-end)
408
(type (unsigned-byte 8) ,elem))
409
(parsing-macrolet (,elem ,data ,p ,g-end)
410
((skip-conditions (elem-var elems)
411
`(or ,@(loop for el in elems
412
if (and (consp el)
413
(eq (car el) 'not))
414
collect `(not (= ,(char-code (cadr el)) ,elem-var))
415
else
416
collect `(= ,(char-code el) ,elem-var))))
417
(%match-case (&rest cases)
418
(check-match-cases cases)
419
(setf cases
420
(loop for case in cases
421
if (stringp (car case))
422
collect (cons (string-to-octets (car case))
423
(cdr case))
424
else
425
collect case))
426
`(prog1
427
(vector-case ,',elem (,',data)
428
,@(if (find 'otherwise cases :key #'car :test #'eq)
429
cases
430
(append cases
431
'((otherwise (go :match-failed))))))
432
(when (eofp) (go :eof))))
433
(%match-i-case (&rest cases)
434
(check-match-cases cases)
435
(setf cases
436
(loop for case in cases
437
if (stringp (car case))
438
collect (cons (string-to-octets (car case))
439
(cdr case))
440
else
441
collect case))
442
`(prog1
443
(vector-case ,',elem (,',data :case-insensitive t)
444
,@(if (find 'otherwise cases :key #'car :test #'eq)
445
cases
446
(append cases
447
'((otherwise (go :match-failed))))))
448
(when (eofp) (go :eof))))
449
(match-case
450
(&rest cases)
451
`(tagbody-with-match-failed ,',elem (%match-case ,@cases)))
452
(match-i-case
453
(&rest cases)
454
`(tagbody-with-match-failed ,',elem (%match-i-case ,@cases))))
455
(block ,body-block
456
(tagbody
457
(when (eofp)
458
(go :eof))
459
(setq ,elem (aref ,data ,p))
460
(return-from ,body-block (progn ,@body))
461
:match-failed
462
(error 'match-failed :elem ,elem)
463
:eof)))))))
464
465
(defmacro with-vector-parsing ((data &key (start 0) end) &body body &environment env)
466
(let ((data-type (variable-type* data env)))
467
(case data-type
468
(string `(with-string-parsing (,data :start ,start :end ,end) ,@body))
469
(octet-vector `(macrolet ((get-elem (form) `(code-char ,form))
470
(subseq* (data start &optional end)
471
`(octets-to-string ,data :start ,start :end ,end)))
472
(with-octets-parsing (,data :start ,start :end ,end) ,@body)))
473
(otherwise (once-only (data)
474
`(etypecase ,data
475
(string (with-string-parsing (,data :start ,start :end ,end) ,@body))
476
(octet-vector (macrolet ((get-elem (form) `(code-char ,form))
477
(subseq* (data start &optional end)
478
`(octets-to-string ,data :start ,start :end ,end)))
479
(with-octets-parsing (,data :start ,start :end ,end) ,@body)))))))))