Coverage report: /home/ellis/comp/core/lib/obj/uri/state.lisp
Kind | Covered | All | % |
expression | 917 | 1796 | 51.1 |
branch | 154 | 354 | 43.5 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; obj/uri/state.lisp --- Parser state
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
;; A note about parser naming conventions.
8
;; There are two types of functions, where <name> comes from the LHS
9
;; of the ABNF grammar:
10
;; state-<name> :: scan and return values based on the parse. The
11
;; first value is always the "next" index beyond the parse.
12
;; The subsequent values are rule specific, and documented in
13
;; the functions themselves.
14
;; scan-<name> :: scan for and return either nil or an index. If
15
;; there is match, return the "next" index beyond the match,
18
;; Rules marked `TERMINAL' must check for `at-end-p', since they must
19
;; terminate the parse for the input to be valid.
20
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23
(defun state-uri (string start end
24
&aux i scheme userinfo host port path query fragment
25
nid nss q-component f-component r-component i2
26
colon urn-scheme file-scheme)
27
;; rule 01: URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
29
;; values: i scheme userinfo host port path query fragment
30
(if* (and (multiple-value-setq (i scheme) (state-scheme string start end))
31
(setq colon (looking-at #\: string i end))
32
(not (setq urn-scheme (looking-at "urn" string start end t)))
33
(not (setq file-scheme (looking-at "file" string start end t)))
34
(multiple-value-setq (i2 userinfo host port path)
35
(state-hier-part string (1+ i) end)))
36
then ;; Have hier-part...
38
(when (at-end-p i end)
39
(return-from state-uri
40
(values i scheme userinfo host port path)))
42
(when (looking-at #\? string i end)
43
(if* (multiple-value-setq (i2 query)
44
(state-query string (incf i) end))
46
else (setq query #.*uri-null-marker*)))
48
(when (looking-at #\# string i end)
49
(if* (multiple-value-setq (i2 fragment)
50
(state-fragment string (incf i) end))
52
else (setq fragment #.*uri-null-marker*)))
54
(when (at-end-p i end)
55
(values i scheme userinfo host port path query fragment))
57
then ;; values: i "urn" nid r-component nil nss q-component f-component
58
(when (multiple-value-setq (i nid nss q-component f-component
60
(state-urn-namestring string i end))
70
elseif (and file-scheme
71
(multiple-value-setq (i path)
72
(state-uri-file string colon end)))
73
then (values i scheme nil nil nil path)
74
elseif (and scheme colon)
75
then ;; Something like "mailto:foo@bar.com". Put the
76
;; the non-scheme part into the path
77
(values end scheme nil nil nil (xsubseq colon end))))
79
;; called by parse-uri-string-rfc3986
80
(defun state-uri-reference (string start end
81
&aux i scheme userinfo host port path query
83
;; rule 02: URI-reference = URI / relative-ref
84
;; values: i scheme host userinfo port path query fragment
85
(if* (multiple-value-setq (i scheme userinfo host port path query
87
(state-uri string start end))
88
then (values i scheme userinfo host port path query fragment)
89
elseif (multiple-value-setq (i userinfo host port path query fragment)
90
(state-relative-ref string start end))
91
then (values i nil userinfo host port path query fragment)))
93
;; called by parse-uri-string-rfc3986
94
(defun state-absolute-uri (string start end
95
&aux i scheme userinfo host port path query i2
96
colon urn-scheme file-scheme)
97
;; rule 03: absolute-URI = scheme ":" hier-part [ "?" query ]
99
;; values: i scheme userinfo host port path query
100
(if* (and (multiple-value-setq (i scheme) (state-scheme string start end))
101
(setq colon (looking-at #\: string i end))
102
(not (setq urn-scheme (looking-at "urn" string start end t)))
103
(not (setq file-scheme (looking-at "file" string start end t)))
104
(multiple-value-setq (i2 userinfo host port path)
105
(state-hier-part string colon end)))
106
then ;; so far: scheme + ":" + hier-part
108
(if* (at-end-p i end)
109
then (values i scheme userinfo host port path)
110
elseif (and (looking-at #\? string i end)
111
(multiple-value-setq (i query)
112
(state-query string (incf i) end))
114
then (values i scheme userinfo host port path query))
116
then ;; values: i "urn" nid r-component nil nss q-component f-component
117
(multiple-value-bind (i3 nid nss q-component f-component r-component)
118
(state-urn-namestring string (incf i) end)
122
r-component ;userinfo
127
f-component ;fragment
129
elseif (and file-scheme
130
(multiple-value-setq (i path)
131
(state-uri-file string colon end)))
132
then (values i scheme nil nil nil path)
133
elseif (and scheme colon)
134
then ;; Something like "mailto:foo@bar.com". Put the
135
;; the non-scheme part into the path
136
(values end scheme nil nil nil (xsubseq colon end))))
138
(defun state-hier-part (string start end &aux i userinfo host port
140
;; rule 04: hier-part = "//" authority path-abempty
141
;; / "//" path-absolute ***NEW***
145
;; values: i userinfo host port path
146
(if* (and (setq i (looking-at "//" string start end))
147
(multiple-value-setq (i userinfo host port)
148
(state-authority string i end)))
149
then (if* (multiple-value-setq (i2 path) (state-path-abempty string i end))
150
then (values i2 userinfo host port path)
151
else (values i userinfo host port))
152
elseif (and (setq i (looking-at "//" string start end))
153
(multiple-value-setq (i path)
154
(state-path-absolute string i end)))
155
then (values i nil nil nil path)
157
(multiple-value-setq (i path) (state-path-absolute string start end))
158
(multiple-value-setq (i path) (state-path-rootless string start end))
159
(multiple-value-setq (i path) (state-path-empty string start end)))
160
then (values i nil nil nil path)))
162
(defun state-relative-ref (string start end &aux i2 query fragment)
163
;; rule 05: relative-ref = relative-part [ "?" query ] [ "#" fragment ]
165
;; values: i userinfo host port path query fragment
166
(multiple-value-bind (i userinfo host port path)
167
(state-relative-part string start end)
169
(if* (at-end-p i end)
170
then (values i userinfo host port path)
171
else (when (looking-at #\? string i end)
172
(if* (multiple-value-setq (i2 query)
173
(state-query string (incf i) end))
175
else (setq query #.*uri-null-marker*)))
177
(when (looking-at #\# string i end)
178
(if* (multiple-value-setq (i2 fragment)
179
(state-fragment string (incf i) end))
181
else (setq fragment #.*uri-null-marker*)))
183
(when (at-end-p i end)
184
(values i userinfo host port path query fragment))))))
186
(defun state-relative-part (string start end
187
&aux (i start) path userinfo host port i2)
188
;; rule 06: relative-part = "//" authority path-abempty
192
;; values: i userinfo host port path
193
(if* (and (setq i (looking-at "//" string i end))
194
(multiple-value-setq (i userinfo host port)
195
(state-authority string i end)))
196
then (if* (multiple-value-setq (i2 path) (state-path-abempty string i end))
197
then (values i2 userinfo host port path)
198
else (values i userinfo host port))
200
(multiple-value-setq (i path) (state-path-absolute string start end))
201
(multiple-value-setq (i path) (state-path-noscheme string start end))
202
(multiple-value-setq (i path) (state-path-empty string start end)))
203
then (values i nil nil nil path)))
205
(defun state-scheme (string start end &aux i scheme)
206
;; rule 07: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
208
(when (looking-at *alpha-bitvector* string start end)
209
(if* (setq i (scan-forward string (1+ start) end *scheme-bitvector*))
210
then (setq scheme (xsubseq start i))
211
else ;; just the one char
212
(setq scheme (xsubseq start (setq i (1+ start)))))
215
(defun state-authority (string start end &aux i i2 userinfo host ipv6 zone-id
217
;; rule 08: authority = [ userinfo "@" ] host [ ":" port ]
218
;; values: i userinfo host port
220
((and (multiple-value-setq (i userinfo) (state-userinfo string start end))
221
(setq i (looking-at #\@ string i end))
222
(multiple-value-setq (i host ipv6 zone-id)
223
(state-host string i end)))
224
;; Somewhat of a hack, but I don't want to change all the functions
225
;; to expect even more multiple values:
226
(when ipv6 (setq host (list host ipv6 zone-id)))
228
;; have: userinfo "@" host
229
(if* (not (setq i2 (looking-at #\: string i end)))
230
then ;; done, return what we have
231
(values i userinfo host)
232
elseif (multiple-value-setq (i port) (state-port string i2 end))
233
then ;; found ":" and port
234
(values i userinfo host port)
235
else ;; found ":" and NO port
236
(values i2 userinfo host)))
238
;; no userinfo, check for host
239
((multiple-value-setq (i host ipv6 zone-id) (state-host string start end))
240
;; Somewhat of a hack, but I don't want to change all the functions
241
;; to expect even more multiple values:
242
(when ipv6 (setq host (list host ipv6 zone-id)))
244
(if* (not (setq i2 (looking-at #\: string i end)))
245
then (values i nil host)
246
elseif (multiple-value-setq (i port) (state-port string i2 end))
247
then (values i nil host port)
248
else ;; found ":" and NO port
249
(values i2 nil host)))))
251
(defun state-userinfo (string start end &aux i)
252
;; rule 09: userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
254
;; This one is more difficult, due to the alternation with
256
;; *( unreserved / pct-encoded / sub-delims / ":" )
257
;; All the others are just characters, but pct-encoded is a
258
;; specific sequence of characters.
259
(when (setq i (scan-forward string start end *userinfo-bitvector*
261
(values i (xsubseq start i))))
263
(defun state-port (string start end &aux i)
264
;; rule 11: port = *DIGIT
265
(when (setq i (scan-forward string start end *digit-bitvector*))
266
(values i (xsubseq start i))))
268
(defun state-host (string start end &aux i host ipv6 zone-id)
269
;; rule 10: host = IP-literal / IPv4address / reg-name
270
;; values: i host ipv6 zone-id
271
(if* (multiple-value-setq (i ipv6 zone-id)
272
(state-ip-literal string start end))
273
then (values i nil ipv6 zone-id)
275
(multiple-value-setq (i host) (state-ipv4address string start end))
276
(multiple-value-setq (i host) (state-reg-name string start end)))
277
then (values i host)))
279
(defun state-ip-literal (string start end &aux ip-start i2 end-ip ip zone-id)
280
;; rule 12a: IP-literal = "[" ( IPv6addrz / IPvFuture ) "]"
281
;; values: i ipaddr zone-id
282
;; NOTE: the [ and ] are not returned as part of the host.
283
(when (and (setq ip-start (looking-at #\[ string start end))
284
(or (multiple-value-setq (end-ip ip zone-id)
285
(state-ipv6addrz string ip-start end))
286
(multiple-value-setq (end-ip ip zone-id)
287
(state-ipvfuture string ip-start end)))
288
(setq i2 (looking-at #\] string end-ip end)))
289
(values i2 ip zone-id)))
291
(defun state-ipv6addrz (string start end &aux ip-end zone-start zone-end)
292
;; rule 12b: IPv6addrz = IPv6address [ "%25" ZoneID ]
293
;; values: i ipaddr zone-id
294
(when (setq ip-end (scan-ipv6address string start end))
295
(if* (and (setq zone-start (looking-at "%25" string ip-end end))
296
(setq zone-end (scan-zone-id string zone-start end)))
297
then (values zone-end
298
(xsubseq start ip-end)
299
(xsubseq zone-start zone-end))
300
else (values ip-end (xsubseq start ip-end)))))
302
(defun scan-zone-id (string start end)
303
;; rule 12c: ZoneID = 1*( unreserved / pct-encoded )
304
(scan-forward string start end *unreserved-bitvector* #'scan-pct-encoded))
306
(defun state-ipvfuture (string start end &aux i)
308
;; IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
309
;; values: i ipvfuture
310
(when (and (setq i (looking-at #\v string start end))
311
(setq i (scan-forward string i end *hexdig-bitvector*))
312
(setq i (looking-at #\. string i end))
313
(setq i (scan-forward string i end *ipvfuture-bitvector*)))
314
(values i (xsubseq start i))))
316
(defun scan-ipv6address (string start end &aux (i start))
318
;; IPv6address = 6( h16 ":" ) ls32 [1]
319
;; / "::" 5( h16 ":" ) ls32 [2]
320
;; / [ h16 ] "::" 4( h16 ":" ) ls32 [3]
321
;; / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 [4]
322
;; / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 [5]
323
;; / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 [6]
324
;; / [ *4( h16 ":" ) h16 ] "::" ls32 [7]
325
;; / [ *5( h16 ":" ) h16 ] "::" h16 [8]
326
;; / [ *6( h16 ":" ) h16 ] "::" [9]
329
(and (setq i (scan-h16-colon-pairs string start end 6 6)) ;; [1]
330
(setq i (scan-ls32 string i end)))
331
(and (setq i (looking-at "::" string start end)) ;; [2]
332
(setq i (scan-h16-colon-pairs string i end 5 5))
333
(setq i (scan-ls32 string i end)))
334
(and (setq i (scan-h16 string start end)) ;; [3]
335
(setq i (looking-at "::" string i end))
336
(setq i (scan-h16-colon-pairs string i end 4 4))
337
(setq i (scan-ls32 string i end)))
338
(setq i (scan-ipv6address-part4 string start end)) ;; [4]
339
(setq i (scan-ipv6address-part5 string start end)) ;; [5]
340
(setq i (scan-ipv6address-part6 string start end)) ;; [6]
341
(setq i (scan-ipv6address-part7 string start end)) ;; [7]
342
(setq i (scan-ipv6address-part8 string start end)) ;; [8]
343
(and (setq i (scan-h16-colon-pairs string start end 0 6)) ;; [9]
344
(setq i (scan-h16 string i end))
345
(setq i (looking-at "::" string i end)))
346
(setq i (looking-at "::" string start end)) ;; [10]
349
(defun scan-ipv6address-part4 (string start end &aux i)
350
;; rule: [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
351
(or (and (setq i (looking-at "::" string start end))
352
(setq i (scan-h16-colon-pairs string i end 3 3))
353
(setq i (scan-ls32 string i end)))
355
(and (setq i (scan-h16-colon-pairs string start end 0 1))
356
(setq i (scan-h16 string i end))
357
(setq i (looking-at "::" string i end))
358
(setq i (scan-h16-colon-pairs string i end 3 3))
359
(setq i (scan-ls32 string i end)))))
361
(defun scan-ipv6address-part5 (string start end &aux i)
362
;; rule: [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
363
(or (and (setq i (looking-at "::" string start end))
364
(setq i (scan-h16-colon-pairs string i end 2 2))
365
(setq i (scan-ls32 string i end)))
367
(and (setq i (scan-h16-colon-pairs string start end 0 2))
368
(setq i (scan-h16 string i end))
369
(setq i (looking-at "::" string i end))
370
(setq i (scan-h16-colon-pairs string i end 2 2))
371
(setq i (scan-ls32 string i end)))))
373
(defun scan-ipv6address-part6 (string start end &aux i)
374
;; rule: [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
375
(or (and (setq i (looking-at "::" string start end))
376
(setq i (scan-h16 string i end))
377
(setq i (looking-at #\: string i end))
378
(setq i (scan-ls32 string i end)))
379
(and (setq i (scan-h16-colon-pairs string start end 0 3))
380
(setq i (scan-h16 string i end))
381
(setq i (looking-at "::" string i end))
382
(setq i (scan-h16 string i end))
383
(setq i (looking-at #\: string i end))
384
(setq i (scan-ls32 string i end)))))
386
(defun scan-ipv6address-part7 (string start end &aux i)
387
;; rule: [ *4( h16 ":" ) h16 ] "::" ls32
388
(or (and (setq i (looking-at "::" string start end))
389
(setq i (scan-ls32 string i end)))
390
(and (setq i (scan-h16-colon-pairs string start end 0 4))
391
(setq i (scan-h16 string i end))
392
(setq i (looking-at "::" string i end))
393
(setq i (scan-ls32 string i end)))))
395
(defun scan-ipv6address-part8 (string start end &aux i)
396
;; rule: [ *5( h16 ":" ) h16 ] "::" h16
397
(or (and (setq i (looking-at "::" string start end))
398
(setq i (scan-h16 string i end)))
399
(and (setq i (scan-h16-colon-pairs string start end 0 5))
400
(setq i (scan-h16 string i end))
401
(setq i (looking-at "::" string i end))
402
(setq i (scan-h16 string i end)))))
404
(defun scan-h16-colon-pairs (string start end min max
408
;; subrule: min*max( h16 ":" )
409
;; Scan from min to max pairs of: h16 + ":"
410
;; NOTE: this function needs to lookahead to make sure there isn't a ::
412
(loop while (and (< nfound max)
413
(setq i2 (scan-h16 string i end))
414
(setq i2 (looking-at #\: string i2 end))
416
(not (looking-at #\: string i2 end)))
420
(when (<= min nfound max)
423
(defun scan-h16 (string start end &aux i)
424
;; rule 15: h16 = 1*4HEXDIG
425
(when (null start) (error "start is null"))
427
(scan-forward string start
429
(min end (+ start 5))
431
(<= 1 (the fixnum (- i start)) 4))
434
(defun scan-ls32 (string start end &aux i)
435
;; rule 16: ls32 = ( h16 ":" h16 ) / IPv4address
436
(if* (and (setq i (scan-h16 string start end))
437
(setq i (looking-at #\: string i end))
438
(setq i (scan-h16 string i end)))
440
else (scan-ipv4address string start end)))
442
(defun scan-ipv4address (string start end &aux i)
444
;; IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
446
(and (setq i (scan-dec-octet string start end))
447
(setq i (looking-at #\. string i end))
448
(setq i (scan-dec-octet string i end))
449
(setq i (looking-at #\. string i end))
450
(setq i (scan-dec-octet string i end))
451
(setq i (looking-at #\. string i end))
452
(scan-dec-octet string i end)))
454
(defun state-ipv4address (string start end &aux i)
456
(when (setq i (scan-ipv4address string start end))
457
(values i (xsubseq start i))))
459
(defun scan-dec-octet (string start end &aux i)
461
;; dec-octet = DIGIT ; 0-9
462
;; / %x31-39 DIGIT ; 10-99
463
;; / "1" 2DIGIT ; 100-199
464
;; / "2" %x30-34 DIGIT ; 200-249
465
;; / "25" %x30-35 ; 250-255
466
;; Honestly, the above makes little sense to me. The truth is,
467
;; "http://256.0.0.1/" is a valid URI because even though it doesn't
468
;; parse as a dec-octet, it does parse as a reg-name (rule 19).
469
(when (and (setq i (scan-forward string start end *digit-bitvector*))
470
(<= 1 (- i start) 3))
473
(defun state-reg-name (string start end &aux i)
474
;; rule 19: reg-name = *( unreserved / pct-encoded / sub-delims )
476
(when (setq i (scan-forward string start end *reg-name-bitvector*
478
(values i (xsubseq start i))))
480
(defun state-path-abempty (string start end &aux i i2)
481
;; rule 21: path-abempty = *( "/" *pchar )
483
;; NOTE: if *strict-parse* is nil, we allow the leading "/" to be "//",
484
;; because it is a common typo in HTML and sometimes fixing it is
485
;; not under our control. Browsers work fine with this
487
(when (and (not *strict-parse*)
488
(looking-at "//" string start end))
489
;; double leading slash is changed to a single leading slash.
494
(if* (looking-at #\/ string i end)
495
then (if* (setq i2 (scan-pchar string (1+ i) end))
497
else (incf i) ;; advance for the / we found
501
(values i (xsubseq start i))))
503
(defun state-path-absolute (string start end &aux (i start) i2 have-slash)
504
;; rule 22: path-absolute = "/" [ 1*pchar *( "/" *pchar ) ]
505
;; remember: [ foo ] means 0*1( foo )
507
(when (setq i (looking-at #\/ string i end))
508
(when (setq i2 (scan-pchar string i end))
509
;; parse is good to here
512
;; Now, look for *( "/" *pchar )
513
(loop while (and (setq have-slash (looking-at #\/ string i end))
514
(setq i2 (scan-pchar string have-slash end)))
516
;; If it ends with a /:
517
(when (and have-slash (not i2)) (incf i)))
518
(values i (xsubseq start i))))
520
(defun state-path-noscheme (string start end &aux (i start) i2 have-slash)
521
;; rule 23: path-noscheme = segment-nz-nc *( "/" *pchar )
523
(when (setq i (scan-segment-nz-nc string i end))
524
(loop while (and (setq have-slash (looking-at #\/ string i end))
525
(setq i2 (scan-pchar string (1+ i) end)))
527
(when (and have-slash (not i2))
528
;; for the slash we did see:
530
(values i (xsubseq start i))))
532
(defun state-path-rootless (string start end &aux (i start) i2)
533
;; rule 24: path-rootless = 1*pchar *( "/" *pchar )
535
(when (setq i (scan-pchar string i end))
536
(loop while (and (looking-at #\/ string i end)
537
;; The pchar after the slash is optional
538
(setq i2 (or (scan-pchar string (1+ i) end)
541
(values i (xsubseq start i))))
543
(defun state-path-empty (string start end)
544
;; rule 25: path-empty = 0<pchar>
546
;; NOTE: the RHS was updated in RFC 3986 errata to be "", but that is
547
;; bogus. "" is very different the 0<pchar>.
548
;; Return nil when looking at a `pchar' and the null marker otherwise.
549
(declare (optimize (safety 0)))
550
(if* (looking-at *pchar-bitvector* string start end)
552
else (values start #.*uri-null-marker*)))
554
(defun scan-segment-nz-nc (string start end)
555
;; rule 28: 1*( unreserved / pct-encoded / sub-delims / "@" )
556
;; In english: pchar without #\:
557
(declare (optimize (safety 0)))
558
(scan-forward string start end *segment-nz-nc-bitvector* #'scan-pct-encoded))
560
(defun scan-pchar (string start end)
561
;; rule 29: pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
562
(declare (optimize (safety 0)))
563
(scan-forward string start end *pchar-bitvector* #'scan-pct-encoded))
565
(defun state-query (string start end &aux i)
566
;; rule 30: *( pchar / "/" / "?" )
569
(scan-forward string start end
571
then *query-bitvector-strict*
572
else *query-bitvector-non-strict*)
574
(values i (xsubseq start i))))
576
(defun state-fragment (string start end &aux i)
577
;; rule 31: *( pchar / "/" / "?" / "#" )
578
;; NOTE: Allegro CL added "#" in non-strict mode
579
;; values: i fragment
581
(scan-forward string start end
583
then *fragment-bitvector-strict*
584
else *fragment-bitvector-non-strict*)
586
(values i (xsubseq start i))))
588
(defvar .pct-encoded. nil)
590
(defun scan-pct-encoded (string start end)
591
;; This scans a single percent encoded sequence. It does no conversion.
592
;; It also sets .pct-encoded., which is a boolean that says "this string
593
;; has some percent encoded characters in it."
595
;; rule 32: pct-encoded = "%" HEXDIG HEXDIG
596
(declare (fixnum start end))
597
(and (> (the fixnum (- end start)) 2) ;; ... at least 3 chars remaining
598
(looking-at #\% string start end)
599
(looking-at *hexdig-bitvector* string (incf start) end)
600
(looking-at *hexdig-bitvector* string (incf start) end)
601
(setq .pct-encoded. start)))
603
(defun state-uri-file (string start end &aux i)
604
;; rule: uri-file = "//" <anything>
607
;; It's not the job of the URI parser to validate file:// URIs.
608
(when (setq i (looking-at "//" string start end))
609
(values i (xsubseq i end))))
611
(defun state-urn-namestring (string start end
612
&aux (i start) i2 nid nss q-component f-component
614
;; rule 50: namestring = assigned-name
616
;; [ "#" f-component ]
617
;; rule 58: f-component = fragment
618
;; START is just after "urn:".
619
;; values: i nid nss q-component f-component r-component
620
(when (multiple-value-setq (i2 nid nss)
621
(state-urn-assigned-name string start end))
622
(when (at-end-p i2 end)
623
(return-from state-urn-namestring (values i2 nid nss)))
626
(when (multiple-value-setq (i2 r-component q-component)
627
(state-urn-rq-components string i end))
628
(when (at-end-p i2 end)
629
(return-from state-urn-namestring
630
(values i2 nid nss q-component nil r-component)))
632
;; more STRING to process...
634
(when (looking-at #\# string i end)
635
(if* (multiple-value-setq (i2 f-component)
636
;; Yes, the same fragment (RFC 8141 defines f-component in
637
;; terms of RFC 3986's fragment).
638
(state-fragment string (incf i) end))
640
else (setq f-component #.*uri-null-marker*)))
642
(when (at-end-p i end)
643
(values i2 nid nss q-component f-component r-component)))))
645
(defun state-urn-assigned-name (string start end &aux i i2 nid nss)
646
;; rule 51: assigned-name = "urn" ":" NID ":" NSS
647
;; START is just after "urn:".
649
(when (and (multiple-value-setq (i2 nid) (state-urn-nid string start end))
650
(looking-at #\: string i2 end)
652
(multiple-value-setq (i2 nss) (state-urn-nss string i end)))
653
(values i2 nid nss)))
655
(defun state-urn-nid (string start end &aux (i start))
656
;; rule 52: NID = (alphanum) 0*30(ldh) (alphanum)
657
;; rule 53: ldh = alphanum / "-"
659
(declare (fixnum start end i))
660
(when (and (looking-at *alphanum-bitvector* string i end)
661
(setq i (scan-forward string (1+ i) end *alphanum+-bitvector*))
662
;; Check for <= 32 chars, thus far
663
(<= (the fixnum (- i start))
665
;; If the last one was alphanum, then we're done.
666
;; If the last one was NOT alphanum, then:
667
;; 1. make sure we had 30 chars (not 31)
668
;; 2. look for another, single alphanum
669
(or (looking-at *alphanum-bitvector* string (1- i) end)
670
(and (<= (the fixnum (- i start))
672
(not (at-end-p i end))
674
(scan-forward string i end *alphanum-bitvector*)))))
675
(values i (xsubseq start i))))
677
(defun state-urn-nss (string start end &aux i i2)
678
;; rule 54: NSS = pchar *(pchar / "/")
680
(when (setq i (scan-pchar string start (1+ start)))
681
(if* (setq i2 (scan-forward
683
;; See the definition of *urn-nss-chars* for
684
;; why we don't use *pchar/-bitvector* here.
687
then (values i2 (xsubseq start i2))
688
else (values i (xsubseq start i)))))
690
(defun state-urn-rq-components (string start end
691
&aux i ri qi r-component q-component)
692
;; rule 55: rq-components = [ "?+" r-component ]
693
;; [ "?=" q-component ]
694
;; values: i r-component q-component
695
(when (and (setq i (looking-at #\? string start end))
696
(not (at-end-p i end))
697
(or (setq ri (looking-at #\+ string i end))
698
(setq qi (looking-at #\= string i end)))
699
(not (at-end-p (or ri qi) end)))
700
(when (and ri (multiple-value-setq (i r-component)
701
(state-urn-r-component string ri end)))
702
(when (at-end-p i end)
703
(return-from state-urn-rq-components
704
(values i r-component)))
706
(if* (setq qi (looking-at #\? string i end))
707
then (when (and (not (at-end-p qi end))
708
(setq qi (looking-at #\= string qi end))
709
(not (at-end-p qi end)))
710
(when (multiple-value-setq (i q-component)
711
(state-urn-q-component string qi end))
712
(return-from state-urn-rq-components
713
(values i r-component q-component))))
714
else (return-from state-urn-rq-components (values i r-component))))
715
;; The r-component branch didn't yield anything, check for q-component
717
(when (and qi (multiple-value-setq (i q-component)
718
(state-urn-q-component string qi end)))
719
(return-from state-urn-rq-components
720
(values i nil q-component)))))
722
(defun scan-q-component-or-pct-encoded (string i end &aux i2)
723
;; Do what scan-pct-encoded does, BUT STOP scanning if we see "?=",
724
;; because that is the start of the q-component.
726
;; This function is called by SCAN-FORWARD at each character position in
729
(when (setq i2 (scan-pct-encoded string i end))
730
(return-from scan-q-component-or-pct-encoded i2))
732
(when (setq i2 (looking-at #\? string i end))
734
;; at least 2 chars remaining (for 1 char after ?=)
736
(looking-at #\= string i2 end))
737
then ;; stop scanning
738
(return-from scan-q-component-or-pct-encoded nil)
739
else ;; return the index after the ?
740
(return-from scan-q-component-or-pct-encoded i2))))
742
(defun state-urn-r-component (string start end &aux i i2)
743
;; rule 56: r-component = pchar *( pchar / "/" / "?" )
744
;; values: i r-component
745
(when (setq i (scan-pchar string start end))
746
(when (at-end-p i end)
747
(return-from state-urn-r-component
748
(values i (xsubseq start i))))
753
;; NOTE: we don't use *query-bitvector-strict* because we need
754
;; to handle #\? specially (see the next argument).
755
*urn-query-bitvector*
756
;; NOTE: Because r-component can contain "?" without percent
757
;; encoding, when processing the r-component we need to
758
;; look ahead to make sure there is no #\= after each
759
;; #\? (since that means we have a q-component).
760
#'scan-q-component-or-pct-encoded))
761
(values i2 (xsubseq start i2)))
763
;; We immediately ran into ?=, so return what we found so far:
764
(t (values i (xsubseq start i))))))
766
(defun state-urn-q-component (string start end &aux i)
767
;; rule 57: q-component = pchar *( pchar / "/" / "?" )
768
;; values: i q-component
769
(when (setq i (looking-at *pchar-bitvector* string start end))
770
(when (at-end-p i end)
771
(return-from state-urn-q-component
772
(values i (xsubseq start i))))
774
(scan-forward string i end *query-bitvector-strict*
776
(values i (xsubseq start i)))))