Kind | Covered | All | % |
expression | 168 | 442 | 38.0 |
branch | 12 | 48 | 25.0 |
1
;;; obj/url.lisp --- Universal Resource Locators
2
3
;; Some conveniences for URLs.
4
5
;;; Commentary:
6
7
;; This package mostly just implements the bare-minimum provided by QURI:
8
;; URL-ENCODE and URL-DECODE
9
10
;;; Code:
11
(in-package :obj/url)
12
13
(define-condition uri-unexpected-end (uri-error simple-error)
14
((state :initarg :state :initform nil))
15
(:report (lambda (c s)
16
(format s "Parsing ended unexpectedly~:[~;~:* at ~A~]"
17
(slot-value c 'state)))))
18
19
(define-condition no-next-state (uri-error simple-error) ())
20
21
(define-condition url-decoding-error (uri-error) ())
22
23
;;; String Utils
24
(defun starts-with-scheme-p (string)
25
"Check whether the string STRING represents a URL which starts with
26
a scheme, i.e. something like 'https://' or 'mailto:'."
27
(loop with scheme-char-seen-p = nil
28
for c across string
29
when (or (char-not-greaterp #\a c #\z)
30
(digit-char-p c)
31
(member c '(#\+ #\- #\.) :test #'char=))
32
do (setq scheme-char-seen-p t)
33
else return (and scheme-char-seen-p
34
(char= c #\:))))
35
36
;;; Array Utils
37
(defmacro with-array-parsing ((elem p seq &optional (start 0) end key) &body body)
38
`(let (,elem)
39
(%with-array-parsing (,elem ,p ,seq ,start ,end ,key) ,@body)))
40
41
(defmacro %with-array-parsing ((elem p seq &optional (start 0) end key) &body body)
42
(with-gensyms (g-end no-next-state last key-fn)
43
(let ((eof-exists nil))
44
`(let (,@(and key `((,key-fn ,key)))
45
(,p ,start)
46
(,g-end (locally (declare #+sbcl (muffle-conditions compiler-note))
47
(or ,end (length ,seq)))))
48
(declare (ignorable ,p ,g-end))
49
,@(loop for (exp . rest) on body
50
while (and (listp exp) (eq (car exp) 'declare))
51
collect exp
52
do (setq body rest))
53
(macrolet ((goto (tag &optional (amount 1))
54
`(locally (declare (optimize (speed 3) (safety 0)))
55
(incf ,',p ,amount)
56
,@(if (eql amount 0)
57
()
58
`((when (= ,',p ,',g-end)
59
(go :eof))
60
(setq ,',elem
61
,',(if key
62
`(if ,key-fn
63
(funcall ,key-fn (aref ,seq ,p))
64
(aref ,seq ,p))
65
`(aref ,seq ,p)))))
66
(go ,tag))))
67
(tagbody
68
(when (= ,p ,g-end)
69
(go :eof))
70
(locally (declare (optimize (speed 3) (safety 0)))
71
(setq ,elem ,@(if key
72
`((if ,key-fn
73
(funcall ,key-fn (aref ,seq ,p))
74
(aref ,seq ,p)))
75
`((aref ,seq ,p)))))
76
,@(loop for (tagpart . rest) on body
77
for (tag . part) = tagpart
78
if (eq tag :eof)
79
append (progn
80
(setf eof-exists t)
81
`(,@tagpart
82
(go ,last)))
83
else
84
append
85
(list tag
86
`(macrolet ((redo (&optional (amount 1))
87
`(goto ,',tag ,amount))
88
(gonext (&optional (amount 1))
89
`(goto ,',(or (caar rest) no-next-state)
90
,amount)))
91
,@part
92
(error 'uri-unexpected-end :state ',tag))))
93
94
,no-next-state
95
(error 'no-next-state)
96
97
,@(if eof-exists
98
()
99
'(:eof))
100
101
,last))))))
102
103
;;; Encode
104
(definline url-encode-params (params &key (external-format *default-external-format*)
105
space-to-plus
106
(percent-encode t))
107
(declare (optimize (speed 3)))
108
(check-type params list)
109
(flet ((maybe-encode (string)
110
(if percent-encode
111
(url-encode string
112
:external-format external-format
113
:space-to-plus space-to-plus)
114
string)))
115
(with-output-to-string (s)
116
(loop for ((field . value) . rest) on params do
117
(write-string (maybe-encode field) s)
118
(when value
119
(write-char #\= s)
120
(check-type value (or string number octet-vector))
121
(write-string (maybe-encode
122
(if (numberp value)
123
(with-standard-io-syntax
124
(write-to-string value))
125
value))
126
s))
127
(when rest
128
(write-char #\& s))))))
129
130
131
(declaim ((simple-array character (16)) *hexdigit-char*))
132
(defvar *hexdigit-char*
133
(let ((ary (make-array 16 :element-type 'character)))
134
(loop for char across "0123456789ABCDEF"
135
for i from 0
136
do (setf (aref ary i) char))
137
ary))
138
139
(defun int-to-hexchar (byte)
140
(declare ((unsigned-byte 8) byte)
141
(optimize (speed 3) (safety 0)))
142
(let ((res (make-string 2)))
143
(multiple-value-bind (quotient remainder)
144
(floor byte 16)
145
(setf (aref res 0) (aref *hexdigit-char* quotient)
146
(aref res 1) (aref *hexdigit-char* remainder)))
147
res))
148
149
(defun unreservedp (byte)
150
(declare ((unsigned-byte 8) byte)
151
(optimize (speed 3) (safety 0)))
152
(or (<= (char-code #\A) byte (char-code #\Z))
153
(<= (char-code #\a) byte (char-code #\z))
154
(<= (char-code #\0) byte (char-code #\9))
155
#.`(or ,@(loop for char across "-._~"
156
collect `(= byte ,(char-code char))))))
157
158
(declaim ((simple-array string (97)) %byte-to-string))
159
(defvar %byte-to-string
160
(let ((ary (make-array 97 :element-type 'string :initial-element "")))
161
(loop for i from 0 to 96
162
unless (unreservedp i)
163
do (setf (aref ary i) (int-to-hexchar i)))
164
ary))
165
166
(defun url-encode (data &key (external-format *default-external-format*)
167
(start 0)
168
end
169
space-to-plus)
170
(declare ((or string octet-vector) data)
171
(integer start)
172
(optimize (speed 3) (safety 2)))
173
(let* ((octets (if (stringp data)
174
(string-to-octets data :external-format external-format :start start :end end)
175
data))
176
(res (make-array (* (length octets) 3) :element-type 'character :fill-pointer t))
177
(i 0))
178
(declare (octet-vector octets)
179
(string res)
180
(integer i))
181
(loop for byte of-type (unsigned-byte 8) across octets do
182
(cond
183
((and space-to-plus
184
(= byte #.(char-code #\Space)))
185
(setf (aref res i) #\+)
186
(incf i))
187
((< byte #.(char-code #\a))
188
(locally (declare (optimize (speed 3) (safety 0)))
189
(let ((converted (aref %byte-to-string byte)))
190
(if (zerop (length converted))
191
(progn
192
(setf (aref res i) (code-char byte))
193
(incf i))
194
(progn
195
(setf (aref res i) #\%)
196
(incf i)
197
(replace res converted :start1 i)
198
(incf i 2))))))
199
((unreservedp byte)
200
(setf (aref res i) (code-char byte))
201
(incf i))
202
(t
203
(setf (aref res i) #\%)
204
(incf i)
205
(replace res (int-to-hexchar byte) :start1 i)
206
(incf i 2))))
207
(setf (fill-pointer res) i)
208
res))
209
210
;;; Decode
211
(definline url-decode-params (data &key (delimiter #\&)
212
(external-format *default-external-format*)
213
(start 0)
214
end
215
lenient
216
(percent-decode t))
217
(declare ((or string octet-vector) data)
218
(integer start)
219
(character delimiter)
220
(optimize (speed 3) (safety 2)))
221
(let ((end (or end (length data)))
222
(start-mark nil)
223
(=-mark nil))
224
(declare (integer end))
225
(std/macs:collecting
226
(labels ((maybe-decode (string external-format start end)
227
(if percent-decode
228
(url-decode string
229
:external-format external-format
230
:start start
231
:end end
232
:lenient lenient)
233
(subseq string start end)))
234
(collect-pair (p)
235
(tagbody
236
(handler-bind ((url-decoding-error
237
(lambda (error)
238
(declare (ignore error))
239
(when lenient
240
(go continue)))))
241
(std/macs::collect
242
(cons (maybe-decode data external-format start-mark =-mark)
243
(maybe-decode data external-format (1+ =-mark) p))))
244
continue)
245
(setq start-mark nil
246
=-mark nil))
247
(collect-field (p)
248
(tagbody
249
(handler-bind ((url-decoding-error
250
(lambda (error)
251
(declare (ignore error))
252
(when lenient
253
(go continue)))))
254
(std/macs::collect
255
(cons (maybe-decode data external-format start-mark p)
256
nil)))
257
continue)
258
(setq start-mark nil)))
259
(with-array-parsing (char p data start end (and (not (stringp data)) #'code-char))
260
(start
261
(setq start-mark p)
262
(if lenient
263
(cond
264
((char= char #\=)
265
(setq =-mark p)
266
(goto parsing-value))
267
((char= char delimiter)
268
(redo)))
269
(when (or (char= char #\=)
270
(char= char delimiter))
271
(error 'uri-malformed-urlencoded-string)))
272
(gonext))
273
(parsing-field
274
(cond
275
((char= char #\=)
276
(setq =-mark p)
277
(gonext))
278
((char= char delimiter)
279
;; field only
280
(collect-field p)
281
(goto start)))
282
(redo))
283
(parsing-value
284
(cond
285
((char= char #\=)
286
(unless lenient
287
(error 'uri-malformed-urlencoded-string)))
288
((char= char delimiter)
289
(collect-pair p)
290
(goto start)))
291
(redo))
292
(:eof
293
(cond
294
(=-mark (collect-pair p))
295
(start-mark (collect-field p)))))))))
296
297
(defun url-decode (data &key (external-format *default-external-format*)
298
(start 0)
299
end
300
lenient)
301
(declare ((or string octet-vector) data)
302
(integer start)
303
(optimize (speed 3) (safety 2)))
304
(let* ((end (or end (length data)))
305
(buffer (make-array (- end start)
306
:element-type '(unsigned-byte 8)))
307
(i 0)
308
parsing-encoded-part)
309
(declare (integer end i)
310
(octet-vector buffer))
311
(flet ((write-to-buffer (byte)
312
(declare (optimize (speed 3) (safety 0)))
313
(setf (aref buffer i) byte)
314
(incf i)))
315
(with-array-parsing (char p data start end (and (not (stringp data)) #'code-char))
316
(parsing
317
(cond
318
((char= char #\%)
319
(gonext))
320
((char= char #\+)
321
(write-to-buffer #.(char-code #\Space))
322
(redo))
323
(t
324
(write-to-buffer (char-code char))
325
(redo))))
326
(parsing-encoded-part
327
(setq parsing-encoded-part char)
328
(gonext))
329
(parsing-encoded-part-second
330
(handler-bind ((url-decoding-error
331
(lambda (error)
332
(declare (ignore error))
333
(when lenient
334
(write-to-buffer #.(char-code #\%))
335
(write-to-buffer (char-code parsing-encoded-part))
336
(write-to-buffer (char-code char))
337
(setq parsing-encoded-part nil)
338
(goto parsing)))))
339
(write-to-buffer
340
(+ (* 16 (hexchar-to-int parsing-encoded-part))
341
(hexchar-to-int char))))
342
(setq parsing-encoded-part nil)
343
(goto parsing))
344
(:eof
345
(when parsing-encoded-part
346
(error 'url-decoding-error)))))
347
;; TODO 2025-06-13: handle leniency
348
(octets-to-string buffer :end i :external-format external-format)))
349