Coverage report: /home/ellis/comp/core/lib/obj/uri/punycode.lisp
Kind | Covered | All | % |
expression | 80 | 407 | 19.7 |
branch | 9 | 50 | 18.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; obj/uri/punycode.lisp --- RFC 3492 Punycode strings
3
;; This library was written by Shinmera
4
;; https://github.com/Shinmera/punycode.git
8
;; ref: https://datatracker.ietf.org/doc/html/rfc3492
10
;; A Bootstring encoding of Unicode for Internationalized Domain Names in
11
;; Applications (IDNA)
14
(in-package :obj/uri/punycode)
16
(defconstant INITIAL-N #x80)
17
(defconstant INITIAL-BIAS 72)
22
(defconstant DAMP 700)
24
(defun encode-digit (code)
25
(code-char (+ code (if (< code 26) 97 22))))
27
(defun adapt (delta num-points first-time-p)
28
(setf delta (if first-time-p
31
(incf delta (truncate delta num-points))
32
(loop for k from 0 by BASE
33
while (< (ash (* TMAX (- BASE TMIN)) -1) delta)
34
do (setf delta (truncate delta (- BASE TMIN)))
35
finally (return (+ k (truncate (* delta (+ 1 (- BASE TMIN))) (+ delta SKEW))))))
37
(defmacro %with-stream (stream-ish &body body)
38
(std:with-gensyms (thunk)
39
`(flet ((,thunk (,stream-ish)
43
(with-output-to-string (,stream-ish)
44
(,thunk ,stream-ish)))
46
(,thunk *standard-output*))
48
(,thunk ,stream-ish))))))
50
(defun encode-punycode (string &optional out)
56
(loop for i from 0 below (length string)
57
for char = (char string i)
58
for code = (char-code char)
64
(loop with handled = basic
65
for m = most-positive-fixnum
66
for handled+1 = (1+ handled)
67
while (< handled (length string))
68
do (loop for char across string
69
for code = (char-code char)
70
do (when (<= n code (1- m))
72
(incf delta (* (- m n) handled+1))
74
(loop for char across string
75
for code = (char-code char)
80
(loop for k from BASE by BASE
81
for tt = (cond ((<= k bias) TMIN)
82
((<= (+ bias TMAX) k) TMAX)
84
do (when (< q tt) (return))
85
(write-char (encode-digit (+ tt (mod (- q tt) (- BASE tt)))) out)
86
(setf q (truncate (- q tt) (- BASE tt))))
87
(write-char (encode-digit q) out)
88
(setf bias (adapt delta handled+1 (= handled basic)))
94
(defun decode-digit (char)
95
(let ((code (char-code char)))
96
(cond ((<= #x30 code #x39) (+ 26 (- code #x30)))
97
((<= #x41 code #x5A) (- code #x41))
98
((<= #x61 code #x7A) (- code #x61))
101
(defun decode-punycode (string &optional out)
105
(basic (or (position #\- string :from-end T) 0))
107
(uni (make-array (length string) :element-type 'character)))
108
;; This is gross, I know. But we can't stream things out nicely because
109
;; later mixed codepoints can have the same target index, causing earlier
110
;; codepoints to be shifted downwards, which we obviously cannot do if we
111
;; already emitted the codepoint to stream. So we instead copy to a string
113
(loop for i from 0 below basic
114
do (setf (char uni i) (char string i)))
115
(flet ((insert (pos char)
116
(loop for i downfrom written above pos
117
do (setf (char uni i) (char uni (1- i))))
118
(setf (char uni pos) char)))
119
(loop with in = (if (< 0 basic) (1+ basic) 0)
121
while (< in (length string))
123
for k from BASE by BASE
124
for digit = (decode-digit (char string in))
127
(let ((tt (cond ((<= k bias) TMIN)
128
((<= (+ bias TMAX) k) TMAX)
130
(when (< digit tt) (return))
131
(setf w (* w (- base tt)))))
133
(setf bias (adapt (- i old-i) written (= old-i 0)))
134
(incf n (truncate i written))
135
(setf i (mod i written))
136
(insert i (code-char n))
139
(write-string uni out :end written))))
141
(defun encode-domain (string &optional out)
143
(loop for start = 0 then (1+ end)
144
for end = (or (position #\. string :start start) (length string))
145
do (cond ((loop for i from start below end
146
thereis (< 127 (char-code (char string i))))
147
(write-string "xn--" out)
148
(encode-punycode (subseq string start end) out))
150
(write-string string out :start start :end end)))
151
(if (< end (length string))
155
(defun decode-domain (string &optional out)
157
(loop for start = 0 then (1+ end)
158
for end = (or (position #\. string :start start) (length string))
159
do (cond ((and (< (length "xn--") (- end start))
160
(string= "xn--" string :start2 start :end2 (+ start (length "xn--"))))
161
(decode-punycode (subseq string (+ start (length "xn--")) end) out))
163
(write-string string out :start start :end end)))
164
(if (< end (length string))
168
(eval-when (:load-toplevel)
169
(pushnew :rfc3492 *features*))