Coverage report: /home/ellis/.stash/quicklisp/dists/ultralisp/software/cl-babel-babel-20240610131823/src/enc-jpn.lisp
Kind | Covered | All | % |
expression | 0 | 160 | 0.0 |
branch | 0 | 18 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;;; -*- Mode: lisp; indent-tabs-mode: nil -*-
3
;;; enc-jpn.lisp --- Japanese encodings.
6
(in-package #:babel-encodings)
9
(defvar *eucjp-to-ucs-hash* (make-hash-table))
10
(defvar *ucs-to-eucjp-hash* (make-hash-table))
11
(defvar *cp932-to-ucs-hash* (make-hash-table))
12
(defvar *ucs-to-cp932-hash* (make-hash-table))
14
(dolist (i `((,*cp932-only*
22
,*ucs-to-eucjp-hash*)))
24
(setf (gethash (car j) (second i)) (cadr j))
25
(setf (gethash (cadr j) (third i)) (car j))))
28
(let ((high (ash x -16))
29
(mid (logand (ash x -8) 255))
31
(cond ((not (zerop high))
39
(incf low (if (zerop (logand mid 1)) #x1f #x7e))
40
(incf low (if (<= #x7f low #x9d) 1 0))
41
(setq mid (ash mid -1))
42
(incf mid (if (<= mid #x1e) #x81 #xc1))
43
(+ (ash mid 8) low))))))
45
(let ((cp932 (euc-cp932 (first i))))
47
(setf (gethash cp932 *cp932-to-ucs-hash*) (second i))
48
(setf (gethash (second i) *ucs-to-cp932-hash*) cp932)))))
51
(loop for i from #x00 to #x7f do
52
(setf (gethash i *cp932-to-ucs-hash*) i)
53
(setf (gethash i *eucjp-to-ucs-hash*) i)
54
(setf (gethash i *ucs-to-eucjp-hash*) i)
55
(setf (gethash i *ucs-to-cp932-hash*) i))
58
(loop for i from #xa1 to #xdf do
59
(setf (gethash i *cp932-to-ucs-hash*) (+ #xff61 #x-a1 i))
60
(setf (gethash (+ #xff61 #x-a1 i) *ucs-to-cp932-hash*) i)
61
(setf (gethash (+ #x8e00 i) *eucjp-to-ucs-hash*) (+ #xff61 #x-a1 i))
62
(setf (gethash (+ #xff61 #x-a1 i) *ucs-to-eucjp-hash*) (+ #x8e00 i)))
64
;; This is quoted from https://support.microsoft.com/en-us/kb/170559/en-us
65
(let ((kb170559 "0x8790 -> U+2252 -> 0x81e0 Approximately Equal To Or The Image Of
66
0x8791 -> U+2261 -> 0x81df Identical To
67
0x8792 -> U+222b -> 0x81e7 Integral
68
0x8795 -> U+221a -> 0x81e3 Square Root
69
0x8796 -> U+22a5 -> 0x81db Up Tack
70
0x8797 -> U+2220 -> 0x81da Angle
71
0x879a -> U+2235 -> 0x81e6 Because
72
0x879b -> U+2229 -> 0x81bf Intersection
73
0x879c -> U+222a -> 0x81be Union
74
0xed40 -> U+7e8a -> 0xfa5c CJK Unified Ideograph
75
0xed41 -> U+891c -> 0xfa5d CJK Unified Ideograph
76
0xed42 -> U+9348 -> 0xfa5e CJK Unified Ideograph
77
0xed43 -> U+9288 -> 0xfa5f CJK Unified Ideograph
78
0xed44 -> U+84dc -> 0xfa60 CJK Unified Ideograph
79
0xed45 -> U+4fc9 -> 0xfa61 CJK Unified Ideograph
80
0xed46 -> U+70bb -> 0xfa62 CJK Unified Ideograph
81
0xed47 -> U+6631 -> 0xfa63 CJK Unified Ideograph
82
0xed48 -> U+68c8 -> 0xfa64 CJK Unified Ideograph
83
0xed49 -> U+92f9 -> 0xfa65 CJK Unified Ideograph
84
0xed4a -> U+66fb -> 0xfa66 CJK Unified Ideograph
85
0xed4b -> U+5f45 -> 0xfa67 CJK Unified Ideograph
86
0xed4c -> U+4e28 -> 0xfa68 CJK Unified Ideograph
87
0xed4d -> U+4ee1 -> 0xfa69 CJK Unified Ideograph
88
0xed4e -> U+4efc -> 0xfa6a CJK Unified Ideograph
89
0xed4f -> U+4f00 -> 0xfa6b CJK Unified Ideograph
90
0xed50 -> U+4f03 -> 0xfa6c CJK Unified Ideograph
91
0xed51 -> U+4f39 -> 0xfa6d CJK Unified Ideograph
92
0xed52 -> U+4f56 -> 0xfa6e CJK Unified Ideograph
93
0xed53 -> U+4f92 -> 0xfa6f CJK Unified Ideograph
94
0xed54 -> U+4f8a -> 0xfa70 CJK Unified Ideograph
95
0xed55 -> U+4f9a -> 0xfa71 CJK Unified Ideograph
96
0xed56 -> U+4f94 -> 0xfa72 CJK Unified Ideograph
97
0xed57 -> U+4fcd -> 0xfa73 CJK Unified Ideograph
98
0xed58 -> U+5040 -> 0xfa74 CJK Unified Ideograph
99
0xed59 -> U+5022 -> 0xfa75 CJK Unified Ideograph
100
0xed5a -> U+4fff -> 0xfa76 CJK Unified Ideograph
101
0xed5b -> U+501e -> 0xfa77 CJK Unified Ideograph
102
0xed5c -> U+5046 -> 0xfa78 CJK Unified Ideograph
103
0xed5d -> U+5070 -> 0xfa79 CJK Unified Ideograph
104
0xed5e -> U+5042 -> 0xfa7a CJK Unified Ideograph
105
0xed5f -> U+5094 -> 0xfa7b CJK Unified Ideograph
106
0xed60 -> U+50f4 -> 0xfa7c CJK Unified Ideograph
107
0xed61 -> U+50d8 -> 0xfa7d CJK Unified Ideograph
108
0xed62 -> U+514a -> 0xfa7e CJK Unified Ideograph
109
0xed63 -> U+5164 -> 0xfa80 CJK Unified Ideograph
110
0xed64 -> U+519d -> 0xfa81 CJK Unified Ideograph
111
0xed65 -> U+51be -> 0xfa82 CJK Unified Ideograph
112
0xed66 -> U+51ec -> 0xfa83 CJK Unified Ideograph
113
0xed67 -> U+5215 -> 0xfa84 CJK Unified Ideograph
114
0xed68 -> U+529c -> 0xfa85 CJK Unified Ideograph
115
0xed69 -> U+52a6 -> 0xfa86 CJK Unified Ideograph
116
0xed6a -> U+52c0 -> 0xfa87 CJK Unified Ideograph
117
0xed6b -> U+52db -> 0xfa88 CJK Unified Ideograph
118
0xed6c -> U+5300 -> 0xfa89 CJK Unified Ideograph
119
0xed6d -> U+5307 -> 0xfa8a CJK Unified Ideograph
120
0xed6e -> U+5324 -> 0xfa8b CJK Unified Ideograph
121
0xed6f -> U+5372 -> 0xfa8c CJK Unified Ideograph
122
0xed70 -> U+5393 -> 0xfa8d CJK Unified Ideograph
123
0xed71 -> U+53b2 -> 0xfa8e CJK Unified Ideograph
124
0xed72 -> U+53dd -> 0xfa8f CJK Unified Ideograph
125
0xed73 -> U+fa0e -> 0xfa90 CJK compatibility Ideograph
126
0xed74 -> U+549c -> 0xfa91 CJK Unified Ideograph
127
0xed75 -> U+548a -> 0xfa92 CJK Unified Ideograph
128
0xed76 -> U+54a9 -> 0xfa93 CJK Unified Ideograph
129
0xed77 -> U+54ff -> 0xfa94 CJK Unified Ideograph
130
0xed78 -> U+5586 -> 0xfa95 CJK Unified Ideograph
131
0xed79 -> U+5759 -> 0xfa96 CJK Unified Ideograph
132
0xed7a -> U+5765 -> 0xfa97 CJK Unified Ideograph
133
0xed7b -> U+57ac -> 0xfa98 CJK Unified Ideograph
134
0xed7c -> U+57c8 -> 0xfa99 CJK Unified Ideograph
135
0xed7d -> U+57c7 -> 0xfa9a CJK Unified Ideograph
136
0xed7e -> U+fa0f -> 0xfa9b CJK compatibility Ideograph
137
0xed80 -> U+fa10 -> 0xfa9c CJK compatibility Ideograph
138
0xed81 -> U+589e -> 0xfa9d CJK Unified Ideograph
139
0xed82 -> U+58b2 -> 0xfa9e CJK Unified Ideograph
140
0xed83 -> U+590b -> 0xfa9f CJK Unified Ideograph
141
0xed84 -> U+5953 -> 0xfaa0 CJK Unified Ideograph
142
0xed85 -> U+595b -> 0xfaa1 CJK Unified Ideograph
143
0xed86 -> U+595d -> 0xfaa2 CJK Unified Ideograph
144
0xed87 -> U+5963 -> 0xfaa3 CJK Unified Ideograph
145
0xed88 -> U+59a4 -> 0xfaa4 CJK Unified Ideograph
146
0xed89 -> U+59ba -> 0xfaa5 CJK Unified Ideograph
147
0xed8a -> U+5b56 -> 0xfaa6 CJK Unified Ideograph
148
0xed8b -> U+5bc0 -> 0xfaa7 CJK Unified Ideograph
149
0xed8c -> U+752f -> 0xfaa8 CJK Unified Ideograph
150
0xed8d -> U+5bd8 -> 0xfaa9 CJK Unified Ideograph
151
0xed8e -> U+5bec -> 0xfaaa CJK Unified Ideograph
152
0xed8f -> U+5c1e -> 0xfaab CJK Unified Ideograph
153
0xed90 -> U+5ca6 -> 0xfaac CJK Unified Ideograph
154
0xed91 -> U+5cba -> 0xfaad CJK Unified Ideograph
155
0xed92 -> U+5cf5 -> 0xfaae CJK Unified Ideograph
156
0xed93 -> U+5d27 -> 0xfaaf CJK Unified Ideograph
157
0xed94 -> U+5d53 -> 0xfab0 CJK Unified Ideograph
158
0xed95 -> U+fa11 -> 0xfab1 CJK compatibility Ideograph
159
0xed96 -> U+5d42 -> 0xfab2 CJK Unified Ideograph
160
0xed97 -> U+5d6d -> 0xfab3 CJK Unified Ideograph
161
0xed98 -> U+5db8 -> 0xfab4 CJK Unified Ideograph
162
0xed99 -> U+5db9 -> 0xfab5 CJK Unified Ideograph
163
0xed9a -> U+5dd0 -> 0xfab6 CJK Unified Ideograph
164
0xed9b -> U+5f21 -> 0xfab7 CJK Unified Ideograph
165
0xed9c -> U+5f34 -> 0xfab8 CJK Unified Ideograph
166
0xed9d -> U+5f67 -> 0xfab9 CJK Unified Ideograph
167
0xed9e -> U+5fb7 -> 0xfaba CJK Unified Ideograph
168
0xed9f -> U+5fde -> 0xfabb CJK Unified Ideograph
169
0xeda0 -> U+605d -> 0xfabc CJK Unified Ideograph
170
0xeda1 -> U+6085 -> 0xfabd CJK Unified Ideograph
171
0xeda2 -> U+608a -> 0xfabe CJK Unified Ideograph
172
0xeda3 -> U+60de -> 0xfabf CJK Unified Ideograph
173
0xeda4 -> U+60d5 -> 0xfac0 CJK Unified Ideograph
174
0xeda5 -> U+6120 -> 0xfac1 CJK Unified Ideograph
175
0xeda6 -> U+60f2 -> 0xfac2 CJK Unified Ideograph
176
0xeda7 -> U+6111 -> 0xfac3 CJK Unified Ideograph
177
0xeda8 -> U+6137 -> 0xfac4 CJK Unified Ideograph
178
0xeda9 -> U+6130 -> 0xfac5 CJK Unified Ideograph
179
0xedaa -> U+6198 -> 0xfac6 CJK Unified Ideograph
180
0xedab -> U+6213 -> 0xfac7 CJK Unified Ideograph
181
0xedac -> U+62a6 -> 0xfac8 CJK Unified Ideograph
182
0xedad -> U+63f5 -> 0xfac9 CJK Unified Ideograph
183
0xedae -> U+6460 -> 0xfaca CJK Unified Ideograph
184
0xedaf -> U+649d -> 0xfacb CJK Unified Ideograph
185
0xedb0 -> U+64ce -> 0xfacc CJK Unified Ideograph
186
0xedb1 -> U+654e -> 0xfacd CJK Unified Ideograph
187
0xedb2 -> U+6600 -> 0xface CJK Unified Ideograph
188
0xedb3 -> U+6615 -> 0xfacf CJK Unified Ideograph
189
0xedb4 -> U+663b -> 0xfad0 CJK Unified Ideograph
190
0xedb5 -> U+6609 -> 0xfad1 CJK Unified Ideograph
191
0xedb6 -> U+662e -> 0xfad2 CJK Unified Ideograph
192
0xedb7 -> U+661e -> 0xfad3 CJK Unified Ideograph
193
0xedb8 -> U+6624 -> 0xfad4 CJK Unified Ideograph
194
0xedb9 -> U+6665 -> 0xfad5 CJK Unified Ideograph
195
0xedba -> U+6657 -> 0xfad6 CJK Unified Ideograph
196
0xedbb -> U+6659 -> 0xfad7 CJK Unified Ideograph
197
0xedbc -> U+fa12 -> 0xfad8 CJK compatibility Ideograph
198
0xedbd -> U+6673 -> 0xfad9 CJK Unified Ideograph
199
0xedbe -> U+6699 -> 0xfada CJK Unified Ideograph
200
0xedbf -> U+66a0 -> 0xfadb CJK Unified Ideograph
201
0xedc0 -> U+66b2 -> 0xfadc CJK Unified Ideograph
202
0xedc1 -> U+66bf -> 0xfadd CJK Unified Ideograph
203
0xedc2 -> U+66fa -> 0xfade CJK Unified Ideograph
204
0xedc3 -> U+670e -> 0xfadf CJK Unified Ideograph
205
0xedc4 -> U+f929 -> 0xfae0 CJK compatibility Ideograph
206
0xedc5 -> U+6766 -> 0xfae1 CJK Unified Ideograph
207
0xedc6 -> U+67bb -> 0xfae2 CJK Unified Ideograph
208
0xedc7 -> U+6852 -> 0xfae3 CJK Unified Ideograph
209
0xedc8 -> U+67c0 -> 0xfae4 CJK Unified Ideograph
210
0xedc9 -> U+6801 -> 0xfae5 CJK Unified Ideograph
211
0xedca -> U+6844 -> 0xfae6 CJK Unified Ideograph
212
0xedcb -> U+68cf -> 0xfae7 CJK Unified Ideograph
213
0xedcc -> U+fa13 -> 0xfae8 CJK compatibility Ideograph
214
0xedcd -> U+6968 -> 0xfae9 CJK Unified Ideograph
215
0xedce -> U+fa14 -> 0xfaea CJK compatibility Ideograph
216
0xedcf -> U+6998 -> 0xfaeb CJK Unified Ideograph
217
0xedd0 -> U+69e2 -> 0xfaec CJK Unified Ideograph
218
0xedd1 -> U+6a30 -> 0xfaed CJK Unified Ideograph
219
0xedd2 -> U+6a6b -> 0xfaee CJK Unified Ideograph
220
0xedd3 -> U+6a46 -> 0xfaef CJK Unified Ideograph
221
0xedd4 -> U+6a73 -> 0xfaf0 CJK Unified Ideograph
222
0xedd5 -> U+6a7e -> 0xfaf1 CJK Unified Ideograph
223
0xedd6 -> U+6ae2 -> 0xfaf2 CJK Unified Ideograph
224
0xedd7 -> U+6ae4 -> 0xfaf3 CJK Unified Ideograph
225
0xedd8 -> U+6bd6 -> 0xfaf4 CJK Unified Ideograph
226
0xedd9 -> U+6c3f -> 0xfaf5 CJK Unified Ideograph
227
0xedda -> U+6c5c -> 0xfaf6 CJK Unified Ideograph
228
0xeddb -> U+6c86 -> 0xfaf7 CJK Unified Ideograph
229
0xeddc -> U+6c6f -> 0xfaf8 CJK Unified Ideograph
230
0xeddd -> U+6cda -> 0xfaf9 CJK Unified Ideograph
231
0xedde -> U+6d04 -> 0xfafa CJK Unified Ideograph
232
0xeddf -> U+6d87 -> 0xfafb CJK Unified Ideograph
233
0xede0 -> U+6d6f -> 0xfafc CJK Unified Ideograph
234
0xede1 -> U+6d96 -> 0xfb40 CJK Unified Ideograph
235
0xede2 -> U+6dac -> 0xfb41 CJK Unified Ideograph
236
0xede3 -> U+6dcf -> 0xfb42 CJK Unified Ideograph
237
0xede4 -> U+6df8 -> 0xfb43 CJK Unified Ideograph
238
0xede5 -> U+6df2 -> 0xfb44 CJK Unified Ideograph
239
0xede6 -> U+6dfc -> 0xfb45 CJK Unified Ideograph
240
0xede7 -> U+6e39 -> 0xfb46 CJK Unified Ideograph
241
0xede8 -> U+6e5c -> 0xfb47 CJK Unified Ideograph
242
0xede9 -> U+6e27 -> 0xfb48 CJK Unified Ideograph
243
0xedea -> U+6e3c -> 0xfb49 CJK Unified Ideograph
244
0xedeb -> U+6ebf -> 0xfb4a CJK Unified Ideograph
245
0xedec -> U+6f88 -> 0xfb4b CJK Unified Ideograph
246
0xeded -> U+6fb5 -> 0xfb4c CJK Unified Ideograph
247
0xedee -> U+6ff5 -> 0xfb4d CJK Unified Ideograph
248
0xedef -> U+7005 -> 0xfb4e CJK Unified Ideograph
249
0xedf0 -> U+7007 -> 0xfb4f CJK Unified Ideograph
250
0xedf1 -> U+7028 -> 0xfb50 CJK Unified Ideograph
251
0xedf2 -> U+7085 -> 0xfb51 CJK Unified Ideograph
252
0xedf3 -> U+70ab -> 0xfb52 CJK Unified Ideograph
253
0xedf4 -> U+710f -> 0xfb53 CJK Unified Ideograph
254
0xedf5 -> U+7104 -> 0xfb54 CJK Unified Ideograph
255
0xedf6 -> U+715c -> 0xfb55 CJK Unified Ideograph
256
0xedf7 -> U+7146 -> 0xfb56 CJK Unified Ideograph
257
0xedf8 -> U+7147 -> 0xfb57 CJK Unified Ideograph
258
0xedf9 -> U+fa15 -> 0xfb58 CJK compatibility Ideograph
259
0xedfa -> U+71c1 -> 0xfb59 CJK Unified Ideograph
260
0xedfb -> U+71fe -> 0xfb5a CJK Unified Ideograph
261
0xedfc -> U+72b1 -> 0xfb5b CJK Unified Ideograph
262
0xee40 -> U+72be -> 0xfb5c CJK Unified Ideograph
263
0xee41 -> U+7324 -> 0xfb5d CJK Unified Ideograph
264
0xee42 -> U+fa16 -> 0xfb5e CJK compatibility Ideograph
265
0xee43 -> U+7377 -> 0xfb5f CJK Unified Ideograph
266
0xee44 -> U+73bd -> 0xfb60 CJK Unified Ideograph
267
0xee45 -> U+73c9 -> 0xfb61 CJK Unified Ideograph
268
0xee46 -> U+73d6 -> 0xfb62 CJK Unified Ideograph
269
0xee47 -> U+73e3 -> 0xfb63 CJK Unified Ideograph
270
0xee48 -> U+73d2 -> 0xfb64 CJK Unified Ideograph
271
0xee49 -> U+7407 -> 0xfb65 CJK Unified Ideograph
272
0xee4a -> U+73f5 -> 0xfb66 CJK Unified Ideograph
273
0xee4b -> U+7426 -> 0xfb67 CJK Unified Ideograph
274
0xee4c -> U+742a -> 0xfb68 CJK Unified Ideograph
275
0xee4d -> U+7429 -> 0xfb69 CJK Unified Ideograph
276
0xee4e -> U+742e -> 0xfb6a CJK Unified Ideograph
277
0xee4f -> U+7462 -> 0xfb6b CJK Unified Ideograph
278
0xee50 -> U+7489 -> 0xfb6c CJK Unified Ideograph
279
0xee51 -> U+749f -> 0xfb6d CJK Unified Ideograph
280
0xee52 -> U+7501 -> 0xfb6e CJK Unified Ideograph
281
0xee53 -> U+756f -> 0xfb6f CJK Unified Ideograph
282
0xee54 -> U+7682 -> 0xfb70 CJK Unified Ideograph
283
0xee55 -> U+769c -> 0xfb71 CJK Unified Ideograph
284
0xee56 -> U+769e -> 0xfb72 CJK Unified Ideograph
285
0xee57 -> U+769b -> 0xfb73 CJK Unified Ideograph
286
0xee58 -> U+76a6 -> 0xfb74 CJK Unified Ideograph
287
0xee59 -> U+fa17 -> 0xfb75 CJK compatibility Ideograph
288
0xee5a -> U+7746 -> 0xfb76 CJK Unified Ideograph
289
0xee5b -> U+52af -> 0xfb77 CJK Unified Ideograph
290
0xee5c -> U+7821 -> 0xfb78 CJK Unified Ideograph
291
0xee5d -> U+784e -> 0xfb79 CJK Unified Ideograph
292
0xee5e -> U+7864 -> 0xfb7a CJK Unified Ideograph
293
0xee5f -> U+787a -> 0xfb7b CJK Unified Ideograph
294
0xee60 -> U+7930 -> 0xfb7c CJK Unified Ideograph
295
0xee61 -> U+fa18 -> 0xfb7d CJK compatibility Ideograph
296
0xee62 -> U+fa19 -> 0xfb7e CJK compatibility Ideograph
297
0xee63 -> U+fa1a -> 0xfb80 CJK compatibility Ideograph
298
0xee64 -> U+7994 -> 0xfb81 CJK Unified Ideograph
299
0xee65 -> U+fa1b -> 0xfb82 CJK compatibility Ideograph
300
0xee66 -> U+799b -> 0xfb83 CJK Unified Ideograph
301
0xee67 -> U+7ad1 -> 0xfb84 CJK Unified Ideograph
302
0xee68 -> U+7ae7 -> 0xfb85 CJK Unified Ideograph
303
0xee69 -> U+fa1c -> 0xfb86 CJK compatibility Ideograph
304
0xee6a -> U+7aeb -> 0xfb87 CJK Unified Ideograph
305
0xee6b -> U+7b9e -> 0xfb88 CJK Unified Ideograph
306
0xee6c -> U+fa1d -> 0xfb89 CJK compatibility Ideograph
307
0xee6d -> U+7d48 -> 0xfb8a CJK Unified Ideograph
308
0xee6e -> U+7d5c -> 0xfb8b CJK Unified Ideograph
309
0xee6f -> U+7db7 -> 0xfb8c CJK Unified Ideograph
310
0xee70 -> U+7da0 -> 0xfb8d CJK Unified Ideograph
311
0xee71 -> U+7dd6 -> 0xfb8e CJK Unified Ideograph
312
0xee72 -> U+7e52 -> 0xfb8f CJK Unified Ideograph
313
0xee73 -> U+7f47 -> 0xfb90 CJK Unified Ideograph
314
0xee74 -> U+7fa1 -> 0xfb91 CJK Unified Ideograph
315
0xee75 -> U+fa1e -> 0xfb92 CJK compatibility Ideograph
316
0xee76 -> U+8301 -> 0xfb93 CJK Unified Ideograph
317
0xee77 -> U+8362 -> 0xfb94 CJK Unified Ideograph
318
0xee78 -> U+837f -> 0xfb95 CJK Unified Ideograph
319
0xee79 -> U+83c7 -> 0xfb96 CJK Unified Ideograph
320
0xee7a -> U+83f6 -> 0xfb97 CJK Unified Ideograph
321
0xee7b -> U+8448 -> 0xfb98 CJK Unified Ideograph
322
0xee7c -> U+84b4 -> 0xfb99 CJK Unified Ideograph
323
0xee7d -> U+8553 -> 0xfb9a CJK Unified Ideograph
324
0xee7e -> U+8559 -> 0xfb9b CJK Unified Ideograph
325
0xee80 -> U+856b -> 0xfb9c CJK Unified Ideograph
326
0xee81 -> U+fa1f -> 0xfb9d CJK compatibility Ideograph
327
0xee82 -> U+85b0 -> 0xfb9e CJK Unified Ideograph
328
0xee83 -> U+fa20 -> 0xfb9f CJK compatibility Ideograph
329
0xee84 -> U+fa21 -> 0xfba0 CJK compatibility Ideograph
330
0xee85 -> U+8807 -> 0xfba1 CJK Unified Ideograph
331
0xee86 -> U+88f5 -> 0xfba2 CJK Unified Ideograph
332
0xee87 -> U+8a12 -> 0xfba3 CJK Unified Ideograph
333
0xee88 -> U+8a37 -> 0xfba4 CJK Unified Ideograph
334
0xee89 -> U+8a79 -> 0xfba5 CJK Unified Ideograph
335
0xee8a -> U+8aa7 -> 0xfba6 CJK Unified Ideograph
336
0xee8b -> U+8abe -> 0xfba7 CJK Unified Ideograph
337
0xee8c -> U+8adf -> 0xfba8 CJK Unified Ideograph
338
0xee8d -> U+fa22 -> 0xfba9 CJK compatibility Ideograph
339
0xee8e -> U+8af6 -> 0xfbaa CJK Unified Ideograph
340
0xee8f -> U+8b53 -> 0xfbab CJK Unified Ideograph
341
0xee90 -> U+8b7f -> 0xfbac CJK Unified Ideograph
342
0xee91 -> U+8cf0 -> 0xfbad CJK Unified Ideograph
343
0xee92 -> U+8cf4 -> 0xfbae CJK Unified Ideograph
344
0xee93 -> U+8d12 -> 0xfbaf CJK Unified Ideograph
345
0xee94 -> U+8d76 -> 0xfbb0 CJK Unified Ideograph
346
0xee95 -> U+fa23 -> 0xfbb1 CJK compatibility Ideograph
347
0xee96 -> U+8ecf -> 0xfbb2 CJK Unified Ideograph
348
0xee97 -> U+fa24 -> 0xfbb3 CJK compatibility Ideograph
349
0xee98 -> U+fa25 -> 0xfbb4 CJK compatibility Ideograph
350
0xee99 -> U+9067 -> 0xfbb5 CJK Unified Ideograph
351
0xee9a -> U+90de -> 0xfbb6 CJK Unified Ideograph
352
0xee9b -> U+fa26 -> 0xfbb7 CJK compatibility Ideograph
353
0xee9c -> U+9115 -> 0xfbb8 CJK Unified Ideograph
354
0xee9d -> U+9127 -> 0xfbb9 CJK Unified Ideograph
355
0xee9e -> U+91da -> 0xfbba CJK Unified Ideograph
356
0xee9f -> U+91d7 -> 0xfbbb CJK Unified Ideograph
357
0xeea0 -> U+91de -> 0xfbbc CJK Unified Ideograph
358
0xeea1 -> U+91ed -> 0xfbbd CJK Unified Ideograph
359
0xeea2 -> U+91ee -> 0xfbbe CJK Unified Ideograph
360
0xeea3 -> U+91e4 -> 0xfbbf CJK Unified Ideograph
361
0xeea4 -> U+91e5 -> 0xfbc0 CJK Unified Ideograph
362
0xeea5 -> U+9206 -> 0xfbc1 CJK Unified Ideograph
363
0xeea6 -> U+9210 -> 0xfbc2 CJK Unified Ideograph
364
0xeea7 -> U+920a -> 0xfbc3 CJK Unified Ideograph
365
0xeea8 -> U+923a -> 0xfbc4 CJK Unified Ideograph
366
0xeea9 -> U+9240 -> 0xfbc5 CJK Unified Ideograph
367
0xeeaa -> U+923c -> 0xfbc6 CJK Unified Ideograph
368
0xeeab -> U+924e -> 0xfbc7 CJK Unified Ideograph
369
0xeeac -> U+9259 -> 0xfbc8 CJK Unified Ideograph
370
0xeead -> U+9251 -> 0xfbc9 CJK Unified Ideograph
371
0xeeae -> U+9239 -> 0xfbca CJK Unified Ideograph
372
0xeeaf -> U+9267 -> 0xfbcb CJK Unified Ideograph
373
0xeeb0 -> U+92a7 -> 0xfbcc CJK Unified Ideograph
374
0xeeb1 -> U+9277 -> 0xfbcd CJK Unified Ideograph
375
0xeeb2 -> U+9278 -> 0xfbce CJK Unified Ideograph
376
0xeeb3 -> U+92e7 -> 0xfbcf CJK Unified Ideograph
377
0xeeb4 -> U+92d7 -> 0xfbd0 CJK Unified Ideograph
378
0xeeb5 -> U+92d9 -> 0xfbd1 CJK Unified Ideograph
379
0xeeb6 -> U+92d0 -> 0xfbd2 CJK Unified Ideograph
380
0xeeb7 -> U+fa27 -> 0xfbd3 CJK compatibility Ideograph
381
0xeeb8 -> U+92d5 -> 0xfbd4 CJK Unified Ideograph
382
0xeeb9 -> U+92e0 -> 0xfbd5 CJK Unified Ideograph
383
0xeeba -> U+92d3 -> 0xfbd6 CJK Unified Ideograph
384
0xeebb -> U+9325 -> 0xfbd7 CJK Unified Ideograph
385
0xeebc -> U+9321 -> 0xfbd8 CJK Unified Ideograph
386
0xeebd -> U+92fb -> 0xfbd9 CJK Unified Ideograph
387
0xeebe -> U+fa28 -> 0xfbda CJK compatibility Ideograph
388
0xeebf -> U+931e -> 0xfbdb CJK Unified Ideograph
389
0xeec0 -> U+92ff -> 0xfbdc CJK Unified Ideograph
390
0xeec1 -> U+931d -> 0xfbdd CJK Unified Ideograph
391
0xeec2 -> U+9302 -> 0xfbde CJK Unified Ideograph
392
0xeec3 -> U+9370 -> 0xfbdf CJK Unified Ideograph
393
0xeec4 -> U+9357 -> 0xfbe0 CJK Unified Ideograph
394
0xeec5 -> U+93a4 -> 0xfbe1 CJK Unified Ideograph
395
0xeec6 -> U+93c6 -> 0xfbe2 CJK Unified Ideograph
396
0xeec7 -> U+93de -> 0xfbe3 CJK Unified Ideograph
397
0xeec8 -> U+93f8 -> 0xfbe4 CJK Unified Ideograph
398
0xeec9 -> U+9431 -> 0xfbe5 CJK Unified Ideograph
399
0xeeca -> U+9445 -> 0xfbe6 CJK Unified Ideograph
400
0xeecb -> U+9448 -> 0xfbe7 CJK Unified Ideograph
401
0xeecc -> U+9592 -> 0xfbe8 CJK Unified Ideograph
402
0xeecd -> U+f9dc -> 0xfbe9 CJK compatibility Ideograph
403
0xeece -> U+fa29 -> 0xfbea CJK compatibility Ideograph
404
0xeecf -> U+969d -> 0xfbeb CJK Unified Ideograph
405
0xeed0 -> U+96af -> 0xfbec CJK Unified Ideograph
406
0xeed1 -> U+9733 -> 0xfbed CJK Unified Ideograph
407
0xeed2 -> U+973b -> 0xfbee CJK Unified Ideograph
408
0xeed3 -> U+9743 -> 0xfbef CJK Unified Ideograph
409
0xeed4 -> U+974d -> 0xfbf0 CJK Unified Ideograph
410
0xeed5 -> U+974f -> 0xfbf1 CJK Unified Ideograph
411
0xeed6 -> U+9751 -> 0xfbf2 CJK Unified Ideograph
412
0xeed7 -> U+9755 -> 0xfbf3 CJK Unified Ideograph
413
0xeed8 -> U+9857 -> 0xfbf4 CJK Unified Ideograph
414
0xeed9 -> U+9865 -> 0xfbf5 CJK Unified Ideograph
415
0xeeda -> U+fa2a -> 0xfbf6 CJK compatibility Ideograph
416
0xeedb -> U+fa2b -> 0xfbf7 CJK compatibility Ideograph
417
0xeedc -> U+9927 -> 0xfbf8 CJK Unified Ideograph
418
0xeedd -> U+fa2c -> 0xfbf9 CJK compatibility Ideograph
419
0xeede -> U+999e -> 0xfbfa CJK Unified Ideograph
420
0xeedf -> U+9a4e -> 0xfbfb CJK Unified Ideograph
421
0xeee0 -> U+9ad9 -> 0xfbfc CJK Unified Ideograph
422
0xeee1 -> U+9adc -> 0xfc40 CJK Unified Ideograph
423
0xeee2 -> U+9b75 -> 0xfc41 CJK Unified Ideograph
424
0xeee3 -> U+9b72 -> 0xfc42 CJK Unified Ideograph
425
0xeee4 -> U+9b8f -> 0xfc43 CJK Unified Ideograph
426
0xeee5 -> U+9bb1 -> 0xfc44 CJK Unified Ideograph
427
0xeee6 -> U+9bbb -> 0xfc45 CJK Unified Ideograph
428
0xeee7 -> U+9c00 -> 0xfc46 CJK Unified Ideograph
429
0xeee8 -> U+9d70 -> 0xfc47 CJK Unified Ideograph
430
0xeee9 -> U+9d6b -> 0xfc48 CJK Unified Ideograph
431
0xeeea -> U+fa2d -> 0xfc49 CJK compatibility Ideograph
432
0xeeeb -> U+9e19 -> 0xfc4a CJK Unified Ideograph
433
0xeeec -> U+9ed1 -> 0xfc4b CJK Unified Ideograph
434
0xeeef -> U+2170 -> 0xfa40 Small Roman Numeral One
435
0xeef0 -> U+2171 -> 0xfa41 Small Roman Numeral Two
436
0xeef1 -> U+2172 -> 0xfa42 Small Roman Numeral Three
437
0xeef2 -> U+2173 -> 0xfa43 Small Roman Numeral Four
438
0xeef3 -> U+2174 -> 0xfa44 Small Roman Numeral Five
439
0xeef4 -> U+2175 -> 0xfa45 Small Roman Numeral Six
440
0xeef5 -> U+2176 -> 0xfa46 Small Roman Numeral Seven
441
0xeef6 -> U+2177 -> 0xfa47 Small Roman Numeral Eight
442
0xeef7 -> U+2178 -> 0xfa48 Small Roman Numeral Nine
443
0xeef8 -> U+2179 -> 0xfa49 Small Roman Numeral Ten
444
0xeef9 -> U+ffe2 -> 0x81ca Fullwidth Not Sign
445
0xeefa -> U+ffe4 -> 0xfa55 Fullwidth Broken Bar
446
0xeefb -> U+ff07 -> 0xfa56 Fullwidth Apostrophe
447
0xeefc -> U+ff02 -> 0xfa57 Fullwidth Quotation Mark
448
0xfa4a -> U+2160 -> 0x8754 Roman Numeral One
449
0xfa4b -> U+2161 -> 0x8755 Roman Numeral Two
450
0xfa4c -> U+2162 -> 0x8756 Roman Numeral Three
451
0xfa4d -> U+2163 -> 0x8757 Roman Numeral Four
452
0xfa4e -> U+2164 -> 0x8758 Roman Numeral Five
453
0xfa4f -> U+2165 -> 0x8759 Roman Numeral Six
454
0xfa50 -> U+2166 -> 0x875a Roman Numeral Seven
455
0xfa51 -> U+2167 -> 0x875b Roman Numeral Eight
456
0xfa52 -> U+2168 -> 0x875c Roman Numeral Nine
457
0xfa53 -> U+2169 -> 0x875d Roman Numeral Ten
458
0xfa54 -> U+ffe2 -> 0x81ca Fullwidth Not Sign
459
0xfa58 -> U+3231 -> 0x878a Parenthesized Ideograph Stock
460
0xfa59 -> U+2116 -> 0x8782 Numero Sign
461
0xfa5a -> U+2121 -> 0x8784 Telephone Sign
462
0xfa5b -> U+2235 -> 0x81e6 Because"))
463
(with-input-from-string (s kb170559)
464
(loop for line = (read-line s nil) until (null line)
465
do (let ((ucs (parse-integer (subseq line 14 18) :radix 16))
466
(cp932 (parse-integer (subseq line 26 30) :radix 16)))
467
(setf (gethash ucs *ucs-to-cp932-hash*) cp932)))))
469
(defun eucjp-to-ucs (code)
470
(values (gethash code *eucjp-to-ucs-hash*)))
472
(defun ucs-to-eucjp (code)
473
(values (gethash code *ucs-to-eucjp-hash*)))
475
(defun cp932-to-ucs (code)
476
(values (gethash code *cp932-to-ucs-hash*)))
478
(defun ucs-to-cp932 (code)
479
(values (gethash code *ucs-to-cp932-hash*)))
483
(define-character-encoding :eucjp
484
"An 8-bit, variable-length character encoding in which
485
character code points in the range #x00-#x7f can be encoded in a
486
single octet; characters with larger code values can be encoded
488
:max-units-per-char 3
489
:literal-char-code-limit #x80)
492
(define-octet-counter :eucjp (getter type)
493
`(named-lambda eucjp-octet-counter (seq start end max)
494
(declare (type ,type seq) (fixnum start end max))
495
(loop with noctets fixnum = 0
496
for i fixnum from start below end
497
for code of-type code-point = (,getter seq i)
498
do (let* ((c (ucs-to-eucjp code))
499
(new (+ (cond ((< #xffff c) 3)
503
(if (and (plusp max) (> new max))
506
finally (return (values noctets i)))))
508
(define-code-point-counter :eucjp (getter type)
509
`(named-lambda eucjp-code-point-counter (seq start end max)
510
(declare (type ,type seq) (fixnum start end max))
511
(loop with nchars fixnum = 0
512
with i fixnum = start
514
(let* ((octet (,getter seq i))
515
(next-i (+ i (cond ((= #x8f octet) 3)
516
((or (< #xa0 octet #xff)
519
(declare (type ub8 octet) (fixnum next-i))
520
(cond ((> next-i end)
521
;; Should we add restarts to this error, we'll have
522
;; to figure out a way to communicate with the
523
;; decoder since we probably want to do something
524
;; about it right here when we have a chance to
525
;; change the count or something. (Like an
526
;; alternative replacement character or perhaps the
527
;; existence of this error so that the decoder
528
;; doesn't have to check for it on every iteration
531
;; FIXME: The data for this error is not right.
532
(decoding-error (vector octet) :eucjp seq i
533
nil 'end-of-input-in-character)
534
(return (values (1+ nchars) end)))
536
(setq nchars (1+ nchars)
538
(when (and (plusp max) (= nchars max))
539
(return (values nchars i))))))
540
finally (progn (assert (= i end))
541
(return (values nchars i))))))
543
(define-encoder :eucjp (getter src-type setter dest-type)
544
`(named-lambda eucjp-encoder (src start end dest d-start)
545
(declare (type ,src-type src)
546
(type ,dest-type dest)
547
(fixnum start end d-start))
548
(loop with di fixnum = d-start
549
for i fixnum from start below end
550
for code of-type code-point = (,getter src i)
551
for eucjp of-type code-point
552
= (ucs-to-eucjp code) do
553
(macrolet ((set-octet (offset value)
554
`(,',setter ,value dest (the fixnum (+ di ,offset)))))
562
(set-octet 0 (f-logand #xff (f-ash eucjp -8)))
563
(set-octet 1 (logand eucjp #xff))
567
(set-octet 0 (f-logand #xff (f-ash eucjp -16)))
568
(set-octet 1 (f-logand #xff (f-ash eucjp -8)))
569
(set-octet 2 (logand eucjp #xff))
572
finally (return (the fixnum (- di d-start))))))
575
(define-decoder :eucjp (getter src-type setter dest-type)
576
`(named-lambda eucjp-decoder (src start end dest d-start)
577
(declare (type ,src-type src)
578
(type ,dest-type dest)
579
(fixnum start end d-start))
581
(declare (type ub8 u2))
582
(loop for di fixnum from d-start
583
for i fixnum from start below end
584
for u1 of-type ub8 = (,getter src i) do
585
;; Note: CONSUME-OCTET doesn't check if I is being
586
;; incremented past END. We're assuming that END has
587
;; been calculated with the CODE-POINT-POINTER above that
591
`(let ((next-i (incf i)))
593
;; FIXME: data for this error is incomplete.
594
;; and signalling this error twice
595
(return-from setter-block
596
(decoding-error nil :eucjp src i +repl+
597
'end-of-input-in-character))
598
(,',getter src next-i))))
599
(handle-error (n &optional (c 'character-decoding-error))
601
(vector ,@(subseq '(u1 u2) 0 n))
602
:eucjp src (1+ (- i ,n)) +repl+ ',c))
603
(handle-error-if-icb (var n)
604
`(when (not (< #x7f ,var #xc0))
606
(return-from setter-block
607
(handle-error ,n invalid-utf8-continuation-byte)))))
613
(setq u2 (consume-octet))
614
(eucjp-to-ucs (logior #x8f0000
620
(eucjp-to-ucs (logior (f-ash u1 8)
626
finally (return (the fixnum (- di d-start)))))))
630
(define-character-encoding :cp932
631
"An 8-bit, variable-length character encoding in which
632
character code points in the range #x00-#x7f can be encoded in a
633
single octet; characters with larger code values can be encoded
635
:max-units-per-char 2
636
:literal-char-code-limit #x80)
639
(define-octet-counter :cp932 (getter type)
640
`(named-lambda cp932-octet-counter (seq start end max)
641
(declare (type ,type seq) (fixnum start end max))
642
(loop with noctets fixnum = 0
643
for i fixnum from start below end
644
for code of-type code-point = (,getter seq i)
645
do (let* ((c (ucs-to-cp932 code))
646
(new (+ (cond ((< #xff c) 2)
649
(if (and (plusp max) (> new max))
652
finally (return (values noctets i)))))
654
(define-code-point-counter :cp932 (getter type)
655
`(named-lambda cp932-code-point-counter (seq start end max)
656
(declare (type ,type seq) (fixnum start end max))
657
(loop with nchars fixnum = 0
658
with i fixnum = start
660
(let* ((octet (,getter seq i))
661
(next-i (+ i (cond ((or (<= #x81 octet #x9f)
662
(<= #xe0 octet #xfc))
665
(declare (type ub8 octet) (fixnum next-i))
666
(cond ((> next-i end)
667
;; Should we add restarts to this error, we'll have
668
;; to figure out a way to communicate with the
669
;; decoder since we probably want to do something
670
;; about it right here when we have a chance to
671
;; change the count or something. (Like an
672
;; alternative replacement character or perhaps the
673
;; existence of this error so that the decoder
674
;; doesn't have to check for it on every iteration
677
;; FIXME: The data for this error is not right.
678
(decoding-error (vector octet) :cp932 seq i
679
nil 'end-of-input-in-character)
680
(return (values (1+ nchars) end)))
682
(setq nchars (1+ nchars)
684
(when (and (plusp max) (= nchars max))
685
(return (values nchars i))))))
686
finally (progn (assert (= i end))
687
(return (values nchars i))))))
689
(define-encoder :cp932 (getter src-type setter dest-type)
690
`(named-lambda cp932-encoder (src start end dest d-start)
691
(declare (type ,src-type src)
692
(type ,dest-type dest)
693
(fixnum start end d-start))
694
(loop with di fixnum = d-start
695
for i fixnum from start below end
696
for code of-type code-point = (,getter src i)
697
for cp932 of-type code-point
698
= (ucs-to-cp932 code) do
699
(macrolet ((set-octet (offset value)
700
`(,',setter ,value dest (the fixnum (+ di ,offset)))))
708
(set-octet 0 (f-logand #xff (f-ash cp932 -8)))
709
(set-octet 1 (logand cp932 #xff))
713
(set-octet 0 (f-logand #xff (f-ash cp932 -16)))
714
(set-octet 1 (f-logand #xff (f-ash cp932 -8)))
715
(set-octet 2 (logand cp932 #xff))
718
finally (return (the fixnum (- di d-start))))))
721
(define-decoder :cp932 (getter src-type setter dest-type)
722
`(named-lambda cp932-decoder (src start end dest d-start)
723
(declare (type ,src-type src)
724
(type ,dest-type dest)
725
(fixnum start end d-start))
727
(declare (type ub8 u2))
728
(loop for di fixnum from d-start
729
for i fixnum from start below end
730
for u1 of-type ub8 = (,getter src i) do
731
;; Note: CONSUME-OCTET doesn't check if I is being
732
;; incremented past END. We're assuming that END has
733
;; been calculated with the CODE-POINT-POINTER above that
737
`(let ((next-i (incf i)))
739
;; FIXME: data for this error is incomplete.
740
;; and signalling this error twice
741
(return-from setter-block
742
(decoding-error nil :cp932 src i +repl+
743
'end-of-input-in-character))
744
(,',getter src next-i))))
745
(handle-error (n &optional (c 'character-decoding-error))
747
(vector ,@(subseq '(u1 u2) 0 n))
748
:cp932 src (1+ (- i ,n)) +repl+ ',c))
749
(handle-error-if-icb (var n)
750
`(when (not (< #x7f ,var #xc0))
752
(return-from setter-block
753
(handle-error ,n invalid-utf8-continuation-byte)))))
758
((or (<= #x81 u1 #x9f)
760
(setq u2 (consume-octet))
761
(cp932-to-ucs (logior (f-ash u1 8)
767
finally (return (the fixnum (- di d-start)))))))