Coverage report: /home/ellis/.stash/quicklisp/dists/ultralisp/software/cl-babel-babel-20240610131823/src/enc-gbk.lisp
Kind | Covered | All | % |
expression | 0 | 24 | 0.0 |
branch | 0 | 0 | nil |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;;; -*- Mode: lisp; indent-tabs-mode: nil -*-
3
;;; enc-gbk.lisp --- GBK encodings.
5
;;; Copyright (C) 2011, Li Wenpeng <levin108@gmail.com>
7
;;; Permission is hereby granted, free of charge, to any person
8
;;; obtaining a copy of this software and associated documentation
9
;;; files (the "Software"), to deal in the Software without
10
;;; restriction, including without limitation the rights to use, copy,
11
;;; modify, merge, publish, distribute, sublicense, and/or sell copies
12
;;; of the Software, and to permit persons to whom the Software is
13
;;; furnished to do so, subject to the following conditions:
15
;;; The above copyright notice and this permission notice shall be
16
;;; included in all copies or substantial portions of the Software.
18
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25
;;; DEALINGS IN THE SOFTWARE.
27
(in-package #:babel-encodings)
29
;; populated in gbk-map.lisp
30
(defvar *gbk-unicode-mapping*)
32
(defconstant +gbk2-offset+ 0)
33
(defconstant +gbk3-offset+ 6763)
34
(defconstant +gbk4-offset+ (+ 6763 6080))
35
(defconstant +gbk1-offset+ 20902)
36
(defconstant +gbk5-offset+ (+ 20902 846))
38
(define-character-encoding :gbk
39
"GBK is an extension of the GB2312 character set for simplified
40
Chinese characters, used in the People's Republic of China."
42
:literal-char-code-limit #x80)
44
(define-condition invalid-gbk-byte (character-decoding-error)
46
(:documentation "Signalled when an invalid GBK byte is found."))
48
(define-condition invalid-gbk-character (character-encoding-error)
50
(:documentation "Signalled when an invalid GBK character is found."))
52
(define-octet-counter :gbk (getter type)
53
`(lambda (seq start end max)
54
(declare (type ,type seq) (fixnum start end max))
56
(loop for i from start below end
57
for u1 of-type code-point = (,getter seq i)
58
do (cond ((< u1 #x80) (incf noctets))
60
(when (and (plusp max) (= noctets max))
61
(return (values noctets i)))
62
finally (return (values noctets i))))))
64
(define-code-point-counter :gbk (getter type)
65
`(lambda (seq start end max)
66
(declare (type ,type seq))
70
do (setf u1 (,getter seq i))
72
((eq 0 (logand u1 #x80)) (incf i))
75
(when (and (plusp max) (= noctets max))
76
(return (values noctets i)))
77
finally (return (values noctets i))))))
79
(define-encoder :gbk (getter src-type setter dest-type)
80
`(lambda (src start end dest d-start)
81
(declare (type ,src-type src)
82
(type ,dest-type dest)
83
(fixnum start end d-start))
88
((<= +gbk2-offset+ ,index (- +gbk3-offset+ 1)) ; gbk/2
89
(setf u1 (+ #xB0 (truncate (/ ,index 94))))
90
(setf u2 (+ #xA1 (mod ,index 94))))
91
((<= +gbk3-offset+ ,index (- +gbk4-offset+ 1)) ; gbk/3
92
(setf index (- ,index +gbk3-offset+))
93
(setf u1 (+ #x81 (truncate (/ ,index 190))))
94
(setf u2 (+ #x40 (mod ,index 190)))
95
(if (>= u2 #x7F) (incf u2)))
96
((<= +gbk4-offset+ ,index (- +gbk1-offset+ 1)) ; gbk/4
97
(setf index (- ,index +gbk4-offset+))
98
(setf u1 (+ #xAA (truncate (/ ,index 96))))
99
(setf u2 (+ #x40 (mod ,index 96)))
100
(if (>= u2 #x7F) (incf u2)))
101
((<= +gbk1-offset+ ,index (- +gbk5-offset+ 1)) ; gbk/1
102
(setf index (- ,index +gbk1-offset+))
103
(setf u1 (+ #xA1 (truncate (/ ,index 94))))
104
(setf u2 (+ #xA1 (mod ,index 94))))
105
((<= +gbk5-offset+ ,index (length *gbk-unicode-mapping*)) ; gbk/5
106
(setf index (- ,index +gbk5-offset+))
107
(setf u1 (+ #xA8 (truncate (/ ,index 96))))
108
(setf u2 (+ #x40 (mod ,index 96)))
109
(if (>= u2 #x7F) (incf u2))))
111
(let ((c 0) index (noctets 0))
112
(loop for i from start below end
113
for code of-type code-point = (,getter src i)
115
((handle-error (&optional (c 'character-encoding-error))
116
`(encoding-error code :gbk src i +repl+ ',c)))
117
(setf c (code-char code))
119
((< code #x80) ; ascii
120
(,setter code dest noctets)
124
(position c *gbk-unicode-mapping*))
127
(handle-error invalid-gbk-character))
128
(multiple-value-bind (uh ul) (do-encoding index)
129
(,setter uh dest noctets)
130
(,setter ul dest (+ 1 noctets))
132
finally (return (the fixnum (- noctets d-start))))))))
134
(define-decoder :gbk (getter src-type setter dest-type)
135
`(lambda (src start end dest d-start)
136
(declare (type ,src-type src)
137
(type ,dest-type dest))
138
(let ((u1 0) (u2 0) (index 0) (tmp 0) (noctets 0))
142
((handle-error (&optional (c 'character-decoding-error))
143
`(decoding-error #(u1 u2) :gbk src i +repl+ ',c)))
144
(setf u1 (,getter src i))
147
((eq 0 (logand u1 #x80))
148
(,setter u1 dest noctets))
150
(setf u2 (,getter src i))
155
((and (<= #xB0 u1 #xF7) (<= #xA1 u2 #xFE))
156
(+ +gbk2-offset+ (+ (* 94 (- u1 #xB0)) (- u2 #xA1))))
158
((and (<= #x81 u1 #xA0) (<= #x40 u2 #xFE))
159
(cond ((> u2 #x7F) (setf tmp 1))
161
(+ +gbk3-offset+ (* 190 (- u1 #x81)) (- u2 #x40 tmp)))
163
((and (<= #xAA u1 #xFE) (<= #x40 #xA0))
164
(cond ((> u2 #x7F) (setf tmp 1))
166
(+ +gbk4-offset+ (* 96 (- u1 #xAA)) (- u2 #x40 tmp)))
168
((and (<= #xA1 u1 #xA9) (<= #xA1 u2 #xFE))
169
(+ +gbk1-offset+ (* 94 (- u1 #xA1)) (- u2 #xA1)))
171
((and (<= #xA8 u1 #xA9) (<= #x40 #xA0))
172
(cond ((> u2 #x7F) (setf tmp 1))
174
(+ +gbk5-offset+ (* 96 (- u1 #xA8)) (- u2 #x40 tmp)))
176
(handle-error invalid-gbk-byte)))))
178
(when (>= index (length *gbk-unicode-mapping*))
179
(handle-error invalid-gbk-byte))
181
(elt *gbk-unicode-mapping* index))
184
finally (return (the fixnum (- noctets d-start)))))))