Coverage report: /home/ellis/.stash/quicklisp/dists/ultralisp/software/edicl-flexi-streams-20240429143708/encode.lisp
Kind | Covered | All | % |
expression | 0 | 250 | 0.0 |
branch | 0 | 26 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
2
;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.26 2008/05/26 10:55:08 edi Exp $
4
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
6
;;; Redistribution and use in source and binary forms, with or without
7
;;; modification, are permitted provided that the following conditions
10
;;; * Redistributions of source code must retain the above copyright
11
;;; notice, this list of conditions and the following disclaimer.
13
;;; * Redistributions in binary form must reproduce the above
14
;;; copyright notice, this list of conditions and the following
15
;;; disclaimer in the documentation and/or other materials
16
;;; provided with the distribution.
18
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
(in-package :flexi-streams)
32
(defgeneric char-to-octets (format char writer)
33
(declare #.*standard-optimize-settings*)
34
(:documentation "Converts the character CHAR to a sequence of octets
35
using the external format FORMAT. The conversion is performed by
36
calling the unary function \(which must be a functional object) WRITER
37
repeatedly each octet. The return value of this function is
40
(defgeneric write-sequence* (format stream sequence start end)
41
(declare #.*standard-optimize-settings*)
42
(:documentation "A generic function which dispatches on the external
43
format and does the real work for STREAM-WRITE-SEQUENCE."))
45
(defgeneric string-to-octets* (format string start end)
46
(declare #.*standard-optimize-settings*)
47
(:documentation "A generic function which dispatches on the external
48
format and does the real work for STRING-TO-OCTETS."))
50
(defmethod string-to-octets* :around (format (list list) start end)
51
(declare #.*standard-optimize-settings*)
52
(string-to-octets* format (coerce list 'string*) start end))
54
(defmacro define-sequence-writers ((format-class) &body body)
55
"Non-hygienic utility macro which defines methods for
56
WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For
57
BODY see the docstring of DEFINE-CHAR-ENCODERS."
58
(let ((body `((locally
59
(declare #.*fixnum-optimize-settings*)
62
(defmethod string-to-octets* ((format ,format-class) string start end)
63
(declare #.*standard-optimize-settings*)
64
(declare (fixnum start end) (string string))
65
(let ((octets (make-array (compute-number-of-octets format string start end)
66
:element-type 'octet))
69
(loop for i of-type fixnum from start below end do
70
(macrolet ((octet-writer (form)
72
(setf (aref (the (array octet *) octets) j) ,form)
74
(symbol-macrolet ((char-getter (char string i)))
77
(defmethod write-sequence* ((format ,format-class) stream sequence start end)
78
(declare #.*standard-optimize-settings*)
79
(declare (fixnum start end))
80
(with-accessors ((column flexi-stream-column))
82
(let* ((octet-seen-p nil)
84
;; estimate should be good enough...
85
(factor (encoding-factor format))
86
;; we don't want arbitrarily large buffer, do we?
87
(buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
88
(buffer (make-octet-buffer buffer-size))
89
(underlying-stream (flexi-stream-stream stream)))
90
(declare (fixnum buffer-pos buffer-size)
91
(boolean octet-seen-p)
92
(type (array octet *) buffer))
93
(macrolet ((octet-writer (form)
94
`(write-octet ,form)))
95
(labels ((flush-buffer ()
96
"Sends all octets in BUFFER to the underlying stream."
97
(write-sequence buffer underlying-stream :end buffer-pos)
100
"Adds one octet to the buffer and flushes it if necessary."
101
(declare (type octet octet))
102
(when (>= buffer-pos buffer-size)
104
(setf (aref buffer buffer-pos) octet)
106
(write-object (object)
107
"Dispatches to WRITE-OCTET or WRITE-CHARACTER
108
depending on the type of OBJECT."
110
(octet (setq octet-seen-p t)
111
(write-octet object))
112
(character (symbol-macrolet ((char-getter object))
114
(macrolet ((iterate (&body output-forms)
115
"An unhygienic macro to implement the actual
116
iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
117
sequence element and put its octet representation into the buffer."
118
`(loop for index of-type fixnum from start below end
119
do (progn ,@output-forms)
120
finally (when (plusp buffer-pos)
124
(symbol-macrolet ((char-getter (char sequence index)))
127
(symbol-macrolet ((char-getter (aref sequence index)))
129
(list (iterate (write-object (nth index sequence))))))
130
;; update the column slot, setting it to NIL if we sent
133
(cond (octet-seen-p nil)
134
(t (let ((last-newline-pos (position #\Newline sequence
139
(cond (last-newline-pos (- end last-newline-pos 1))
140
(column (+ column (- end start))))))))))))))))
142
(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
143
"Non-hygienic utility macro which defines several encoding-related
144
methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
145
CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
146
encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
147
similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
148
BODY is a code template for the code to convert one character to
149
octets. BODY must contain a symbol CHAR-GETTER representing the form
150
which is used to obtain the character and a forms like \(OCTET-WRITE
151
<thing>) to write the octet <thing>. The CHAR-GETTER form might be
152
called more than once."
154
(defmethod char-to-octets ((format ,lf-format-class) char writer)
155
(declare #.*fixnum-optimize-settings*)
156
(declare (character char) (function writer))
157
(symbol-macrolet ((char-getter char))
158
(macrolet ((octet-writer (form)
159
`(funcall writer ,form)))
161
(define-sequence-writers (,lf-format-class) ,@body)
162
(define-sequence-writers (,cr-format-class)
163
;; modify the body so that the getter replaces a #\Newline
165
,@(sublis `((char-getter . ,(with-unique-names (char)
166
`(let ((,char char-getter))
167
(declare (character ,char))
168
(if (char= ,char #\Newline)
172
(define-sequence-writers (,crlf-format-class)
173
;; modify the body so that we potentially write octets for
174
;; two characters (#\Return and #\Linefeed) - the original
175
;; body is wrapped with the WRITE-CHAR local function
176
,(with-unique-names (char write-char)
177
`(flet ((,write-char (,char)
178
,@(sublis `((char-getter . ,char)) body)))
179
(let ((,char char-getter))
180
(declare (character ,char))
181
(cond ((char= ,char #\Newline)
182
(,write-char #\Return)
183
(,write-char #\Linefeed))
184
(t (,write-char ,char)))))))))
186
(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
187
(let ((octet (char-code char-getter)))
189
(signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
190
(octet-writer octet)))
192
(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
193
(let ((octet (char-code char-getter)))
195
(signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
196
(octet-writer octet)))
198
(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
199
(with-accessors ((encoding-hash external-format-encoding-hash))
201
(let ((octet (gethash (char-code char-getter) encoding-hash)))
203
(signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
204
(octet-writer octet))))
206
(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
207
;; the old version using LDB was more elegant, but some Lisps had
208
;; trouble optimizing it
209
(let ((char-code (char-code char-getter)))
211
(cond ((< char-code #x80)
212
(octet-writer char-code)
215
(octet-writer (logior* #b11000000 (ash* char-code -6)))
217
((< char-code #x10000)
218
(octet-writer (logior* #b11100000 (ash* char-code -12)))
221
(octet-writer (logior* #b11110000 (ash* char-code -18)))))
222
(octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12))))
224
(octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6))))
226
(octet-writer (logior* #b10000000 (logand* #b00111111 char-code)))
229
(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
230
(flet ((write-word (word)
231
(octet-writer (logand* #x00ff word))
232
(octet-writer (ash* (logand* #xff00 word) -8))))
233
(declare (inline write-word))
234
(let ((char-code (char-code char-getter)))
235
(declare (type char-code-integer char-code))
236
(cond ((< char-code #x10000)
237
(write-word char-code))
238
(t (decf char-code #x10000)
239
(write-word (logior* #xd800 (ash* char-code -10)))
240
(write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
242
(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
243
(flet ((write-word (word)
244
(octet-writer (ash* (logand* #xff00 word) -8))
245
(octet-writer (logand* #x00ff word))))
246
(declare (inline write-word))
247
(let ((char-code (char-code char-getter)))
248
(declare (type char-code-integer char-code))
249
(cond ((< char-code #x10000)
250
(write-word char-code))
251
(t (decf char-code #x10000)
252
(write-word (logior* #xd800 (ash* char-code -10)))
253
(write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
255
(define-char-encoders (flexi-gbk-format flexi-cr-gbk-format flexi-crlf-gbk-format)
256
(let ((octet (char-code char-getter)))
259
(flet ((write-word (word)
260
(octet-writer (ash* (logand* #xff00 word) -8))
261
(octet-writer (logand* #x00ff word))))
262
(declare (inline write-word))
263
(declare (type char-code-integer octet))
264
(cond ((= octet #x20ac)
269
(let ((code (get-multibyte-mapper (if (< octet #x100)
270
*ucs-to-gbk-special-table*
275
(signal-encoding-error format "~S (code ~A) is not in this encoding."
276
char-getter octet)))))))))
277
(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
278
(let ((char-code (char-code char-getter)))
279
(octet-writer (logand* #x00ff char-code))
280
(octet-writer (logand* #x00ff (ash* char-code -8)))
281
(octet-writer (logand* #x00ff (ash* char-code -16)))
282
(octet-writer (logand* #x00ff (ash* char-code -24)))))
284
(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
285
(let ((char-code (char-code char-getter)))
286
(octet-writer (logand* #x00ff (ash* char-code -24)))
287
(octet-writer (logand* #x00ff (ash* char-code -16)))
288
(octet-writer (logand* #x00ff (ash* char-code -8)))
289
(octet-writer (logand* #x00ff char-code))))
291
(defmethod char-to-octets ((format flexi-cr-mixin) char writer)
292
(declare #.*fixnum-optimize-settings*)
293
(declare (character char))
294
(if (char= char #\Newline)
295
(call-next-method format #\Return writer)
298
(defmethod char-to-octets ((format flexi-crlf-mixin) char writer)
299
(declare #.*fixnum-optimize-settings*)
300
(declare (character char))
301
(cond ((char= char #\Newline)
302
(call-next-method format #\Return writer)
303
(call-next-method format #\Linefeed writer))
304
(t (call-next-method))))