Coverage report: /home/ellis/.stash/quicklisp/dists/ultralisp/software/edicl-flexi-streams-20240429143708/length.lisp
Kind | Covered | All | % |
expression | 4 | 754 | 0.5 |
branch | 0 | 140 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
2
;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.6 2008/05/29 10:25:14 edi Exp $
4
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
6
;;; Redistribution and use in source and binary forms, with or without
7
;;; modification, are permitted provided that the following conditions
10
;;; * Redistributions of source code must retain the above copyright
11
;;; notice, this list of conditions and the following disclaimer.
13
;;; * Redistributions in binary form must reproduce the above
14
;;; copyright notice, this list of conditions and the following
15
;;; disclaimer in the documentation and/or other materials
16
;;; provided with the distribution.
18
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
(in-package :flexi-streams)
32
(defgeneric encoding-factor (format)
33
(:documentation "Given an external format FORMAT, returns a factor
34
which denotes the octets to characters ratio to expect when
35
encoding/decoding. If the returned value is an integer, the factor is
36
assumed to be exact. If it is a \(double) float, the factor is
37
supposed to be based on heuristics and usually not exact.
39
This factor is used in string.lisp.")
40
(declare #.*standard-optimize-settings*))
42
(defmethod encoding-factor ((format flexi-8-bit-format))
43
(declare #.*standard-optimize-settings*)
44
;; 8-bit encodings map octets to characters in an exact one-to-one
48
(defmethod encoding-factor ((format flexi-utf-8-format))
49
(declare #.*standard-optimize-settings*)
50
;; UTF-8 characters can be anything from one to six octets, but we
51
;; assume that the "overhead" is only about 5 percent - this
52
;; estimate is obviously very much dependant on the content
55
(defmethod encoding-factor ((format flexi-utf-16-format))
56
(declare #.*standard-optimize-settings*)
57
;; usually one character maps to two octets, but characters with
58
;; code points above #x10000 map to four octets - we assume that we
59
;; usually don't see these characters but of course have to return a
63
(defmethod encoding-factor ((format flexi-gbk-format))
64
(declare #.*standard-optimize-settings*)
65
;; usually one character maps to two octets, but characters with
66
;; code points below #x80 map to one octets - we assume that
67
;; the "overhead" is about 50 percent - this estimate is
68
;; obviously very much dependant on the content
70
(defmethod encoding-factor ((format flexi-utf-32-format))
71
(declare #.*standard-optimize-settings*)
72
;; UTF-32 always matches every character to four octets
75
(defmethod encoding-factor ((format flexi-crlf-mixin))
76
(declare #.*standard-optimize-settings*)
77
;; if the sequence #\Return #\Linefeed is the line-end marker, this
78
;; obviously makes encodings potentially longer and definitely makes
79
;; the estimate unexact
80
(* 1.02d0 (call-next-method)))
82
(defgeneric check-end (format start end i)
83
(declare #.*fixnum-optimize-settings*)
84
(:documentation "Helper function used below to determine if we tried
85
to read past the end of the sequence.")
86
(:method (format start end i)
87
(declare #.*fixnum-optimize-settings*)
88
(declare (ignore start))
89
(declare (fixnum end i))
91
(signal-encoding-error format "This sequence can't be decoded ~
92
using ~A as it is too short. ~A octet~:P missing at the end."
93
(external-format-name format)
95
(:method ((format flexi-utf-16-format) start end i)
96
(declare #.*fixnum-optimize-settings*)
97
(declare (fixnum start end i))
100
(when (evenp (- end start))
101
(call-next-method))))
103
(defgeneric compute-number-of-chars (format sequence start end)
104
(declare #.*standard-optimize-settings*)
105
(:documentation "Computes the exact number of characters required to
106
decode the sequence of octets in SEQUENCE from START to END using the
107
external format FORMAT."))
109
(defmethod compute-number-of-chars :around (format (list list) start end)
110
(declare #.*standard-optimize-settings*)
111
(call-next-method format (coerce list 'vector) start end))
113
(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
114
(declare #.*fixnum-optimize-settings*)
115
(declare (fixnum start end))
116
(declare (ignore sequence))
119
(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end)
120
;; this method only applies to the 8-bit formats as all other
121
;; formats with CRLF line endings have their own specialized methods
123
(declare #.*fixnum-optimize-settings*)
124
(declare (fixnum start end) (vector sequence))
126
(length (- end start)))
127
(declare (fixnum i length))
131
(let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
134
(setq i (1+ position))
138
(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
139
(declare #.*fixnum-optimize-settings*)
140
(declare (fixnum start end) (vector sequence))
143
(declare (fixnum i sum))
147
(let* ((octet (aref sequence i))
148
;; note that there are no validity checks here
149
(length (cond ((not (logbitp 7 octet)) 1)
150
((= #b11000000 (logand* octet #b11100000)) 2)
151
((= #b11100000 (logand* octet #b11110000)) 3)
153
(declare (fixnum length) (type octet octet))
156
(check-end format start end i)
159
(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
160
(declare #.*fixnum-optimize-settings*)
161
(declare (fixnum start end) (vector sequence))
165
(declare (fixnum i sum) (type octet last-octet))
169
(let* ((octet (aref sequence i))
170
;; note that there are no validity checks here
171
(length (cond ((not (logbitp 7 octet)) 1)
172
((= #b11000000 (logand* octet #b11100000)) 2)
173
((= #b11100000 (logand* octet #b11110000)) 3)
175
(declare (fixnum length) (type octet octet))
176
(unless (and (= octet +lf+) (= last-octet +cr+))
179
(setq last-octet octet)))
180
(check-end format start end i)
183
(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
184
(declare #.*fixnum-optimize-settings*)
185
(declare (fixnum start end) (vector sequence))
186
(declare (ignore sequence))
187
(when (oddp (- end start))
188
(signal-encoding-error format "~A octet~:P cannot be decoded ~
189
using UTF-16 as ~:*~A is not even."
192
(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
193
(declare #.*fixnum-optimize-settings*)
194
(declare (fixnum start end))
197
(declare (fixnum i sum))
202
(let* ((high-octet (aref sequence (1+ i)))
203
(length (cond ((<= #xd8 high-octet #xdf) 4)
205
(declare (fixnum length) (type octet high-octet))
208
(check-end format start (+ end 2) i)
211
(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
212
(declare #.*fixnum-optimize-settings*)
213
(declare (fixnum start end) (vector sequence))
216
(declare (fixnum i sum))
221
(let* ((high-octet (aref sequence i))
222
(length (cond ((<= #xd8 high-octet #xdf) 4)
224
(declare (fixnum length) (type octet high-octet))
227
(check-end format start (+ end 2) i)
230
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
231
(declare #.*fixnum-optimize-settings*)
232
(declare (fixnum start end) (vector sequence))
236
(declare (fixnum i sum) (type octet last-octet))
241
(let* ((high-octet (aref sequence (1+ i)))
242
(length (cond ((<= #xd8 high-octet #xdf) 4)
244
(declare (fixnum length) (type octet high-octet))
245
(unless (and (zerop high-octet)
246
(= (the octet (aref sequence i)) +lf+)
249
(setq last-octet (if (zerop high-octet)
253
(check-end format start (+ end 2) i)
256
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
257
(declare #.*fixnum-optimize-settings*)
258
(declare (fixnum start end) (vector sequence))
262
(declare (fixnum i sum) (type octet last-octet))
267
(let* ((high-octet (aref sequence i))
268
(length (cond ((<= #xd8 high-octet #xdf) 4)
270
(declare (fixnum length) (type octet high-octet))
271
(unless (and (zerop high-octet)
272
(= (the octet (aref sequence (1+ i))) +lf+)
275
(setq last-octet (if (zerop high-octet)
276
(aref sequence (1+ i))
279
(check-end format start (+ end 2) i)
282
(defmethod compute-number-of-chars ((format flexi-gbk-format) sequence start end)
283
(declare #.*fixnum-optimize-settings*)
284
(declare (fixnum start end) (vector sequence))
287
(declare (fixnum i sum))
291
(let* ((octet (aref sequence i))
292
;; note that there are no validity checks here
293
(length (cond ((or (<= octet #x7f)
298
(declare (fixnum length) (type octet octet))
301
(check-end format start end i)
303
(defmethod compute-number-of-chars ((format flexi-crlf-gbk-format) sequence start end)
304
(declare #.*fixnum-optimize-settings*)
305
(declare (fixnum start end) (vector sequence))
309
(declare (fixnum i sum) (type octet last-octet))
313
(let* ((octet (aref sequence i))
314
;; note that there are no validity checks here
315
(length (cond ((or (<= octet #x7f)
320
(declare (fixnum length) (type octet octet))
321
(unless (and (= octet +lf+) (= last-octet +cr+))
324
(setq last-octet octet)))
325
(check-end format start end i)
328
(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
329
(declare #.*fixnum-optimize-settings*)
330
(declare (fixnum start end))
331
(declare (ignore sequence))
332
(let ((length (- end start)))
333
(when (plusp (mod length 4))
334
(signal-encoding-error format "~A octet~:P cannot be decoded ~
335
using UTF-32 as ~:*~A is not a multiple-value of four."
338
(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
339
(declare #.*fixnum-optimize-settings*)
340
(declare (fixnum start end))
341
(declare (ignore sequence))
342
(ceiling (- end start) 4))
344
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
345
(declare #.*fixnum-optimize-settings*)
346
(declare (fixnum start end) (vector sequence))
348
(length (ceiling (- end start) 4)))
353
(cond ((loop for j of-type fixnum from i
354
for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
355
always (= octet (aref sequence j)))
361
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
362
(declare #.*fixnum-optimize-settings*)
363
(declare (fixnum start end) (vector sequence))
365
(length (ceiling (- end start) 4)))
370
(cond ((loop for j of-type fixnum from i
371
for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
372
always (= octet (aref sequence j)))
378
(defgeneric compute-number-of-octets (format sequence start end)
379
(declare #.*standard-optimize-settings*)
380
(:documentation "Computes the exact number of octets required to
381
encode the sequence of characters in SEQUENCE from START to END using
382
the external format FORMAT."))
384
(defmethod compute-number-of-octets :around (format (list list) start end)
385
(declare #.*standard-optimize-settings*)
386
(call-next-method format (coerce list 'string*) start end))
388
(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
389
(declare #.*fixnum-optimize-settings*)
390
(declare (fixnum start end))
391
(declare (ignore string))
393
(defmethod compute-number-of-octets ((format flexi-gbk-format) string start end)
394
(declare #.*fixnum-optimize-settings*)
395
(declare (fixnum start end) (string string))
398
(declare (fixnum i sum))
402
(let* ((char-code (char-code (char string i)))
403
(char-length (cond ((<= char-code #x7f) 1)
404
((or (= char-code #x20ac)
405
(= char-code #xf8f5))
407
((get-multibyte-mapper *gbk-to-ucs-special-table* char-code)
410
(declare (fixnum char-length) (type char-code-integer char-code))
411
(incf sum char-length)
415
(defmethod compute-number-of-octets ((format flexi-crlf-gbk-format) string start end)
416
(declare #.*fixnum-optimize-settings*)
417
(declare (fixnum start end) (string string))
420
(declare (fixnum i sum))
424
(let* ((char-code (char-code (char string i)))
425
(char-length (cond ((= char-code #.(char-code #\Newline)) 2)
426
((<= char-code #x7f) 1)
427
((or (= char-code #x20ac)
428
(= char-code #xf8f5))
430
((get-multibyte-mapper *gbk-to-ucs-special-table* char-code)
433
(declare (fixnum char-length) (type char-code-integer char-code))
434
(incf sum char-length)
438
(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
439
(declare #.*fixnum-optimize-settings*)
440
(declare (fixnum start end) (string string))
443
(declare (fixnum i sum))
447
(let* ((char-code (char-code (char string i)))
448
(char-length (cond ((< char-code #x80) 1)
449
((< char-code #x800) 2)
450
((< char-code #x10000) 3)
452
(declare (fixnum char-length) (type char-code-integer char-code))
453
(incf sum char-length)
457
(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
458
(declare #.*fixnum-optimize-settings*)
459
(declare (fixnum start end) (string string))
462
(declare (fixnum i sum))
466
(let* ((char-code (char-code (char string i)))
467
(char-length (cond ((= char-code #.(char-code #\Newline)) 2)
468
((< char-code #x80) 1)
469
((< char-code #x800) 2)
470
((< char-code #x10000) 3)
472
(declare (fixnum char-length) (type char-code-integer char-code))
473
(incf sum char-length)
477
(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
478
(declare #.*fixnum-optimize-settings*)
479
(declare (fixnum start end) (string string))
482
(declare (fixnum i sum))
486
(let* ((char-code (char-code (char string i)))
487
(char-length (cond ((< char-code #x10000) 2)
489
(declare (fixnum char-length) (type char-code-integer char-code))
490
(incf sum char-length)
494
(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
495
(declare #.*fixnum-optimize-settings*)
496
(declare (fixnum start end) (string string))
499
(declare (fixnum i sum))
503
(let* ((char-code (char-code (char string i)))
504
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
505
((< char-code #x10000) 2)
507
(declare (fixnum char-length) (type char-code-integer char-code))
508
(incf sum char-length)
512
(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
513
(declare #.*fixnum-optimize-settings*)
514
(declare (fixnum start end) (string string))
517
(declare (fixnum i sum))
521
(let* ((char-code (char-code (char string i)))
522
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
523
((< char-code #x10000) 2)
525
(declare (fixnum char-length) (type char-code-integer char-code))
526
(incf sum char-length)
530
(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
531
(declare #.*fixnum-optimize-settings*)
532
(declare (fixnum start end))
533
(declare (ignore string))
536
(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
537
(declare #.*fixnum-optimize-settings*)
538
(declare (fixnum start end) (string string))
539
(+ (call-next-method)
540
(* (case (external-format-name format)
543
(count #\Newline string :start start :end end :test #'char=))))
545
(defgeneric character-length (format char)
546
(declare #.*fixnum-optimize-settings*)
547
(:documentation "Returns the number of octets needed to encode the
548
single character CHAR.")
549
(:method (format char)
550
(compute-number-of-octets format (string char) 0 1)))
552
(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
553
(declare #.*fixnum-optimize-settings*)
554
(+ (call-next-method format +cr+)
555
(call-next-method format +lf+)))
557
(defmethod character-length ((format flexi-8-bit-format) char)
558
(declare #.*fixnum-optimize-settings*)
559
(declare (ignore char))
562
(defmethod character-length ((format flexi-utf-32-format) char)
563
(declare #.*fixnum-optimize-settings*)
564
(declare (ignore char))