Coverage report: /home/ellis/comp/ext/ironclad/src/common.lisp
Kind | Covered | All | % |
expression | 25 | 437 | 5.7 |
branch | 0 | 42 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;;; common.lisp -- efficient implementations of mod32 arithmetic and macros
3
;; Functions in this file are intended to be fast
8
(defmacro defconst (name value)
14
;;; CMUCL and SBCL both have an internal type for this, but we'd like to
15
;;; be portable, so we define our own.
17
(deftype index () '(mod #.array-dimension-limit))
18
(deftype index+1 () `(mod ,(1+ array-dimension-limit)))
20
;;; We write something like this all over the place.
22
(deftype simple-octet-vector (&optional length)
23
(let ((length (or length '*)))
24
`(simple-array (unsigned-byte 8) (,length))))
26
;;; a global specification of optimization settings
28
(eval-when (:compile-toplevel :load-toplevel :execute)
29
(defun burn-baby-burn ()
30
'(optimize (speed 3) (safety 0) (space 0)
31
(debug 0) (compilation-speed 0)))
33
(defun hold-me-back ()
34
'(declare (optimize (speed 3) (space 0) (compilation-speed 0)
35
(safety 1) #-cmu (debug 1))))) ; EVAL-WHEN
37
;;; extracting individual bytes from integers
39
;;; We used to declare these functions with much stricter types (e.g.
40
;;; (UNSIGNED-BYTE 32) as the lone argument), but we need to access
41
;;; bytes of both 32-bit and 64-bit words and the types would just get
42
;;; in our way. We declare these functions as inline; a good Common
43
;;; Lisp compiler should be able to generate efficient code from the
44
;;; declarations at the point of the call.
46
;;; These functions are named according to big-endian conventions. The
47
;;; comment is here because I always forget and need to be reminded.
48
#.(loop for i from 1 to 8
49
collect (let ((name (intern (format nil "~:@(~:R~)-~A" i (string '#:byte)))))
51
(declaim (inline ,name))
52
(declaim (ftype (function (unsigned-byte) (unsigned-byte 8)) ,name))
54
(declare (type unsigned-byte ub))
55
(ldb (byte 8 ,(* 8 (1- i))) ub)))) into forms
56
finally (return `(progn ,@forms)))
58
;;; fetching/storing appropriately-sized integers from octet vectors
59
(eval-when (:compile-toplevel :load-toplevel :execute)
60
(defun ubref-fun-name (bitsize big-endian-p)
61
(symbolicate '#:ub bitsize (if big-endian-p '#:ref/be '#:ref/le))))
63
(declaim (inline ub16ref/le (setf ub16ref/le)
64
ub16ref/be (setf ub16ref/be)
65
ub32ref/le (setf ub32ref/le)
66
ub32ref/be (setf ub32ref/be)
67
ub64ref/le (setf ub64ref/le)
68
ub64ref/be (setf ub64ref/be)))
70
(defun ub16ref/le (vector offset)
71
(declare (type simple-octet-vector vector)
74
(sb-sys:sap-ref-16 (sb-sys:vector-sap vector) offset)
76
(dpb (aref vector (1+ offset))
78
(aref vector offset)))
80
(defun (setf ub16ref/le) (value vector offset)
81
(declare (type (unsigned-byte 16) value)
82
(type simple-octet-vector vector)
85
(setf (sb-sys:sap-ref-16 (sb-sys:vector-sap vector) offset) value)
87
(setf (aref vector offset) (logand value #xff)
88
(aref vector (1+ offset)) (ldb (byte 8 8) value))
91
(defun ub16ref/be (vector offset)
92
(declare (type simple-octet-vector vector)
95
(sb-sys:sap-ref-16 (sb-sys:vector-sap vector) offset)
97
(dpb (aref vector offset)
99
(aref vector (1+ offset))))
101
(defun (setf ub16ref/be) (value vector offset)
102
(declare (type (unsigned-byte 16) value)
103
(type simple-octet-vector vector)
106
(setf (sb-sys:sap-ref-16 (sb-sys:vector-sap vector) offset) value)
108
(setf (aref vector (1+ offset)) (logand value #xff)
109
(aref vector offset) (ldb (byte 8 8) value))
112
(defun ub32ref/le (vector offset)
113
(declare (type simple-octet-vector vector)
116
(sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset)
118
(dpb (ub16ref/le vector (+ offset 2))
120
(ub16ref/le vector offset)))
122
(defun (setf ub32ref/le) (value vector offset)
123
(declare (type (unsigned-byte 32) value)
124
(type simple-octet-vector vector)
127
(setf (sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset) value)
129
(setf (ub16ref/le vector offset) (logand value #xffff)
130
(ub16ref/le vector (+ offset 2)) (ldb (byte 16 16) value))
133
(defun ub32ref/be (vector offset)
134
(declare (type simple-octet-vector vector)
137
(sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset)
138
#+(and ironclad-assembly (or x86 x86-64))
139
(swap32 (sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset))
141
(and ironclad-assembly (or x86 x86-64)))
142
(dpb (ub16ref/be vector offset)
144
(ub16ref/be vector (+ offset 2))))
146
(defun (setf ub32ref/be) (value vector offset)
147
(declare (type (unsigned-byte 32) value)
148
(type simple-octet-vector vector)
151
(setf (sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset) value)
152
#+(and ironclad-assembly (or x86 x86-64))
153
(setf (sb-sys:sap-ref-32 (sb-sys:vector-sap vector) offset) (swap32 value))
155
(and ironclad-assembly (or x86 x86-64)))
156
(setf (ub16ref/be vector (+ offset 2)) (logand value #xffff)
157
(ub16ref/be vector offset) (ldb (byte 16 16) value))
160
(defun ub64ref/le (vector offset)
161
(declare (type simple-octet-vector vector)
163
#+(and little-endian 64-bit)
164
(sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset)
165
#-(and little-endian 64-bit)
166
(dpb (ub32ref/le vector (+ offset 4))
168
(ub32ref/le vector offset)))
170
(defun (setf ub64ref/le) (value vector offset)
171
(declare (type (unsigned-byte 64) value)
172
(type simple-octet-vector vector)
174
#+(and little-endian 64-bit)
175
(setf (sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset) value)
176
#-(and little-endian 64-bit)
177
(setf (ub32ref/le vector offset) (logand value #xffffffff)
178
(ub32ref/le vector (+ offset 4)) (ldb (byte 32 32) value))
182
(defun ub64ref/be (vector offset)
183
(declare (type simple-octet-vector vector)
185
#+(and big-endian 64-bit)
186
(sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset)
187
#+(and ironclad-assembly x86-64)
188
(swap64 (sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset))
189
#-(or (and big-endian 64-bit)
190
(and ironclad-assembly x86-64))
191
(dpb (ub32ref/be vector offset)
193
(ub32ref/be vector (+ offset 4))))
195
(defun (setf ub64ref/be) (value vector offset)
196
(declare (type (unsigned-byte 64) value)
197
(type simple-octet-vector vector)
199
#+(and big-endian 64-bit)
200
(setf (sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset) value)
201
#+(and ironclad-assembly x86-64)
202
(setf (sb-sys:sap-ref-64 (sb-sys:vector-sap vector) offset) (swap64 value))
204
#-(or (and big-endian 64-bit)
205
(and ironclad-assembly x86-64))
206
(setf (ub32ref/be vector (+ offset 4)) (logand value #xffffffff)
207
(ub32ref/be vector offset) (ldb (byte 32 32) value))
210
;;; efficient 32-bit arithmetic, which a lot of algorithms require
212
(declaim #+ironclad-fast-mod32-arithmetic (inline mod32+)
213
(ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32+))
216
(declare (type (unsigned-byte 32) a b))
217
(ldb (byte 32 0) (+ a b)))
219
(define-compiler-macro mod32+ (a b)
220
`(ldb (byte 32 0) (+ ,a ,b)))
222
;;; mostly needed for CAST*
223
(declaim #+ironclad-fast-mod32-arithmetic (inline mod32-)
224
(ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32-))
227
(declare (type (unsigned-byte 32) a b))
228
(ldb (byte 32 0) (- a b)))
230
(define-compiler-macro mod32- (a b)
231
`(ldb (byte 32 0) (- ,a ,b)))
233
;;; mostly needed for RC6
234
(declaim #+ironclad-fast-mod32-arithmetic (inline mod32*)
235
(ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32*))
238
(declare (type (unsigned-byte 32) a b))
239
(ldb (byte 32 0) (* a b)))
241
(define-compiler-macro mod32* (a b)
242
`(ldb (byte 32 0) (* ,a ,b)))
244
(declaim #+ironclad-fast-mod32-arithmetic (inline mod32ash)
245
(ftype (function ((unsigned-byte 32) (integer -31 31)) (unsigned-byte 32)) mod32ash))
247
(defun mod32ash (num count)
248
(declare (type (unsigned-byte 32) num)
249
(type (integer -31 31) count))
250
(ldb (byte 32 0) (ash num count)))
252
(define-compiler-macro mod32ash (num count)
253
;; work around SBCL optimizing bug as described by APD:
254
;; http://www.caddr.com/macho/archives/sbcl-devel/2004-8/3877.html
255
`(logand #xffffffff (ash ,num ,count)))
257
(declaim #+ironclad-fast-mod32-arithmetic (inline mod32lognot)
258
(ftype (function ((unsigned-byte 32)) (unsigned-byte 32)) mod32lognot))
260
(defun mod32lognot (num)
261
(declare (type (unsigned-byte 32) num))
262
(ldb (byte 32 0) (lognot num)))
264
(define-compiler-macro mod32lognot (num)
265
`(ldb (byte 32 0) (lognot ,num)))
267
(declaim #+ironclad-fast-mod32-arithmetic (inline rol32 ror32)
268
(ftype (function ((unsigned-byte 32) (unsigned-byte 5)) (unsigned-byte 32)) rol32 ror32))
271
(declare (type (unsigned-byte 32) a)
272
(type (integer 0 32) s))
273
(sb-rotate-byte:rotate-byte s (byte 32 0) a))
276
(declare (type (unsigned-byte 32) a)
277
(type (integer 0 32) s))
278
(sb-rotate-byte:rotate-byte (- s) (byte 32 0) a))
280
(declaim #+ironclad-fast-mod64-arithmetic (inline mod64+ mod64- mod64*)
281
(ftype (function ((unsigned-byte 64) (unsigned-byte 64)) (unsigned-byte 64)) mod64+))
284
(declare (type (unsigned-byte 64) a b))
285
(ldb (byte 64 0) (+ a b)))
287
(define-compiler-macro mod64+ (a b)
288
`(ldb (byte 64 0) (+ ,a ,b)))
291
(declare (type (unsigned-byte 64) a b))
292
(ldb (byte 64 0) (- a b)))
294
(define-compiler-macro mod64- (a b)
295
`(ldb (byte 64 0) (- ,a ,b)))
298
(declare (type (unsigned-byte 64) a b))
299
(ldb (byte 64 0) (* a b)))
301
(define-compiler-macro mod64* (a b)
302
`(ldb (byte 64 0) (* ,a ,b)))
304
(declaim #+ironclad-fast-mod64-arithmetic (inline mod64ash)
305
(ftype (function ((unsigned-byte 64) (integer -63 63)) (unsigned-byte 64)) mod64ash))
307
(defun mod64ash (num count)
308
(declare (type (unsigned-byte 64) num)
309
(type (integer -63 63) count))
310
(ldb (byte 64 0) (ash num count)))
312
(define-compiler-macro mod64ash (num count)
313
;; work around SBCL optimizing bug as described by APD:
314
;; http://www.caddr.com/macho/archives/sbcl-devel/2004-8/3877.html
315
`(logand #xffffffffffffffff (ash ,num ,count)))
317
(declaim #+ironclad-fast-mod64-arithmetic (inline mod64lognot)
318
(ftype (function ((unsigned-byte 64)) (unsigned-byte 64)) mod64lognot))
320
(defun mod64lognot (num)
321
(declare (type (unsigned-byte 64) num))
322
(ldb (byte 64 0) (lognot num)))
324
(define-compiler-macro mod64lognot (num)
325
`(ldb (byte 64 0) (lognot ,num)))
327
(declaim #+ironclad-fast-mod64-arithmetic (inline rol64 ror64)
328
(ftype (function ((unsigned-byte 64) (unsigned-byte 6)) (unsigned-byte 64)) rol64 ror64))
331
(declare (type (unsigned-byte 64) a)
332
(type (integer 0 64) s))
333
(sb-rotate-byte:rotate-byte s (byte 64 0) a))
336
(declare (type (unsigned-byte 64) a)
337
(type (integer 0 64) s))
338
(sb-rotate-byte:rotate-byte (- s) (byte 64 0) a))
341
(declaim #+ironclad-fast-mod32-arithmetic
342
(inline %add-with-carry %subtract-with-borrow))
344
;;; The names are taken from sbcl and cmucl's bignum routines.
345
;;; Naturally, they work the same way (which means %SUBTRACT-WITH-BORROW
346
;;; is a little weird).
347
(defun %add-with-carry (x y carry)
348
(declare (type (unsigned-byte 32) x y)
349
(type (mod 2) carry))
350
(let* ((temp (mod32+ x y))
351
(temp-carry (if (< temp x) 1 0))
352
(result (mod32+ temp carry)))
353
(values result (logior temp-carry (if (< result temp) 1 0)))))
355
(defun %subtract-with-borrow (x y borrow)
356
(declare (type (unsigned-byte 32) x y)
357
(type (mod 2) borrow))
358
(let ((temp (mod32- x y)))
361
(values (mod32- temp 1) (if (< y x) 1 0)))
363
(values temp (logxor (if (< x y) 1 0) 1))))))
365
;;; efficient 8-byte -> 32-byte buffer copy routines, mostly used by
366
;;; the hash functions. we provide big-endian and little-endian
369
(declaim (inline fill-block-le-ub8 fill-block-be-ub8))
371
(declaim (inline copy-to-buffer))
372
(defun copy-to-buffer (from from-offset count buffer buffer-offset)
373
"Copy a partial segment from input vector from starting at
374
from-offset and copying count elements into the 64 byte buffer
375
starting at buffer-offset."
376
(declare (type index from-offset)
377
(type (integer 0 127) count buffer-offset)
378
(type simple-octet-vector from)
379
(type simple-octet-vector buffer)
381
(sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count))
383
(defun fill-block-ub8-le (block buffer offset)
384
"Convert a complete 64 (UNSIGNED-BYTE 8) input BUFFER starting from
385
OFFSET into the given (UNSIGNED-BYTE 32) BLOCK."
386
(declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
387
(type (simple-array (unsigned-byte 32) (16)) block)
388
(type simple-octet-vector buffer))
390
(sb-kernel:ub8-bash-copy buffer offset block 0 64)
391
#-(and sbcl little-endian)
392
(loop for i of-type (integer 0 16) from 0
393
for j of-type (integer 0 #.array-dimension-limit)
394
from offset to (+ offset 63) by 4
396
(setf (aref block i) (ub32ref/le buffer j)))
399
(defun fill-block-ub8-be (block buffer offset)
400
"Convert a complete 64 (unsigned-byte 8) input vector segment
401
starting from offset into the given 16 word SHA1 block. Calling this function
402
without subsequently calling EXPAND-BLOCK results in undefined behavior."
403
(declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
404
(type (simple-array (unsigned-byte 32) (*)) block)
405
(type simple-octet-vector buffer))
406
;; convert to 32-bit words
408
(sb-kernel:ub8-bash-copy buffer offset block 0 64)
409
#-(and sbcl big-endian)
410
(loop for i of-type (integer 0 16) from 0
411
for j of-type (integer 0 #.array-dimension-limit)
412
from offset to (+ offset 63) by 4
413
do (setf (aref block i) (ub32ref/be buffer j)))
416
(defun fill-block-ub8-le/64 (block buffer offset)
417
"Convert a complete 128 (unsigned-byte 8) input vector segment
418
starting from offset into the given 16 qword SHA1 block. Calling this
419
function without subsequently calling EXPAND-BLOCK results in undefined
421
(declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
422
(type (simple-array (unsigned-byte 64) (*)) block)
423
(type simple-octet-vector buffer)
425
#+(and little-endian 64-bit)
426
(sb-kernel:ub8-bash-copy buffer offset block 0 64)
427
#-(and little-endian 64-bit)
428
(loop for i of-type (integer 0 8) from 0
429
for j of-type (integer 0 #.array-dimension-limit)
430
from offset to (+ offset 63) by 8
431
do (setf (aref block i) (ub64ref/le buffer j)))
434
(defun fill-block-ub8-be/64 (block buffer offset)
435
"Convert a complete 128 (unsigned-byte 8) input vector segment
436
starting from offset into the given 16 qword SHA1 block. Calling this
437
function without subsequently calling EXPAND-BLOCK results in undefined
439
(declare (type (integer 0 #.(- array-dimension-limit 128)) offset)
440
(type (simple-array (unsigned-byte 64) (*)) block)
441
(type simple-octet-vector buffer)
443
;; convert to 64-bit words
444
#+(and big-endian 64-bit)
445
(sb-kernel:ub8-bash-copy buffer offset block 0 128)
447
(loop for i of-type (integer 0 16) from 0
448
for j of-type (integer 0 #.array-dimension-limit)
449
from offset to (+ offset 127) by 8
450
do (setf (aref block i) (ub64ref/be buffer j)))
453
(defun xor-block (block-length input-block1 input-block1-start input-block2 input-block2-start output-block output-block-start)
454
(declare (type (simple-array (unsigned-byte 8) (*)) input-block1 input-block2 output-block)
455
(type index block-length input-block1-start input-block2-start output-block-start)
457
(macrolet ((xor-bytes (size xor-form)
458
`(loop until (< block-length ,size) do
460
(incf output-block-start ,size)
461
(incf input-block1-start ,size)
462
(incf input-block2-start ,size)
463
(decf block-length ,size))))
464
#+(and x86-64 ironclad-assembly)
465
(xor-bytes 16 (xor128 input-block1 input-block1-start
466
input-block2 input-block2-start
467
output-block output-block-start))
469
(xor-bytes 8 (setf (ub64ref/le output-block output-block-start)
470
(logxor (ub64ref/le input-block1 input-block1-start)
471
(ub64ref/le input-block2 input-block2-start))))
473
(xor-bytes 4 (setf (ub32ref/le output-block output-block-start)
474
(logxor (ub32ref/le input-block1 input-block1-start)
475
(ub32ref/le input-block2 input-block2-start))))
476
(xor-bytes 1 (setf (aref output-block output-block-start)
477
(logxor (aref input-block1 input-block1-start)
478
(aref input-block2 input-block2-start))))))
480
(define-compiler-macro xor-block (&whole form &environment env block-length input-block1 input-block1-start input-block2 input-block2-start output-block output-block-start)
481
(declare (ignorable env block-length input-block1 input-block1-start input-block2 input-block2-start output-block output-block-start))
483
#+(and x86-64 ironclad-assembly)
484
((and (constantp block-length env)
486
`(xor128 ,input-block1 ,input-block1-start
487
,input-block2 ,input-block2-start
488
,output-block ,output-block-start))
489
#+(and x86-64 ironclad-assembly)
490
((and (constantp block-length env)
491
(zerop (mod block-length 16)))
493
`(loop for ,i from 0 below ,block-length by 16 do
494
(xor128 ,input-block1 (+ ,input-block1-start ,i)
495
,input-block2 (+ ,input-block2-start ,i)
496
,output-block (+ ,output-block-start ,i)))))
498
((and (constantp block-length env)
500
`(setf (ub64ref/le ,output-block ,output-block-start)
501
(logxor (ub64ref/le ,input-block1 ,input-block1-start)
502
(ub64ref/le ,input-block2 ,input-block2-start))))
504
((and (constantp block-length env)
506
`(setf (ub32ref/le ,output-block ,output-block-start)
507
(logxor (ub32ref/le ,input-block1 ,input-block1-start)
508
(ub32ref/le ,input-block2 ,input-block2-start))))
510
((and (constantp block-length env)
511
(zerop (mod block-length 4)))
513
`(loop for ,i from 0 below ,block-length by 4 do
514
(setf (ub32ref/le ,output-block (+ ,output-block-start ,i))
515
(logxor (ub32ref/le ,input-block1 (+ ,input-block1-start ,i))
516
(ub32ref/le ,input-block2 (+ ,input-block2-start ,i)))))))
520
(defun copy-block (block-length input-block input-block-start output-block output-block-start)
521
(declare (type (simple-array (unsigned-byte 8) (*)) input-block output-block)
522
(type index block-length input-block-start output-block-start)
524
(macrolet ((copy-bytes (size copy-form)
525
`(loop until (< block-length ,size) do
527
(incf input-block-start ,size)
528
(incf output-block-start ,size)
529
(decf block-length ,size))))
530
#+(and x86-64 ironclad-assembly)
531
(copy-bytes 16 (mov128 input-block input-block-start
532
output-block output-block-start))
534
(copy-bytes 8 (setf (ub64ref/le output-block output-block-start)
535
(ub64ref/le input-block input-block-start)))
537
(copy-bytes 4 (setf (ub32ref/le output-block output-block-start)
538
(ub32ref/le input-block input-block-start)))
539
(replace output-block input-block
540
:start1 output-block-start :end1 (+ output-block-start block-length)
541
:start2 input-block-start :end2 (+ input-block-start block-length))))
543
(define-compiler-macro copy-block (&whole form &environment env
545
input-block input-block-start
546
output-block output-block-start)
547
(declare (ignorable env block-length input-block input-block-start output-block output-block-start))
549
#+(and x86-64 ironclad-assembly)
550
((and (constantp block-length env)
552
`(mov128 ,input-block ,input-block-start
553
,output-block ,output-block-start))
554
#+(and x86-64 ironclad-assembly)
555
((and (constantp block-length env)
556
(zerop (mod block-length 16)))
558
`(loop for ,i from 0 below ,block-length by 16 do
559
(mov128 ,input-block (+ ,input-block-start ,i)
560
,output-block (+ ,output-block-start ,i)))))
562
((and (constantp block-length env)
564
`(setf (ub64ref/le ,output-block ,output-block-start)
565
(ub64ref/le ,input-block ,input-block-start)))
567
((and (constantp block-length env)
569
`(setf (ub32ref/le ,output-block ,output-block-start)
570
(ub32ref/le ,input-block ,input-block-start)))
572
((and (constantp block-length env)
573
(zerop (mod block-length 4)))
575
`(loop for ,i from 0 below ,block-length by 4 do
576
(setf (ub32ref/le ,output-block (+ ,output-block-start ,i))
577
(ub32ref/le ,input-block (+ ,input-block-start ,i))))))