Coverage report: /home/ellis/comp/ext/ironclad/src/opt/sbcl/x86oid-vm.lisp
Kind | Covered | All | % |
expression | 0 | 2452 | 0.0 |
branch | 0 | 2 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
2
#+(and (or x86 x86-64) ironclad-assembly)
3
(defpackage :ironclad-vm
4
;; more recent SBCL exports various symbols making this package
5
;; definition more concise. This is the backward-compatible way.
7
#:sb-c ; for DEFINE-VOP and SC-IS
8
#:sb-assem ; for INST, GEN-LABEL
9
#:sb-vm) ; for primtype names, SCs, constants
10
(:shadow #:ea) ; in case SB-VM exports it
12
#:positive-fixnum #:unsigned-num
13
#:descriptor-reg #:unsigned-reg #:double-reg #:immediate
14
#:simple-array-unsigned-byte-8
15
#:simple-array-unsigned-byte-32
16
#+x86-64 #:simple-array-unsigned-byte-64
17
#+x86-64 #:rax-offset #+x86-64 #:rcx-offset))
19
#+(and (or x86 x86-64) ironclad-assembly)
20
(in-package :ironclad-vm)
22
#+(and x86 ironclad-assembly)
23
(eval-when (:compile-toplevel :load-toplevel :execute)
24
(defun ea (displacement &optional base index (scale 1))
25
(sb-vm::make-ea :dword
29
:disp (or displacement 0)))
30
(setf (fdefinition 'dword-ea) (fdefinition 'ea))
31
(defmacro dword-inst (name &rest operands)
32
`(inst ,name ,@operands)))
34
#+(and x86-64 ironclad-assembly)
35
(eval-when (:compile-toplevel :load-toplevel :execute)
37
(progn ; Newer SBCL (>= 1.4.11)
38
(setf (fdefinition 'ea) (fdefinition 'sb-vm::ea))
39
(setf (fdefinition 'dword-ea) (fdefinition 'ea))
40
(defmacro dword-inst (name &rest operands)
43
;; The '(bswap :dword r)' notation is only supported
45
(if (ignore-errors (sb-ext:assert-version->= 1 5 9 17) t)
46
`(inst bswap :dword ,@operands)
47
`(inst bswap (sb-vm::reg-in-size ,@operands :dword))))
49
`(inst ,name :dword ,@operands)))))
51
(progn ; Older SBCL (< 1.4.11)
52
(defun ea (displacement &optional base index (scale 1))
53
(sb-vm::make-ea :qword
57
:disp (or displacement 0)))
58
(defun dword-ea (displacement &optional base index (scale 1))
59
(sb-vm::make-ea :dword
63
:disp (or displacement 0)))
64
(defmacro dword-inst (name &rest operands)
65
`(inst ,name ,@(mapcar (lambda (operand)
67
(sb-vm::reg-in-size ,operand :dword)
71
#+(and (or x86 x86-64) ironclad-assembly)
73
(define-vop (fill-block-ub8)
75
(:args (block :scs (descriptor-reg))
76
(buffer :scs (descriptor-reg))
77
(offset :scs (unsigned-reg immediate) :target buffer-index))
78
(:variant-vars big-endian-p bytes-to-copy 64-bit-p)
79
(:temporary (:sc unsigned-reg) temp buffer-index block-index)
81
(let* ((data-offset (- (* n-word-bytes vector-data-offset)
82
other-pointer-lowtag))
83
(block-disp (+ data-offset bytes-to-copy))
84
(immediate-offset (sc-is offset immediate))
85
(unroll (if immediate-offset 2 1))
87
(flet ((ea-for-buffer (&optional (offset 0))
89
(ea (+ block-disp offset) buffer block-index n-word-bytes)
90
(ea data-offset buffer buffer-index)))
91
(ea-for-block (&optional (offset 0))
92
(ea (+ block-disp offset) block block-index n-word-bytes))
93
(handle-endianness (x)
99
(unless immediate-offset
100
(move buffer-index offset))
101
(inst mov block-index (- (truncate bytes-to-copy n-word-bytes)))
103
(inst mov temp (ea-for-buffer 0))
104
(when immediate-offset
105
(inst mov buffer-index (ea-for-buffer n-word-bytes)))
106
(handle-endianness temp)
107
(when immediate-offset
108
(handle-endianness buffer-index))
109
(inst mov (ea-for-block) temp)
110
(when immediate-offset
111
(inst mov (ea-for-block n-word-bytes) buffer-index))
112
(unless immediate-offset
113
(inst add buffer-index n-word-bytes))
114
(inst add block-index unroll)
115
(inst jmp :nz loop)))))
117
(define-vop (fill-block-ub8-le fill-block-ub8)
118
(:translate ironclad::fill-block-ub8-le)
119
(:arg-types simple-array-unsigned-byte-32
120
simple-array-unsigned-byte-8
122
(:variant nil 64 nil))
124
(define-vop (fill-block-ub8-be fill-block-ub8)
125
(:translate ironclad::fill-block-ub8-be)
126
(:arg-types simple-array-unsigned-byte-32
127
simple-array-unsigned-byte-8
132
(define-vop (fill-block-ub8-le/64 fill-block-ub8)
133
(:translate ironclad::fill-block-ub8-le/64)
134
(:arg-types simple-array-unsigned-byte-64
135
simple-array-unsigned-byte-8
137
;; Yes. Really. Inconsistent naming FTW.
141
(define-vop (fill-block-ub8-be/64 fill-block-ub8)
142
(:translate ironclad::fill-block-ub8-be/64)
143
(:arg-types simple-array-unsigned-byte-64
144
simple-array-unsigned-byte-8
148
(define-vop (expand-block)
149
(:translate ironclad::expand-block)
151
(:args (block :scs (descriptor-reg)))
152
(:arg-types simple-array-unsigned-byte-32)
153
(:temporary (:sc unsigned-reg) temp count)
155
(flet ((block-word (elem-offset)
156
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
157
other-pointer-lowtag)
159
(dword-ea disp block count 4))))
160
(let ((loop (gen-label)))
163
(dword-inst mov temp (block-word -3))
164
(dword-inst xor temp (block-word -8))
165
(dword-inst xor temp (block-word -14))
166
(dword-inst xor temp (block-word -16))
167
(dword-inst rol temp 1)
168
(dword-inst mov (block-word 0) temp)
171
(inst jmp :le loop)))))
173
(define-vop (sha256-expand-block)
174
(:translate ironclad::sha256-expand-block)
176
(:args (block :scs (descriptor-reg)))
177
(:arg-types simple-array-unsigned-byte-32)
178
(:temporary (:sc unsigned-reg) t1 t2 t3 t4 count)
180
(flet ((block-word (elem-offset)
181
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
182
other-pointer-lowtag)
184
(dword-ea disp block count 4))))
185
(let ((loop (gen-label)))
186
;; The code could definitely be improved for x86-64 by using
187
;; more temporaries, but this version works on both 32- and
188
;; 64-bit and eliminates many of the stupidities in the modular
189
;; arithmetic version (mostly on 64-bit, but some lameness in
190
;; the 32-bit version as well).
193
(dword-inst mov t1 (block-word -2))
195
(dword-inst mov t2 t1)
196
(dword-inst rol t2 15)
197
(dword-inst mov t3 t1)
198
(dword-inst rol t3 13)
199
(dword-inst xor t2 t3)
200
(dword-inst shr t1 10)
201
(dword-inst xor t1 t2)
202
(dword-inst mov t2 (block-word -15))
204
(dword-inst mov t3 t2)
205
(dword-inst rol t3 25)
206
(dword-inst mov t4 t2)
207
(dword-inst rol t4 14)
208
(dword-inst xor t3 t4)
209
(dword-inst shr t2 3)
210
(dword-inst xor t2 t3)
211
(dword-inst add t1 (block-word -7))
212
(dword-inst add t2 (block-word -16))
213
(dword-inst add t1 t2)
214
(dword-inst mov (block-word 0) t1)
217
(inst jmp :le loop)))))
219
;;; Implementing this for x86 would require nasty hacks with
220
;;; pseudo-atomic. Might just be worth it for the speed increase,
221
;;; though. The code is also probably not scheduled optimally.
223
(define-vop (update-sha1-block)
224
(:translate ironclad::%update-sha1-block)
226
(:args (regs :scs (descriptor-reg) :target result)
227
(block :scs (descriptor-reg)))
228
(:arg-types simple-array-unsigned-byte-32 simple-array-unsigned-byte-32)
229
(:results (result :scs (descriptor-reg)))
230
(:result-types simple-array-unsigned-byte-32)
231
(:temporary (:sc unsigned-reg) a b c d e t1 t2)
233
(let ((k1 #x5a827999)
237
(labels ((block/reg-ea (base index)
238
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
239
other-pointer-lowtag)
241
(dword-ea disp base)))
243
(dword-inst mov t2 a)
244
(dword-inst mov t1 c)
245
(dword-inst rol t2 5)
246
(dword-inst xor t1 d)
247
(dword-inst add t2 (block/reg-ea block n))
248
(dword-inst and t1 b)
249
(dword-inst xor t1 d)
250
(dword-inst lea e (ea k1 t1 e))
251
(dword-inst rol b 30)
252
(dword-inst add e t2))
253
(f2/4 (a b c d e n k)
254
(dword-inst mov t2 a)
255
(dword-inst mov t1 d)
256
(dword-inst rol t2 5)
257
(dword-inst xor t1 c)
258
(dword-inst add t2 (block/reg-ea block n))
259
(dword-inst xor t1 b)
260
(dword-inst lea e (ea k t1 e))
261
(dword-inst rol b 30)
262
(dword-inst add e t2))
264
(f2/4 a b c d e n k2))
266
(f2/4 a b c d e n k4))
268
(dword-inst mov t2 c)
269
(dword-inst mov t1 c)
270
(dword-inst and t2 b)
272
(dword-inst and t1 d)
273
(dword-inst or t1 t2)
274
(dword-inst mov t2 a)
275
(dword-inst rol t2 5)
276
(dword-inst add t2 (block/reg-ea block n))
277
(dword-inst rol b 30)
278
(dword-inst lea e (ea k3 t1 e))
279
(dword-inst add e t2))
280
(sha1-rounds (start end f)
281
(let ((xvars (ironclad::make-circular-list a b c d e)))
282
(loop for i from start upto end
283
for vars on xvars by #'cddddr
284
do (multiple-value-bind (a b c d e)
285
(apply #'values (ironclad::circular-list-subseq vars 0 5))
286
(funcall f a b c d e i))))))
287
(dword-inst mov a (block/reg-ea regs 0))
288
(dword-inst mov b (block/reg-ea regs 1))
289
(dword-inst mov c (block/reg-ea regs 2))
290
(dword-inst mov d (block/reg-ea regs 3))
291
(dword-inst mov e (block/reg-ea regs 4))
292
(sha1-rounds 0 19 #'f1)
293
(sha1-rounds 20 39 #'f2)
294
(sha1-rounds 40 59 #'f3)
295
(sha1-rounds 60 79 #'f4)
296
(dword-inst add (block/reg-ea regs 0) a)
297
(dword-inst add (block/reg-ea regs 1) b)
298
(dword-inst add (block/reg-ea regs 2) c)
299
(dword-inst add (block/reg-ea regs 3) d)
300
(dword-inst add (block/reg-ea regs 4) e)
301
(move result regs)))))
304
(define-vop (salsa-core-fast)
305
(:translate ironclad::x-salsa-core)
307
(:args (buffer :scs (descriptor-reg))
308
(state :scs (descriptor-reg)))
310
(:arg-types (:constant (signed-byte 61))
311
simple-array-unsigned-byte-8
312
simple-array-unsigned-byte-32)
313
(:temporary (:sc double-reg) x0 x1 x2 x3)
314
(:temporary (:sc unsigned-reg) r0 r1 r2 r3 temp count)
316
(labels ((nth-xmm-mem (base i)
317
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
318
other-pointer-lowtag)
321
(nth-buffer-dword (i)
322
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
323
other-pointer-lowtag)
325
(dword-ea disp buffer)))
326
(quarter-round (y0 y1 y2 y3)
327
;; x[y0] = XOR(x[y0],ROTATE(PLUS(x[y3],x[y2]), 7));
328
;; x[y1] = XOR(x[y1],ROTATE(PLUS(x[y0],x[y3]), 9));
329
;; x[y2] = XOR(x[y2],ROTATE(PLUS(x[y1],x[y0]),13));
330
;; x[y3] = XOR(x[y3],ROTATE(PLUS(x[y2],x[y1]),18));
331
(dword-inst mov r2 (nth-buffer-dword y2))
332
(dword-inst mov r3 (nth-buffer-dword y3))
334
(dword-inst lea r0 (ea 0 r3 r2))
335
(dword-inst rol r0 7)
336
(dword-inst xor r0 (nth-buffer-dword y0))
338
(dword-inst lea r1 (ea 0 r0 r3))
339
(dword-inst rol r1 9)
340
(dword-inst xor r1 (nth-buffer-dword y1))
342
(dword-inst lea temp (ea 0 r1 r0))
343
(dword-inst rol temp 13)
344
(dword-inst xor r2 temp)
346
(dword-inst lea temp (ea 0 r2 r1))
347
(dword-inst rol temp 18)
348
(dword-inst xor r3 temp)
350
(dword-inst mov (nth-buffer-dword y0) r0)
351
(dword-inst mov (nth-buffer-dword y1) r1)
352
(dword-inst mov (nth-buffer-dword y2) r2)
353
(dword-inst mov (nth-buffer-dword y3) r3)))
354
;; copy state to the output buffer
355
(inst movdqu x0 (nth-xmm-mem state 0))
356
(inst movdqu x1 (nth-xmm-mem state 1))
357
(inst movdqu x2 (nth-xmm-mem state 2))
358
(inst movdqu x3 (nth-xmm-mem state 3))
359
(inst movdqu (nth-xmm-mem buffer 0) x0)
360
(inst movdqu (nth-xmm-mem buffer 1) x1)
361
(inst movdqu (nth-xmm-mem buffer 2) x2)
362
(inst movdqu (nth-xmm-mem buffer 3) x3)
364
(let ((repeat (gen-label)))
365
(inst mov count n-rounds)
367
(quarter-round 4 8 12 0)
368
(quarter-round 9 13 1 5)
369
(quarter-round 14 2 6 10)
370
(quarter-round 3 7 11 15)
372
(quarter-round 1 2 3 0)
373
(quarter-round 6 7 4 5)
374
(quarter-round 11 8 9 10)
375
(quarter-round 12 13 14 15)
377
(inst jmp :nz repeat))
379
(inst paddd x0 (nth-xmm-mem buffer 0))
380
(inst paddd x1 (nth-xmm-mem buffer 1))
381
(inst paddd x2 (nth-xmm-mem buffer 2))
382
(inst paddd x3 (nth-xmm-mem buffer 3))
383
(inst movdqu (nth-xmm-mem buffer 0) x0)
384
(inst movdqu (nth-xmm-mem buffer 1) x1)
385
(inst movdqu (nth-xmm-mem buffer 2) x2)
386
(inst movdqu (nth-xmm-mem buffer 3) x3))))
389
(define-vop (chacha-core-fast)
390
(:translate ironclad::x-chacha-core)
392
(:args (buffer :scs (descriptor-reg))
393
(state :scs (descriptor-reg)))
395
(:arg-types (:constant (signed-byte 61))
396
simple-array-unsigned-byte-8
397
simple-array-unsigned-byte-32)
398
(:temporary (:sc double-reg) x0 x1 x2 x3 y0 y1 y2 y3 t0)
399
(:temporary (:sc unsigned-reg) count)
401
(flet ((nth-xmm-mem (base i)
402
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
403
other-pointer-lowtag)
410
(inst pslld-imm y3 16)
411
(inst psrld-imm t0 16)
417
(inst pslld-imm y1 12)
418
(inst psrld-imm t0 20)
424
(inst pslld-imm y3 8)
425
(inst psrld-imm t0 24)
431
(inst pslld-imm y1 7)
432
(inst psrld-imm t0 25)
434
(inst movdqu x0 (nth-xmm-mem state 0))
435
(inst movdqu x1 (nth-xmm-mem state 1))
436
(inst movdqu x2 (nth-xmm-mem state 2))
437
(inst movdqu x3 (nth-xmm-mem state 3))
443
(let ((repeat (gen-label)))
444
(inst mov count n-rounds)
448
(inst pshufd y1 y1 #b00111001)
449
(inst pshufd y2 y2 #b01001110)
450
(inst pshufd y3 y3 #b10010011)
453
(inst pshufd y1 y1 #b10010011)
454
(inst pshufd y2 y2 #b01001110)
455
(inst pshufd y3 y3 #b00111001)
458
(inst jmp :nz repeat))
464
(inst movdqu (nth-xmm-mem buffer 0) x0)
465
(inst movdqu (nth-xmm-mem buffer 1) x1)
466
(inst movdqu (nth-xmm-mem buffer 2) x2)
467
(inst movdqu (nth-xmm-mem buffer 3) x3))))
470
(define-vop (pclmulqdq-support-p)
471
(:translate ironclad::pclmulqdq-support-p)
474
(:temporary (:sc unsigned-reg :offset rax-offset) eax)
475
(:temporary (:sc unsigned-reg :offset rcx-offset) ecx)
482
(define-vop (aes-ni-support-p)
483
(:translate ironclad::aes-ni-support-p)
486
(:temporary (:sc unsigned-reg :offset rax-offset) eax)
487
(:temporary (:sc unsigned-reg :offset rcx-offset) ecx)
494
(define-vop (aes-ni-generate-round-keys)
495
(:translate ironclad::aes-ni-generate-round-keys)
497
(:args (key :scs (descriptor-reg))
498
(key-length :scs (unsigned-reg))
499
(encryption-keys :scs (descriptor-reg))
500
(decryption-keys :scs (descriptor-reg)))
501
(:arg-types simple-array-unsigned-byte-8
503
simple-array-unsigned-byte-32
504
simple-array-unsigned-byte-32)
505
(:temporary (:sc double-reg) x0 x1 x2 x3 x4 x5 x6)
507
(labels ((buffer-mem (base i)
508
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
509
other-pointer-lowtag)
513
(inst pshufd x1 x1 #b11111111)
514
(inst shufps x2 x0 #b00010000)
516
(inst shufps x2 x0 #b10001100)
520
(inst pshufd x1 x1 #b01010101)
521
(inst shufps x2 x0 #b00010000)
523
(inst shufps x2 x0 #b10001100)
529
(inst pshufd x6 x0 #b11111111)
533
(inst shufps x5 x0 #b01000100)
534
(inst shufps x1 x3 #b01001110))
536
(inst pshufd x1 x1 #b01010101)
537
(inst shufps x2 x0 #b00010000)
539
(inst shufps x2 x0 #b10001100)
544
(inst pshufd x5 x0 #b11111111)
550
(inst pshufd x1 x1 #b10101010)
551
(inst shufps x2 x3 #b00010000)
553
(inst shufps x2 x3 #b10001100)
556
(let ((key-192 (gen-label))
557
(key-128 (gen-label))
560
(inst movdqu x0 (buffer-mem key 0))
561
(inst cmp key-length 24)
562
(inst jmp :b key-128)
563
(inst jmp :z key-192)
565
(inst movdqu x3 (buffer-mem key 1))
566
(inst movdqu (buffer-mem encryption-keys 0) x0)
567
(inst movdqu (buffer-mem decryption-keys 14) x0)
568
(inst movdqu (buffer-mem encryption-keys 1) x3)
570
(inst movdqu (buffer-mem decryption-keys 13) x6)
571
(inst aeskeygenassist x1 x3 1)
573
(inst movdqu (buffer-mem encryption-keys 2) x0)
575
(inst movdqu (buffer-mem decryption-keys 12) x6)
576
(inst aeskeygenassist x1 x0 1)
578
(inst movdqu (buffer-mem encryption-keys 3) x3)
580
(inst movdqu (buffer-mem decryption-keys 11) x6)
581
(inst aeskeygenassist x1 x3 2)
583
(inst movdqu (buffer-mem encryption-keys 4) x0)
585
(inst movdqu (buffer-mem decryption-keys 10) x6)
586
(inst aeskeygenassist x1 x0 2)
588
(inst movdqu (buffer-mem encryption-keys 5) x3)
590
(inst movdqu (buffer-mem decryption-keys 9) x6)
591
(inst aeskeygenassist x1 x3 4)
593
(inst movdqu (buffer-mem encryption-keys 6) x0)
595
(inst movdqu (buffer-mem decryption-keys 8) x6)
596
(inst aeskeygenassist x1 x0 4)
598
(inst movdqu (buffer-mem encryption-keys 7) x3)
600
(inst movdqu (buffer-mem decryption-keys 7) x6)
601
(inst aeskeygenassist x1 x3 8)
603
(inst movdqu (buffer-mem encryption-keys 8) x0)
605
(inst movdqu (buffer-mem decryption-keys 6) x6)
606
(inst aeskeygenassist x1 x0 8)
608
(inst movdqu (buffer-mem encryption-keys 9) x3)
610
(inst movdqu (buffer-mem decryption-keys 5) x6)
611
(inst aeskeygenassist x1 x3 16)
613
(inst movdqu (buffer-mem encryption-keys 10) x0)
615
(inst movdqu (buffer-mem decryption-keys 4) x6)
616
(inst aeskeygenassist x1 x0 16)
618
(inst movdqu (buffer-mem encryption-keys 11) x3)
620
(inst movdqu (buffer-mem decryption-keys 3) x6)
621
(inst aeskeygenassist x1 x3 32)
623
(inst movdqu (buffer-mem encryption-keys 12) x0)
625
(inst movdqu (buffer-mem decryption-keys 2) x6)
626
(inst aeskeygenassist x1 x0 32)
628
(inst movdqu (buffer-mem encryption-keys 13) x3)
630
(inst movdqu (buffer-mem decryption-keys 1) x6)
631
(inst aeskeygenassist x1 x3 64)
633
(inst movdqu (buffer-mem encryption-keys 14) x0)
634
(inst movdqu (buffer-mem decryption-keys 0) x0)
638
(inst movdqu x3 (buffer-mem key 1))
639
(inst movdqu (buffer-mem encryption-keys 0) x0)
640
(inst movdqu (buffer-mem decryption-keys 12) x0)
641
(inst aeskeygenassist x1 x3 1)
643
(inst movdqu (buffer-mem encryption-keys 1) x5)
645
(inst movdqu (buffer-mem decryption-keys 11) x6)
646
(inst movdqu (buffer-mem encryption-keys 2) x1)
648
(inst movdqu (buffer-mem decryption-keys 10) x6)
649
(inst aeskeygenassist x1 x3 2)
651
(inst movdqu (buffer-mem encryption-keys 3) x0)
653
(inst movdqu (buffer-mem decryption-keys 9) x6)
654
(inst aeskeygenassist x1 x3 4)
656
(inst movdqu (buffer-mem encryption-keys 4) x5)
658
(inst movdqu (buffer-mem decryption-keys 8) x6)
659
(inst movdqu (buffer-mem encryption-keys 5) x1)
661
(inst movdqu (buffer-mem decryption-keys 7) x6)
662
(inst aeskeygenassist x1 x3 8)
664
(inst movdqu (buffer-mem encryption-keys 6) x0)
666
(inst movdqu (buffer-mem decryption-keys 6) x6)
667
(inst aeskeygenassist x1 x3 16)
669
(inst movdqu (buffer-mem encryption-keys 7) x5)
671
(inst movdqu (buffer-mem decryption-keys 5) x6)
672
(inst movdqu (buffer-mem encryption-keys 8) x1)
674
(inst movdqu (buffer-mem decryption-keys 4) x6)
675
(inst aeskeygenassist x1 x3 32)
677
(inst movdqu (buffer-mem encryption-keys 9) x0)
679
(inst movdqu (buffer-mem decryption-keys 3) x6)
680
(inst aeskeygenassist x1 x3 64)
682
(inst movdqu (buffer-mem encryption-keys 10) x5)
684
(inst movdqu (buffer-mem decryption-keys 2) x6)
685
(inst movdqu (buffer-mem encryption-keys 11) x1)
687
(inst movdqu (buffer-mem decryption-keys 1) x6)
688
(inst aeskeygenassist x1 x3 128)
690
(inst movdqu (buffer-mem encryption-keys 12) x0)
691
(inst movdqu (buffer-mem decryption-keys 0) x0)
695
(inst movdqu (buffer-mem encryption-keys 0) x0)
696
(inst movdqu (buffer-mem decryption-keys 10) x0)
697
(inst aeskeygenassist x1 x0 1)
699
(inst movdqu (buffer-mem encryption-keys 1) x0)
701
(inst movdqu (buffer-mem decryption-keys 9) x6)
702
(inst aeskeygenassist x1 x0 2)
704
(inst movdqu (buffer-mem encryption-keys 2) x0)
706
(inst movdqu (buffer-mem decryption-keys 8) x6)
707
(inst aeskeygenassist x1 x0 4)
709
(inst movdqu (buffer-mem encryption-keys 3) x0)
711
(inst movdqu (buffer-mem decryption-keys 7) x6)
712
(inst aeskeygenassist x1 x0 8)
714
(inst movdqu (buffer-mem encryption-keys 4) x0)
716
(inst movdqu (buffer-mem decryption-keys 6) x6)
717
(inst aeskeygenassist x1 x0 16)
719
(inst movdqu (buffer-mem encryption-keys 5) x0)
721
(inst movdqu (buffer-mem decryption-keys 5) x6)
722
(inst aeskeygenassist x1 x0 32)
724
(inst movdqu (buffer-mem encryption-keys 6) x0)
726
(inst movdqu (buffer-mem decryption-keys 4) x6)
727
(inst aeskeygenassist x1 x0 64)
729
(inst movdqu (buffer-mem encryption-keys 7) x0)
731
(inst movdqu (buffer-mem decryption-keys 3) x6)
732
(inst aeskeygenassist x1 x0 128)
734
(inst movdqu (buffer-mem encryption-keys 8) x0)
736
(inst movdqu (buffer-mem decryption-keys 2) x6)
737
(inst aeskeygenassist x1 x0 27)
739
(inst movdqu (buffer-mem encryption-keys 9) x0)
741
(inst movdqu (buffer-mem decryption-keys 1) x6)
742
(inst aeskeygenassist x1 x0 54)
744
(inst movdqu (buffer-mem encryption-keys 10) x0)
745
(inst movdqu (buffer-mem decryption-keys 0) x0)
749
(define-vop (aes-ni-encrypt)
750
(:translate ironclad::aes-ni-encrypt)
752
(:args (plaintext :scs (descriptor-reg))
753
(plaintext-start :scs (unsigned-reg))
754
(ciphertext :scs (descriptor-reg))
755
(ciphertext-start :scs (unsigned-reg))
756
(round-keys :scs (descriptor-reg))
757
(n-rounds :scs (unsigned-reg)))
758
(:arg-types simple-array-unsigned-byte-8
760
simple-array-unsigned-byte-8
762
simple-array-unsigned-byte-32
764
(:temporary (:sc double-reg) x0 x1)
766
(flet ((buffer-mem (base offset)
767
(let ((disp (- (* n-word-bytes vector-data-offset)
768
other-pointer-lowtag)))
769
(ea disp base offset)))
771
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
772
other-pointer-lowtag)
774
(ea disp round-keys))))
775
(let ((last-round (gen-label)))
776
(inst movdqu x0 (buffer-mem plaintext plaintext-start))
777
(inst movdqu x1 (round-key 0))
779
(inst movdqu x1 (round-key 1))
781
(inst movdqu x1 (round-key 2))
783
(inst movdqu x1 (round-key 3))
785
(inst movdqu x1 (round-key 4))
787
(inst movdqu x1 (round-key 5))
789
(inst movdqu x1 (round-key 6))
791
(inst movdqu x1 (round-key 7))
793
(inst movdqu x1 (round-key 8))
795
(inst movdqu x1 (round-key 9))
797
(inst movdqu x1 (round-key 10))
798
(inst cmp n-rounds 10)
799
(inst jmp :z last-round)
801
(inst movdqu x1 (round-key 11))
803
(inst movdqu x1 (round-key 12))
804
(inst cmp n-rounds 12)
805
(inst jmp :z last-round)
807
(inst movdqu x1 (round-key 13))
809
(inst movdqu x1 (round-key 14))
810
(emit-label last-round)
811
(inst aesenclast x0 x1)
812
(inst movdqu (buffer-mem ciphertext ciphertext-start) x0)))))
815
(define-vop (aes-ni-decrypt)
816
(:translate ironclad::aes-ni-decrypt)
818
(:args (ciphertext :scs (descriptor-reg))
819
(ciphertext-start :scs (unsigned-reg))
820
(plaintext :scs (descriptor-reg))
821
(plaintext-start :scs (unsigned-reg))
822
(round-keys :scs (descriptor-reg))
823
(n-rounds :scs (unsigned-reg)))
824
(:arg-types simple-array-unsigned-byte-8
826
simple-array-unsigned-byte-8
828
simple-array-unsigned-byte-32
830
(:temporary (:sc double-reg) x0 x1)
832
(flet ((buffer-mem (base offset)
833
(let ((disp (- (* n-word-bytes vector-data-offset)
834
other-pointer-lowtag)))
835
(ea disp base offset)))
837
(let ((disp (+ (- (* n-word-bytes vector-data-offset)
838
other-pointer-lowtag)
840
(ea disp round-keys))))
841
(let ((last-round (gen-label)))
842
(inst movdqu x0 (buffer-mem ciphertext ciphertext-start))
843
(inst movdqu x1 (round-key 0))
845
(inst movdqu x1 (round-key 1))
847
(inst movdqu x1 (round-key 2))
849
(inst movdqu x1 (round-key 3))
851
(inst movdqu x1 (round-key 4))
853
(inst movdqu x1 (round-key 5))
855
(inst movdqu x1 (round-key 6))
857
(inst movdqu x1 (round-key 7))
859
(inst movdqu x1 (round-key 8))
861
(inst movdqu x1 (round-key 9))
863
(inst movdqu x1 (round-key 10))
864
(inst cmp n-rounds 10)
865
(inst jmp :z last-round)
867
(inst movdqu x1 (round-key 11))
869
(inst movdqu x1 (round-key 12))
870
(inst cmp n-rounds 12)
871
(inst jmp :z last-round)
873
(inst movdqu x1 (round-key 13))
875
(inst movdqu x1 (round-key 14))
876
(emit-label last-round)
877
(inst aesdeclast x0 x1)
878
(inst movdqu (buffer-mem plaintext plaintext-start) x0)))))
881
(define-vop (fast-blake2s-mixing)
882
(:translate ironclad::fast-blake2s-mixing)
884
(:args (va :scs (unsigned-reg) :target ra)
885
(vb :scs (unsigned-reg) :target rb)
886
(vc :scs (unsigned-reg) :target rc)
887
(vd :scs (unsigned-reg) :target rd)
888
(x :scs (unsigned-reg))
889
(y :scs (unsigned-reg)))
890
(:arg-types unsigned-num
896
(:results (ra :scs (unsigned-reg) :from (:argument 0))
897
(rb :scs (unsigned-reg) :from (:argument 1))
898
(rc :scs (unsigned-reg) :from (:argument 2))
899
(rd :scs (unsigned-reg) :from (:argument 3)))
900
(:result-types unsigned-num
909
(dword-inst add ra rb)
910
(dword-inst add ra x)
911
(dword-inst xor rd ra)
912
(dword-inst ror rd 16)
913
(dword-inst add rc rd)
914
(dword-inst xor rb rc)
915
(dword-inst ror rb 12)
916
(dword-inst add ra rb)
917
(dword-inst add ra y)
918
(dword-inst xor rd ra)
919
(dword-inst ror rd 8)
920
(dword-inst add rc rd)
921
(dword-inst xor rb rc)
922
(dword-inst ror rb 7)))
925
(define-vop (gmac-mul-fast)
926
(:translate ironclad::gmac-mul-fast)
928
(:args (accumulator :scs (descriptor-reg))
929
(key :scs (descriptor-reg)))
930
(:arg-types simple-array-unsigned-byte-8
931
simple-array-unsigned-byte-8)
932
(:temporary (:sc double-reg) x0 x1 x2 x3 x4 x5 x6 x7 x8 x9)
934
(flet ((buffer-mem (base)
935
(let ((disp (- (* n-word-bytes vector-data-offset)
936
other-pointer-lowtag)))
938
(inst movdqu x0 (buffer-mem accumulator))
939
(inst movdqu x1 (buffer-mem key))
941
(inst pclmulqdq x3 x1 0)
943
(inst pclmulqdq x4 x1 16)
945
(inst pclmulqdq x5 x1 1)
947
(inst pclmulqdq x6 x1 17)
956
(inst pslld-imm x3 1)
957
(inst pslld-imm x6 1)
958
(inst psrld-imm x7 31)
959
(inst psrld-imm x8 31)
970
(inst pslld-imm x7 31)
971
(inst pslld-imm x8 30)
972
(inst pslld-imm x9 25)
982
(inst psrld-imm x2 1)
983
(inst psrld-imm x4 2)
984
(inst psrld-imm x5 7)
990
(inst movdqu (buffer-mem accumulator) x6))))
994
(:translate ironclad::xor128)
996
(:args (in1 :scs (descriptor-reg))
997
(start-in1 :scs (unsigned-reg))
998
(in2 :scs (descriptor-reg))
999
(start-in2 :scs (unsigned-reg))
1000
(out :scs (descriptor-reg))
1001
(start-out :scs (unsigned-reg)))
1002
(:arg-types simple-array-unsigned-byte-8
1004
simple-array-unsigned-byte-8
1006
simple-array-unsigned-byte-8
1008
(:temporary (:sc double-reg) x0 x1)
1010
(flet ((buffer-mem (base offset)
1011
(let ((disp (- (* n-word-bytes vector-data-offset)
1012
other-pointer-lowtag)))
1013
(ea disp base offset))))
1014
(inst movdqu x0 (buffer-mem in1 start-in1))
1015
(inst movdqu x1 (buffer-mem in2 start-in2))
1017
(inst movdqu (buffer-mem out start-out) x0))))
1020
(define-vop (mov128)
1021
(:translate ironclad::mov128)
1022
(:policy :fast-safe)
1023
(:args (in :scs (descriptor-reg))
1024
(start-in :scs (unsigned-reg))
1025
(out :scs (descriptor-reg))
1026
(start-out :scs (unsigned-reg)))
1027
(:arg-types simple-array-unsigned-byte-8
1029
simple-array-unsigned-byte-8
1031
(:temporary (:sc double-reg) x0)
1033
(flet ((buffer-mem (base offset)
1034
(let ((disp (- (* n-word-bytes vector-data-offset)
1035
other-pointer-lowtag)))
1036
(ea disp base offset))))
1037
(inst movdqu x0 (buffer-mem in start-in))
1038
(inst movdqu (buffer-mem out start-out) x0))))
1040
(define-vop (swap32)
1041
(:translate ironclad::swap32)
1042
(:policy :fast-safe)
1043
(:args (n :scs (unsigned-reg) :target r))
1044
(:arg-types unsigned-num)
1045
(:results (r :scs (unsigned-reg)))
1046
(:result-types unsigned-num)
1049
(dword-inst bswap r)))
1052
(define-vop (swap64)
1053
(:translate ironclad::swap64)
1054
(:policy :fast-safe)
1055
(:args (n :scs (unsigned-reg) :target r))
1056
(:arg-types unsigned-num)
1057
(:results (r :scs (unsigned-reg)))
1058
(:result-types unsigned-num)
1063
(define-vop (inc-counter-block)
1064
(:translate ironclad::inc-counter-block)
1065
(:policy :fast-safe)
1066
(:args (size :scs (unsigned-reg) :target idx)
1067
(counter :scs (descriptor-reg)))
1068
(:arg-types positive-fixnum
1069
simple-array-unsigned-byte-8)
1070
(:temporary (:sc unsigned-reg) idx)
1072
(let ((start (gen-label))
1077
#.(let ((disp '(- (* n-word-bytes vector-data-offset)
1078
other-pointer-lowtag 1)))
1080
`(inst adc :byte (ea ,disp counter idx) 0)
1082
`(inst adc (sb-vm::make-ea :byte :base counter :index idx :disp ,disp) 0))
1085
(inst jmp :nz start)
1086
(emit-label end)))))