summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
authorThordur I. Bjornsson <thib@cvs.openbsd.org>2010-06-29 21:34:12 +0000
committerThordur I. Bjornsson <thib@cvs.openbsd.org>2010-06-29 21:34:12 +0000
commitd2a69d8f01d6a5d4b82b8c45a9ad7c8768627472 (patch)
tree7eecc9182334983ea92dc73459c90944fbd944ab /sys/arch
parent07f81d1ec7f9a00d77ebeac722af8e828796359c (diff)
aesni, a driver for the crypto framework, similar to the
via driver for supporting the AES-NI instructions found on recent Intel cores. I would like to thank Huang Ying at Intel for getting the assembly code relicensed from GPL to a more suitable license! Inital diff by myself, but Mike Belopuhov beat this into a usable shape and fixed many bugs. Not enabled yet.
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/amd64/amd64/aes_intel.S879
-rw-r--r--sys/arch/amd64/amd64/aesni.c494
2 files changed, 1373 insertions, 0 deletions
diff --git a/sys/arch/amd64/amd64/aes_intel.S b/sys/arch/amd64/amd64/aes_intel.S
new file mode 100644
index 00000000000..9747b8d93e9
--- /dev/null
+++ b/sys/arch/amd64/amd64/aes_intel.S
@@ -0,0 +1,879 @@
+/* $OpenBSD: aes_intel.S,v 1.1 2010/06/29 21:34:11 thib Exp $ */
+
+/*
+ * Implement AES algorithm in Intel AES-NI instructions.
+ *
+ * The white paper of AES-NI instructions can be downloaded from:
+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
+ *
+ * Copyright (C) 2008-2010, Intel Corporation
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal <vinodh.gopal@intel.com>
+ * Kahraman Akdemir
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Changes to the original source code released by Intel:
+ *
+ * - assembler macros were converted to the actual instructions;
+ * - aesni_ctr_enc was changed to be RFC 3686 compliant;
+ */
+
+#include <machine/param.h>
+#include <machine/asm.h>
+
+#define STATE1 %xmm0
+#define STATE2 %xmm4
+#define STATE3 %xmm5
+#define STATE4 %xmm6
+#define STATE STATE1
+#define IN1 %xmm1
+#define IN2 %xmm7
+#define IN3 %xmm8
+#define IN4 %xmm9
+#define IN IN1
+#define KEY %xmm2
+#define IV %xmm3
+#define BSWAP_MASK %xmm10
+#define CTR %xmm11
+#define INC %xmm12
+#define NONCE %xmm13
+
+#define KEYP %rdi
+#define OUTP %rsi
+#define INP %rdx
+#define LEN %rcx
+#define IVP %r8
+#define KLEN %r9d
+#define T1 %r10
+#define TKEYP T1
+#define T2 %r11
+#define TCTR_LOW T2
+
+ .data
+.align 16
+.Lbswap_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+ .text
+
+_key_expansion_128:
+_key_expansion_256a:
+ pshufd $0b11111111, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_192a:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ movaps %xmm2, %xmm6
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, %xmm1
+ shufps $0b01000100, %xmm0, %xmm6
+ movaps %xmm6, (%rcx)
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 16(%rcx)
+ add $0x20, %rcx
+ ret
+
+_key_expansion_192b:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_256b:
+ pshufd $0b10101010, %xmm1, %xmm1
+ shufps $0b00010000, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ shufps $0b10001100, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ ret
+
+/*
+ * void aesni_set_key(struct aesni_sess *ses, uint8_t *key, size_t len)
+ */
+ENTRY(aesni_set_key)
+ movups (%rsi), %xmm0 # user key (first 16 bytes)
+ movaps %xmm0, (%rdi)
+ lea 0x10(%rdi), %rcx # key addr
+ movl %edx, 480(%rdi)
+ pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
+ cmp $24, %dl
+ jb .Lenc_key128
+ je .Lenc_key192
+ movups 0x10(%rsi), %xmm2 # other user key
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ call _key_expansion_256a
+ aeskeygenassist $0x1, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ call _key_expansion_256a
+ aeskeygenassist $0x2, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ call _key_expansion_256a
+ aeskeygenassist $0x4, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ call _key_expansion_256a
+ aeskeygenassist $0x8, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ call _key_expansion_256a
+ aeskeygenassist $0x10, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ call _key_expansion_256a
+ aeskeygenassist $0x20, %xmm0, %xmm1
+ call _key_expansion_256b
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ call _key_expansion_256a
+ jmp .Ldec_key
+.Lenc_key192:
+ movq 0x10(%rsi), %xmm2 # other user key
+ aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ call _key_expansion_192a
+ aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ call _key_expansion_192b
+ aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ call _key_expansion_192a
+ aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ call _key_expansion_192b
+ aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ call _key_expansion_192a
+ aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ call _key_expansion_192b
+ aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ call _key_expansion_192a
+ aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
+ call _key_expansion_192b
+ jmp .Ldec_key
+.Lenc_key128:
+ aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
+ call _key_expansion_128
+ aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
+ call _key_expansion_128
+ aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
+ call _key_expansion_128
+ aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
+ call _key_expansion_128
+ aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
+ call _key_expansion_128
+ aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
+ call _key_expansion_128
+ aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
+ call _key_expansion_128
+ aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
+ call _key_expansion_128
+ aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
+ call _key_expansion_128
+ aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
+ call _key_expansion_128
+.Ldec_key:
+ sub $0x10, %rcx
+ movaps (%rdi), %xmm0
+ movaps (%rcx), %xmm1
+ movaps %xmm0, 240(%rcx)
+ movaps %xmm1, 240(%rdi)
+ add $0x10, %rdi
+ lea 240-16(%rcx), %rsi
+.align 4
+.Ldec_key_loop:
+ movaps (%rdi), %xmm0
+ aesimc %xmm0, %xmm1
+ movaps %xmm1, (%rsi)
+ add $0x10, %rdi
+ sub $0x10, %rsi
+ cmp %rcx, %rdi
+ jb .Ldec_key_loop
+ ret
+
+/*
+ * void aesni_enc(struct aesni_sess *ses, uint8_t *dst, uint8_t *src)
+ */
+ENTRY(aesni_enc)
+ movl 480(KEYP), KLEN # key length
+ movups (INP), STATE # input
+ call _aesni_enc1
+ movups STATE, (OUTP) # output
+ ret
+
+/*
+ * _aesni_enc1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Lenc128
+ lea 0x20(TKEYP), TKEYP
+ je .Lenc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps -0x50(TKEYP), KEY
+ aesenc KEY, STATE
+.align 4
+.Lenc192:
+ movaps -0x40(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps -0x30(TKEYP), KEY
+ aesenc KEY, STATE
+.align 4
+.Lenc128:
+ movaps -0x20(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps -0x10(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps (TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x10(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x20(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x30(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x40(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x50(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x60(TKEYP), KEY
+ aesenc KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesenclast KEY, STATE
+ ret
+
+/*
+ * _aesni_enc4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4enc128
+ lea 0x20(TKEYP), TKEYP
+ je .L4enc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps -0x50(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+#.align 4
+.L4enc192:
+ movaps -0x40(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps -0x30(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+#.align 4
+.L4enc128:
+ movaps -0x20(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps -0x10(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps (TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x10(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x20(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x30(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x40(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x50(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x60(TKEYP), KEY
+ aesenc KEY, STATE1
+ aesenc KEY, STATE2
+ aesenc KEY, STATE3
+ aesenc KEY, STATE4
+ movaps 0x70(TKEYP), KEY
+ aesenclast KEY, STATE1 # last round
+ aesenclast KEY, STATE2
+ aesenclast KEY, STATE3
+ aesenclast KEY, STATE4
+ ret
+
+/*
+ * void aesni_dec(struct aesni_sess *ses, uint8_t *dst, uint8_t *src)
+ */
+ENTRY(aesni_dec)
+ mov 480(KEYP), KLEN # key length
+ add $240, KEYP
+ movups (INP), STATE # input
+ call _aesni_dec1
+ movups STATE, (OUTP) #output
+ ret
+
+/*
+ * _aesni_dec1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Ldec128
+ lea 0x20(TKEYP), TKEYP
+ je .Ldec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps -0x50(TKEYP), KEY
+ aesdec KEY, STATE
+.align 4
+.Ldec192:
+ movaps -0x40(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps -0x30(TKEYP), KEY
+ aesdec KEY, STATE
+.align 4
+.Ldec128:
+ movaps -0x20(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps -0x10(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps (TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x10(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x20(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x30(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x40(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x50(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x60(TKEYP), KEY
+ aesdec KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesdeclast KEY, STATE
+ ret
+
+/*
+ * _aesni_dec4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4dec128
+ lea 0x20(TKEYP), TKEYP
+ je .L4dec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps -0x50(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+.align 4
+.L4dec192:
+ movaps -0x40(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps -0x30(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+.align 4
+.L4dec128:
+ movaps -0x20(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps -0x10(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps (TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x10(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x20(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x30(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x40(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x50(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x60(TKEYP), KEY
+ aesdec KEY, STATE1
+ aesdec KEY, STATE2
+ aesdec KEY, STATE3
+ aesdec KEY, STATE4
+ movaps 0x70(TKEYP), KEY
+ aesdeclast KEY, STATE1 # last round
+ aesdeclast KEY, STATE2
+ aesdeclast KEY, STATE3
+ aesdeclast KEY, STATE4
+ ret
+
+#if 0
+/*
+ * void aesni_ecb_enc(struct aesni_sess *ses, uint8_t *dst, uint8_t *src,
+ * size_t len)
+ */
+ENTRY(aesni_ecb_enc)
+ test LEN, LEN # check length
+ jz .Lecb_enc_ret
+ mov 480(KEYP), KLEN
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+ cmp $64, LEN
+ jb .Lecb_enc_loop1
+.align 4
+.Lecb_enc_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_enc4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_enc_loop4
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+.align 4
+.Lecb_enc_loop1:
+ movups (INP), STATE1
+ call _aesni_enc1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_enc_loop1
+.Lecb_enc_ret:
+ ret
+
+/*
+ * void aesni_ecb_dec(struct aesni_sess *ses, uint8_t *dst, uint8_t *src,
+ * size_t len);
+ */
+ENTRY(aesni_ecb_dec)
+ test LEN, LEN
+ jz .Lecb_dec_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+ cmp $64, LEN
+ jb .Lecb_dec_loop1
+.align 4
+.Lecb_dec_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_dec4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_dec_loop4
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+.align 4
+.Lecb_dec_loop1:
+ movups (INP), STATE1
+ call _aesni_dec1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_dec_loop1
+.Lecb_dec_ret:
+ ret
+#endif
+
+/*
+ * void aesni_cbc_enc(struct aesni_sess *ses, uint8_t *dst, uint8_t *src,
+ * size_t len, uint8_t *iv)
+ */
+ENTRY(aesni_cbc_enc)
+ cmp $16, LEN
+ jb .Lcbc_enc_ret
+ mov 480(KEYP), KLEN
+ movups (IVP), STATE # load iv as initial state
+.align 4
+.Lcbc_enc_loop:
+ movups (INP), IN # load input
+ pxor IN, STATE
+ call _aesni_enc1
+ movups STATE, (OUTP) # store output
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_enc_loop
+ movups STATE, (IVP)
+.Lcbc_enc_ret:
+ ret
+
+/*
+ * void aesni_cbc_dec(struct aesni_sess *ses, uint8_t *dst, uint8_t *src,
+ * size_t len, uint8_t *iv)
+ */
+ENTRY(aesni_cbc_dec)
+ cmp $16, LEN
+ jb .Lcbc_dec_just_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ movups (IVP), IV
+ cmp $64, LEN
+ jb .Lcbc_dec_loop1
+.align 4
+.Lcbc_dec_loop4:
+ movups (INP), IN1
+ movaps IN1, STATE1
+ movups 0x10(INP), IN2
+ movaps IN2, STATE2
+ movups 0x20(INP), IN3
+ movaps IN3, STATE3
+ movups 0x30(INP), IN4
+ movaps IN4, STATE4
+ call _aesni_dec4
+ pxor IV, STATE1
+ pxor IN1, STATE2
+ pxor IN2, STATE3
+ pxor IN3, STATE4
+ movaps IN4, IV
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lcbc_dec_loop4
+ cmp $16, LEN
+ jb .Lcbc_dec_ret
+.align 4
+.Lcbc_dec_loop1:
+ movups (INP), IN
+ movaps IN, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+ movups STATE, (OUTP)
+ movaps IN, IV
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_dec_loop1
+.Lcbc_dec_ret:
+ movups IV, (IVP)
+.Lcbc_dec_just_ret:
+ ret
+
+/*
+ * _aesni_inc_init: internal ABI
+ * setup registers used by _aesni_inc
+ * input:
+ * IV
+ * output:
+ * CTR: == IV, in little endian
+ * TCTR_LOW: == lower dword of CTR
+ * INC: == 1, in little endian
+ * BSWAP_MASK == endian swapping mask
+ */
+_aesni_inc_init:
+ movaps .Lbswap_mask, BSWAP_MASK
+ movaps IV, CTR
+ pslldq $4, CTR
+ por NONCE, CTR
+ pshufb BSWAP_MASK, CTR
+ mov $1, TCTR_LOW
+ movd TCTR_LOW, INC
+ movd CTR, TCTR_LOW
+ ret
+
+/*
+ * _aesni_inc: internal ABI
+ * Increase IV by 1, IV is in big endian
+ * input:
+ * IV
+ * CTR: == IV, in little endian
+ * TCTR_LOW: == lower dword of CTR
+ * INC: == 1, in little endian
+ * BSWAP_MASK == endian swapping mask
+ * output:
+ * IV: Increase by 1
+ * changed:
+ * CTR: == output IV, in little endian
+ * TCTR_LOW: == lower dword of CTR
+ */
+_aesni_inc:
+ paddq INC, CTR
+ add $1, TCTR_LOW
+ jnc .Linc_low
+ pslldq $8, INC
+ paddq INC, CTR
+ psrldq $8, INC
+.Linc_low:
+ movaps CTR, IV
+ pshufb BSWAP_MASK, IV
+ ret
+
+/*
+ * void aesni_ctr_enc(struct aesni_sess *ses, uint8_t *dst, uint8_t *src,
+ * size_t len, uint8_t *iv)
+ */
+ENTRY(aesni_ctr_enc)
+ cmp $16, LEN
+ jb .Lctr_enc_just_ret
+ mov 480(KEYP), KLEN
+ movd 484(KEYP), NONCE
+ movq (IVP), IV
+ call _aesni_inc_init
+ cmp $64, LEN
+ jb .Lctr_enc_loop1
+.align 4
+.Lctr_enc_loop4:
+ movaps IV, STATE1
+ call _aesni_inc
+ movups (INP), IN1
+ movaps IV, STATE2
+ call _aesni_inc
+ movups 0x10(INP), IN2
+ movaps IV, STATE3
+ call _aesni_inc
+ movups 0x20(INP), IN3
+ movaps IV, STATE4
+ call _aesni_inc
+ movups 0x30(INP), IN4
+ call _aesni_enc4
+ pxor IN1, STATE1
+ movups STATE1, (OUTP)
+ pxor IN2, STATE2
+ movups STATE2, 0x10(OUTP)
+ pxor IN3, STATE3
+ movups STATE3, 0x20(OUTP)
+ pxor IN4, STATE4
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lctr_enc_loop4
+ cmp $16, LEN
+ jb .Lctr_enc_ret
+.align 4
+.Lctr_enc_loop1:
+ call _aesni_inc
+ movaps IV, STATE
+ movups (INP), IN
+ call _aesni_enc1
+ pxor IN, STATE
+ movups STATE, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lctr_enc_loop1
+.Lctr_enc_ret:
+ movq IV, (IVP)
+.Lctr_enc_just_ret:
+ ret
diff --git a/sys/arch/amd64/amd64/aesni.c b/sys/arch/amd64/amd64/aesni.c
new file mode 100644
index 00000000000..7b6024f787c
--- /dev/null
+++ b/sys/arch/amd64/amd64/aesni.c
@@ -0,0 +1,494 @@
+/* $OpenBSD: aesni.c,v 1.1 2010/06/29 21:34:11 thib Exp $ */
+/*-
+ * Copyright (c) 2003 Jason Wright
+ * Copyright (c) 2003, 2004 Theo de Raadt
+ * Copyright (c) 2010, Thordur I. Bjornsson
+ * Copyright (c) 2010, Mike Belopuhov
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+
+#ifdef CRYPTO
+#include <crypto/cryptodev.h>
+#include <crypto/rijndael.h>
+#include <crypto/xform.h>
+#include <crypto/cryptosoft.h>
+#endif
+
+#include <dev/rndvar.h>
+
+#include <machine/fpu.h>
+
+#ifdef CRYPTO
+
+/* defines from crypto/xform.c */
+#define AESCTR_NONCESIZE 4
+#define AESCTR_IVSIZE 8
+#define AESCTR_BLOCKSIZE 16
+
+#define AESCTR_MINKEY 16+4
+#define AESCTR_MAXKEY 32+4
+
+struct aesni_sess {
+ uint32_t ses_ekey[4 * (AES_MAXROUNDS + 1)];
+ uint32_t ses_dkey[4 * (AES_MAXROUNDS + 1)];
+ uint32_t ses_klen;
+ uint8_t ses_nonce[AESCTR_NONCESIZE];
+ uint8_t ses_iv[16];
+ int ses_sid;
+ int ses_used;
+ struct swcr_data *ses_swd;
+ LIST_ENTRY(aesni_sess) ses_entries;
+};
+
+struct aesni_softc {
+ uint8_t op_buf[16384];
+ int32_t sc_cid;
+// uint32_t sc_nsessions;
+ LIST_HEAD(, aesni_sess) sc_sessions;
+} *aesni_sc;
+
+uint32_t aesni_nsessions, aesni_ops;
+
+/* assembler-assisted key setup */
+extern void aesni_set_key(struct aesni_sess *ses, uint8_t *key, size_t len);
+/* aes encryption/decryption */
+extern void aesni_enc(struct aesni_sess *ses, uint8_t *dst, uint8_t *src);
+extern void aesni_dec(struct aesni_sess *ses, uint8_t *dst, uint8_t *src);
+/* assembler-assisted CBC mode */
+extern void aesni_cbc_enc(struct aesni_sess *ses, uint8_t *dst,
+ uint8_t *src, size_t len, uint8_t *iv);
+extern void aesni_cbc_dec(struct aesni_sess *ses, uint8_t *dst,
+ uint8_t *src, size_t len, uint8_t *iv);
+/* assembler-assisted CTR mode */
+extern void aesni_ctr_enc(struct aesni_sess *ses, uint8_t *dst,
+ uint8_t *src, size_t len, uint8_t *iv);
+
+void aesni_setup(void);
+int aesni_newsession(u_int32_t *, struct cryptoini *);
+int aesni_freesession(u_int64_t);
+int aesni_process(struct cryptop *);
+
+int aesni_swauth(struct cryptop *, struct cryptodesc *, struct swcr_data *,
+ caddr_t);
+
+int aesni_encdec(struct cryptop *, struct cryptodesc *,
+ struct aesni_sess *);
+
+void
+aesni_setup(void)
+{
+ int algs[CRYPTO_ALGORITHM_MAX + 1];
+// int flags = CRYPTOCAP_F_SOFTWARE;
+ int flags = 0; /* XXX TESTING */
+
+ aesni_sc = malloc(sizeof(*aesni_sc), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (aesni_sc == NULL)
+ return;
+
+ bzero(algs, sizeof(algs));
+ algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_CTR] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ /* needed for ipsec, uses software crypto */
+ algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_256_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_384_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_512_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ aesni_sc->sc_cid = crypto_get_driverid(flags);
+ if (aesni_sc->sc_cid < 0) {
+ free(aesni_sc, M_DEVBUF);
+ return;
+ }
+
+ crypto_register(aesni_sc->sc_cid, algs, aesni_newsession,
+ aesni_freesession, aesni_process);
+}
+
+int
+aesni_newsession(u_int32_t *sidp, struct cryptoini *cri)
+{
+ struct cryptoini *c;
+ struct aesni_sess *ses = NULL;
+ struct auth_hash *axf;
+ struct swcr_data *swd;
+ caddr_t ptr = NULL;
+ int i;
+
+ if (sidp == NULL || cri == NULL)
+ return (EINVAL);
+
+ LIST_FOREACH(ses, &aesni_sc->sc_sessions, ses_entries) {
+ if (ses->ses_used == 0)
+ break;
+ }
+
+ if (!ses) {
+ /* XXX use pool? */
+ ptr = malloc(sizeof(*ses) + 16, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!ptr)
+ return (ENOMEM);
+ /*
+ * align to a 16 byte boundary, "the most utterly retarded
+ * requirement".
+ */
+ ses = (struct aesni_sess *)(roundup(((uint64_t)ptr), 16));
+
+ LIST_INSERT_HEAD(&aesni_sc->sc_sessions, ses, ses_entries);
+ ses->ses_sid = ++aesni_nsessions;
+ }
+
+ ses->ses_used = 1;
+
+ if ((uint64_t)ses % 16 != 0)
+ panic("aesni: unaligned address %p\n", ses);
+
+ fpu_kernel_enter(0);
+ for (c = cri; c != NULL; c = c->cri_next) {
+ switch (c->cri_alg) {
+ case CRYPTO_AES_CBC:
+ ses->ses_klen = c->cri_klen / 8;
+ arc4random_buf(ses->ses_iv, 16);
+ aesni_set_key(ses, c->cri_key, ses->ses_klen);
+ break;
+
+ case CRYPTO_AES_CTR:
+ ses->ses_klen = c->cri_klen / 8 - AESCTR_NONCESIZE;
+ bcopy(c->cri_key + ses->ses_klen, ses->ses_nonce,
+ AESCTR_NONCESIZE);
+ arc4random_buf(ses->ses_iv, 8);
+ aesni_set_key(ses, c->cri_key, ses->ses_klen);
+ break;
+
+ case CRYPTO_MD5_HMAC:
+ axf = &auth_hash_hmac_md5_96;
+ goto authcommon;
+ case CRYPTO_SHA1_HMAC:
+ axf = &auth_hash_hmac_sha1_96;
+ goto authcommon;
+ case CRYPTO_RIPEMD160_HMAC:
+ axf = &auth_hash_hmac_ripemd_160_96;
+ goto authcommon;
+ case CRYPTO_SHA2_256_HMAC:
+ axf = &auth_hash_hmac_sha2_256_128;
+ goto authcommon;
+ case CRYPTO_SHA2_384_HMAC:
+ axf = &auth_hash_hmac_sha2_384_192;
+ goto authcommon;
+ case CRYPTO_SHA2_512_HMAC:
+ axf = &auth_hash_hmac_sha2_512_256;
+ authcommon:
+ swd = malloc(sizeof(struct swcr_data), M_CRYPTO_DATA,
+ M_NOWAIT|M_ZERO);
+ if (swd == NULL) {
+ aesni_freesession(ses->ses_sid);
+ return (ENOMEM);
+ }
+ ses->ses_swd = swd;
+
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ M_NOWAIT);
+ if (swd->sw_ictx == NULL) {
+ aesni_freesession(ses->ses_sid);
+ return (ENOMEM);
+ }
+
+ swd->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ M_NOWAIT);
+ if (swd->sw_octx == NULL) {
+ aesni_freesession(ses->ses_sid);
+ return (ENOMEM);
+ }
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= HMAC_IPAD_VAL;
+
+ axf->Init(swd->sw_ictx);
+ axf->Update(swd->sw_ictx, c->cri_key, c->cri_klen / 8);
+ axf->Update(swd->sw_ictx, hmac_ipad_buffer,
+ axf->blocksize - (c->cri_klen / 8));
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= (HMAC_IPAD_VAL ^
+ HMAC_OPAD_VAL);
+
+ axf->Init(swd->sw_octx);
+ axf->Update(swd->sw_octx, c->cri_key, c->cri_klen / 8);
+ axf->Update(swd->sw_octx, hmac_opad_buffer,
+ axf->blocksize - (c->cri_klen / 8));
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= HMAC_OPAD_VAL;
+
+ swd->sw_axf = axf;
+ swd->sw_alg = c->cri_alg;
+
+ break;
+ default:
+ aesni_freesession(ses->ses_sid);
+ return (EINVAL);
+ }
+ }
+ fpu_kernel_exit(0);
+
+ *sidp = ses->ses_sid;
+ return (0);
+}
+
+int
+aesni_freesession(u_int64_t tid)
+{
+ struct aesni_sess *ses;
+ struct swcr_data *swd;
+ struct auth_hash *axf;
+ u_int32_t sid = (u_int32_t)tid;
+
+ LIST_FOREACH(ses, &aesni_sc->sc_sessions, ses_entries) {
+ if (ses->ses_sid == sid)
+ break;
+ }
+
+ if (ses == NULL)
+ return (EINVAL);
+
+ LIST_REMOVE(ses, ses_entries);
+
+ if (ses->ses_swd) {
+ swd = ses->ses_swd;
+ axf = swd->sw_axf;
+
+ if (swd->sw_ictx) {
+ bzero(swd->sw_ictx, axf->ctxsize);
+ free(swd->sw_ictx, M_CRYPTO_DATA);
+ }
+ if (swd->sw_octx) {
+ bzero(swd->sw_octx, axf->ctxsize);
+ free(swd->sw_octx, M_CRYPTO_DATA);
+ }
+ free(swd, M_CRYPTO_DATA);
+ }
+
+ bzero(ses, sizeof (*ses));
+
+ LIST_INSERT_HEAD(&aesni_sc->sc_sessions, ses, ses_entries);
+ ses->ses_sid = sid;
+
+ return (0);
+}
+
+int
+aesni_swauth(struct cryptop *crp, struct cryptodesc *crd,
+ struct swcr_data *sw, caddr_t buf)
+{
+ int type;
+
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ type = CRYPTO_BUF_MBUF;
+ else
+ type= CRYPTO_BUF_IOV;
+
+ return (swcr_authcompute(crp, crd, sw, buf, type));
+}
+
+int
+aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
+ struct aesni_sess *ses)
+{
+ uint8_t iv[EALG_MAX_BLOCK_LEN];
+ uint8_t *buf = &aesni_sc->op_buf[0];
+ int ivlen = 0;
+ int err = 0;
+
+ if ((crd->crd_len % 16) != 0) {
+ err = EINVAL;
+ return (err);
+ }
+
+ if (crd->crd_len > sizeof (aesni_sc->op_buf)) {
+ printf("aesni: crd->crd_len > sizeof (aesni_sc->op_buf)\n");
+ return (EINVAL);
+ }
+
+ /*
+ buf = malloc(crd->crd_len, M_DEVBUF, M_NOWAIT);
+ if (buf == NULL) {
+ err = ENOMEM;
+ return (err);
+ }
+ */
+
+ /* CBC uses 16, CTR only 8 */
+ ivlen = (crd->crd_alg == CRYPTO_AES_CBC) ? 16 : 8;
+
+ /* Initialize the IV */
+ if (crd->crd_flags & CRD_F_ENCRYPT) {
+ if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+ bcopy(crd->crd_iv, iv, ivlen);
+ else
+ bcopy(ses->ses_iv, iv, ivlen);
+
+ /* Do we need to write the IV */
+ if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copyback((struct mbuf *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ else if (crp->crp_flags & CRYPTO_F_IOV)
+ cuio_copyback((struct uio *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ else
+ bcopy(iv, crp->crp_buf + crd->crd_inject,
+ ivlen);
+ }
+ } else {
+ if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+ bcopy(crd->crd_iv, iv, ivlen);
+ else {
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ else if (crp->crp_flags & CRYPTO_F_IOV)
+ cuio_copydata((struct uio *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ else
+ bcopy(crp->crp_buf + crd->crd_inject,
+ iv, ivlen);
+ }
+ }
+
+ /* Copy data to be processed to the buffer */
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+ else if (crp->crp_flags & CRYPTO_F_IOV)
+ cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+ else
+ bcopy(crp->crp_buf + crd->crd_skip, buf, crd->crd_len);
+
+ /* Apply cipher */
+ if (crd->crd_alg == CRYPTO_AES_CBC) {
+ if (crd->crd_flags & CRD_F_ENCRYPT)
+ aesni_cbc_enc(ses, buf, buf, crd->crd_len, iv);
+ else
+ aesni_cbc_dec(ses, buf, buf, crd->crd_len, iv);
+ } else if (crd->crd_alg == CRYPTO_AES_CTR) {
+ aesni_ctr_enc(ses, buf, buf, crd->crd_len, iv);
+ }
+
+ aesni_ops++;
+
+ /* Copy back the result */
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copyback((struct mbuf *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+ else if (crp->crp_flags & CRYPTO_F_IOV)
+ cuio_copyback((struct uio *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+ else
+ bcopy(buf, crp->crp_buf + crd->crd_skip, crd->crd_len);
+
+ /* Copy out last block for use as next session IV for CBC */
+ if (crd->crd_alg == CRYPTO_AES_CBC && crd->crd_flags & CRD_F_ENCRYPT) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf,
+ crd->crd_skip + crd->crd_len - ivlen, ivlen,
+ ses->ses_iv);
+ else if (crp->crp_flags & CRYPTO_F_IOV)
+ cuio_copydata((struct uio *)crp->crp_buf,
+ crd->crd_skip + crd->crd_len - ivlen, ivlen,
+ ses->ses_iv);
+ else
+ bcopy(crp->crp_buf + crd->crd_skip +
+ crd->crd_len - ivlen, ses->ses_iv, ivlen);
+ }
+
+ /*
+ if (buf != NULL) {
+ bzero(buf, crd->crd_len);
+ free(buf, M_DEVBUF);
+ }
+ */
+
+ bzero(buf, crd->crd_len);
+ return (err);
+}
+
+int
+aesni_process(struct cryptop *crp)
+{
+ struct aesni_sess *ses;
+ struct cryptodesc *crd;
+ int err = 0;
+
+ if (crp == NULL || crp->crp_callback == NULL) {
+ err = EINVAL;
+ goto out;
+ }
+
+ LIST_FOREACH(ses, &aesni_sc->sc_sessions, ses_entries) {
+ if (ses->ses_sid == crp->crp_sid)
+ break;
+ }
+
+ if (!ses) {
+ err = EINVAL;
+ goto out;
+ }
+
+ fpu_kernel_enter(0);
+ for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
+ switch (crd->crd_alg) {
+ case CRYPTO_AES_CBC:
+ case CRYPTO_AES_CTR:
+ err = aesni_encdec(crp, crd, ses);
+ if (err != 0)
+ goto cleanup;
+ break;
+
+ case CRYPTO_MD5_HMAC:
+ case CRYPTO_SHA1_HMAC:
+ case CRYPTO_RIPEMD160_HMAC:
+ case CRYPTO_SHA2_256_HMAC:
+ case CRYPTO_SHA2_384_HMAC:
+ case CRYPTO_SHA2_512_HMAC:
+ err = aesni_swauth(crp, crd, ses->ses_swd,
+ crp->crp_buf);
+ if (err != 0)
+ goto cleanup;
+ break;
+
+ default:
+ err = EINVAL;
+ goto cleanup;
+ }
+ }
+cleanup:
+ fpu_kernel_exit(0);
+out:
+ crp->crp_etype = err;
+ crypto_done(crp);
+ return (err);
+}
+
+#endif /* CRYPTO */