summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Sing <jsing@cvs.openbsd.org>2013-03-26 15:47:02 +0000
committerJoel Sing <jsing@cvs.openbsd.org>2013-03-26 15:47:02 +0000
commit86ca9d53d91cc3fd11474d05607d4420b7b2478b (patch)
tree327c2c9d01c19e8a69279df114d325c494d8024b
parentaeb73896d16495ce55d50e26803dee5b5bfa9a9c (diff)
Add AES-XTS support to the aesni crypto driver. This allows softraid(4)
to benefit from the AES-NI instructions found on newer Intel CPUs. ok mikeb@
-rw-r--r--sys/arch/amd64/amd64/aes_intel.S110
-rw-r--r--sys/arch/amd64/amd64/aesni.c72
2 files changed, 172 insertions, 10 deletions
diff --git a/sys/arch/amd64/amd64/aes_intel.S b/sys/arch/amd64/amd64/aes_intel.S
index 2cb2f5b0a31..efb8f5ae5f9 100644
--- a/sys/arch/amd64/amd64/aes_intel.S
+++ b/sys/arch/amd64/amd64/aes_intel.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: aes_intel.S,v 1.8 2012/12/09 23:14:28 mikeb Exp $ */
+/* $OpenBSD: aes_intel.S,v 1.9 2013/03/26 15:47:01 jsing Exp $ */
/*
* Implement AES algorithm in Intel AES-NI instructions.
@@ -47,8 +47,10 @@
* - assembler macros were converted to the actual instructions;
* - aesni_ctr_enc was changed to be RFC 3686 compliant;
* - aes-gcm mode added;
+ * - aes-xts implementation added;
*
* Copyright (c) 2010,2011 Mike Belopuhov
+ * Copyright (c) 2013 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -284,7 +286,7 @@ ENTRY(aesni_enc)
* KLEN: round count
* STATE: initial state (input)
* output:
- * STATE: finial state (output)
+ * STATE: final state (output)
* changed:
* KEY
* TKEYP (T1)
@@ -343,7 +345,7 @@ _aesni_enc1:
* STATE3
* STATE4
* output:
- * STATE1: finial state (output)
+ * STATE1: final state (output)
* STATE2
* STATE3
* STATE4
@@ -458,7 +460,7 @@ ENTRY(aesni_dec)
* KLEN: key length
* STATE: initial state (input)
* output:
- * STATE: finial state (output)
+ * STATE: final state (output)
* changed:
* KEY
* TKEYP (T1)
@@ -517,7 +519,7 @@ _aesni_dec1:
* STATE3
* STATE4
* output:
- * STATE1: finial state (output)
+ * STATE1: final state (output)
* STATE2
* STATE3
* STATE4
@@ -1002,3 +1004,101 @@ ENTRY(aesni_gmac_final)
pxor IN,STATE
movdqu STATE,(OUTP) # output
ret
+
+/*
+ * void aesni_xts_enc(struct aesni_xts_ctx *xts, uint8_t *dst, uint8_t *src,
+ * size_t len, uint8_t *iv)
+ */
+ENTRY(aesni_xts_enc)
+ cmp $16,%rcx
+ jb 2f
+
+ call _aesni_xts_tweak
+
+ movl 480(KEYP),KLEN # key length
+1:
+ movups (%rdx),%xmm0 # src
+ pxor %xmm3,%xmm0 # xor block with tweak
+ call _aesni_enc1
+ pxor %xmm3,%xmm0 # xor block with tweak
+ movups %xmm0,(%rsi) # dst
+
+ call _aesni_xts_tweak_exp
+
+ add $16,%rsi
+ add $16,%rdx
+ sub $16,%rcx
+ cmp $16,%rcx
+ jge 1b
+2:
+ ret
+
+/*
+ * void aesni_xts_dec(struct aesni_xts_ctx *xts, uint8_t *dst, uint8_t *src,
+ * size_t len, uint8_t *iv)
+ */
+ENTRY(aesni_xts_dec)
+ cmp $16,%rcx
+ jb 2f
+
+ call _aesni_xts_tweak
+
+ movl 480(KEYP),KLEN # key length
+ add $240,KEYP # decryption key
+1:
+ movups (%rdx),%xmm0 # src
+ pxor %xmm3,%xmm0 # xor block with tweak
+ call _aesni_dec1
+ pxor %xmm3,%xmm0 # xor block with tweak
+ movups %xmm0,(%rsi) # dst
+
+ call _aesni_xts_tweak_exp
+
+ add $16,%rsi
+ add $16,%rdx
+ sub $16,%rcx
+ cmp $16,%rcx
+ jge 1b
+2:
+ ret
+
+/*
+ * Prepare tweak as E_k2(IV). IV is specified as LE representation of a
+ * 64-bit block number which we allow to be passed in directly. Since
+ * we're on a 64-bit LE host the representation is already correct.
+ *
+ * xts is in %rdi, iv is in %r8 and we return the tweak in %xmm3.
+ */
+_aesni_xts_tweak:
+ mov (%r8),%r10
+ movd %r10,%xmm0 # Last 64-bits of IV are always zero.
+ mov KEYP,%r11
+ lea 496(%rdi),KEYP
+ movl 480(KEYP),KLEN
+ call _aesni_enc1
+ movdqa %xmm0,%xmm3
+ mov %r11,KEYP
+ ret
+
+/*
+ * Exponentiate AES XTS tweak (in %xmm3).
+ */
+_aesni_xts_tweak_exp:
+ pextrw $7,%xmm3,%r10
+ pextrw $3,%xmm3,%r11
+ psllq $1,%xmm3 # Left shift.
+
+ and $0x8000,%r11 # Carry between quads.
+ jz 1f
+ mov $1,%r11
+ pxor %xmm0,%xmm0
+ pinsrw $4,%r11,%xmm0
+ por %xmm0,%xmm3
+1:
+ and $0x8000,%r10
+ jz 2f
+ pextrw $0,%xmm3,%r11
+ xor $0x87,%r11 # AES XTS alpha - GF(2^128).
+ pinsrw $0,%r11,%xmm3
+2:
+ ret
diff --git a/sys/arch/amd64/amd64/aesni.c b/sys/arch/amd64/amd64/aesni.c
index 4f4561cdcb7..4847b054bac 100644
--- a/sys/arch/amd64/amd64/aesni.c
+++ b/sys/arch/amd64/amd64/aesni.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: aesni.c,v 1.24 2012/12/10 15:06:45 mikeb Exp $ */
+/* $OpenBSD: aesni.c,v 1.25 2013/03/26 15:47:01 jsing Exp $ */
/*-
* Copyright (c) 2003 Jason Wright
* Copyright (c) 2003, 2004 Theo de Raadt
@@ -42,6 +42,21 @@
#define AESCTR_IVSIZE 8
#define AESCTR_BLOCKSIZE 16
+#define AES_XTS_BLOCKSIZE 16
+#define AES_XTS_IVSIZE 8
+#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
+
+struct aesni_aes_ctx {
+ uint32_t aes_ekey[4 * (AES_MAXROUNDS + 1)];
+ uint32_t aes_dkey[4 * (AES_MAXROUNDS + 1)];
+ uint32_t aes_klen;
+ uint32_t aes_pad[3];
+};
+
+struct aesni_xts_ctx {
+ struct aesni_aes_ctx xts_keys[2];
+};
+
struct aesni_session {
uint32_t ses_ekey[4 * (AES_MAXROUNDS + 1)];
uint32_t ses_dkey[4 * (AES_MAXROUNDS + 1)];
@@ -49,6 +64,7 @@ struct aesni_session {
uint8_t ses_nonce[AESCTR_NONCESIZE];
int ses_sid;
GHASH_CTX *ses_ghash;
+ struct aesni_xts_ctx *ses_xts;
struct swcr_data *ses_swd;
LIST_ENTRY(aesni_session)
ses_entries;
@@ -84,6 +100,12 @@ extern void aesni_cbc_dec(struct aesni_session *ses, uint8_t *dst,
extern void aesni_ctr_enc(struct aesni_session *ses, uint8_t *dst,
uint8_t *src, size_t len, uint8_t *icb);
+/* assembler-assisted XTS mode */
+extern void aesni_xts_enc(struct aesni_xts_ctx *xts, uint8_t *dst,
+ uint8_t *src, size_t len, uint8_t *tweak);
+extern void aesni_xts_dec(struct aesni_xts_ctx *xts, uint8_t *dst,
+ uint8_t *src, size_t len, uint8_t *tweak);
+
/* assembler-assisted GMAC */
extern void aesni_gmac_update(GHASH_CTX *ghash, uint8_t *src, size_t len);
extern void aesni_gmac_final(struct aesni_session *ses, uint8_t *tag,
@@ -114,15 +136,20 @@ aesni_setup(void)
aesni_sc->sc_buflen = PAGE_SIZE;
bzero(algs, sizeof(algs));
+
+ /* Encryption algorithms. */
algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_CTR] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_GCM_16] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_XTS] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ /* Authentication algorithms. */
algs[CRYPTO_AES_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_128_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_192_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_256_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
- /* needed for ipsec, uses software crypto */
+ /* HMACs needed for IPsec, uses software crypto. */
algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
@@ -150,6 +177,7 @@ int
aesni_newsession(u_int32_t *sidp, struct cryptoini *cri)
{
struct aesni_session *ses = NULL;
+ struct aesni_aes_ctx *aes1, *aes2;
struct cryptoini *c;
struct auth_hash *axf;
struct swcr_data *swd;
@@ -184,6 +212,28 @@ aesni_newsession(u_int32_t *sidp, struct cryptoini *cri)
fpu_kernel_exit();
break;
+ case CRYPTO_AES_XTS:
+ ses->ses_xts = malloc(sizeof(struct aesni_xts_ctx),
+ M_CRYPTO_DATA, M_NOWAIT | M_ZERO);
+ if (ses->ses_xts == NULL) {
+ aesni_freesession(ses->ses_sid);
+ return (ENOMEM);
+ }
+
+ ses->ses_klen = c->cri_klen / 16;
+ aes1 = &ses->ses_xts->xts_keys[0];
+ aes1->aes_klen = ses->ses_klen;
+ aes2 = &ses->ses_xts->xts_keys[1];
+ aes2->aes_klen = ses->ses_klen;
+
+ fpu_kernel_enter();
+ aesni_set_key((struct aesni_session *)aes1,
+ c->cri_key, aes1->aes_klen);
+ aesni_set_key((struct aesni_session *)aes2,
+ c->cri_key + ses->ses_klen, aes2->aes_klen);
+ fpu_kernel_exit();
+ break;
+
case CRYPTO_AES_128_GMAC:
case CRYPTO_AES_192_GMAC:
case CRYPTO_AES_256_GMAC:
@@ -300,10 +350,15 @@ aesni_freesession(u_int64_t tid)
LIST_REMOVE(ses, ses_entries);
if (ses->ses_ghash) {
- bzero(ses->ses_ghash, sizeof(GHASH_CTX));
+ explicit_bzero(ses->ses_ghash, sizeof(GHASH_CTX));
free(ses->ses_ghash, M_CRYPTO_DATA);
}
+ if (ses->ses_xts) {
+ explicit_bzero(ses->ses_xts, sizeof(struct aesni_xts_ctx));
+ free(ses->ses_xts, M_CRYPTO_DATA);
+ }
+
if (ses->ses_swd) {
swd = ses->ses_swd;
axf = swd->sw_axf;
@@ -343,12 +398,12 @@ int
aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
struct cryptodesc *crda, struct aesni_session *ses)
{
+ int aadlen, err, ivlen, iskip, oskip, rlen;
uint8_t iv[EALG_MAX_BLOCK_LEN];
uint8_t icb[AESCTR_BLOCKSIZE];
uint8_t tag[GMAC_DIGEST_LEN];
uint8_t *buf = aesni_sc->sc_buf;
uint32_t *dw;
- int aadlen, err, ivlen, iskip, oskip, rlen;
aadlen = rlen = err = iskip = oskip = 0;
@@ -367,7 +422,7 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
aesni_sc->sc_buflen = rlen;
}
- /* CBC uses 16, CTR only 8 */
+ /* CBC uses 16, CTR/XTS only 8. */
ivlen = (crd->crd_alg == CRYPTO_AES_CBC) ? 16 : 8;
/* Initialize the IV */
@@ -492,6 +547,12 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
/* finalization */
aesni_gmac_final(ses, tag, icb, ses->ses_ghash->S);
break;
+ case CRYPTO_AES_XTS:
+ if (crd->crd_flags & CRD_F_ENCRYPT)
+ aesni_xts_enc(ses->ses_xts, buf, buf, crd->crd_len, iv);
+ else
+ aesni_xts_dec(ses->ses_xts, buf, buf, crd->crd_len, iv);
+ break;
}
fpu_kernel_exit();
@@ -555,6 +616,7 @@ aesni_process(struct cryptop *crp)
switch (crd->crd_alg) {
case CRYPTO_AES_CBC:
case CRYPTO_AES_CTR:
+ case CRYPTO_AES_XTS:
err = aesni_encdec(crp, crd, NULL, ses);
if (err != 0)
goto out;