diff options
author | Joel Sing <jsing@cvs.openbsd.org> | 2013-03-26 15:47:02 +0000 |
---|---|---|
committer | Joel Sing <jsing@cvs.openbsd.org> | 2013-03-26 15:47:02 +0000 |
commit | 86ca9d53d91cc3fd11474d05607d4420b7b2478b (patch) | |
tree | 327c2c9d01c19e8a69279df114d325c494d8024b | |
parent | aeb73896d16495ce55d50e26803dee5b5bfa9a9c (diff) |
Add AES-XTS support to the aesni crypto driver. This allows softraid(4)
to benefit from the AES-NI instructions found on newer Intel CPUs.
ok mikeb@
-rw-r--r-- | sys/arch/amd64/amd64/aes_intel.S | 110 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/aesni.c | 72 |
2 files changed, 172 insertions, 10 deletions
diff --git a/sys/arch/amd64/amd64/aes_intel.S b/sys/arch/amd64/amd64/aes_intel.S index 2cb2f5b0a31..efb8f5ae5f9 100644 --- a/sys/arch/amd64/amd64/aes_intel.S +++ b/sys/arch/amd64/amd64/aes_intel.S @@ -1,4 +1,4 @@ -/* $OpenBSD: aes_intel.S,v 1.8 2012/12/09 23:14:28 mikeb Exp $ */ +/* $OpenBSD: aes_intel.S,v 1.9 2013/03/26 15:47:01 jsing Exp $ */ /* * Implement AES algorithm in Intel AES-NI instructions. @@ -47,8 +47,10 @@ * - assembler macros were converted to the actual instructions; * - aesni_ctr_enc was changed to be RFC 3686 compliant; * - aes-gcm mode added; + * - aes-xts implementation added; * * Copyright (c) 2010,2011 Mike Belopuhov + * Copyright (c) 2013 Joel Sing <jsing@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -284,7 +286,7 @@ ENTRY(aesni_enc) * KLEN: round count * STATE: initial state (input) * output: - * STATE: finial state (output) + * STATE: final state (output) * changed: * KEY * TKEYP (T1) @@ -343,7 +345,7 @@ _aesni_enc1: * STATE3 * STATE4 * output: - * STATE1: finial state (output) + * STATE1: final state (output) * STATE2 * STATE3 * STATE4 @@ -458,7 +460,7 @@ ENTRY(aesni_dec) * KLEN: key length * STATE: initial state (input) * output: - * STATE: finial state (output) + * STATE: final state (output) * changed: * KEY * TKEYP (T1) @@ -517,7 +519,7 @@ _aesni_dec1: * STATE3 * STATE4 * output: - * STATE1: finial state (output) + * STATE1: final state (output) * STATE2 * STATE3 * STATE4 @@ -1002,3 +1004,101 @@ ENTRY(aesni_gmac_final) pxor IN,STATE movdqu STATE,(OUTP) # output ret + +/* + * void aesni_xts_enc(struct aesni_xts_ctx *xts, uint8_t *dst, uint8_t *src, + * size_t len, uint8_t *iv) + */ +ENTRY(aesni_xts_enc) + cmp $16,%rcx + jb 2f + + call _aesni_xts_tweak + + movl 480(KEYP),KLEN # key length +1: + movups (%rdx),%xmm0 # src + pxor %xmm3,%xmm0 # xor block with tweak + call _aesni_enc1 + pxor %xmm3,%xmm0 # xor block with tweak + movups %xmm0,(%rsi) # dst + + call _aesni_xts_tweak_exp + + add $16,%rsi + add $16,%rdx + sub $16,%rcx + cmp $16,%rcx + jge 1b +2: + ret + +/* + * void aesni_xts_dec(struct aesni_xts_ctx *xts, uint8_t *dst, uint8_t *src, + * size_t len, uint8_t *iv) + */ +ENTRY(aesni_xts_dec) + cmp $16,%rcx + jb 2f + + call _aesni_xts_tweak + + movl 480(KEYP),KLEN # key length + add $240,KEYP # decryption key +1: + movups (%rdx),%xmm0 # src + pxor %xmm3,%xmm0 # xor block with tweak + call _aesni_dec1 + pxor %xmm3,%xmm0 # xor block with tweak + movups %xmm0,(%rsi) # dst + + call _aesni_xts_tweak_exp + + add $16,%rsi + add $16,%rdx + sub $16,%rcx + cmp $16,%rcx + jge 1b +2: + ret + +/* + * Prepare tweak as E_k2(IV). IV is specified as LE representation of a + * 64-bit block number which we allow to be passed in directly. Since + * we're on a 64-bit LE host the representation is already correct. + * + * xts is in %rdi, iv is in %r8 and we return the tweak in %xmm3. + */ +_aesni_xts_tweak: + mov (%r8),%r10 + movd %r10,%xmm0 # Last 64-bits of IV are always zero. + mov KEYP,%r11 + lea 496(%rdi),KEYP + movl 480(KEYP),KLEN + call _aesni_enc1 + movdqa %xmm0,%xmm3 + mov %r11,KEYP + ret + +/* + * Exponentiate AES XTS tweak (in %xmm3). + */ +_aesni_xts_tweak_exp: + pextrw $7,%xmm3,%r10 + pextrw $3,%xmm3,%r11 + psllq $1,%xmm3 # Left shift. + + and $0x8000,%r11 # Carry between quads. + jz 1f + mov $1,%r11 + pxor %xmm0,%xmm0 + pinsrw $4,%r11,%xmm0 + por %xmm0,%xmm3 +1: + and $0x8000,%r10 + jz 2f + pextrw $0,%xmm3,%r11 + xor $0x87,%r11 # AES XTS alpha - GF(2^128). + pinsrw $0,%r11,%xmm3 +2: + ret diff --git a/sys/arch/amd64/amd64/aesni.c b/sys/arch/amd64/amd64/aesni.c index 4f4561cdcb7..4847b054bac 100644 --- a/sys/arch/amd64/amd64/aesni.c +++ b/sys/arch/amd64/amd64/aesni.c @@ -1,4 +1,4 @@ -/* $OpenBSD: aesni.c,v 1.24 2012/12/10 15:06:45 mikeb Exp $ */ +/* $OpenBSD: aesni.c,v 1.25 2013/03/26 15:47:01 jsing Exp $ */ /*- * Copyright (c) 2003 Jason Wright * Copyright (c) 2003, 2004 Theo de Raadt @@ -42,6 +42,21 @@ #define AESCTR_IVSIZE 8 #define AESCTR_BLOCKSIZE 16 +#define AES_XTS_BLOCKSIZE 16 +#define AES_XTS_IVSIZE 8 +#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ + +struct aesni_aes_ctx { + uint32_t aes_ekey[4 * (AES_MAXROUNDS + 1)]; + uint32_t aes_dkey[4 * (AES_MAXROUNDS + 1)]; + uint32_t aes_klen; + uint32_t aes_pad[3]; +}; + +struct aesni_xts_ctx { + struct aesni_aes_ctx xts_keys[2]; +}; + struct aesni_session { uint32_t ses_ekey[4 * (AES_MAXROUNDS + 1)]; uint32_t ses_dkey[4 * (AES_MAXROUNDS + 1)]; @@ -49,6 +64,7 @@ struct aesni_session { uint8_t ses_nonce[AESCTR_NONCESIZE]; int ses_sid; GHASH_CTX *ses_ghash; + struct aesni_xts_ctx *ses_xts; struct swcr_data *ses_swd; LIST_ENTRY(aesni_session) ses_entries; @@ -84,6 +100,12 @@ extern void aesni_cbc_dec(struct aesni_session *ses, uint8_t *dst, extern void aesni_ctr_enc(struct aesni_session *ses, uint8_t *dst, uint8_t *src, size_t len, uint8_t *icb); +/* assembler-assisted XTS mode */ +extern void aesni_xts_enc(struct aesni_xts_ctx *xts, uint8_t *dst, + uint8_t *src, size_t len, uint8_t *tweak); +extern void aesni_xts_dec(struct aesni_xts_ctx *xts, uint8_t *dst, + uint8_t *src, size_t len, uint8_t *tweak); + /* assembler-assisted GMAC */ extern void aesni_gmac_update(GHASH_CTX *ghash, uint8_t *src, size_t len); extern void aesni_gmac_final(struct aesni_session *ses, uint8_t *tag, @@ -114,15 +136,20 @@ aesni_setup(void) aesni_sc->sc_buflen = PAGE_SIZE; bzero(algs, sizeof(algs)); + + /* Encryption algorithms. */ algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_AES_CTR] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_AES_GCM_16] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_AES_XTS] = CRYPTO_ALG_FLAG_SUPPORTED; + + /* Authentication algorithms. */ algs[CRYPTO_AES_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_AES_128_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_AES_192_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_AES_256_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED; - /* needed for ipsec, uses software crypto */ + /* HMACs needed for IPsec, uses software crypto. */ algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; @@ -150,6 +177,7 @@ int aesni_newsession(u_int32_t *sidp, struct cryptoini *cri) { struct aesni_session *ses = NULL; + struct aesni_aes_ctx *aes1, *aes2; struct cryptoini *c; struct auth_hash *axf; struct swcr_data *swd; @@ -184,6 +212,28 @@ aesni_newsession(u_int32_t *sidp, struct cryptoini *cri) fpu_kernel_exit(); break; + case CRYPTO_AES_XTS: + ses->ses_xts = malloc(sizeof(struct aesni_xts_ctx), + M_CRYPTO_DATA, M_NOWAIT | M_ZERO); + if (ses->ses_xts == NULL) { + aesni_freesession(ses->ses_sid); + return (ENOMEM); + } + + ses->ses_klen = c->cri_klen / 16; + aes1 = &ses->ses_xts->xts_keys[0]; + aes1->aes_klen = ses->ses_klen; + aes2 = &ses->ses_xts->xts_keys[1]; + aes2->aes_klen = ses->ses_klen; + + fpu_kernel_enter(); + aesni_set_key((struct aesni_session *)aes1, + c->cri_key, aes1->aes_klen); + aesni_set_key((struct aesni_session *)aes2, + c->cri_key + ses->ses_klen, aes2->aes_klen); + fpu_kernel_exit(); + break; + case CRYPTO_AES_128_GMAC: case CRYPTO_AES_192_GMAC: case CRYPTO_AES_256_GMAC: @@ -300,10 +350,15 @@ aesni_freesession(u_int64_t tid) LIST_REMOVE(ses, ses_entries); if (ses->ses_ghash) { - bzero(ses->ses_ghash, sizeof(GHASH_CTX)); + explicit_bzero(ses->ses_ghash, sizeof(GHASH_CTX)); free(ses->ses_ghash, M_CRYPTO_DATA); } + if (ses->ses_xts) { + explicit_bzero(ses->ses_xts, sizeof(struct aesni_xts_ctx)); + free(ses->ses_xts, M_CRYPTO_DATA); + } + if (ses->ses_swd) { swd = ses->ses_swd; axf = swd->sw_axf; @@ -343,12 +398,12 @@ int aesni_encdec(struct cryptop *crp, struct cryptodesc *crd, struct cryptodesc *crda, struct aesni_session *ses) { + int aadlen, err, ivlen, iskip, oskip, rlen; uint8_t iv[EALG_MAX_BLOCK_LEN]; uint8_t icb[AESCTR_BLOCKSIZE]; uint8_t tag[GMAC_DIGEST_LEN]; uint8_t *buf = aesni_sc->sc_buf; uint32_t *dw; - int aadlen, err, ivlen, iskip, oskip, rlen; aadlen = rlen = err = iskip = oskip = 0; @@ -367,7 +422,7 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd, aesni_sc->sc_buflen = rlen; } - /* CBC uses 16, CTR only 8 */ + /* CBC uses 16, CTR/XTS only 8. */ ivlen = (crd->crd_alg == CRYPTO_AES_CBC) ? 16 : 8; /* Initialize the IV */ @@ -492,6 +547,12 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd, /* finalization */ aesni_gmac_final(ses, tag, icb, ses->ses_ghash->S); break; + case CRYPTO_AES_XTS: + if (crd->crd_flags & CRD_F_ENCRYPT) + aesni_xts_enc(ses->ses_xts, buf, buf, crd->crd_len, iv); + else + aesni_xts_dec(ses->ses_xts, buf, buf, crd->crd_len, iv); + break; } fpu_kernel_exit(); @@ -555,6 +616,7 @@ aesni_process(struct cryptop *crp) switch (crd->crd_alg) { case CRYPTO_AES_CBC: case CRYPTO_AES_CTR: + case CRYPTO_AES_XTS: err = aesni_encdec(crp, crd, NULL, ses); if (err != 0) goto out; |