summaryrefslogtreecommitdiff
path: root/sys/arch/amd64
diff options
context:
space:
mode:
authorMike Belopuhov <mikeb@cvs.openbsd.org>2011-08-17 17:00:36 +0000
committerMike Belopuhov <mikeb@cvs.openbsd.org>2011-08-17 17:00:36 +0000
commitacfc089e56799c7010cb263d8ed2ccd18bf6edd7 (patch)
tree13295d08cd24016ea0f02a00cc5f44e33002bc6a /sys/arch/amd64
parent83a4b2c1d01916a4f37477934f43d1634cb37950 (diff)
Assembler implementation of the GCM mode using the Carry-less
Multiplication (CLMUL) instruction found in the new Intel and future AMD CPUs. Done about a year ago and was rotting in my trees until Ryan prodded Theo to read the white paper and figure out the license issues. Apparently, there are none. All C code and SSE glue code was written by me, while the GF multiplication function is taken from the CLMUL white paper, specifically Figure 6: "Code Sample - Performing Ghash Using Algorithms 1 and 5".
Diffstat (limited to 'sys/arch/amd64')
-rw-r--r--sys/arch/amd64/amd64/aes_intel.S114
-rw-r--r--sys/arch/amd64/amd64/aesni.c146
2 files changed, 246 insertions, 14 deletions
diff --git a/sys/arch/amd64/amd64/aes_intel.S b/sys/arch/amd64/amd64/aes_intel.S
index 013e8e8b000..a3c7735eae1 100644
--- a/sys/arch/amd64/amd64/aes_intel.S
+++ b/sys/arch/amd64/amd64/aes_intel.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: aes_intel.S,v 1.4 2010/11/15 14:48:17 mikeb Exp $ */
+/* $OpenBSD: aes_intel.S,v 1.5 2011/08/17 17:00:35 mikeb Exp $ */
/*
* Implement AES algorithm in Intel AES-NI instructions.
@@ -71,6 +71,7 @@
#define OUTP %rsi
#define INP %rdx
#define LEN %rcx
+#define HSTATE %rcx
#define IVP %r8
#define ICBP %r8
#define KLEN %r9d
@@ -876,3 +877,114 @@ ENTRY(aesni_ctr_enc)
movq IV, (IVP)
.Lctr_enc_just_ret:
ret
+
+_aesni_gmac_gfmul:
+ movdqa %xmm0, %xmm3
+ pclmulqdq $0x00, %xmm1, %xmm3 # xmm3 holds a0*b0
+ movdqa %xmm0, %xmm4
+ pclmulqdq $0x10, %xmm1, %xmm4 # xmm4 holds a0*b1
+ movdqa %xmm0, %xmm5
+ pclmulqdq $0x01, %xmm1, %xmm5 # xmm5 holds a1*b0
+ movdqa %xmm0, %xmm6
+ pclmulqdq $0x11, %xmm1, %xmm6 # xmm6 holds a1*b1
+
+ pxor %xmm5, %xmm4 # xmm4 holds a0*b1 + a1*b0
+ movdqa %xmm4, %xmm5
+ psrldq $8, %xmm4
+ pslldq $8, %xmm5
+ pxor %xmm5, %xmm3
+ pxor %xmm4, %xmm6
+
+ /*
+ * <xmm6:xmm3> holds the result of the carry-less
+ * multiplication of xmm0 by xmm1
+ *
+ * shift the result by one bit position to the left
+ * cope for the fact that bits are reversed
+ */
+ movdqa %xmm3, %xmm7
+ movdqa %xmm6, %xmm8
+ pslld $1, %xmm3
+ pslld $1, %xmm6
+ psrld $31, %xmm7
+ psrld $31, %xmm8
+ movdqa %xmm7, %xmm9
+ pslldq $4, %xmm8
+ pslldq $4, %xmm7
+ psrldq $12, %xmm9
+ por %xmm7, %xmm3
+ por %xmm8, %xmm6
+ por %xmm9, %xmm6
+
+ /* first phase of the reduction */
+ movdqa %xmm3, %xmm7
+ movdqa %xmm3, %xmm8
+ movdqa %xmm3, %xmm9
+ pslld $31, %xmm7 # packed right shifting << 31
+ pslld $30, %xmm8 # packed right shifting shift << 30
+ pslld $25, %xmm9 # packed right shifting shift << 25
+ pxor %xmm8, %xmm7 # xor the shifted versions
+ pxor %xmm9, %xmm7
+ movdqa %xmm7, %xmm8
+ pslldq $12, %xmm7
+ psrldq $4, %xmm8
+ pxor %xmm7, %xmm3
+
+ /* second phase of the reduction */
+ movdqa %xmm3,%xmm2
+ movdqa %xmm3,%xmm4
+ movdqa %xmm3,%xmm5
+ psrld $1, %xmm2 # packed left shifting >> 1
+ psrld $2, %xmm4 # packed left shifting >> 2
+ psrld $7, %xmm5 # packed left shifting >> 7
+ pxor %xmm4, %xmm2 # xor the shifted versions
+ pxor %xmm5, %xmm2
+ pxor %xmm8, %xmm2
+ pxor %xmm2, %xmm3
+ pxor %xmm3, %xmm6 # the result is in xmm6
+ ret
+
+/*
+ * void aesni_gmac_update(GHASH_CTX *ghash, uint8_t *src, size_t len)
+ */
+ENTRY(aesni_gmac_update)
+ cmp $16, %rdx
+ jb .Lgcm_hash_just_ret
+
+ movdqu .Lbswap_mask, BSWAP_MASK # endianness swap mask
+
+ movdqu (%rdi), %xmm1 # hash subkey
+ movdqu 32(%rdi), %xmm6 # initial state
+ pshufb BSWAP_MASK, %xmm1
+ pshufb BSWAP_MASK, %xmm6
+
+.Lgcm_hash_loop:
+ movdqu (%rsi), %xmm2
+ pshufb BSWAP_MASK, %xmm2
+ movdqa %xmm6, %xmm0
+ pxor %xmm2, %xmm0
+ call _aesni_gmac_gfmul
+
+ sub $16, %rdx
+ add $16, %rsi
+ cmp $16, %rdx
+ jge .Lgcm_hash_loop
+
+ pshufb BSWAP_MASK, %xmm6
+ movdqu %xmm6, 16(%rdi)
+ movdqu %xmm6, 32(%rdi)
+.Lgcm_hash_just_ret:
+ ret
+
+/*
+ * void aesni_gmac_final(struct aesni_sess *ses, uint8_t *tag,
+ * uint8_t *icb, uint8_t *hashstate)
+ */
+ENTRY(aesni_gmac_final)
+ movl 480(KEYP), KLEN # key length
+ movdqu (INP), STATE # icb
+ call _aesni_enc1
+ movdqu (HSTATE), IN
+ pxor IN, STATE
+ movdqu STATE, (OUTP) # output
+ ret
diff --git a/sys/arch/amd64/amd64/aesni.c b/sys/arch/amd64/amd64/aesni.c
index 0f519745ae1..0f65f73b36e 100644
--- a/sys/arch/amd64/amd64/aesni.c
+++ b/sys/arch/amd64/amd64/aesni.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: aesni.c,v 1.21 2011/05/06 17:31:16 mikeb Exp $ */
+/* $OpenBSD: aesni.c,v 1.22 2011/08/17 17:00:35 mikeb Exp $ */
/*-
* Copyright (c) 2003 Jason Wright
* Copyright (c) 2003, 2004 Theo de Raadt
@@ -29,6 +29,7 @@
#include <crypto/cryptodev.h>
#include <crypto/rijndael.h>
+#include <crypto/gmac.h>
#include <crypto/xform.h>
#include <crypto/cryptosoft.h>
@@ -47,6 +48,7 @@ struct aesni_session {
uint32_t ses_klen;
uint8_t ses_nonce[AESCTR_NONCESIZE];
int ses_sid;
+ GHASH_CTX *ses_ghash;
struct swcr_data *ses_swd;
LIST_ENTRY(aesni_session)
ses_entries;
@@ -82,6 +84,11 @@ extern void aesni_cbc_dec(struct aesni_session *ses, uint8_t *dst,
extern void aesni_ctr_enc(struct aesni_session *ses, uint8_t *dst,
uint8_t *src, size_t len, uint8_t *icb);
+/* assembler-assisted GMAC */
+extern void aesni_gmac_update(GHASH_CTX *ghash, uint8_t *src, size_t len);
+extern void aesni_gmac_final(struct aesni_session *ses, uint8_t *tag,
+ uint8_t *icb, uint8_t *hashstate);
+
void aesni_setup(void);
int aesni_newsession(u_int32_t *, struct cryptoini *);
int aesni_freesession(u_int64_t);
@@ -91,7 +98,7 @@ int aesni_swauth(struct cryptop *, struct cryptodesc *, struct swcr_data *,
caddr_t);
int aesni_encdec(struct cryptop *, struct cryptodesc *,
- struct aesni_session *);
+ struct cryptodesc *, struct aesni_session *);
void
aesni_setup(void)
@@ -109,6 +116,11 @@ aesni_setup(void)
bzero(algs, sizeof(algs));
algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED;
algs[CRYPTO_AES_CTR] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_GCM_16] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_128_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_192_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_AES_256_GMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
/* needed for ipsec, uses software crypto */
algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
@@ -160,6 +172,8 @@ aesni_newsession(u_int32_t *sidp, struct cryptoini *cri)
break;
case CRYPTO_AES_CTR:
+ case CRYPTO_AES_GCM_16:
+ case CRYPTO_AES_GMAC:
ses->ses_klen = c->cri_klen / 8 - AESCTR_NONCESIZE;
bcopy(c->cri_key + ses->ses_klen, ses->ses_nonce,
AESCTR_NONCESIZE);
@@ -168,6 +182,24 @@ aesni_newsession(u_int32_t *sidp, struct cryptoini *cri)
fpu_kernel_exit();
break;
+ case CRYPTO_AES_128_GMAC:
+ case CRYPTO_AES_192_GMAC:
+ case CRYPTO_AES_256_GMAC:
+ ses->ses_ghash = malloc(sizeof(GHASH_CTX),
+ M_CRYPTO_DATA, M_NOWAIT | M_ZERO);
+ if (ses->ses_ghash == NULL) {
+ aesni_freesession(ses->ses_sid);
+ return (ENOMEM);
+ }
+
+ bzero(ses->ses_ghash->H, GMAC_BLOCK_LEN);
+ bzero(ses->ses_ghash->S, GMAC_BLOCK_LEN);
+ bzero(ses->ses_ghash->Z, GMAC_BLOCK_LEN);
+
+ /* prepare a hash subkey */
+ aesni_enc(ses, ses->ses_ghash->H, ses->ses_ghash->H);
+ break;
+
case CRYPTO_MD5_HMAC:
axf = &auth_hash_hmac_md5_96;
goto authcommon;
@@ -260,6 +292,11 @@ aesni_freesession(u_int64_t tid)
LIST_REMOVE(ses, ses_entries);
+ if (ses->ses_ghash) {
+ bzero(ses->ses_ghash, sizeof(GHASH_CTX));
+ free(ses->ses_ghash, M_CRYPTO_DATA);
+ }
+
if (ses->ses_swd) {
swd = ses->ses_swd;
axf = swd->sw_axf;
@@ -297,18 +334,15 @@ aesni_swauth(struct cryptop *crp, struct cryptodesc *crd,
int
aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
- struct aesni_session *ses)
+ struct cryptodesc *crda, struct aesni_session *ses)
{
uint8_t iv[EALG_MAX_BLOCK_LEN];
uint8_t icb[AESCTR_BLOCKSIZE];
+ uint8_t tag[GMAC_DIGEST_LEN];
uint8_t *buf = aesni_sc->sc_buf;
+ uint32_t *dw;
int ivlen, rlen, err = 0;
- if ((crd->crd_len % 16) != 0) {
- err = EINVAL;
- return (err);
- }
-
if (crd->crd_len > aesni_sc->sc_buflen) {
if (buf != NULL) {
explicit_bzero(buf, aesni_sc->sc_buflen);
@@ -359,6 +393,21 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
}
}
+ if (crda) {
+ /* Supply GMAC with AAD */
+ rlen = roundup(crda->crd_len, GMAC_BLOCK_LEN);
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf, crda->crd_skip,
+ crda->crd_len, buf);
+ else
+ cuio_copydata((struct uio *)crp->crp_buf,
+ crda->crd_skip, crda->crd_len, buf);
+ fpu_kernel_enter();
+ aesni_gmac_update(ses->ses_ghash, buf, rlen);
+ fpu_kernel_exit();
+ bzero(buf, crda->crd_len);
+ }
+
/* Copy data to be processed to the buffer */
if (crp->crp_flags & CRYPTO_F_IMBUF)
m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip,
@@ -367,6 +416,16 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip,
crd->crd_len, buf);
+ if (crd->crd_alg == CRYPTO_AES_CTR ||
+ crd->crd_alg == CRYPTO_AES_GCM_16 ||
+ crd->crd_alg == CRYPTO_AES_GMAC) {
+ bzero(icb, AESCTR_BLOCKSIZE);
+ bcopy(ses->ses_nonce, icb, AESCTR_NONCESIZE);
+ bcopy(iv, icb + AESCTR_NONCESIZE, AESCTR_IVSIZE);
+ /* rlen is for gcm and gmac only */
+ rlen = roundup(crd->crd_len, AESCTR_BLOCKSIZE);
+ }
+
/* Apply cipher */
fpu_kernel_enter();
switch (crd->crd_alg) {
@@ -377,11 +436,36 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
aesni_cbc_dec(ses, buf, buf, crd->crd_len, iv);
break;
case CRYPTO_AES_CTR:
- bzero(icb, AESCTR_BLOCKSIZE);
- bcopy(ses->ses_nonce, icb, AESCTR_NONCESIZE);
- bcopy(iv, icb + AESCTR_NONCESIZE, AESCTR_IVSIZE);
aesni_ctr_enc(ses, buf, buf, crd->crd_len, icb);
break;
+ case CRYPTO_AES_GCM_16:
+ icb[AESCTR_BLOCKSIZE - 1] = 1;
+ if (crd->crd_flags & CRD_F_ENCRYPT) {
+ /* encrypt padded data */
+ aesni_ctr_enc(ses, buf, buf, rlen, icb);
+ /* zero out padding bytes */
+ bzero(buf + crd->crd_len, rlen - crd->crd_len);
+ /* hash encrypted data padded with zeroes */
+ aesni_gmac_update(ses->ses_ghash, buf, rlen);
+ } else {
+ aesni_gmac_update(ses->ses_ghash, buf, rlen);
+ aesni_ctr_enc(ses, buf, buf, rlen, icb);
+ }
+ goto gcmcommon;
+ case CRYPTO_AES_GMAC:
+ icb[AESCTR_BLOCKSIZE - 1] = 1;
+ aesni_gmac_update(ses->ses_ghash, buf, rlen);
+ gcmcommon:
+ /* lengths block */
+ bzero(tag, GMAC_BLOCK_LEN);
+ dw = (uint32_t *)tag + 1;
+ *dw = htobe32(crda->crd_len * 8);
+ dw = (uint32_t *)tag + 3;
+ *dw = htobe32(crd->crd_len * 8);
+ aesni_gmac_update(ses->ses_ghash, tag, GMAC_BLOCK_LEN);
+ /* finalization */
+ aesni_gmac_final(ses, tag, icb, ses->ses_ghash->S);
+ break;
}
fpu_kernel_exit();
@@ -398,6 +482,23 @@ aesni_encdec(struct cryptop *crp, struct cryptodesc *crd,
cuio_copyback((struct uio *)crp->crp_buf, crd->crd_skip,
crd->crd_len, buf);
+ /* Copy back the authentication tag */
+ if (crda) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF) {
+ if (m_copyback((struct mbuf *)crp->crp_buf,
+ crda->crd_inject, GMAC_DIGEST_LEN, tag,
+ M_NOWAIT)) {
+ err = ENOMEM;
+ goto out;
+ }
+ } else
+ bcopy(tag, crp->crp_mac, GMAC_BLOCK_LEN);
+
+ /* clean up GHASH state */
+ bzero(ses->ses_ghash->S, GMAC_BLOCK_LEN);
+ bzero(ses->ses_ghash->Z, GMAC_BLOCK_LEN);
+ }
+
out:
explicit_bzero(buf, roundup(crd->crd_len, EALG_MAX_BLOCK_LEN));
return (err);
@@ -407,7 +508,7 @@ int
aesni_process(struct cryptop *crp)
{
struct aesni_session *ses;
- struct cryptodesc *crd;
+ struct cryptodesc *crd, *crda, *crde;
int err = 0;
if (crp == NULL || crp->crp_callback == NULL)
@@ -423,11 +524,30 @@ aesni_process(struct cryptop *crp)
goto out;
}
+ crda = crde = NULL;
for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
switch (crd->crd_alg) {
case CRYPTO_AES_CBC:
case CRYPTO_AES_CTR:
- err = aesni_encdec(crp, crd, ses);
+ err = aesni_encdec(crp, crd, NULL, ses);
+ if (err != 0)
+ goto out;
+ break;
+
+ case CRYPTO_AES_GCM_16:
+ case CRYPTO_AES_GMAC:
+ crde = crd;
+ if (!crda)
+ continue;
+ goto gcmcommon;
+ case CRYPTO_AES_128_GMAC:
+ case CRYPTO_AES_192_GMAC:
+ case CRYPTO_AES_256_GMAC:
+ crda = crd;
+ if (!crde)
+ continue;
+ gcmcommon:
+ err = aesni_encdec(crp, crde, crda, ses);
if (err != 0)
goto out;
break;