diff options
author | Patrick Wildt <patrick@cvs.openbsd.org> | 2019-03-13 10:18:31 +0000 |
---|---|---|
committer | Patrick Wildt <patrick@cvs.openbsd.org> | 2019-03-13 10:18:31 +0000 |
commit | 3b662f914412f8456ea61ce0077df37f9a22c1cb (patch) | |
tree | c00aedb15041225ead85808faa5bfecae2335d89 /lib/libcrypto | |
parent | f149cc20d2d70fe3fd7158d15aa684c616682f86 (diff) |
Backport support for probing ARMv8 HW acceleration capabilities on armv7
in preparation for adding support for the probing code for arm64.
ok bcook@
Diffstat (limited to 'lib/libcrypto')
-rw-r--r-- | lib/libcrypto/arm_arch.h | 8 | ||||
-rw-r--r-- | lib/libcrypto/armcap.c | 21 | ||||
-rw-r--r-- | lib/libcrypto/armv4cpuid.S | 124 |
3 files changed, 121 insertions, 32 deletions
diff --git a/lib/libcrypto/arm_arch.h b/lib/libcrypto/arm_arch.h index 8b8a05b5f71..a64c6da46eb 100644 --- a/lib/libcrypto/arm_arch.h +++ b/lib/libcrypto/arm_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: arm_arch.h,v 1.8 2018/01/07 12:35:52 kettenis Exp $ */ +/* $OpenBSD: arm_arch.h,v 1.9 2019/03/13 10:18:30 patrick Exp $ */ #ifndef __ARM_ARCH_H__ #define __ARM_ARCH_H__ @@ -41,7 +41,11 @@ #if !defined(__ASSEMBLER__) extern unsigned int OPENSSL_armcap_P; -#define ARMV7_NEON (1<<0) +#define ARMV7_NEON (1<<0) +#define ARMV8_AES (1<<1) +#define ARMV8_SHA1 (1<<2) +#define ARMV8_SHA256 (1<<3) +#define ARMV8_PMULL (1<<4) #endif #if defined(__OpenBSD__) diff --git a/lib/libcrypto/armcap.c b/lib/libcrypto/armcap.c index 3fda1853261..8c4983280e8 100644 --- a/lib/libcrypto/armcap.c +++ b/lib/libcrypto/armcap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: armcap.c,v 1.7 2018/11/11 03:27:56 bcook Exp $ */ +/* $OpenBSD: armcap.c,v 1.8 2019/03/13 10:18:30 patrick Exp $ */ #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -22,6 +22,10 @@ static sigjmp_buf ill_jmp; * ARM compilers support inline assembler... */ void _armv7_neon_probe(void); +void _armv8_aes_probe(void); +void _armv8_sha1_probe(void); +void _armv8_sha256_probe(void); +void _armv8_pmull_probe(void); #endif #if defined(__GNUC__) && __GNUC__>=2 @@ -61,6 +65,21 @@ OPENSSL_cpuid_setup(void) if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_pmull_probe(); + OPENSSL_armcap_P |= ARMV8_PMULL | ARMV8_AES; + } else if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_aes_probe(); + OPENSSL_armcap_P |= ARMV8_AES; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha1_probe(); + OPENSSL_armcap_P |= ARMV8_SHA1; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha256_probe(); + OPENSSL_armcap_P |= ARMV8_SHA256; + } } sigaction (SIGILL, &ill_oact, NULL); diff --git a/lib/libcrypto/armv4cpuid.S b/lib/libcrypto/armv4cpuid.S index 5ca979f3b3e..bb9abafebe5 100644 --- a/lib/libcrypto/armv4cpuid.S +++ b/lib/libcrypto/armv4cpuid.S @@ -1,19 +1,16 @@ #include "arm_arch.h" .text +#if defined(__thumb2__) && !defined(__APPLE__) +.syntax unified +.thumb +#else .code 32 - -.align 5 -#if __ARM_ARCH__>=7 -.global _armv7_neon_probe -.type _armv7_neon_probe,%function -_armv7_neon_probe: - .word 0xf26ee1fe @ vorr q15,q15,q15 - .word 0xe12fff1e @ bx lr -.size _armv7_neon_probe,.-_armv7_neon_probe +#undef __thumb2__ #endif -.global OPENSSL_atomic_add +.align 5 +.globl OPENSSL_atomic_add .type OPENSSL_atomic_add,%function OPENSSL_atomic_add: #if __ARM_ARCH__>=6 @@ -23,9 +20,9 @@ OPENSSL_atomic_add: cmp r2,#0 bne .Ladd mov r0,r3 - .word 0xe12fff1e @ bx lr + bx lr #else - stmdb sp!,{r4-r6,lr} + stmdb sp!,{r4,r5,r6,lr} ldr r2,.Lspinlock adr r3,.Lspinlock mov r4,r0 @@ -42,46 +39,115 @@ OPENSSL_atomic_add: add r2,r2,r5 str r2,[r4] str r0,[r6] @ release spinlock - ldmia sp!,{r4-r6,lr} + ldmia sp!,{r4,r5,r6,lr} tst lr,#1 moveq pc,lr - .word 0xe12fff1e @ bx lr +.word 0xe12fff1e @ bx lr #endif .size OPENSSL_atomic_add,.-OPENSSL_atomic_add -.global OPENSSL_wipe_cpu +#if __ARM_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 5 +.globl _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + vorr q0,q0,q0 + bx lr +.size _armv7_neon_probe,.-_armv7_neon_probe + +.globl _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: +#if defined(__thumb2__) && !defined(__APPLE__) +.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0 +#else +.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 +#endif + bx lr +.size _armv8_aes_probe,.-_armv8_aes_probe + +.globl _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: +#if defined(__thumb2__) && !defined(__APPLE__) +.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0 +#else +.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 +#endif + bx lr +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.globl _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: +#if defined(__thumb2__) && !defined(__APPLE__) +.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0 +#else +.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 +#endif + bx lr +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.globl _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: +#if defined(__thumb2__) && !defined(__APPLE__) +.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0 +#else +.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 +#endif + bx lr +.size _armv8_pmull_probe,.-_armv8_pmull_probe +#endif + +.globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,%function OPENSSL_wipe_cpu: +#if __ARM_ARCH__>=7 ldr r0,.LOPENSSL_armcap adr r1,.LOPENSSL_armcap ldr r0,[r1,r0] +#ifdef __APPLE__ + ldr r0,[r0] +#endif +#endif eor r2,r2,r2 eor r3,r3,r3 eor ip,ip,ip +#if __ARM_ARCH__>=7 tst r0,#1 beq .Lwipe_done - .word 0xf3000150 @ veor q0, q0, q0 - .word 0xf3022152 @ veor q1, q1, q1 - .word 0xf3044154 @ veor q2, q2, q2 - .word 0xf3066156 @ veor q3, q3, q3 - .word 0xf34001f0 @ veor q8, q8, q8 - .word 0xf34221f2 @ veor q9, q9, q9 - .word 0xf34441f4 @ veor q10, q10, q10 - .word 0xf34661f6 @ veor q11, q11, q11 - .word 0xf34881f8 @ veor q12, q12, q12 - .word 0xf34aa1fa @ veor q13, q13, q13 - .word 0xf34cc1fc @ veor q14, q14, q14 - .word 0xf34ee1fe @ veor q15, q15, q15 + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + veor q3, q3, q3 + veor q8, q8, q8 + veor q9, q9, q9 + veor q10, q10, q10 + veor q11, q11, q11 + veor q12, q12, q12 + veor q13, q13, q13 + veor q14, q14, q14 + veor q15, q15, q15 .Lwipe_done: +#endif mov r0,sp +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr - .word 0xe12fff1e @ bx lr +.word 0xe12fff1e @ bx lr +#endif .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu .align 5 +#if __ARM_ARCH__>=7 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-.LOPENSSL_armcap +.word OPENSSL_armcap_P-. +#endif #if __ARM_ARCH__>=6 .align 5 #else |