summaryrefslogtreecommitdiff
path: root/lib/libcrypto
diff options
context:
space:
mode:
authorPatrick Wildt <patrick@cvs.openbsd.org>2019-03-13 10:18:31 +0000
committerPatrick Wildt <patrick@cvs.openbsd.org>2019-03-13 10:18:31 +0000
commit3b662f914412f8456ea61ce0077df37f9a22c1cb (patch)
treec00aedb15041225ead85808faa5bfecae2335d89 /lib/libcrypto
parentf149cc20d2d70fe3fd7158d15aa684c616682f86 (diff)
Backport support for probing ARMv8 HW acceleration capabilities on armv7
in preparation for adding support for the probing code for arm64. ok bcook@
Diffstat (limited to 'lib/libcrypto')
-rw-r--r--lib/libcrypto/arm_arch.h8
-rw-r--r--lib/libcrypto/armcap.c21
-rw-r--r--lib/libcrypto/armv4cpuid.S124
3 files changed, 121 insertions, 32 deletions
diff --git a/lib/libcrypto/arm_arch.h b/lib/libcrypto/arm_arch.h
index 8b8a05b5f71..a64c6da46eb 100644
--- a/lib/libcrypto/arm_arch.h
+++ b/lib/libcrypto/arm_arch.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: arm_arch.h,v 1.8 2018/01/07 12:35:52 kettenis Exp $ */
+/* $OpenBSD: arm_arch.h,v 1.9 2019/03/13 10:18:30 patrick Exp $ */
#ifndef __ARM_ARCH_H__
#define __ARM_ARCH_H__
@@ -41,7 +41,11 @@
#if !defined(__ASSEMBLER__)
extern unsigned int OPENSSL_armcap_P;
-#define ARMV7_NEON (1<<0)
+#define ARMV7_NEON (1<<0)
+#define ARMV8_AES (1<<1)
+#define ARMV8_SHA1 (1<<2)
+#define ARMV8_SHA256 (1<<3)
+#define ARMV8_PMULL (1<<4)
#endif
#if defined(__OpenBSD__)
diff --git a/lib/libcrypto/armcap.c b/lib/libcrypto/armcap.c
index 3fda1853261..8c4983280e8 100644
--- a/lib/libcrypto/armcap.c
+++ b/lib/libcrypto/armcap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: armcap.c,v 1.7 2018/11/11 03:27:56 bcook Exp $ */
+/* $OpenBSD: armcap.c,v 1.8 2019/03/13 10:18:30 patrick Exp $ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -22,6 +22,10 @@ static sigjmp_buf ill_jmp;
* ARM compilers support inline assembler...
*/
void _armv7_neon_probe(void);
+void _armv8_aes_probe(void);
+void _armv8_sha1_probe(void);
+void _armv8_sha256_probe(void);
+void _armv8_pmull_probe(void);
#endif
#if defined(__GNUC__) && __GNUC__>=2
@@ -61,6 +65,21 @@ OPENSSL_cpuid_setup(void)
if (sigsetjmp(ill_jmp, 1) == 0) {
_armv7_neon_probe();
OPENSSL_armcap_P |= ARMV7_NEON;
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_pmull_probe();
+ OPENSSL_armcap_P |= ARMV8_PMULL | ARMV8_AES;
+ } else if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_aes_probe();
+ OPENSSL_armcap_P |= ARMV8_AES;
+ }
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_sha1_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA1;
+ }
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_sha256_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA256;
+ }
}
sigaction (SIGILL, &ill_oact, NULL);
diff --git a/lib/libcrypto/armv4cpuid.S b/lib/libcrypto/armv4cpuid.S
index 5ca979f3b3e..bb9abafebe5 100644
--- a/lib/libcrypto/armv4cpuid.S
+++ b/lib/libcrypto/armv4cpuid.S
@@ -1,19 +1,16 @@
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
-
-.align 5
-#if __ARM_ARCH__>=7
-.global _armv7_neon_probe
-.type _armv7_neon_probe,%function
-_armv7_neon_probe:
- .word 0xf26ee1fe @ vorr q15,q15,q15
- .word 0xe12fff1e @ bx lr
-.size _armv7_neon_probe,.-_armv7_neon_probe
+#undef __thumb2__
#endif
-.global OPENSSL_atomic_add
+.align 5
+.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
OPENSSL_atomic_add:
#if __ARM_ARCH__>=6
@@ -23,9 +20,9 @@ OPENSSL_atomic_add:
cmp r2,#0
bne .Ladd
mov r0,r3
- .word 0xe12fff1e @ bx lr
+ bx lr
#else
- stmdb sp!,{r4-r6,lr}
+ stmdb sp!,{r4,r5,r6,lr}
ldr r2,.Lspinlock
adr r3,.Lspinlock
mov r4,r0
@@ -42,46 +39,115 @@ OPENSSL_atomic_add:
add r2,r2,r5
str r2,[r4]
str r0,[r6] @ release spinlock
- ldmia sp!,{r4-r6,lr}
+ ldmia sp!,{r4,r5,r6,lr}
tst lr,#1
moveq pc,lr
- .word 0xe12fff1e @ bx lr
+.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
-.global OPENSSL_wipe_cpu
+#if __ARM_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.align 5
+.globl _armv7_neon_probe
+.type _armv7_neon_probe,%function
+_armv7_neon_probe:
+ vorr q0,q0,q0
+ bx lr
+.size _armv7_neon_probe,.-_armv7_neon_probe
+
+.globl _armv8_aes_probe
+.type _armv8_aes_probe,%function
+_armv8_aes_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
+#else
+.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+#endif
+ bx lr
+.size _armv8_aes_probe,.-_armv8_aes_probe
+
+.globl _armv8_sha1_probe
+.type _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
+#else
+.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+#endif
+ bx lr
+.size _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.globl _armv8_sha256_probe
+.type _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
+#else
+.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+#endif
+ bx lr
+.size _armv8_sha256_probe,.-_armv8_sha256_probe
+.globl _armv8_pmull_probe
+.type _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
+#else
+.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+#endif
+ bx lr
+.size _armv8_pmull_probe,.-_armv8_pmull_probe
+#endif
+
+.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
+#if __ARM_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
+#ifdef __APPLE__
+ ldr r0,[r0]
+#endif
+#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
+#if __ARM_ARCH__>=7
tst r0,#1
beq .Lwipe_done
- .word 0xf3000150 @ veor q0, q0, q0
- .word 0xf3022152 @ veor q1, q1, q1
- .word 0xf3044154 @ veor q2, q2, q2
- .word 0xf3066156 @ veor q3, q3, q3
- .word 0xf34001f0 @ veor q8, q8, q8
- .word 0xf34221f2 @ veor q9, q9, q9
- .word 0xf34441f4 @ veor q10, q10, q10
- .word 0xf34661f6 @ veor q11, q11, q11
- .word 0xf34881f8 @ veor q12, q12, q12
- .word 0xf34aa1fa @ veor q13, q13, q13
- .word 0xf34cc1fc @ veor q14, q14, q14
- .word 0xf34ee1fe @ veor q15, q15, q15
+ veor q0, q0, q0
+ veor q1, q1, q1
+ veor q2, q2, q2
+ veor q3, q3, q3
+ veor q8, q8, q8
+ veor q9, q9, q9
+ veor q10, q10, q10
+ veor q11, q11, q11
+ veor q12, q12, q12
+ veor q13, q13, q13
+ veor q14, q14, q14
+ veor q15, q15, q15
.Lwipe_done:
+#endif
mov r0,sp
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
- .word 0xe12fff1e @ bx lr
+.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.align 5
+#if __ARM_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.LOPENSSL_armcap
+.word OPENSSL_armcap_P-.
+#endif
#if __ARM_ARCH__>=6
.align 5
#else