summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Sing <jsing@cvs.openbsd.org>2023-06-17 15:40:47 +0000
committerJoel Sing <jsing@cvs.openbsd.org>2023-06-17 15:40:47 +0000
commitfea93fea949171985c3afccc0a385be0cbd5c253 (patch)
tree057f2d497e2ed9fba092dd42d1b0885fddd96ae5
parent06c3588e11fcc37685ff3392d15fc1bdf99efeb0 (diff)
Optimise bn_mul2_mulw_addtw() for aarch64.
This provides significant performance gains for bn_sqr_comba4() and bn_sqr_comba8().
-rw-r--r--lib/libcrypto/bn/arch/aarch64/bn_arch.h29
1 files changed, 28 insertions, 1 deletions
diff --git a/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/lib/libcrypto/bn/arch/aarch64/bn_arch.h
index f658510c739..aa780e09e91 100644
--- a/lib/libcrypto/bn/arch/aarch64/bn_arch.h
+++ b/lib/libcrypto/bn/arch/aarch64/bn_arch.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_arch.h,v 1.10 2023/06/12 16:42:11 jsing Exp $ */
+/* $OpenBSD: bn_arch.h,v 1.11 2023/06/17 15:40:46 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
@@ -177,6 +177,33 @@ bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
*out_r0 = r0;
}
+#define HAVE_BN_MUL2_MULW_ADDTW
+
+static inline void
+bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
+ BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
+{
+ BN_ULONG r2, r1, r0, x1, x0;
+
+ __asm__ (
+ "umulh %[x1], %[a], %[b] \n"
+ "mul %[x0], %[a], %[b] \n"
+ "adds %[r0], %[c0], %[x0] \n"
+ "adcs %[r1], %[c1], %[x1] \n"
+ "adc %[r2], xzr, %[c2] \n"
+ "adds %[r0], %[r0], %[x0] \n"
+ "adcs %[r1], %[r1], %[x1] \n"
+ "adc %[r2], xzr, %[r2] \n"
+ : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
+ [x0]"=&r"(x0)
+ : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
+ : "cc");
+
+ *out_r2 = r2;
+ *out_r1 = r1;
+ *out_r0 = r0;
+}
+
#define HAVE_BN_QWMULW_ADDW
static inline void