diff options
author | Joel Sing <jsing@cvs.openbsd.org> | 2023-06-17 15:40:47 +0000 |
---|---|---|
committer | Joel Sing <jsing@cvs.openbsd.org> | 2023-06-17 15:40:47 +0000 |
commit | fea93fea949171985c3afccc0a385be0cbd5c253 (patch) | |
tree | 057f2d497e2ed9fba092dd42d1b0885fddd96ae5 | |
parent | 06c3588e11fcc37685ff3392d15fc1bdf99efeb0 (diff) |
Optimise bn_mul2_mulw_addtw() for aarch64.
This provides significant performance gains for bn_sqr_comba4() and
bn_sqr_comba8().
-rw-r--r-- | lib/libcrypto/bn/arch/aarch64/bn_arch.h | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/lib/libcrypto/bn/arch/aarch64/bn_arch.h b/lib/libcrypto/bn/arch/aarch64/bn_arch.h index f658510c739..aa780e09e91 100644 --- a/lib/libcrypto/bn/arch/aarch64/bn_arch.h +++ b/lib/libcrypto/bn/arch/aarch64/bn_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_arch.h,v 1.10 2023/06/12 16:42:11 jsing Exp $ */ +/* $OpenBSD: bn_arch.h,v 1.11 2023/06/17 15:40:46 jsing Exp $ */ /* * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> * @@ -177,6 +177,33 @@ bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, *out_r0 = r0; } +#define HAVE_BN_MUL2_MULW_ADDTW + +static inline void +bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, + BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) +{ + BN_ULONG r2, r1, r0, x1, x0; + + __asm__ ( + "umulh %[x1], %[a], %[b] \n" + "mul %[x0], %[a], %[b] \n" + "adds %[r0], %[c0], %[x0] \n" + "adcs %[r1], %[c1], %[x1] \n" + "adc %[r2], xzr, %[c2] \n" + "adds %[r0], %[r0], %[x0] \n" + "adcs %[r1], %[r1], %[x1] \n" + "adc %[r2], xzr, %[r2] \n" + : [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1), + [x0]"=&r"(x0) + : [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0) + : "cc"); + + *out_r2 = r2; + *out_r1 = r1; + *out_r0 = r0; +} + #define HAVE_BN_QWMULW_ADDW static inline void |