diff options
author | Joel Sing <jsing@cvs.openbsd.org> | 2023-03-07 09:42:10 +0000 |
---|---|---|
committer | Joel Sing <jsing@cvs.openbsd.org> | 2023-03-07 09:42:10 +0000 |
commit | ea9ac9be462b77e7f06448a9c1520fabc3af5d7e (patch) | |
tree | 55bb061e198f1f27da13dc894a65302276ddb35a /lib/libcrypto/bn | |
parent | 423fa3ead93633014105fce54b83577800d02f4e (diff) |
Improve bn_montgomery_multiply_words().
Rather than calling bn_mul_add_words() twice - once to multiply and once
to reduce - perform the multiplication and reduction in a single pass using
bn_mulw_addw_addw() directly. Also simplify the addition of the resulting
carries, which in turn allows us to avoid zeroing the top half of the
temporary words.
This provides a ~20-25% performance improvement for RSA operations on
aarch64.
ok tb@
Diffstat (limited to 'lib/libcrypto/bn')
-rw-r--r-- | lib/libcrypto/bn/bn_mont.c | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/lib/libcrypto/bn/bn_mont.c b/lib/libcrypto/bn/bn_mont.c index 314d6837825..ed49ec83eb5 100644 --- a/lib/libcrypto/bn/bn_mont.c +++ b/lib/libcrypto/bn/bn_mont.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_mont.c,v 1.51 2023/03/07 06:28:36 jsing Exp $ */ +/* $OpenBSD: bn_mont.c,v 1.52 2023/03/07 09:42:09 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -345,19 +345,22 @@ void bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) { - BN_ULONG carry, mask; - int i; + BN_ULONG carry1, carry2, mask, w, x; + int i, j; - for (i = 0; i < n_len * 2 + 2; i++) + for (i = 0; i <= n_len; i++) tp[i] = 0; for (i = 0; i < n_len; i++) { - carry = bn_mul_add_words(tp, ap, n_len, bp[i]); - bn_addw(tp[n_len], carry, &tp[n_len + 1], &tp[n_len]); - - carry = bn_mul_add_words(tp, np, n_len, tp[0] * n0); - bn_addw(tp[n_len], carry, &carry, &tp[n_len]); - bn_addw(tp[n_len + 1], carry, &carry, &tp[n_len + 1]); + /* Compute new t[0] * n0, as we need it inside the loop. */ + w = (ap[0] * bp[i] + tp[0]) * n0; + + carry1 = carry2 = 0; + for (j = 0; j < n_len; j++) { + bn_mulw_addw_addw(ap[j], bp[i], tp[j], carry1, &carry1, &x); + bn_mulw_addw_addw(np[j], w, x, carry2, &carry2, &tp[j]); + } + bn_addw_addw(carry1, carry2, tp[n_len], &tp[n_len + 1], &tp[n_len]); tp++; } |