diff options
Diffstat (limited to 'lib/libcrypto/bn')
-rw-r--r-- | lib/libcrypto/bn/Makefile.ssl | 135 | ||||
-rw-r--r-- | lib/libcrypto/bn/asm/README | 12 | ||||
-rw-r--r-- | lib/libcrypto/bn/asm/pa-risc2.s | 2024 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn.h | 12 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_asm.c | 11 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_blind.c | 4 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_ctx.c | 4 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_div.c | 2 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_err.c | 2 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_exp.c | 521 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_exp2.c | 357 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_lcl.h | 100 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_lib.c | 42 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_mont.c | 31 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_mul.c | 2 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_print.c | 12 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_rand.c | 10 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_recp.c | 4 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_shift.c | 2 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_sqr.c | 2 | ||||
-rw-r--r-- | lib/libcrypto/bn/bn_word.c | 17 | ||||
-rw-r--r-- | lib/libcrypto/bn/vms-helper.c | 2 |
22 files changed, 2351 insertions, 957 deletions
diff --git a/lib/libcrypto/bn/Makefile.ssl b/lib/libcrypto/bn/Makefile.ssl index beb9c1b5231..17b72d577f3 100644 --- a/lib/libcrypto/bn/Makefile.ssl +++ b/lib/libcrypto/bn/Makefile.ssl @@ -170,118 +170,143 @@ clean: bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_add.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_add.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_add.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_add.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_asm.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_asm.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_asm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_asm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_blind.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h +bn_blind.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_blind.o: ../../include/openssl/opensslconf.h bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_blind.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +bn_blind.o: ../cryptlib.h bn_lcl.h bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_ctx.o: ../../include/openssl/stack.h ../cryptlib.h +bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_ctx.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_div.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_div.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h -bn_err.o: ../../include/openssl/bn.h ../../include/openssl/err.h -bn_err.o: ../../include/openssl/opensslconf.h +bn_div.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_div.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h +bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h +bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/err.h +bn_err.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +bn_err.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h +bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_exp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_exp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_exp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_exp2.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_exp2.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_gcd.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_gcd.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_lib.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_lib.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_lib.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_mont.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mont.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_mont.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_mont.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mpi.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_mpi.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_mul.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_mul.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_mul.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_mul.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_prime.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h +bn_prime.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_prime.o: ../../include/openssl/opensslconf.h bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.h +bn_prime.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_prime.h bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_print.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h +bn_print.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_print.o: ../../include/openssl/opensslconf.h bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_print.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +bn_print.o: ../cryptlib.h bn_lcl.h bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_rand.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h -bn_rand.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_rand.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_rand.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h +bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h bn_rand.o: ../cryptlib.h bn_lcl.h bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_recp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_recp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_recp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_recp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_shift.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h +bn_shift.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_shift.o: ../../include/openssl/opensslconf.h bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_shift.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +bn_shift.o: ../cryptlib.h bn_lcl.h bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_sqr.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_sqr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h -bn_word.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h -bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h -bn_word.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h +bn_word.o: ../../include/openssl/err.h ../../include/openssl/lhash.h +bn_word.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h diff --git a/lib/libcrypto/bn/asm/README b/lib/libcrypto/bn/asm/README index 86bf64cfc2f..a0fe58a6771 100644 --- a/lib/libcrypto/bn/asm/README +++ b/lib/libcrypto/bn/asm/README @@ -15,9 +15,9 @@ On the 2 alpha C compilers I had access to, it was not possible to do were 64 bits). So the hand assember gives access to the 128 bit result and a 2 times speedup :-). -There are 2 versions of assember for the HP PA-RISC. -pa-risc.s is the origional one which works fine. -pa-risc2.s is a new version that often generates warnings but if the -tests pass, it gives performance that is over 2 times faster than -pa-risc.s. -Both were generated using gcc :-) +There are 3 versions of assember for the HP PA-RISC. + +pa-risc.s is the origional one which works fine and generated using gcc :-) + +pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations +by Chris Ruemmler from HP (with some help from the HP C compiler). diff --git a/lib/libcrypto/bn/asm/pa-risc2.s b/lib/libcrypto/bn/asm/pa-risc2.s index c2725996a45..7239aa2c762 100644 --- a/lib/libcrypto/bn/asm/pa-risc2.s +++ b/lib/libcrypto/bn/asm/pa-risc2.s @@ -1,416 +1,1618 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +; +; PA-RISC 2.0 implementation of bn_asm code, based on the +; 64-bit version of the code. This code is effectively the +; same as the 64-bit version except the register model is +; slightly different given all values must be 32-bit between +; function calls. Thus the 64-bit return values are returned +; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit +; +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0N + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 32-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28,r29 (ret0,ret1) +; Stack pointer ; r30 (sp) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +n .reg %r23 + +; +; Note that the "w" argument for bn_mul_add_words and bn_mul_words +; is passed on the stack at a delta of -56 from the top of stack +; as the routine is entered. +; + +; +; Globals used in some routines +; + +top_overflow .reg %r23 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg3 = num +; -56(sp) = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 + bn_mul_add_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r4,64(0,%r30) - copy %r24,%r31 - stw %r3,-60(0,%r30) - ldi 0,%r20 - ldo 12(%r26),%r2 - stw %r23,-16(0,%r30) - copy %r25,%r3 - ldo 12(%r3),%r1 - fldws -16(0,%r30),%fr8L -L$0010 - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r3),%fr9L - ldw 0(0,%r26),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r26) - copy %r20,%r25 - ldi 0,%r24 - fldws -8(0,%r1),%fr9L - ldw -8(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-8(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws -4(0,%r1),%fr9L - ldw -4(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-4(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r1),%fr9L - ldw 0(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r2) - ldo 16(%r1),%r1 - ldo 16(%r3),%r3 - ldo 16(%r2),%r2 - bl L$0010,0 - ldo 16(%r26),%r26 -L$0011 - copy %r20,%r28 - ldw -84(0,%r30),%r2 - ldw -60(0,%r30),%r3 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + NOP + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + + STD %r8,40(%sp) ; save r8 + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret1 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret1,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg3 = num +; w on stack at -56(sp) + bn_mul_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 - .ENTRY - stw %r2,-20(0,%r30) - copy %r25,%r2 - stwm %r4,64(0,%r30) - copy %r24,%r19 - ldi 0,%r28 - stw %r23,-16(0,%r30) - ldo 12(%r26),%r31 - ldo 12(%r2),%r29 - fldws -16(0,%r30),%fr8L -L$0026 - fldws 0(0,%r2),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r26) - fldws -8(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-8(0,%r31) - fldws -4(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-4(0,%r31) - fldws 0(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r31) - ldo 16(%r29),%r29 - ldo 16(%r2),%r2 - ldo 16(%r31),%r31 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -84(0,%r30),%r2 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret1 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret1,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret1 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + bn_sqr_words + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just output from the HP C compiler. +; +;------------------------------------------------------------------------------ +bn_div_words .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r19 - ldo 12(%r25),%r28 -L$0042 - fldws 0(0,%r25),%fr8L - fldws 0(0,%r25),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,0(0,%r26) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-24(0,%r19) - fldws -8(0,%r28),%fr8L - fldws -8(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-20(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-16(0,%r19) - fldws -4(0,%r28),%fr8L - fldws -4(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-12(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-8(0,%r19) - fldws 0(0,%r28),%fr8L - fldws 0(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-4(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,0(0,%r19) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r19),%r19 - bl L$0042,0 - ldo 32(%r26),%r26 -L$0049 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ - - .align 4 -L$C0000 - .STRING "Division would overflow (%d)\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN + .IMPORT BN_num_bits_word,CODE + .IMPORT __iob,DATA + .IMPORT fprintf,CODE + .IMPORT abort,CODE + .IMPORT $$div2U,MILLICODE + .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .ENTRY + STW %r2,-20(%r30) ;offset 0x8ec + STW,MA %r3,192(%r30) ;offset 0x8f0 + STW %r4,-188(%r30) ;offset 0x8f4 + DEPD %r5,31,32,%r6 ;offset 0x8f8 + STD %r6,-184(%r30) ;offset 0x8fc + DEPD %r7,31,32,%r8 ;offset 0x900 + STD %r8,-176(%r30) ;offset 0x904 + STW %r9,-168(%r30) ;offset 0x908 + LDD -248(%r30),%r3 ;offset 0x90c + COPY %r26,%r4 ;offset 0x910 + COPY %r24,%r5 ;offset 0x914 + DEPD %r25,31,32,%r4 ;offset 0x918 + CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c + DEPD %r23,31,32,%r5 ;offset 0x920 + MOVIB,TR -1,%r29,$00060002 ;offset 0x924 + EXTRD,U %r29,31,32,%r28 ;offset 0x928 +$0006002A + LDO -1(%r29),%r29 ;offset 0x92c + SUB %r23,%r7,%r23 ;offset 0x930 +$00060024 + SUB %r4,%r31,%r25 ;offset 0x934 + AND %r25,%r19,%r26 ;offset 0x938 + CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c + DEPD,Z %r25,31,32,%r20 ;offset 0x940 + OR %r20,%r24,%r21 ;offset 0x944 + CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 + SUB %r31,%r2,%r31 ;offset 0x94c +$00060046 +$0006002E + DEPD,Z %r23,31,32,%r25 ;offset 0x950 + EXTRD,U %r23,31,32,%r26 ;offset 0x954 + AND %r25,%r19,%r24 ;offset 0x958 + ADD,L %r31,%r26,%r31 ;offset 0x95c + CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 + LDO 1(%r31),%r31 ;offset 0x964 +$00060032 + CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 + LDO -1(%r29),%r29 ;offset 0x96c + ADD,L %r4,%r3,%r4 ;offset 0x970 +$00060036 + ADDIB,=,N -1,%r8,$D0 ;offset 0x974 + SUB %r5,%r24,%r28 ;offset 0x978 +$0006003A + SUB %r4,%r31,%r24 ;offset 0x97c + SHRPD %r24,%r28,32,%r4 ;offset 0x980 + DEPD,Z %r29,31,32,%r9 ;offset 0x984 + DEPD,Z %r28,31,32,%r5 ;offset 0x988 +$0006001C + EXTRD,U %r4,31,32,%r31 ;offset 0x98c + CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 + MOVB,TR %r6,%r29,$D1 ;offset 0x994 + STD %r29,-152(%r30) ;offset 0x998 +$0006000C + EXTRD,U %r3,31,32,%r25 ;offset 0x99c + COPY %r3,%r26 ;offset 0x9a0 + EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 + EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 + .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; + B,L BN_num_bits_word,%r2 ;offset 0x9ac + EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 + LDI 64,%r20 ;offset 0x9b4 + DEPD %r7,31,32,%r5 ;offset 0x9b8 + DEPD %r8,31,32,%r4 ;offset 0x9bc + DEPD %r9,31,32,%r3 ;offset 0x9c0 + CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 + COPY %r28,%r24 ;offset 0x9c8 + MTSARCM %r24 ;offset 0x9cc + DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 + CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 +$00060012 + SUBI 64,%r24,%r31 ;offset 0x9d8 + CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc + SUB %r4,%r3,%r4 ;offset 0x9e0 +$00060016 + CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 + COPY %r0,%r9 ;offset 0x9e8 + MTSARCM %r31 ;offset 0x9ec + DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 + SUBI 64,%r31,%r26 ;offset 0x9f4 + MTSAR %r26 ;offset 0x9f8 + SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc + MTSARCM %r31 ;offset 0xa00 + DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 +$0006001A + DEPDI,Z -1,31,32,%r19 ;offset 0xa08 + AND %r3,%r19,%r29 ;offset 0xa0c + EXTRD,U %r29,31,32,%r2 ;offset 0xa10 + DEPDI,Z -1,63,32,%r6 ;offset 0xa14 + MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 + EXTRD,U %r3,63,32,%r7 ;offset 0xa1c +$D2 + ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 + LDIL LR'C$7,%r21 ;offset 0xa24 + LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 + .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; + B,L fprintf,%r2 ;offset 0xa2c + LDO RR'C$7(%r21),%r25 ;offset 0xa30 + .CALL ; + B,L abort,%r2 ;offset 0xa34 + NOP ;offset 0xa38 + B $D3 ;offset 0xa3c + LDW -212(%r30),%r2 ;offset 0xa40 +$00060020 + COPY %r4,%r26 ;offset 0xa44 + EXTRD,U %r4,31,32,%r25 ;offset 0xa48 + COPY %r2,%r24 ;offset 0xa4c + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r31 ;offset 0xa50 + EXTRD,U %r2,31,32,%r23 ;offset 0xa54 + DEPD %r28,31,32,%r29 ;offset 0xa58 +$00060022 + STD %r29,-152(%r30) ;offset 0xa5c +$D1 + AND %r5,%r19,%r24 ;offset 0xa60 + EXTRD,U %r24,31,32,%r24 ;offset 0xa64 + STW %r2,-160(%r30) ;offset 0xa68 + STW %r7,-128(%r30) ;offset 0xa6c + FLDD -152(%r30),%fr4 ;offset 0xa70 + FLDD -152(%r30),%fr7 ;offset 0xa74 + FLDW -160(%r30),%fr8L ;offset 0xa78 + FLDW -128(%r30),%fr5L ;offset 0xa7c + XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 + FSTD %fr10,-136(%r30) ;offset 0xa84 + XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 + FSTD %fr22,-144(%r30) ;offset 0xa8c + XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 + XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 + FSTD %fr11,-112(%r30) ;offset 0xa98 + FSTD %fr23,-120(%r30) ;offset 0xa9c + LDD -136(%r30),%r28 ;offset 0xaa0 + DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 + LDD -144(%r30),%r20 ;offset 0xaa8 + ADD,L %r20,%r31,%r31 ;offset 0xaac + LDD -112(%r30),%r22 ;offset 0xab0 + DEPD,Z %r22,31,32,%r22 ;offset 0xab4 + LDD -120(%r30),%r21 ;offset 0xab8 + B $00060024 ;offset 0xabc + ADD,L %r21,%r22,%r23 ;offset 0xac0 +$D0 + OR %r9,%r29,%r29 ;offset 0xac4 +$00060040 + EXTRD,U %r29,31,32,%r28 ;offset 0xac8 +$00060002 +$L2 + LDW -212(%r30),%r2 ;offset 0xacc +$D3 + LDW -168(%r30),%r9 ;offset 0xad0 + LDD -176(%r30),%r8 ;offset 0xad4 + EXTRD,U %r8,31,32,%r7 ;offset 0xad8 + LDD -184(%r30),%r6 ;offset 0xadc + EXTRD,U %r6,31,32,%r5 ;offset 0xae0 + LDW -188(%r30),%r4 ;offset 0xae4 + BVE (%r2) ;offset 0xae8 + .EXIT + LDW,MB -192(%r30),%r3 ;offset 0xaec + .PROCEND ;in=23,25;out=28,29;fpin=105,107; + + + + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0051 - ldi 2,%r6 - bl L$0068,0 - ldi -1,%r28 -L$0051 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - copy %r28,%r24 - ldi 32,%r19 - comb,= %r19,%r24,L$0052 - subi 31,%r24,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0052 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0052 - comb,>> %r5,%r3,L$0053 - subi 32,%r24,%r24 - sub %r3,%r5,%r3 -L$0053 - comib,= 0,%r24,L$0054 - subi 31,%r24,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r24,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0054 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0055 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0058 - copy %r3,%r26 - bl L$0059,0 - zdepi -1,31,16,%r29 -L$0058 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0059 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0060 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0061 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0061 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0060,0 - ldo -1(%r29),%r29 -L$0061 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0066 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0066 - addib,= -1,%r6,L$0056 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0055,0 - zdep %r4,15,16,%r4 -L$0056 - or %r7,%r29,%r28 -L$0068 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SPACE $PRIVATE$,SORT=16 + .IMPORT $global$,DATA + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 +C$7 + .ALIGN 8 + .STRINGZ "Division would overflow (%d)\n" + .END diff --git a/lib/libcrypto/bn/bn.h b/lib/libcrypto/bn/bn.h index 009b0eb6856..1eb8395b25c 100644 --- a/lib/libcrypto/bn/bn.h +++ b/lib/libcrypto/bn/bn.h @@ -59,7 +59,7 @@ #ifndef HEADER_BN_H #define HEADER_BN_H -#ifndef WIN16 +#ifndef NO_FP_API #include <stdio.h> /* FILE */ #endif #include <openssl/opensslconf.h> @@ -233,7 +233,7 @@ typedef struct bignum_st BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ int top; /* Index of last used d +1. */ /* The next are internal book keeping for bn_expand. */ - int max; /* Size of the d array. */ + int dmax; /* Size of the d array. */ int neg; /* one if the number is negative */ int flags; } BIGNUM; @@ -364,6 +364,8 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,BN_CTX *ctx); int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); +int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, @@ -433,9 +435,9 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, /* library internal functions */ -#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->max)?\ +#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ (a):bn_expand2((a),(bits)/BN_BITS2+1)) -#define bn_wexpand(a,words) (((words) <= (a)->max)?(a):bn_expand2((a),(words))) +#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) BIGNUM *bn_expand2(BIGNUM *a, int words); #define bn_fix_top(a) \ @@ -483,7 +485,9 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); #define BN_F_BN_CTX_NEW 106 #define BN_F_BN_DIV 107 #define BN_F_BN_EXPAND2 108 +#define BN_F_BN_MOD_EXP2_MONT 118 #define BN_F_BN_MOD_EXP_MONT 109 +#define BN_F_BN_MOD_EXP_MONT_WORD 117 #define BN_F_BN_MOD_INVERSE 110 #define BN_F_BN_MOD_MUL_RECIPROCAL 111 #define BN_F_BN_MPI2BN 112 diff --git a/lib/libcrypto/bn/bn_asm.c b/lib/libcrypto/bn/bn_asm.c index 3329cc18e6c..44e52a40db8 100644 --- a/lib/libcrypto/bn/bn_asm.c +++ b/lib/libcrypto/bn/bn_asm.c @@ -227,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) #else -/* Divide h-l by d and return the result. */ +/* Divide h,l by d and return the result. */ /* I need to test this some more :-( */ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { @@ -237,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) if (d == 0) return(BN_MASK2); i=BN_num_bits_word(d); - if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) - { -#if !defined(NO_STDIO) && !defined(WIN16) - fprintf(stderr,"Division would overflow (%d)\n",i); -#endif - abort(); - } + assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); + i=BN_BITS2-i; if (h >= d) h-=d; diff --git a/lib/libcrypto/bn/bn_blind.c b/lib/libcrypto/bn/bn_blind.c index 1b1bb060463..2d287e6d1bb 100644 --- a/lib/libcrypto/bn/bn_blind.c +++ b/lib/libcrypto/bn/bn_blind.c @@ -67,7 +67,7 @@ BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod) bn_check_top(Ai); bn_check_top(mod); - if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL) + if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL) { BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE); return(NULL); @@ -91,7 +91,7 @@ void BN_BLINDING_free(BN_BLINDING *r) if (r->A != NULL) BN_free(r->A ); if (r->Ai != NULL) BN_free(r->Ai); - Free(r); + OPENSSL_free(r); } int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) diff --git a/lib/libcrypto/bn/bn_ctx.c b/lib/libcrypto/bn/bn_ctx.c index 46132fd1806..b1a8d7571e8 100644 --- a/lib/libcrypto/bn/bn_ctx.c +++ b/lib/libcrypto/bn/bn_ctx.c @@ -69,7 +69,7 @@ BN_CTX *BN_CTX_new(void) { BN_CTX *ret; - ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); + ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX)); if (ret == NULL) { BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); @@ -102,7 +102,7 @@ void BN_CTX_free(BN_CTX *ctx) for (i=0; i < BN_CTX_NUM; i++) BN_clear_free(&(ctx->bn[i])); if (ctx->flags & BN_FLG_MALLOCED) - Free(ctx); + OPENSSL_free(ctx); } void BN_CTX_start(BN_CTX *ctx) diff --git a/lib/libcrypto/bn/bn_div.c b/lib/libcrypto/bn/bn_div.c index 07af1d3b449..c3772c243be 100644 --- a/lib/libcrypto/bn/bn_div.c +++ b/lib/libcrypto/bn/bn_div.c @@ -205,7 +205,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_init(&wnum); wnum.d= &(snum->d[loop]); wnum.top= div_n; - wnum.max= snum->max+1; /* a bit of a lie */ + wnum.dmax= snum->dmax+1; /* a bit of a lie */ /* Get the top 2 words of sdiv */ /* i=sdiv->top; */ diff --git a/lib/libcrypto/bn/bn_err.c b/lib/libcrypto/bn/bn_err.c index 988270bcf4f..86550c4c21d 100644 --- a/lib/libcrypto/bn/bn_err.c +++ b/lib/libcrypto/bn/bn_err.c @@ -76,7 +76,9 @@ static ERR_STRING_DATA BN_str_functs[]= {ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, {ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, {ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, +{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"}, {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, +{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"}, {ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, {ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, {ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, diff --git a/lib/libcrypto/bn/bn_exp.c b/lib/libcrypto/bn/bn_exp.c index 0c116016754..d2c91628acb 100644 --- a/lib/libcrypto/bn/bn_exp.c +++ b/lib/libcrypto/bn/bn_exp.c @@ -55,18 +55,66 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + #include <stdio.h> #include "cryptlib.h" #include "bn_lcl.h" -#ifdef ATALLA -# include <alloca.h> -# include <atasi.h> -# include <assert.h> -# include <dlfcn.h> -#endif -#define TABLE_SIZE 16 +#define TABLE_SIZE 32 /* slow but works */ int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) @@ -91,42 +139,6 @@ err: return(r); } -#if 0 -/* this one works - simple but works */ -int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, BN_CTX *ctx) - { - int i,bits,ret=0; - BIGNUM *v,*tmp; - - BN_CTX_start(ctx); - v = BN_CTX_get(ctx); - tmp = BN_CTX_get(ctx); - if (v == NULL || tmp == NULL) goto err; - - if (BN_copy(v,a) == NULL) goto err; - bits=BN_num_bits(p); - - if (BN_is_odd(p)) - { if (BN_copy(r,a) == NULL) goto err; } - else { if (!BN_one(r)) goto err; } - - for (i=1; i<bits; i++) - { - if (!BN_sqr(tmp,v,ctx)) goto err; - if (!BN_mod(v,tmp,m,ctx)) goto err; - if (BN_is_bit_set(p,i)) - { - if (!BN_mul(tmp,r,v,ctx)) goto err; - if (!BN_mod(r,tmp,m,ctx)) goto err; - } - } - ret=1; -err: - BN_CTX_end(ctx); - return(ret); - } - -#endif /* this one works - simple but works */ int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx) @@ -163,172 +175,6 @@ err: return(ret); } -#ifdef ATALLA - -/* - * This routine will dynamically check for the existance of an Atalla AXL-200 - * SSL accelerator module. If one is found, the variable - * asi_accelerator_present is set to 1 and the function pointers - * ptr_ASI_xxxxxx above will be initialized to corresponding ASI API calls. - */ -typedef int tfnASI_GetPerformanceStatistics(int reset_flag, - unsigned int *ret_buf); -typedef int tfnASI_GetHardwareConfig(long card_num, unsigned int *ret_buf); -typedef int tfnASI_RSAPrivateKeyOpFn(RSAPrivateKey * rsaKey, - unsigned char *output, - unsigned char *input, - unsigned int modulus_len); - -static tfnASI_GetHardwareConfig *ptr_ASI_GetHardwareConfig; -static tfnASI_RSAPrivateKeyOpFn *ptr_ASI_RSAPrivateKeyOpFn; -static tfnASI_GetPerformanceStatistics *ptr_ASI_GetPerformanceStatistics; -static int asi_accelerator_present; -static int tried_atalla; - -void atalla_initialize_accelerator_handle(void) - { - void *dl_handle; - int status; - unsigned int config_buf[1024]; - static int tested; - - if(tested) - return; - - tested=1; - - bzero((void *)config_buf, 1024); - - /* - * Check to see if the library is present on the system - */ - dl_handle = dlopen("atasi.so", RTLD_NOW); - if (dl_handle == (void *) NULL) - { -/* printf("atasi.so library is not present on the system\n"); - printf("No HW acceleration available\n");*/ - return; - } - - /* - * The library is present. Now we'll check to insure that the - * LDM is up and running. First we'll get the address of the - * function in the atasi library that we need to see if the - * LDM is operating. - */ - - ptr_ASI_GetHardwareConfig = - (tfnASI_GetHardwareConfig *)dlsym(dl_handle,"ASI_GetHardwareConfig"); - - if (ptr_ASI_GetHardwareConfig) - { - /* - * We found the call, now we'll get our config - * status. If we get a non 0 result, the LDM is not - * running and we cannot use the Atalla ASI * - * library. - */ - status = (*ptr_ASI_GetHardwareConfig)(0L, config_buf); - if (status != 0) - { - printf("atasi.so library is present but not initialized\n"); - printf("No HW acceleration available\n"); - return; - } - } - else - { -/* printf("We found the library, but not the function. Very Strange!\n");*/ - return ; - } - - /* - * It looks like we have acceleration capabilities. Load up the - * pointers to our ASI API calls. - */ - ptr_ASI_RSAPrivateKeyOpFn= - (tfnASI_RSAPrivateKeyOpFn *)dlsym(dl_handle, "ASI_RSAPrivateKeyOpFn"); - if (ptr_ASI_RSAPrivateKeyOpFn == NULL) - { -/* printf("We found the library, but no RSA function. Very Strange!\n");*/ - return; - } - - ptr_ASI_GetPerformanceStatistics = - (tfnASI_GetPerformanceStatistics *)dlsym(dl_handle, "ASI_GetPerformanceStatistics"); - if (ptr_ASI_GetPerformanceStatistics == NULL) - { -/* printf("We found the library, but no stat function. Very Strange!\n");*/ - return; - } - - /* - * Indicate that acceleration is available - */ - asi_accelerator_present = 1; - -/* printf("This system has acceleration!\n");*/ - - return; - } - -/* make sure this only gets called once when bn_mod_exp calls bn_mod_exp_mont */ -int BN_mod_exp_atalla(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m) - { - unsigned char *abin; - unsigned char *pbin; - unsigned char *mbin; - unsigned char *rbin; - int an,pn,mn,ret; - RSAPrivateKey keydata; - - atalla_initialize_accelerator_handle(); - if(!asi_accelerator_present) - return 0; - - -/* We should be able to run without size testing */ -# define ASIZE 128 - an=BN_num_bytes(a); - pn=BN_num_bytes(p); - mn=BN_num_bytes(m); - - if(an <= ASIZE && pn <= ASIZE && mn <= ASIZE) - { - int size=mn; - - assert(an <= mn); - abin=alloca(size); - memset(abin,'\0',mn); - BN_bn2bin(a,abin+size-an); - - pbin=alloca(pn); - BN_bn2bin(p,pbin); - - mbin=alloca(size); - memset(mbin,'\0',mn); - BN_bn2bin(m,mbin+size-mn); - - rbin=alloca(size); - - memset(&keydata,'\0',sizeof keydata); - keydata.privateExponent.data=pbin; - keydata.privateExponent.len=pn; - keydata.modulus.data=mbin; - keydata.modulus.len=size; - - ret=(*ptr_ASI_RSAPrivateKeyOpFn)(&keydata,rbin,abin,keydata.modulus.len); -/*fprintf(stderr,"!%s\n",BN_bn2hex(a));*/ - if(!ret) - { - BN_bin2bn(rbin,keydata.modulus.len,r); -/*fprintf(stderr,"?%s\n",BN_bn2hex(r));*/ - return 1; - } - } - return 0; - } -#endif /* def ATALLA */ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx) @@ -339,13 +185,6 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, bn_check_top(p); bn_check_top(m); -#ifdef ATALLA - if(BN_mod_exp_atalla(r,a,p,m)) - return 1; -/* If it fails, try the other methods (but don't try atalla again) */ - tried_atalla=1; -#endif - #ifdef MONT_MUL_MOD /* I have finally been able to take out this pre-condition of * the top bit being set. It was caused by an error in BN_div @@ -354,7 +193,15 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ if (BN_is_odd(m)) - { ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); } + { + if (a->top == 1) + { + BN_ULONG A = a->d[0]; + ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); + } + else + ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); + } else #endif #ifdef RECP_MUL_MOD @@ -363,14 +210,10 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, { ret=BN_mod_exp_simple(r,a,p,m,ctx); } #endif -#ifdef ATALLA - tried_atalla=0; -#endif - return(ret); } -/* #ifdef RECP_MUL_MOD */ + int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx) { @@ -398,27 +241,22 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, ts=1; if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ - if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) - goto err; /* 2 */ - - if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits >= 256) - window=5; /* max size of window */ - else if (bits >= 128) - window=4; - else - window=3; - j=1<<(window-1); - for (i=1; i<j; i++) + window = BN_window_bits_for_exponent_size(bits); + if (window > 1) { - BN_init(&val[i]); - if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) - goto err; + if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) + goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i<j; i++) + { + BN_init(&val[i]); + if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) + goto err; + } + ts=i; } - ts=i; - + start=1; /* This is used to avoid multiplication etc * when there is only the value '1' in the * buffer. */ @@ -485,9 +323,8 @@ err: BN_RECP_CTX_free(&recp); return(ret); } -/* #endif */ -/* #ifdef MONT_MUL_MOD */ + int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { @@ -502,12 +339,6 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, bn_check_top(p); bn_check_top(m); -#ifdef ATALLA - if(!tried_atalla && BN_mod_exp_atalla(rr,a,p,m)) - return 1; -/* If it fails, try the other methods */ -#endif - if (!(m->d[0] & 1)) { BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); @@ -527,11 +358,9 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, /* If this is not done, things will break in the montgomery * part */ -#if 1 if (in_mont != NULL) mont=in_mont; else -#endif { if ((mont=BN_MONT_CTX_new()) == NULL) goto err; if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; @@ -541,31 +370,27 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, ts=1; if (BN_ucmp(a,m) >= 0) { - BN_mod(&(val[0]),a,m,ctx); + if (!BN_mod(&(val[0]),a,m,ctx)) + goto err; aa= &(val[0]); } else aa=a; if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ - if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ - - if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits >= 256) - window=5; /* max size of window */ - else if (bits >= 128) - window=4; - else - window=3; - j=1<<(window-1); - for (i=1; i<j; i++) + window = BN_window_bits_for_exponent_size(bits); + if (window > 1) { - BN_init(&(val[i])); - if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) - goto err; + if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i<j; i++) + { + BN_init(&(val[i])); + if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) + goto err; + } + ts=i; } - ts=i; start=1; /* This is used to avoid multiplication etc * when there is only the value '1' in the @@ -574,7 +399,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, wstart=bits-1; /* The top bit of the window */ wend=0; /* The bottom bit of the window */ - if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; for (;;) { if (BN_is_bit_set(p,wstart) == 0) @@ -626,7 +451,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, start=0; if (wstart < 0) break; } - BN_from_montgomery(rr,r,mont,ctx); + if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; ret=1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); @@ -635,7 +460,134 @@ err: BN_clear_free(&(val[i])); return(ret); } -/* #endif */ + +int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) + { + BN_MONT_CTX *mont = NULL; + int b, bits, ret=0; + int r_is_one; + BN_ULONG w, next_w; + BIGNUM *d, *r, *t; + BIGNUM *swap_tmp; +#define BN_MOD_MUL_WORD(r, w, m) \ + (BN_mul_word(r, (w)) && \ + (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ + (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) + /* BN_MOD_MUL_WORD is only used with 'w' large, + * so the BN_ucmp test is probably more overhead + * than always using BN_mod (which uses BN_copy if + * a similar test returns true). */ +#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ + (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) + + bn_check_top(p); + bn_check_top(m); + + if (!(m->d[0] & 1)) + { + BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); + return(0); + } + bits = BN_num_bits(p); + if (bits == 0) + { + BN_one(rr); + return(1); + } + BN_CTX_start(ctx); + d = BN_CTX_get(ctx); + r = BN_CTX_get(ctx); + t = BN_CTX_get(ctx); + if (d == NULL || r == NULL || t == NULL) goto err; + + if (in_mont != NULL) + mont=in_mont; + else + { + if ((mont = BN_MONT_CTX_new()) == NULL) goto err; + if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; + } + + r_is_one = 1; /* except for Montgomery factor */ + + /* bits-1 >= 0 */ + + /* The result is accumulated in the product r*w. */ + w = a; /* bit 'bits-1' of 'p' is always set */ + for (b = bits-2; b >= 0; b--) + { + /* First, square r*w. */ + next_w = w*w; + if ((next_w/w) != w) /* overflow */ + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + next_w = 1; + } + w = next_w; + if (!r_is_one) + { + if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; + } + + /* Second, multiply r*w by 'a' if exponent bit is set. */ + if (BN_is_bit_set(p, b)) + { + next_w = w*a; + if ((next_w/a) != w) /* overflow */ + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + next_w = a; + } + w = next_w; + } + } + + /* Finally, set r:=r*w. */ + if (w != 1) + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + } + + if (r_is_one) /* can happen only if a == 1*/ + { + if (!BN_one(rr)) goto err; + } + else + { + if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; + } + ret = 1; +err: + if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); + BN_CTX_end(ctx); + return(ret); + } + /* The old fallback, simple version :-) */ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, @@ -660,26 +612,21 @@ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, BN_init(&(val[0])); ts=1; if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ - if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) - goto err; /* 2 */ - - if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits >= 256) - window=5; /* max size of window */ - else if (bits >= 128) - window=4; - else - window=3; - j=1<<(window-1); - for (i=1; i<j; i++) + window = BN_window_bits_for_exponent_size(bits); + if (window > 1) { - BN_init(&(val[i])); - if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) - goto err; + if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) + goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i<j; i++) + { + BN_init(&(val[i])); + if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) + goto err; + } + ts=i; } - ts=i; start=1; /* This is used to avoid multiplication etc * when there is only the value '1' in the diff --git a/lib/libcrypto/bn/bn_exp2.c b/lib/libcrypto/bn/bn_exp2.c index 4f4e9e32998..29029f4c724 100644 --- a/lib/libcrypto/bn/bn_exp2.c +++ b/lib/libcrypto/bn/bn_exp2.c @@ -1,27 +1,128 @@ +/* crypto/bn/bn_exp2.c */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + #include <stdio.h> #include "cryptlib.h" #include "bn_lcl.h" -/* I've done some timing with different table sizes. - * The main hassle is that even with bits set at 3, this requires - * 63 BIGNUMs to store the pre-calculated values. - * 512 1024 - * bits=1 75.4% 79.4% - * bits=2 61.2% 62.4% - * bits=3 61.3% 59.3% - * The lack of speed improvement is also a function of the pre-calculation - * which could be removed. - */ -#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */ -#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */ +#define TABLE_SIZE 32 int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue; - int start=1,ts=0,x,y; - BIGNUM *d,*aa1,*aa2,*r; - BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE]; + int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2; + int r_is_one=1,ts1=0,ts2=0; + BIGNUM *d,*r; + BIGNUM *a_mod_m; + BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE]; BN_MONT_CTX *mont=NULL; bn_check_top(a1); @@ -32,7 +133,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, if (!(m->d[0] & 1)) { - BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); + BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); return(0); } bits1=BN_num_bits(p1); @@ -42,17 +143,13 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, BN_one(rr); return(1); } + bits=(bits1 > bits2)?bits1:bits2; BN_CTX_start(ctx); d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); if (d == NULL || r == NULL) goto err; - bits=(bits1 > bits2)?bits1:bits2; - - /* If this is not done, things will break in the montgomery - * part */ - if (in_mont != NULL) mont=in_mont; else @@ -61,139 +158,143 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; } - BN_init(&(val[0][0])); - BN_init(&(val[1][1])); - BN_init(&(val[0][1])); - BN_init(&(val[1][0])); - ts=1; + window1 = BN_window_bits_for_exponent_size(bits1); + window2 = BN_window_bits_for_exponent_size(bits2); + + /* + * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) + */ + BN_init(&val1[0]); + ts1=1; if (BN_ucmp(a1,m) >= 0) { - BN_mod(&(val[1][0]),a1,m,ctx); - aa1= &(val[1][0]); + if (!BN_mod(&(val1[0]),a1,m,ctx)) + goto err; + a_mod_m = &(val1[0]); } else - aa1=a1; + a_mod_m = a1; + if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err; + if (window1 > 1) + { + if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err; + + j=1<<(window1-1); + for (i=1; i<j; i++) + { + BN_init(&(val1[i])); + if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx)) + goto err; + } + ts1=i; + } + + + /* + * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) + */ + BN_init(&val2[0]); + ts2=1; if (BN_ucmp(a2,m) >= 0) { - BN_mod(&(val[0][1]),a2,m,ctx); - aa2= &(val[0][1]); + if (!BN_mod(&(val2[0]),a2,m,ctx)) + goto err; + a_mod_m = &(val2[0]); } else - aa2=a2; - if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err; - if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err; - if (!BN_mod_mul_montgomery(&(val[1][1]), - &(val[1][0]),&(val[0][1]),mont,ctx)) - goto err; - -#if 0 - if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits > 250) - window=5; /* max size of window */ - else if (bits >= 120) - window=4; - else - window=3; -#else - window=EXP2_TABLE_BITS; -#endif - - k=1<<window; - for (x=0; x<k; x++) + a_mod_m = a2; + if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err; + if (window2 > 1) { - if (x >= 2) - { - BN_init(&(val[x][0])); - BN_init(&(val[x][1])); - if (!BN_mod_mul_montgomery(&(val[x][0]), - &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err; - if (!BN_mod_mul_montgomery(&(val[x][1]), - &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err; - } - for (y=2; y<k; y++) + if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err; + + j=1<<(window2-1); + for (i=1; i<j; i++) { - BN_init(&(val[x][y])); - if (!BN_mod_mul_montgomery(&(val[x][y]), - &(val[x][y-1]),&(val[0][1]),mont,ctx)) + BN_init(&(val2[i])); + if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx)) goto err; } + ts2=i; } - ts=k; - - start=1; /* This is used to avoid multiplication etc - * when there is only the value '1' in the - * buffer. */ - xvalue=0; /* The 'x value' of the window */ - yvalue=0; /* The 'y value' of the window */ - wstart=bits-1; /* The top bit of the window */ - wend=0; /* The bottom bit of the window */ - - if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; - for (;;) + + + /* Now compute the power product, using independent windows. */ + r_is_one=1; + wvalue1=0; /* The 'value' of the first window */ + wvalue2=0; /* The 'value' of the second window */ + wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */ + wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */ + + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; + for (b=bits-1; b>=0; b--) { - xvalue=BN_is_bit_set(p1,wstart); - yvalue=BN_is_bit_set(p2,wstart); - if (!(xvalue || yvalue)) + if (!r_is_one) { - if (!start) + if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) + goto err; + } + + if (!wvalue1) + if (BN_is_bit_set(p1, b)) { - if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) - goto err; + /* consider bits b-window1+1 .. b for this window */ + i = b-window1+1; + while (!BN_is_bit_set(p1, i)) /* works for i<0 */ + i++; + wpos1 = i; + wvalue1 = 1; + for (i = b-1; i >= wpos1; i--) + { + wvalue1 <<= 1; + if (BN_is_bit_set(p1, i)) + wvalue1++; + } } - wstart--; - if (wstart < 0) break; - continue; - } - /* We now have wstart on a 'set' bit, we now need to work out - * how bit a window to do. To do this we need to scan - * forward until the last set bit before the end of the - * window */ - j=wstart; - /* xvalue=BN_is_bit_set(p1,wstart); already set */ - /* yvalue=BN_is_bit_set(p1,wstart); already set */ - wend=0; - for (i=1; i<window; i++) - { - if (wstart-i < 0) break; - xvalue+=xvalue; - xvalue|=BN_is_bit_set(p1,wstart-i); - yvalue+=yvalue; - yvalue|=BN_is_bit_set(p2,wstart-i); - } - - /* i is the size of the current window */ - /* add the 'bytes above' */ - if (!start) - for (j=0; j<i; j++) + + if (!wvalue2) + if (BN_is_bit_set(p2, b)) { - if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) - goto err; + /* consider bits b-window2+1 .. b for this window */ + i = b-window2+1; + while (!BN_is_bit_set(p2, i)) + i++; + wpos2 = i; + wvalue2 = 1; + for (i = b-1; i >= wpos2; i--) + { + wvalue2 <<= 1; + if (BN_is_bit_set(p2, i)) + wvalue2++; + } } + + if (wvalue1 && b == wpos1) + { + /* wvalue1 is odd and < 2^window1 */ + if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx)) + goto err; + wvalue1 = 0; + r_is_one = 0; + } - /* wvalue will be an odd number < 2^window */ - if (xvalue || yvalue) + if (wvalue2 && b == wpos2) { - if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]), - mont,ctx)) goto err; + /* wvalue2 is odd and < 2^window2 */ + if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx)) + goto err; + wvalue2 = 0; + r_is_one = 0; } - - /* move the 'window' down further */ - wstart-=i; - start=0; - if (wstart < 0) break; } BN_from_montgomery(rr,r,mont,ctx); ret=1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); BN_CTX_end(ctx); - for (i=0; i<ts; i++) - { - for (j=0; j<ts; j++) - { - BN_clear_free(&(val[i][j])); - } - } + for (i=0; i<ts1; i++) + BN_clear_free(&(val1[i])); + for (i=0; i<ts2; i++) + BN_clear_free(&(val2[i])); return(ret); } diff --git a/lib/libcrypto/bn/bn_lcl.h b/lib/libcrypto/bn/bn_lcl.h index e36ccbc4c2d..9c959921b49 100644 --- a/lib/libcrypto/bn/bn_lcl.h +++ b/lib/libcrypto/bn/bn_lcl.h @@ -55,6 +55,59 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ #ifndef HEADER_BN_LCL_H #define HEADER_BN_LCL_H @@ -65,6 +118,51 @@ extern "C" { #endif + +/* + * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions + * + * + * For window size 'w' (w >= 2) and a random 'b' bits exponent, + * the number of multiplications is a constant plus on average + * + * 2^(w-1) + (b-w)/(w+1); + * + * here 2^(w-1) is for precomputing the table (we actually need + * entries only for windows that have the lowest bit set), and + * (b-w)/(w+1) is an approximation for the expected number of + * w-bit windows, not counting the first one. + * + * Thus we should use + * + * w >= 6 if b > 671 + * w = 5 if 671 > b > 239 + * w = 4 if 239 > b > 79 + * w = 3 if 79 > b > 23 + * w <= 2 if 23 > b + * + * (with draws in between). Very small exponents are often selected + * with low Hamming weight, so we use w = 1 for b <= 23. + */ +#if 1 +#define BN_window_bits_for_exponent_size(b) \ + ((b) > 671 ? 6 : \ + (b) > 239 ? 5 : \ + (b) > 79 ? 4 : \ + (b) > 23 ? 3 : 1) +#else +/* Old SSLeay/OpenSSL table. + * Maximum window size was 5, so this table differs for b==1024; + * but it coincides for other interesting values (b==160, b==512). + */ +#define BN_window_bits_for_exponent_size(b) \ + ((b) > 255 ? 5 : \ + (b) > 127 ? 4 : \ + (b) > 17 ? 3 : 1) +#endif + + + /* Pentium pro 16,16,16,32,64 */ /* Alpha 16,16,16,16.64 */ #define BN_MULL_SIZE_NORMAL (16) /* 32 */ @@ -130,7 +228,7 @@ extern "C" { /* This is used for internal error checking and is not normally used */ #ifdef BN_DEBUG # include <assert.h> -# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->max); +# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax); #else # define bn_check_top(a) #endif diff --git a/lib/libcrypto/bn/bn_lib.c b/lib/libcrypto/bn/bn_lib.c index 0e6b12d9c36..b6b0ce4b3c9 100644 --- a/lib/libcrypto/bn/bn_lib.c +++ b/lib/libcrypto/bn/bn_lib.c @@ -56,6 +56,12 @@ * [including the GNU Public Licence.] */ +#ifndef BN_DEBUG +# undef NDEBUG /* avoid conflicting definitions */ +# define NDEBUG +#endif + +#include <assert.h> #include <stdio.h> #include "cryptlib.h" #include "bn_lcl.h" @@ -244,14 +250,8 @@ int BN_num_bits(const BIGNUM *a) if (a->top == 0) return(0); l=a->d[a->top-1]; + assert(l != 0); i=(a->top-1)*BN_BITS2; - if (l == 0) - { -#if !defined(NO_STDIO) && !defined(WIN16) - fprintf(stderr,"BAD TOP VALUE\n"); -#endif - abort(); - } return(i+BN_num_bits_word(l)); } @@ -262,24 +262,24 @@ void BN_clear_free(BIGNUM *a) if (a == NULL) return; if (a->d != NULL) { - memset(a->d,0,a->max*sizeof(a->d[0])); + memset(a->d,0,a->dmax*sizeof(a->d[0])); if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) - Free(a->d); + OPENSSL_free(a->d); } i=BN_get_flags(a,BN_FLG_MALLOCED); memset(a,0,sizeof(BIGNUM)); if (i) - Free(a); + OPENSSL_free(a); } void BN_free(BIGNUM *a) { if (a == NULL) return; if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) - Free(a->d); + OPENSSL_free(a->d); a->flags|=BN_FLG_FREE; /* REMOVE? */ if (a->flags & BN_FLG_MALLOCED) - Free(a); + OPENSSL_free(a); } void BN_init(BIGNUM *a) @@ -291,7 +291,7 @@ BIGNUM *BN_new(void) { BIGNUM *ret; - if ((ret=(BIGNUM *)Malloc(sizeof(BIGNUM))) == NULL) + if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL) { BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); return(NULL); @@ -299,7 +299,7 @@ BIGNUM *BN_new(void) ret->flags=BN_FLG_MALLOCED; ret->top=0; ret->neg=0; - ret->max=0; + ret->dmax=0; ret->d=NULL; return(ret); } @@ -317,7 +317,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) bn_check_top(b); - if (words > b->max) + if (words > b->dmax) { bn_check_top(b); if (BN_get_flags(b,BN_FLG_STATIC_DATA)) @@ -325,7 +325,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); return(NULL); } - a=A=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(words+1)); + a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1)); if (A == NULL) { BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); @@ -423,21 +423,21 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) case 0: ; /* ultrix cc workaround, see above */ } #endif - Free(b->d); + OPENSSL_free(b->d); } b->d=a; - b->max=words; + b->dmax=words; /* Now need to zero any data between b->top and b->max */ A= &(b->d[b->top]); - for (i=(b->max - b->top)>>3; i>0; i--,A+=8) + for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8) { A[0]=0; A[1]=0; A[2]=0; A[3]=0; A[4]=0; A[5]=0; A[6]=0; A[7]=0; } - for (i=(b->max - b->top)&7; i>0; i--,A++) + for (i=(b->dmax - b->top)&7; i>0; i--,A++) A[0]=0; #else memset(A,0,sizeof(BN_ULONG)*(words+1)); @@ -508,7 +508,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) void BN_clear(BIGNUM *a) { if (a->d != NULL) - memset(a->d,0,a->max*sizeof(a->d[0])); + memset(a->d,0,a->dmax*sizeof(a->d[0])); a->top=0; a->neg=0; } diff --git a/lib/libcrypto/bn/bn_mont.c b/lib/libcrypto/bn/bn_mont.c index 598fecbf0c8..8cf1febacca 100644 --- a/lib/libcrypto/bn/bn_mont.c +++ b/lib/libcrypto/bn/bn_mont.c @@ -85,16 +85,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b, if (a == b) { -#if 0 - bn_wexpand(tmp,a->top*2); - bn_wexpand(tmp2,a->top*4); - bn_sqr_recursive(tmp->d,a->d,a->top,tmp2->d); - tmp->top=a->top*2; - if (tmp->d[tmp->top-1] == 0) - tmp->top--; -#else if (!BN_sqr(tmp,a,ctx)) goto err; -#endif } else { @@ -157,7 +148,22 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont, #endif for (i=0; i<nl; i++) { +#ifdef __TANDEM + { + long long t1; + long long t2; + long long t3; + t1 = rp[0] * (n0 & 0177777); + t2 = 037777600000l; + t2 = n0 & t2; + t3 = rp[0] & 0177777; + t2 = (t3 * t2) & BN_MASK2; + t1 = t1 + t2; + v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1); + } +#else v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); +#endif nrp++; rp++; if (((nrp[-1]+=v)&BN_MASK2) >= v) @@ -175,6 +181,7 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont, #if 0 BN_rshift(ret,r,mont->ri); #else + ret->neg = r->neg; x=ri; rp=ret->d; ap= &(r->d[x]); @@ -234,7 +241,7 @@ BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret; - if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL) + if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL) return(NULL); BN_MONT_CTX_init(ret); @@ -260,7 +267,7 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) BN_free(&(mont->N)); BN_free(&(mont->Ni)); if (mont->flags & BN_FLG_MALLOCED) - Free(mont); + OPENSSL_free(mont); } int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) @@ -284,7 +291,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) buf[1]=0; tmod.d=buf; tmod.top=1; - tmod.max=2; + tmod.dmax=2; tmod.neg=mod->neg; /* Ri = R^-1 mod N*/ if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) diff --git a/lib/libcrypto/bn/bn_mul.c b/lib/libcrypto/bn/bn_mul.c index 3e8baaad9a0..3e8d8b9567a 100644 --- a/lib/libcrypto/bn/bn_mul.c +++ b/lib/libcrypto/bn/bn_mul.c @@ -631,7 +631,6 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) al=a->top; bl=b->top; - r->neg=a->neg^b->neg; if ((al == 0) || (bl == 0)) { @@ -647,6 +646,7 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) } else rr = r; + rr->neg=a->neg^b->neg; #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) i = al-bl; diff --git a/lib/libcrypto/bn/bn_print.c b/lib/libcrypto/bn/bn_print.c index 782a96e7e0a..532e66bcc39 100644 --- a/lib/libcrypto/bn/bn_print.c +++ b/lib/libcrypto/bn/bn_print.c @@ -64,14 +64,14 @@ static const char *Hex="0123456789ABCDEF"; -/* Must 'Free' the returned data */ +/* Must 'OPENSSL_free' the returned data */ char *BN_bn2hex(const BIGNUM *a) { int i,j,v,z=0; char *buf; char *p; - buf=(char *)Malloc(a->top*BN_BYTES*2+2); + buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2); if (buf == NULL) { BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); @@ -99,7 +99,7 @@ err: return(buf); } -/* Must 'Free' the returned data */ +/* Must 'OPENSSL_free' the returned data */ char *BN_bn2dec(const BIGNUM *a) { int i=0,num; @@ -110,8 +110,8 @@ char *BN_bn2dec(const BIGNUM *a) i=BN_num_bits(a)*3; num=(i/10+i/1000+3)+1; - bn_data=(BN_ULONG *)Malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); - buf=(char *)Malloc(num+3); + bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); + buf=(char *)OPENSSL_malloc(num+3); if ((buf == NULL) || (bn_data == NULL)) { BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); @@ -149,7 +149,7 @@ char *BN_bn2dec(const BIGNUM *a) } } err: - if (bn_data != NULL) Free(bn_data); + if (bn_data != NULL) OPENSSL_free(bn_data); if (t != NULL) BN_free(t); return(buf); } diff --git a/lib/libcrypto/bn/bn_rand.c b/lib/libcrypto/bn/bn_rand.c index 943712c15b8..21ecbc04ed0 100644 --- a/lib/libcrypto/bn/bn_rand.c +++ b/lib/libcrypto/bn/bn_rand.c @@ -68,11 +68,17 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) int ret=0,bit,bytes,mask; time_t tim; + if (bits == 0) + { + BN_zero(rnd); + return 1; + } + bytes=(bits+7)/8; bit=(bits-1)%8; mask=0xff<<bit; - buf=(unsigned char *)Malloc(bytes); + buf=(unsigned char *)OPENSSL_malloc(bytes); if (buf == NULL) { BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE); @@ -120,7 +126,7 @@ err: if (buf != NULL) { memset(buf,0,bytes); - Free(buf); + OPENSSL_free(buf); } return(ret); } diff --git a/lib/libcrypto/bn/bn_recp.c b/lib/libcrypto/bn/bn_recp.c index a8796bd0aac..d019941d6be 100644 --- a/lib/libcrypto/bn/bn_recp.c +++ b/lib/libcrypto/bn/bn_recp.c @@ -72,7 +72,7 @@ BN_RECP_CTX *BN_RECP_CTX_new(void) { BN_RECP_CTX *ret; - if ((ret=(BN_RECP_CTX *)Malloc(sizeof(BN_RECP_CTX))) == NULL) + if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL) return(NULL); BN_RECP_CTX_init(ret); @@ -88,7 +88,7 @@ void BN_RECP_CTX_free(BN_RECP_CTX *recp) BN_free(&(recp->N)); BN_free(&(recp->Nr)); if (recp->flags & BN_FLG_MALLOCED) - Free(recp); + OPENSSL_free(recp); } int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) diff --git a/lib/libcrypto/bn/bn_shift.c b/lib/libcrypto/bn/bn_shift.c index 61aae65a6bf..0883247384e 100644 --- a/lib/libcrypto/bn/bn_shift.c +++ b/lib/libcrypto/bn/bn_shift.c @@ -162,7 +162,7 @@ int BN_rshift(BIGNUM *r, BIGNUM *a, int n) nw=n/BN_BITS2; rb=n%BN_BITS2; lb=BN_BITS2-rb; - if (nw > a->top) + if (nw > a->top || a->top == 0) { BN_zero(r); return(1); diff --git a/lib/libcrypto/bn/bn_sqr.c b/lib/libcrypto/bn/bn_sqr.c index fe00c5f69a0..75f4f38392d 100644 --- a/lib/libcrypto/bn/bn_sqr.c +++ b/lib/libcrypto/bn/bn_sqr.c @@ -188,7 +188,7 @@ void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp) #ifdef BN_RECURSION /* r is 2*n words in size, - * a and b are both n words in size. + * a and b are both n words in size. (There's not actually a 'b' here ...) * n must be a power of 2. * We multiply and return the result. * t must be 2*n words in size diff --git a/lib/libcrypto/bn/bn_word.c b/lib/libcrypto/bn/bn_word.c index 73157a7d43f..cd59baa2c49 100644 --- a/lib/libcrypto/bn/bn_word.c +++ b/lib/libcrypto/bn/bn_word.c @@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) a->neg=0; i=BN_sub_word(a,w); if (!BN_is_zero(a)) - a->neg=1; + a->neg=!(a->neg); return(i); } w&=BN_MASK2; @@ -140,7 +140,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) { int i; - if (a->neg) + if (BN_is_zero(a) || a->neg) { a->neg=0; i=BN_add_word(a,w); @@ -182,11 +182,16 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) w&=BN_MASK2; if (a->top) { - ll=bn_mul_words(a->d,a->d,a->top,w); - if (ll) + if (w == 0) + BN_zero(a); + else { - if (bn_wexpand(a,a->top+1) == NULL) return(0); - a->d[a->top++]=ll; + ll=bn_mul_words(a->d,a->d,a->top,w); + if (ll) + { + if (bn_wexpand(a,a->top+1) == NULL) return(0); + a->d[a->top++]=ll; + } } } return(1); diff --git a/lib/libcrypto/bn/vms-helper.c b/lib/libcrypto/bn/vms-helper.c index 73af3370695..0fa79c4edb5 100644 --- a/lib/libcrypto/bn/vms-helper.c +++ b/lib/libcrypto/bn/vms-helper.c @@ -59,8 +59,10 @@ bn_div_words_abort(int i) { +#ifdef BN_DEBUG #if !defined(NO_STDIO) && !defined(WIN16) fprintf(stderr,"Division would overflow (%d)\n",i); #endif abort(); +#endif } |