diff options
author | Damien Miller <djm@cvs.openbsd.org> | 2010-10-01 22:59:02 +0000 |
---|---|---|
committer | Damien Miller <djm@cvs.openbsd.org> | 2010-10-01 22:59:02 +0000 |
commit | 367b2622e0527401666a65476f4111fdda2e3c12 (patch) | |
tree | dc507d2394eb3b616bd9eae56d17671899a24a05 /lib/libcrypto/bn/asm | |
parent | ae9cbeba0dc25f0e95e6a0e50b6c161bf6384e17 (diff) |
resolve conflicts, fix local changes
Diffstat (limited to 'lib/libcrypto/bn/asm')
34 files changed, 166 insertions, 9895 deletions
diff --git a/lib/libcrypto/bn/asm/alpha.s b/lib/libcrypto/bn/asm/alpha.s deleted file mode 100644 index 555ff0b92d1..00000000000 --- a/lib/libcrypto/bn/asm/alpha.s +++ /dev/null @@ -1,3199 +0,0 @@ - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words - .text - .align 3 - .globl bn_mul_comba4 - .ent bn_mul_comba4 -bn_mul_comba4: -bn_mul_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 0($18) - ldq $2, 8($17) - ldq $3, 8($18) - ldq $4, 16($17) - ldq $5, 16($18) - ldq $6, 24($17) - ldq $7, 24($18) - bis $31, $31, $23 - mulq $0, $1, $8 - umulh $0, $1, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $0, $3, $24 - umulh $0, $3, $25 - addq $22, $24, $22 - cmpult $22, $24, $27 - addq $27, $25, $25 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $8, $28, $8 - mulq $2, $1, $21 - umulh $2, $1, $20 - addq $22, $21, $22 - cmpult $22, $21, $19 - addq $19, $20, $20 - addq $23, $20, $23 - cmpult $23, $20, $17 - addq $8, $17, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $2, $3, $18 - umulh $2, $3, $24 - addq $23, $18, $23 - cmpult $23, $18, $27 - addq $27, $24, $24 - addq $8, $24, $8 - cmpult $8, $24, $25 - addq $22, $25, $22 - mulq $0, $5, $28 - umulh $0, $5, $21 - addq $23, $28, $23 - cmpult $23, $28, $19 - addq $19, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - mulq $4, $1, $17 - umulh $4, $1, $18 - addq $23, $17, $23 - cmpult $23, $17, $27 - addq $27, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $24 - addq $22, $24, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $0, $7, $25 - umulh $0, $7, $28 - addq $8, $25, $8 - cmpult $8, $25, $19 - addq $19, $28, $28 - addq $22, $28, $22 - cmpult $22, $28, $21 - addq $23, $21, $23 - mulq $2, $5, $20 - umulh $2, $5, $17 - addq $8, $20, $8 - cmpult $8, $20, $27 - addq $27, $17, $17 - addq $22, $17, $22 - cmpult $22, $17, $18 - addq $23, $18, $23 - mulq $4, $3, $24 - umulh $4, $3, $25 - addq $8, $24, $8 - cmpult $8, $24, $19 - addq $19, $25, $25 - addq $22, $25, $22 - cmpult $22, $25, $28 - addq $23, $28, $23 - mulq $6, $1, $21 - umulh $6, $1, $0 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $20, $0, $0 - addq $22, $0, $22 - cmpult $22, $0, $27 - addq $23, $27, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $7, $17 - umulh $2, $7, $18 - addq $22, $17, $22 - cmpult $22, $17, $24 - addq $24, $18, $18 - addq $23, $18, $23 - cmpult $23, $18, $19 - addq $8, $19, $8 - mulq $4, $5, $25 - umulh $4, $5, $28 - addq $22, $25, $22 - cmpult $22, $25, $21 - addq $21, $28, $28 - addq $23, $28, $23 - cmpult $23, $28, $20 - addq $8, $20, $8 - mulq $6, $3, $0 - umulh $6, $3, $27 - addq $22, $0, $22 - cmpult $22, $0, $1 - addq $1, $27, $27 - addq $23, $27, $23 - cmpult $23, $27, $17 - addq $8, $17, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $4, $7, $24 - umulh $4, $7, $18 - addq $23, $24, $23 - cmpult $23, $24, $19 - addq $19, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $2 - addq $22, $2, $22 - mulq $6, $5, $25 - umulh $6, $5, $21 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $28, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $6, $7, $0 - umulh $6, $7, $1 - addq $8, $0, $8 - cmpult $8, $0, $27 - addq $27, $1, $1 - addq $22, $1, $22 - cmpult $22, $1, $17 - addq $23, $17, $23 - stq $8, 48($16) - stq $22, 56($16) - ret $31,($26),1 - .end bn_mul_comba4 - .text - .align 3 - .globl bn_mul_comba8 - .ent bn_mul_comba8 -bn_mul_comba8: -bn_mul_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - ldq $1, 0($17) - ldq $2, 0($18) - zapnot $1, 15, $7 - srl $2, 32, $8 - mulq $8, $7, $22 - srl $1, 32, $6 - zapnot $2, 15, $5 - mulq $5, $6, $4 - mulq $7, $5, $24 - addq $22, $4, $22 - cmpult $22, $4, $1 - mulq $6, $8, $3 - beq $1, $173 - bis $31, 1, $1 - sll $1, 32, $1 - addq $3, $1, $3 -$173: - sll $22, 32, $4 - addq $24, $4, $24 - stq $24, 0($16) - ldq $2, 0($17) - ldq $1, 8($18) - zapnot $2, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $2, 32, $6 - mulq $5, $6, $23 - mulq $6, $8, $6 - srl $22, 32, $1 - cmpult $24, $4, $2 - addq $3, $1, $3 - addq $2, $3, $22 - addq $25, $23, $25 - cmpult $25, $23, $1 - bis $31, 1, $2 - beq $1, $177 - sll $2, 32, $1 - addq $6, $1, $6 -$177: - sll $25, 32, $23 - ldq $1, 0($18) - addq $0, $23, $0 - bis $0, $0, $7 - ldq $3, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $4 - zapnot $3, 15, $7 - mulq $8, $7, $28 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $25, 32, $1 - cmpult $0, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $24 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $28, $2, $28 - cmpult $28, $2, $1 - bis $31, 1, $2 - beq $1, $181 - sll $2, 32, $1 - addq $6, $1, $6 -$181: - sll $28, 32, $2 - addq $21, $2, $21 - bis $21, $21, $7 - addq $22, $7, $22 - stq $22, 8($16) - ldq $3, 16($17) - ldq $1, 0($18) - cmpult $22, $7, $4 - zapnot $3, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $22 - zapnot $1, 15, $5 - mulq $7, $5, $20 - srl $28, 32, $1 - cmpult $21, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $22, $2, $22 - cmpult $22, $2, $1 - bis $31, 1, $2 - beq $1, $185 - sll $2, 32, $1 - addq $6, $1, $6 -$185: - sll $22, 32, $2 - ldq $1, 8($18) - addq $20, $2, $20 - bis $20, $20, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $22, 32, $1 - cmpult $20, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $189 - sll $21, 32, $1 - addq $6, $1, $6 -$189: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 0($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $193 - sll $21, 32, $1 - addq $6, $1, $6 -$193: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 16($16) - ldq $4, 0($17) - ldq $5, 24($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $197 - sll $21, 32, $1 - addq $6, $1, $6 -$197: - sll $0, 32, $24 - ldq $1, 16($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $201 - sll $20, 32, $1 - addq $6, $1, $6 -$201: - sll $25, 32, $5 - ldq $2, 8($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $205 - sll $20, 32, $1 - addq $6, $1, $6 -$205: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $209 - sll $20, 32, $1 - addq $6, $1, $6 -$209: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 24($16) - ldq $4, 32($17) - ldq $5, 0($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $213 - sll $20, 32, $1 - addq $6, $1, $6 -$213: - sll $28, 32, $23 - ldq $1, 8($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $217 - sll $21, 32, $1 - addq $6, $1, $6 -$217: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $221 - sll $21, 32, $1 - addq $6, $1, $6 -$221: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $225 - sll $21, 32, $1 - addq $6, $1, $6 -$225: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $229 - sll $21, 32, $1 - addq $6, $1, $6 -$229: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 32($16) - ldq $4, 0($17) - ldq $5, 40($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $233 - sll $21, 32, $1 - addq $6, $1, $6 -$233: - sll $0, 32, $22 - ldq $1, 32($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $237 - sll $20, 32, $1 - addq $6, $1, $6 -$237: - sll $25, 32, $5 - ldq $2, 24($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $241 - sll $20, 32, $1 - addq $6, $1, $6 -$241: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $245 - sll $20, 32, $1 - addq $6, $1, $6 -$245: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $249 - sll $20, 32, $1 - addq $6, $1, $6 -$249: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $253 - sll $20, 32, $1 - addq $6, $1, $6 -$253: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 40($16) - ldq $4, 48($17) - ldq $5, 0($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $24, $28 - cmpult $28, $24, $1 - mulq $6, $8, $6 - beq $1, $257 - sll $20, 32, $1 - addq $6, $1, $6 -$257: - sll $28, 32, $24 - ldq $1, 8($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $261 - sll $21, 32, $1 - addq $6, $1, $6 -$261: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $265 - sll $21, 32, $1 - addq $6, $1, $6 -$265: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $269 - sll $21, 32, $1 - addq $6, $1, $6 -$269: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $273 - sll $21, 32, $1 - addq $6, $1, $6 -$273: - sll $28, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $277 - sll $21, 32, $1 - addq $6, $1, $6 -$277: - sll $0, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $281 - sll $21, 32, $1 - addq $6, $1, $6 -$281: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 48($16) - ldq $4, 0($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $23, $0 - cmpult $0, $23, $1 - mulq $6, $8, $6 - beq $1, $285 - sll $21, 32, $1 - addq $6, $1, $6 -$285: - sll $0, 32, $23 - ldq $1, 48($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $289 - sll $20, 32, $1 - addq $6, $1, $6 -$289: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $293 - sll $20, 32, $1 - addq $6, $1, $6 -$293: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $297 - sll $20, 32, $1 - addq $6, $1, $6 -$297: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $301 - sll $20, 32, $1 - addq $6, $1, $6 -$301: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $305 - sll $20, 32, $1 - addq $6, $1, $6 -$305: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $309 - sll $20, 32, $1 - addq $6, $1, $6 -$309: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $313 - sll $20, 32, $1 - addq $6, $1, $6 -$313: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 56($16) - ldq $4, 56($17) - ldq $5, 8($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $6 - beq $1, $317 - sll $20, 32, $1 - addq $6, $1, $6 -$317: - sll $28, 32, $22 - ldq $1, 16($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $321 - sll $21, 32, $1 - addq $6, $1, $6 -$321: - sll $25, 32, $5 - ldq $2, 24($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $325 - sll $21, 32, $1 - addq $6, $1, $6 -$325: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $329 - sll $21, 32, $1 - addq $6, $1, $6 -$329: - sll $0, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $333 - sll $21, 32, $1 - addq $6, $1, $6 -$333: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $337 - sll $21, 32, $1 - addq $6, $1, $6 -$337: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $341 - sll $21, 32, $1 - addq $6, $1, $6 -$341: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 64($16) - ldq $4, 16($17) - ldq $5, 56($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $345 - sll $21, 32, $1 - addq $6, $1, $6 -$345: - sll $0, 32, $24 - ldq $1, 48($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $349 - sll $20, 32, $1 - addq $6, $1, $6 -$349: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $353 - sll $20, 32, $1 - addq $6, $1, $6 -$353: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $357 - sll $20, 32, $1 - addq $6, $1, $6 -$357: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $361 - sll $20, 32, $1 - addq $6, $1, $6 -$361: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $365 - sll $20, 32, $1 - addq $6, $1, $6 -$365: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 72($16) - ldq $4, 56($17) - ldq $5, 24($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $369 - sll $20, 32, $1 - addq $6, $1, $6 -$369: - sll $28, 32, $23 - ldq $1, 32($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $373 - sll $21, 32, $1 - addq $6, $1, $6 -$373: - sll $25, 32, $5 - ldq $2, 40($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $377 - sll $21, 32, $1 - addq $6, $1, $6 -$377: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $381 - sll $21, 32, $1 - addq $6, $1, $6 -$381: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $385 - sll $21, 32, $1 - addq $6, $1, $6 -$385: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 80($16) - ldq $4, 32($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $389 - sll $21, 32, $1 - addq $6, $1, $6 -$389: - sll $0, 32, $22 - ldq $1, 48($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $393 - sll $20, 32, $1 - addq $6, $1, $6 -$393: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $397 - sll $20, 32, $1 - addq $6, $1, $6 -$397: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $21 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $21, $25, $21 - cmpult $21, $25, $1 - mulq $6, $8, $6 - beq $1, $401 - sll $20, 32, $1 - addq $6, $1, $6 -$401: - sll $21, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 88($16) - ldq $4, 56($17) - ldq $5, 40($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $21, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $405 - sll $20, 32, $1 - addq $6, $1, $6 -$405: - sll $0, 32, $24 - ldq $2, 48($18) - addq $5, $24, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $24, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $409 - sll $20, 32, $1 - addq $6, $1, $6 -$409: - sll $28, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $413 - sll $20, 32, $1 - addq $6, $1, $6 -$413: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 96($16) - ldq $4, 48($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $417 - sll $20, 32, $1 - addq $6, $1, $6 -$417: - sll $28, 32, $23 - ldq $2, 48($18) - addq $5, $23, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $23, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $421 - sll $20, 32, $1 - addq $6, $1, $6 -$421: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 104($16) - ldq $4, 56($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $3 - beq $1, $425 - sll $20, 32, $1 - addq $3, $1, $3 -$425: - sll $28, 32, $22 - srl $28, 32, $1 - addq $2, $22, $2 - addq $3, $1, $3 - bis $2, $2, $7 - addq $24, $7, $24 - cmpult $7, $22, $1 - cmpult $24, $7, $2 - addq $1, $3, $6 - addq $2, $6, $6 - stq $24, 112($16) - addq $23, $6, $23 - stq $23, 120($16) - ret $31, ($26), 1 - .end bn_mul_comba8 - .text - .align 3 - .globl bn_sqr_comba4 - .ent bn_sqr_comba4 -bn_sqr_comba4: -bn_sqr_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - bis $31, $31, $6 - mulq $0, $0, $4 - umulh $0, $0, $5 - stq $4, 0($16) - bis $31, $31, $4 - mulq $0, $1, $7 - umulh $0, $1, $8 - cmplt $7, $31, $22 - cmplt $8, $31, $23 - addq $7, $7, $7 - addq $8, $8, $8 - addq $8, $22, $8 - addq $4, $23, $4 - addq $5, $7, $5 - addq $6, $8, $6 - cmpult $5, $7, $24 - cmpult $6, $8, $25 - addq $6, $24, $6 - addq $4, $25, $4 - stq $5, 8($16) - bis $31, $31, $5 - mulq $1, $1, $27 - umulh $1, $1, $28 - addq $6, $27, $6 - addq $4, $28, $4 - cmpult $6, $27, $21 - cmpult $4, $28, $20 - addq $4, $21, $4 - addq $5, $20, $5 - mulq $2, $0, $19 - umulh $2, $0, $18 - cmplt $19, $31, $17 - cmplt $18, $31, $22 - addq $19, $19, $19 - addq $18, $18, $18 - addq $18, $17, $18 - addq $5, $22, $5 - addq $6, $19, $6 - addq $4, $18, $4 - cmpult $6, $19, $23 - cmpult $4, $18, $7 - addq $4, $23, $4 - addq $5, $7, $5 - stq $6, 16($16) - bis $31, $31, $6 - mulq $3, $0, $8 - umulh $3, $0, $24 - cmplt $8, $31, $25 - cmplt $24, $31, $27 - addq $8, $8, $8 - addq $24, $24, $24 - addq $24, $25, $24 - addq $6, $27, $6 - addq $4, $8, $4 - addq $5, $24, $5 - cmpult $4, $8, $28 - cmpult $5, $24, $21 - addq $5, $28, $5 - addq $6, $21, $6 - mulq $2, $1, $20 - umulh $2, $1, $17 - cmplt $20, $31, $22 - cmplt $17, $31, $19 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $22, $17 - addq $6, $19, $6 - addq $4, $20, $4 - addq $5, $17, $5 - cmpult $4, $20, $18 - cmpult $5, $17, $23 - addq $5, $18, $5 - addq $6, $23, $6 - stq $4, 24($16) - bis $31, $31, $4 - mulq $2, $2, $7 - umulh $2, $2, $25 - addq $5, $7, $5 - addq $6, $25, $6 - cmpult $5, $7, $27 - cmpult $6, $25, $8 - addq $6, $27, $6 - addq $4, $8, $4 - mulq $3, $1, $24 - umulh $3, $1, $28 - cmplt $24, $31, $21 - cmplt $28, $31, $22 - addq $24, $24, $24 - addq $28, $28, $28 - addq $28, $21, $28 - addq $4, $22, $4 - addq $5, $24, $5 - addq $6, $28, $6 - cmpult $5, $24, $19 - cmpult $6, $28, $20 - addq $6, $19, $6 - addq $4, $20, $4 - stq $5, 32($16) - bis $31, $31, $5 - mulq $3, $2, $17 - umulh $3, $2, $18 - cmplt $17, $31, $23 - cmplt $18, $31, $7 - addq $17, $17, $17 - addq $18, $18, $18 - addq $18, $23, $18 - addq $5, $7, $5 - addq $6, $17, $6 - addq $4, $18, $4 - cmpult $6, $17, $25 - cmpult $4, $18, $27 - addq $4, $25, $4 - addq $5, $27, $5 - stq $6, 40($16) - bis $31, $31, $6 - mulq $3, $3, $8 - umulh $3, $3, $21 - addq $4, $8, $4 - addq $5, $21, $5 - cmpult $4, $8, $22 - cmpult $5, $21, $24 - addq $5, $22, $5 - addq $6, $24, $6 - stq $4, 48($16) - stq $5, 56($16) - ret $31,($26),1 - .end bn_sqr_comba4 - .text - .align 3 - .globl bn_sqr_comba8 - .ent bn_sqr_comba8 -bn_sqr_comba8: -bn_sqr_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - ldq $4, 32($17) - ldq $5, 40($17) - ldq $6, 48($17) - ldq $7, 56($17) - bis $31, $31, $23 - mulq $0, $0, $8 - umulh $0, $0, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $1, $0, $24 - umulh $1, $0, $25 - cmplt $24, $31, $27 - cmplt $25, $31, $28 - addq $24, $24, $24 - addq $25, $25, $25 - addq $25, $27, $25 - addq $8, $28, $8 - addq $22, $24, $22 - addq $23, $25, $23 - cmpult $22, $24, $21 - cmpult $23, $25, $20 - addq $23, $21, $23 - addq $8, $20, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $1, $1, $19 - umulh $1, $1, $18 - addq $23, $19, $23 - addq $8, $18, $8 - cmpult $23, $19, $17 - cmpult $8, $18, $27 - addq $8, $17, $8 - addq $22, $27, $22 - mulq $2, $0, $28 - umulh $2, $0, $24 - cmplt $28, $31, $25 - cmplt $24, $31, $21 - addq $28, $28, $28 - addq $24, $24, $24 - addq $24, $25, $24 - addq $22, $21, $22 - addq $23, $28, $23 - addq $8, $24, $8 - cmpult $23, $28, $20 - cmpult $8, $24, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $2, $1, $18 - umulh $2, $1, $17 - cmplt $18, $31, $27 - cmplt $17, $31, $25 - addq $18, $18, $18 - addq $17, $17, $17 - addq $17, $27, $17 - addq $23, $25, $23 - addq $8, $18, $8 - addq $22, $17, $22 - cmpult $8, $18, $21 - cmpult $22, $17, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $3, $0, $24 - umulh $3, $0, $20 - cmplt $24, $31, $19 - cmplt $20, $31, $27 - addq $24, $24, $24 - addq $20, $20, $20 - addq $20, $19, $20 - addq $23, $27, $23 - addq $8, $24, $8 - addq $22, $20, $22 - cmpult $8, $24, $25 - cmpult $22, $20, $18 - addq $22, $25, $22 - addq $23, $18, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $2, $17 - umulh $2, $2, $21 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $28 - cmpult $23, $21, $19 - addq $23, $28, $23 - addq $8, $19, $8 - mulq $3, $1, $27 - umulh $3, $1, $24 - cmplt $27, $31, $20 - cmplt $24, $31, $25 - addq $27, $27, $27 - addq $24, $24, $24 - addq $24, $20, $24 - addq $8, $25, $8 - addq $22, $27, $22 - addq $23, $24, $23 - cmpult $22, $27, $18 - cmpult $23, $24, $17 - addq $23, $18, $23 - addq $8, $17, $8 - mulq $4, $0, $21 - umulh $4, $0, $28 - cmplt $21, $31, $19 - cmplt $28, $31, $20 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $19, $28 - addq $8, $20, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $25 - cmpult $23, $28, $27 - addq $23, $25, $23 - addq $8, $27, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $3, $2, $24 - umulh $3, $2, $18 - cmplt $24, $31, $17 - cmplt $18, $31, $19 - addq $24, $24, $24 - addq $18, $18, $18 - addq $18, $17, $18 - addq $22, $19, $22 - addq $23, $24, $23 - addq $8, $18, $8 - cmpult $23, $24, $20 - cmpult $8, $18, $21 - addq $8, $20, $8 - addq $22, $21, $22 - mulq $4, $1, $28 - umulh $4, $1, $25 - cmplt $28, $31, $27 - cmplt $25, $31, $17 - addq $28, $28, $28 - addq $25, $25, $25 - addq $25, $27, $25 - addq $22, $17, $22 - addq $23, $28, $23 - addq $8, $25, $8 - cmpult $23, $28, $19 - cmpult $8, $25, $24 - addq $8, $19, $8 - addq $22, $24, $22 - mulq $5, $0, $18 - umulh $5, $0, $20 - cmplt $18, $31, $21 - cmplt $20, $31, $27 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $21, $20 - addq $22, $27, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $28 - addq $8, $17, $8 - addq $22, $28, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $3, $3, $25 - umulh $3, $3, $19 - addq $8, $25, $8 - addq $22, $19, $22 - cmpult $8, $25, $24 - cmpult $22, $19, $21 - addq $22, $24, $22 - addq $23, $21, $23 - mulq $4, $2, $27 - umulh $4, $2, $18 - cmplt $27, $31, $20 - cmplt $18, $31, $17 - addq $27, $27, $27 - addq $18, $18, $18 - addq $18, $20, $18 - addq $23, $17, $23 - addq $8, $27, $8 - addq $22, $18, $22 - cmpult $8, $27, $28 - cmpult $22, $18, $25 - addq $22, $28, $22 - addq $23, $25, $23 - mulq $5, $1, $19 - umulh $5, $1, $24 - cmplt $19, $31, $21 - cmplt $24, $31, $20 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $21, $24 - addq $23, $20, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $17 - cmpult $22, $24, $27 - addq $22, $17, $22 - addq $23, $27, $23 - mulq $6, $0, $18 - umulh $6, $0, $28 - cmplt $18, $31, $25 - cmplt $28, $31, $21 - addq $18, $18, $18 - addq $28, $28, $28 - addq $28, $25, $28 - addq $23, $21, $23 - addq $8, $18, $8 - addq $22, $28, $22 - cmpult $8, $18, $20 - cmpult $22, $28, $19 - addq $22, $20, $22 - addq $23, $19, $23 - stq $8, 48($16) - bis $31, $31, $8 - mulq $4, $3, $24 - umulh $4, $3, $17 - cmplt $24, $31, $27 - cmplt $17, $31, $25 - addq $24, $24, $24 - addq $17, $17, $17 - addq $17, $27, $17 - addq $8, $25, $8 - addq $22, $24, $22 - addq $23, $17, $23 - cmpult $22, $24, $21 - cmpult $23, $17, $18 - addq $23, $21, $23 - addq $8, $18, $8 - mulq $5, $2, $28 - umulh $5, $2, $20 - cmplt $28, $31, $19 - cmplt $20, $31, $27 - addq $28, $28, $28 - addq $20, $20, $20 - addq $20, $19, $20 - addq $8, $27, $8 - addq $22, $28, $22 - addq $23, $20, $23 - cmpult $22, $28, $25 - cmpult $23, $20, $24 - addq $23, $25, $23 - addq $8, $24, $8 - mulq $6, $1, $17 - umulh $6, $1, $21 - cmplt $17, $31, $18 - cmplt $21, $31, $19 - addq $17, $17, $17 - addq $21, $21, $21 - addq $21, $18, $21 - addq $8, $19, $8 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $27 - cmpult $23, $21, $28 - addq $23, $27, $23 - addq $8, $28, $8 - mulq $7, $0, $20 - umulh $7, $0, $25 - cmplt $20, $31, $24 - cmplt $25, $31, $18 - addq $20, $20, $20 - addq $25, $25, $25 - addq $25, $24, $25 - addq $8, $18, $8 - addq $22, $20, $22 - addq $23, $25, $23 - cmpult $22, $20, $19 - cmpult $23, $25, $17 - addq $23, $19, $23 - addq $8, $17, $8 - stq $22, 56($16) - bis $31, $31, $22 - mulq $4, $4, $21 - umulh $4, $4, $27 - addq $23, $21, $23 - addq $8, $27, $8 - cmpult $23, $21, $28 - cmpult $8, $27, $24 - addq $8, $28, $8 - addq $22, $24, $22 - mulq $5, $3, $18 - umulh $5, $3, $20 - cmplt $18, $31, $25 - cmplt $20, $31, $19 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $25, $20 - addq $22, $19, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $21 - addq $8, $17, $8 - addq $22, $21, $22 - mulq $6, $2, $27 - umulh $6, $2, $28 - cmplt $27, $31, $24 - cmplt $28, $31, $25 - addq $27, $27, $27 - addq $28, $28, $28 - addq $28, $24, $28 - addq $22, $25, $22 - addq $23, $27, $23 - addq $8, $28, $8 - cmpult $23, $27, $19 - cmpult $8, $28, $18 - addq $8, $19, $8 - addq $22, $18, $22 - mulq $7, $1, $20 - umulh $7, $1, $17 - cmplt $20, $31, $21 - cmplt $17, $31, $24 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $21, $17 - addq $22, $24, $22 - addq $23, $20, $23 - addq $8, $17, $8 - cmpult $23, $20, $25 - cmpult $8, $17, $27 - addq $8, $25, $8 - addq $22, $27, $22 - stq $23, 64($16) - bis $31, $31, $23 - mulq $5, $4, $28 - umulh $5, $4, $19 - cmplt $28, $31, $18 - cmplt $19, $31, $21 - addq $28, $28, $28 - addq $19, $19, $19 - addq $19, $18, $19 - addq $23, $21, $23 - addq $8, $28, $8 - addq $22, $19, $22 - cmpult $8, $28, $24 - cmpult $22, $19, $20 - addq $22, $24, $22 - addq $23, $20, $23 - mulq $6, $3, $17 - umulh $6, $3, $25 - cmplt $17, $31, $27 - cmplt $25, $31, $18 - addq $17, $17, $17 - addq $25, $25, $25 - addq $25, $27, $25 - addq $23, $18, $23 - addq $8, $17, $8 - addq $22, $25, $22 - cmpult $8, $17, $21 - cmpult $22, $25, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $7, $2, $19 - umulh $7, $2, $24 - cmplt $19, $31, $20 - cmplt $24, $31, $27 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $20, $24 - addq $23, $27, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $18 - cmpult $22, $24, $17 - addq $22, $18, $22 - addq $23, $17, $23 - stq $8, 72($16) - bis $31, $31, $8 - mulq $5, $5, $25 - umulh $5, $5, $21 - addq $22, $25, $22 - addq $23, $21, $23 - cmpult $22, $25, $28 - cmpult $23, $21, $20 - addq $23, $28, $23 - addq $8, $20, $8 - mulq $6, $4, $27 - umulh $6, $4, $19 - cmplt $27, $31, $24 - cmplt $19, $31, $18 - addq $27, $27, $27 - addq $19, $19, $19 - addq $19, $24, $19 - addq $8, $18, $8 - addq $22, $27, $22 - addq $23, $19, $23 - cmpult $22, $27, $17 - cmpult $23, $19, $25 - addq $23, $17, $23 - addq $8, $25, $8 - mulq $7, $3, $21 - umulh $7, $3, $28 - cmplt $21, $31, $20 - cmplt $28, $31, $24 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $20, $28 - addq $8, $24, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $18 - cmpult $23, $28, $27 - addq $23, $18, $23 - addq $8, $27, $8 - stq $22, 80($16) - bis $31, $31, $22 - mulq $6, $5, $19 - umulh $6, $5, $17 - cmplt $19, $31, $25 - cmplt $17, $31, $20 - addq $19, $19, $19 - addq $17, $17, $17 - addq $17, $25, $17 - addq $22, $20, $22 - addq $23, $19, $23 - addq $8, $17, $8 - cmpult $23, $19, $24 - cmpult $8, $17, $21 - addq $8, $24, $8 - addq $22, $21, $22 - mulq $7, $4, $28 - umulh $7, $4, $18 - cmplt $28, $31, $27 - cmplt $18, $31, $25 - addq $28, $28, $28 - addq $18, $18, $18 - addq $18, $27, $18 - addq $22, $25, $22 - addq $23, $28, $23 - addq $8, $18, $8 - cmpult $23, $28, $20 - cmpult $8, $18, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 88($16) - bis $31, $31, $23 - mulq $6, $6, $17 - umulh $6, $6, $24 - addq $8, $17, $8 - addq $22, $24, $22 - cmpult $8, $17, $21 - cmpult $22, $24, $27 - addq $22, $21, $22 - addq $23, $27, $23 - mulq $7, $5, $25 - umulh $7, $5, $28 - cmplt $25, $31, $18 - cmplt $28, $31, $20 - addq $25, $25, $25 - addq $28, $28, $28 - addq $28, $18, $28 - addq $23, $20, $23 - addq $8, $25, $8 - addq $22, $28, $22 - cmpult $8, $25, $19 - cmpult $22, $28, $17 - addq $22, $19, $22 - addq $23, $17, $23 - stq $8, 96($16) - bis $31, $31, $8 - mulq $7, $6, $24 - umulh $7, $6, $21 - cmplt $24, $31, $27 - cmplt $21, $31, $18 - addq $24, $24, $24 - addq $21, $21, $21 - addq $21, $27, $21 - addq $8, $18, $8 - addq $22, $24, $22 - addq $23, $21, $23 - cmpult $22, $24, $20 - cmpult $23, $21, $25 - addq $23, $20, $23 - addq $8, $25, $8 - stq $22, 104($16) - bis $31, $31, $22 - mulq $7, $7, $28 - umulh $7, $7, $19 - addq $23, $28, $23 - addq $8, $19, $8 - cmpult $23, $28, $17 - cmpult $8, $19, $27 - addq $8, $17, $8 - addq $22, $27, $22 - stq $23, 112($16) - stq $8, 120($16) - ret $31,($26),1 - .end bn_sqr_comba8 diff --git a/lib/libcrypto/bn/asm/alpha.s.works b/lib/libcrypto/bn/asm/alpha.s.works deleted file mode 100644 index ee6c5878099..00000000000 --- a/lib/libcrypto/bn/asm/alpha.s.works +++ /dev/null @@ -1,533 +0,0 @@ - - # DEC Alpha assember - # The bn_div64 is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div64. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words diff --git a/lib/libcrypto/bn/asm/alpha.works/add.pl b/lib/libcrypto/bn/asm/alpha.works/add.pl deleted file mode 100644 index 4dc76e6b69f..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/add.pl +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($cc,$t0,$cc); # add the borrows - &st($o0,&QWPw(0,$rp)); # save - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/div.pl b/lib/libcrypto/bn/asm/alpha.works/div.pl deleted file mode 100644 index 7ec144377fa..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div64 - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 -EOF - &asm_add($data); - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/mul.pl b/lib/libcrypto/bn/asm/alpha.works/mul.pl deleted file mode 100644 index b182bae4520..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/mul.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$word,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($l0,$cc,$l0); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &cmpult($l0,$cc,$cc); - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/lib/libcrypto/bn/asm/alpha.works/mul_add.pl deleted file mode 100644 index e37f6315fbc..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/mul_add.pl +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - &mul($a0,$word,($l0)=&NR(1)); - &sub($count,1,$count); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($r0,$l0,$r0); - &add($rp,$QWS,$rp); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl deleted file mode 100644 index 5efd2012814..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl deleted file mode 100644 index 79d86dd25cd..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl deleted file mode 100644 index 525ca7494b7..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/lib/libcrypto/bn/asm/alpha.works/sqr.pl deleted file mode 100644 index a55b696906e..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl deleted file mode 100644 index bf33f5b5037..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl deleted file mode 100644 index b4afe085f1c..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha.works/sub.pl b/lib/libcrypto/bn/asm/alpha.works/sub.pl deleted file mode 100644 index d998da5c21a..00000000000 --- a/lib/libcrypto/bn/asm/alpha.works/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/add.pl b/lib/libcrypto/bn/asm/alpha/add.pl deleted file mode 100644 index 13bf5164281..00000000000 --- a/lib/libcrypto/bn/asm/alpha/add.pl +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - -########################################################## - &set_label("loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &ld(($b0)=&NR(1),&QWPw(0,$bp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &ld(($b1)=&NR(1),&QWPw(1,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &ld(($b2)=&NR(1),&QWPw(2,$bp)); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &ld(($b3)=&NR(1),&QWPw(3,$bp)); - &st($o0,&QWPw(0,$rp)); &FR($o0); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &st($o1,&QWPw(0,$rp)); &FR($o1); - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &cmpult($o3,$cc,$cc); - &st($o3,&QWPw(0,$rp)); &FR($o3); - &add($cc,$t3,$cc); &FR($t3); - - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - ### - &bge($count,&label("loop")); - ### - &br(&label("finish")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($a0,$b0,$o0); - &sub($count,1,$count); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($rp,$QWS,$rp); - &st($o0,&QWPw(-1,$rp)); # save - &add($cc,$t0,$cc); # add the borrows - - ### - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/div.pl b/lib/libcrypto/bn/asm/alpha/div.pl deleted file mode 100644 index e9e680897aa..00000000000 --- a/lib/libcrypto/bn/asm/alpha/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div_words - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words - ldgp $29,0($27) -bn_div_words.ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - &asm_add($data); - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/mul.pl b/lib/libcrypto/bn/asm/alpha/mul.pl deleted file mode 100644 index 76c926566c7..00000000000 --- a/lib/libcrypto/bn/asm/alpha/mul.pl +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - ### wait 8 - &muh($a1,$word,($h1)=&NR(1)); &FR($a1); - &add($l0,$cc,$l0); ### wait 8 - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &cmpult($l0,$cc,$cc); ### wait 8 - &muh($a2,$word,($h2)=&NR(1)); &FR($a2); - &add($h0,$cc,$cc); &FR($h0); ### wait 8 - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($l1,$cc,$l1); ### wait 8 - &st($l0,&QWPw(0,$rp)); &FR($l0); - &cmpult($l1,$cc,$cc); ### wait 8 - &muh($a3,$word,($h3)=&NR(1)); &FR($a3); - &add($h1,$cc,$cc); &FR($h1); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($l2,$cc,$l2); - &st($l1,&QWPw(1,$rp)); &FR($l1); - &cmpult($l2,$cc,$cc); - &add($h2,$cc,$cc); &FR($h2); - &sub($count,4,$count); # count-=4 - &st($l2,&QWPw(2,$rp)); &FR($l2); - &add($l3,$cc,$l3); - &cmpult($l3,$cc,$cc); - &add($bp,4*$QWS,$bp); # count+=4 - &add($h3,$cc,$cc); &FR($h3); - &add($ap,4*$QWS,$ap); # count+=4 - &st($l3,&QWPw(3,$rp)); &FR($l3); - &add($rp,4*$QWS,$rp); # count+=4 - ### - &blt($count,&label("finish")); - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - &br(&label("finish")); -################################################## - -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - ### - ### - ### - &muh($a0,$word,($h0)=&NR(1)); - ### Wait 8 for next mul issue - &mul($a0,$word,($l0)=&NR(1)); &FR($a0) - &add($ap,$QWS,$ap); - ### Loose 12 until result is available - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &add($l0,$cc,$l0); - ### - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &cmpult($l0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/mul_add.pl b/lib/libcrypto/bn/asm/alpha/mul_add.pl deleted file mode 100644 index 0d6df69bc4b..00000000000 --- a/lib/libcrypto/bn/asm/alpha/mul_add.pl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - &ld(($r0)=&NR(1),&QWPw(0,$rp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &muh($a0,$word,($h0)=&NR(1)); - &ld(($r1)=&NR(1),&QWPw(1,$rp)); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - ### - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &ld(($r2)=&NR(1),&QWPw(2,$rp)); - &muh($a1,$word,($h1)=&NR(1)); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &ld(($r3)=&NR(1),&QWPw(3,$rp)); - &add($r0,$l0,$r0); - &add($r1,$l1,$r1); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); - &muh($a2,$word,($h2)=&NR(1)); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h1,$t1,$h1); &FR($t1); - &add($h0,$cc,$cc); &FR($h0); - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($r1,$cc,$r1); - &cmpult($r1,$cc,$cc); - &add($r2,$l2,$r2); - &add($h1,$cc,$cc); &FR($h1); - &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); - &muh($a3,$word,($h3)=&NR(1)); - &add($r2,$cc,$r2); - &st($r0,&QWPw(0,$rp)); &FR($r0); - &add($h2,$t2,$h2); &FR($t2); - &st($r1,&QWPw(1,$rp)); &FR($r1); - &cmpult($r2,$cc,$cc); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($h2,$cc,$cc); &FR($h2); - &st($r2,&QWPw(2,$rp)); &FR($r2); - &sub($count,4,$count); # count-=4 - &add($rp,4*$QWS,$rp); # count+=4 - &add($r3,$l3,$r3); - &add($ap,4*$QWS,$ap); # count+=4 - &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); - &add($r3,$cc,$r3); - &add($h3,$t3,$h3); &FR($t3); - &cmpult($r3,$cc,$cc); - &st($r3,&QWPw(-1,$rp)); &FR($r3); - &add($h3,$cc,$cc); &FR($h3); - - ### - &blt($count,&label("finish")); - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - ### - ### - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &add($rp,$QWS,$rp); - &add($ap,$QWS,$ap); - &sub($count,1,$count); - ### wait 3 until l0 is available - &add($r0,$l0,$r0); - ### - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/lib/libcrypto/bn/asm/alpha/mul_c4.pl deleted file mode 100644 index 9cc876ded4a..00000000000 --- a/lib/libcrypto/bn/asm/alpha/mul_c4.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -# upto - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl deleted file mode 100644 index 79d86dd25cd..00000000000 --- a/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/lib/libcrypto/bn/asm/alpha/mul_c8.pl deleted file mode 100644 index 525ca7494b7..00000000000 --- a/lib/libcrypto/bn/asm/alpha/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/sqr.pl b/lib/libcrypto/bn/asm/alpha/sqr.pl deleted file mode 100644 index a55b696906e..00000000000 --- a/lib/libcrypto/bn/asm/alpha/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/lib/libcrypto/bn/asm/alpha/sqr_c4.pl deleted file mode 100644 index bf33f5b5037..00000000000 --- a/lib/libcrypto/bn/asm/alpha/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/lib/libcrypto/bn/asm/alpha/sqr_c8.pl deleted file mode 100644 index b4afe085f1c..00000000000 --- a/lib/libcrypto/bn/asm/alpha/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/alpha/sub.pl b/lib/libcrypto/bn/asm/alpha/sub.pl deleted file mode 100644 index d998da5c21a..00000000000 --- a/lib/libcrypto/bn/asm/alpha/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl index 26c2685a726..332ef3e91d6 100644 --- a/lib/libcrypto/bn/asm/bn-586.pl +++ b/lib/libcrypto/bn/asm/bn-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); @@ -24,38 +25,25 @@ sub bn_mul_add_words { local($name)=@_; - &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); + $r="eax"; + $a="edx"; + $c="ecx"; if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); &bt(&DWP(0,"eax"),26); - &jnc(&label("maw_loop")); + &jnc(&label("maw_non_sse2")); - &movd("mm0",$w); # mm0 = w + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in - - &set_label("maw_sse2_loop",0); + &jmp(&label("maw_sse2_entry")); + + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] @@ -112,42 +100,82 @@ sub bn_mul_add_words &psrlq("mm1",32); # mm1 = carry6 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] &movd(&DWP(28,$r,"",0),"mm1"); - &add($r,32); + &lea($r,&DWP(32,$r)); &psrlq("mm1",32); # mm1 = carry_out - &sub("ecx",8); + &sub($c,8); + &jz(&label("maw_sse2_exit")); + &set_label("maw_sse2_entry"); + &test($c,0xfffffff8); + &jnz(&label("maw_sse2_unrolled")); + + &set_label("maw_sse2_loop",4); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &movd("mm3",&DWP(0,$r)); # mm3 = r[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm3"); # carry += r[i] + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); &jnz(&label("maw_sse2_loop")); - - &movd($c,"mm1"); # c = carry_out + &set_label("maw_sse2_exit"); + &movd("eax","mm1"); # c = carry_out &emms(); + &ret(); - &jmp(&label("maw_finish")); + &set_label("maw_non_sse2",16); } - &set_label("maw_loop",0); + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + &comment(""); + $Low="eax"; + $High="edx"; + $a="ebx"; + $w="ebp"; + $r="edi"; + $c="esi"; + + &xor($c,$c); # clear carry + &mov($r,&wparam(0)); # + + &mov("ecx",&wparam(2)); # + &mov($a,&wparam(1)); # + + &and("ecx",0xfffffff8); # num / 8 + &mov($w,&wparam(3)); # - &mov(&swtmp(0),"ecx"); # + &push("ecx"); # Up the stack for a tmp variable + + &jz(&label("maw_finish")); + + &set_label("maw_loop",16); for ($i=0; $i<32; $i+=4) { &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a + &mov("eax",&DWP($i,$a)); # *a &mul($w); # *a * w - &add("eax",$c); # L(t)+= *r - &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r + &add("eax",$c); # L(t)+= c &adc("edx",0); # H(t)+=carry - &add("eax",$c); # L(t)+=c + &add("eax",&DWP($i,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); + &mov(&DWP($i,$r),"eax"); # *r= L(t); &mov($c,"edx"); # c= H(t); } &comment(""); - &mov("ecx",&swtmp(0)); # - &add($a,32); - &add($r,32); &sub("ecx",8); + &lea($a,&DWP(32,$a)); + &lea($r,&DWP(32,$r)); &jnz(&label("maw_loop")); &set_label("maw_finish",0); @@ -160,16 +188,15 @@ sub bn_mul_add_words for ($i=0; $i<7; $i++) { &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a + &mov("eax",&DWP($i*4,$a)); # *a &mul($w); # *a * w &add("eax",$c); # L(t)+=c - &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &add("eax",$c); + &add("eax",&DWP($i*4,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); + &mov(&DWP($i*4,$r),"eax"); # *r= L(t); + &mov($c,"edx"); # c= H(t); &jz(&label("maw_end")) if ($i != 7-1); } &set_label("maw_end",0); @@ -184,7 +211,45 @@ sub bn_mul_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("mw_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w + &pxor("mm1","mm1"); # mm1 = carry = 0 + + &set_label("mw_sse2_loop",16); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); + &jnz(&label("mw_sse2_loop")); + + &movd("eax","mm1"); # return carry + &emms(); + &ret(); + &set_label("mw_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $Low="eax"; @@ -257,7 +322,40 @@ sub bn_sqr_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("sqr_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + + &set_label("sqr_sse2_loop",16); + &movd("mm0",&DWP(0,$a)); # mm0 = a[i] + &pmuludq("mm0","mm0"); # a[i] *= a[i] + &lea($a,&DWP(4,$a)); # a++ + &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] + &sub($c,1); + &lea($r,&DWP(8,$r)); # r += 2 + &jnz(&label("sqr_sse2_loop")); + + &emms(); + &ret(); + &set_label("sqr_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $r="esi"; @@ -313,12 +411,13 @@ sub bn_div_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,""); &mov("edx",&wparam(0)); # &mov("eax",&wparam(1)); # - &mov("ebx",&wparam(2)); # - &div("ebx"); - &function_end($name); + &mov("ecx",&wparam(2)); # + &div("ecx"); + &ret(); + &function_end_B($name); } sub bn_add_words diff --git a/lib/libcrypto/bn/asm/bn-alpha.pl b/lib/libcrypto/bn/asm/bn-alpha.pl deleted file mode 100644 index 302edf23767..00000000000 --- a/lib/libcrypto/bn/asm/bn-alpha.pl +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -$d=&data(); -$d =~ s/CC/0/g; -$d =~ s/R1/1/g; -$d =~ s/R2/2/g; -$d =~ s/R3/3/g; -$d =~ s/R4/4/g; -$d =~ s/L1/5/g; -$d =~ s/L2/6/g; -$d =~ s/L3/7/g; -$d =~ s/L4/8/g; -$d =~ s/O1/22/g; -$d =~ s/O2/23/g; -$d =~ s/O3/24/g; -$d =~ s/O4/25/g; -$d =~ s/A1/20/g; -$d =~ s/A2/21/g; -$d =~ s/A3/27/g; -$d =~ s/A4/28/g; -if (0){ -} - -print $d; - -sub data - { - local($data)=<<'EOF'; - - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - .align 3 -$42: - mulq $A1,$19,$L1 # 1 2 1 ###### - ldq $A2,8($17) # 2 1 - ldq $R2,8($16) # 2 1 - umulh $A1,$19,$A1 # 1 2 ###### - ldq $A3,16($17) # 3 1 - ldq $R3,16($16) # 3 1 - mulq $A2,$19,$L2 # 2 2 1 ###### - ldq $A4,24($17) # 4 1 - addq $R1,$L1,$R1 # 1 2 2 - ldq $R4,24($16) # 4 1 - umulh $A2,$19,$A2 # 2 2 ###### - cmpult $R1,$L1,$O1 # 1 2 3 1 - addq $A1,$O1,$A1 # 1 3 1 - addq $R1,$CC,$R1 # 1 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ###### - cmpult $R1,$CC,$CC # 1 2 3 2 - addq $R2,$L2,$R2 # 2 2 2 - addq $A1,$CC,$CC # 1 3 2 - cmpult $R2,$L2,$O2 # 2 2 3 1 - addq $A2,$O2,$A2 # 2 3 1 - umulh $A3,$19,$A3 # 3 2 ###### - addq $R2,$CC,$R2 # 2 2 3 1 - cmpult $R2,$CC,$CC # 2 2 3 2 - subq $18,4,$18 - mulq $A4,$19,$L4 # 4 2 1 ###### - addq $A2,$CC,$CC # 2 3 2 - addq $R3,$L3,$R3 # 3 2 2 - addq $16,32,$16 - cmpult $R3,$L3,$O3 # 3 2 3 1 - stq $R1,-32($16) # 1 2 4 - umulh $A4,$19,$A4 # 4 2 ###### - addq $A3,$O3,$A3 # 3 3 1 - addq $R3,$CC,$R3 # 3 2 3 1 - stq $R2,-24($16) # 2 2 4 - cmpult $R3,$CC,$CC # 3 2 3 2 - stq $R3,-16($16) # 3 2 4 - addq $R4,$L4,$R4 # 4 2 2 - addq $A3,$CC,$CC # 3 3 2 - cmpult $R4,$L4,$O4 # 4 2 3 1 - addq $17,32,$17 - addq $A4,$O4,$A4 # 4 3 1 - addq $R4,$CC,$R4 # 4 2 3 1 - cmpult $R4,$CC,$CC # 4 2 3 2 - stq $R4,-8($16) # 4 2 4 - addq $A4,$CC,$CC # 4 3 2 - blt $18,$43 - - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $A1,0($17) # 4 1 - ldq $R1,0($16) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $A1,$19,$A1 # 4 2 - addq $R1,$L1,$R1 # 4 2 2 - cmpult $R1,$L1,$O1 # 4 2 3 1 - addq $A1,$O1,$A1 # 4 3 1 - addq $R1,$CC,$R1 # 4 2 3 1 - cmpult $R1,$CC,$CC # 4 2 3 2 - addq $A1,$CC,$CC # 4 3 2 - stq $R1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$142: - - mulq $A1,$19,$L1 # 1 2 1 ##### - ldq $A2,8($17) # 2 1 - ldq $A3,16($17) # 3 1 - umulh $A1,$19,$A1 # 1 2 ##### - ldq $A4,24($17) # 4 1 - mulq $A2,$19,$L2 # 2 2 1 ##### - addq $L1,$CC,$L1 # 1 2 3 1 - subq $18,4,$18 - cmpult $L1,$CC,$CC # 1 2 3 2 - umulh $A2,$19,$A2 # 2 2 ##### - addq $A1,$CC,$CC # 1 3 2 - addq $17,32,$17 - addq $L2,$CC,$L2 # 2 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ##### - cmpult $L2,$CC,$CC # 2 2 3 2 - addq $A2,$CC,$CC # 2 3 2 - addq $16,32,$16 - umulh $A3,$19,$A3 # 3 2 ##### - stq $L1,-32($16) # 1 2 4 - mulq $A4,$19,$L4 # 4 2 1 ##### - addq $L3,$CC,$L3 # 3 2 3 1 - stq $L2,-24($16) # 2 2 4 - cmpult $L3,$CC,$CC # 3 2 3 2 - umulh $A4,$19,$A4 # 4 2 ##### - addq $A3,$CC,$CC # 3 3 2 - stq $L3,-16($16) # 3 2 4 - addq $L4,$CC,$L4 # 4 2 3 1 - cmpult $L4,$CC,$CC # 4 2 3 2 - - addq $A4,$CC,$CC # 4 3 2 - - stq $L4,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $A1,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $A1,0($17) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - umulh $A1,$19,$A1 # 4 2 - addq $L1,$CC,$L1 # 4 2 3 1 - addq $16,8,$16 - cmpult $L1,$CC,$CC # 4 2 3 2 - addq $17,8,$17 - addq $A1,$CC,$CC # 4 3 2 - stq $L1,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$542: - mulq $A1,$A1,$L1 ###### - ldq $A2,8($17) # 1 1 - subq $18,4 - umulh $A1,$A1,$R1 ###### - ldq $A3,16($17) # 1 1 - mulq $A2,$A2,$L2 ###### - ldq $A4,24($17) # 1 1 - stq $L1,0($16) # r[0] - umulh $A2,$A2,$R2 ###### - stq $R1,8($16) # r[1] - mulq $A3,$A3,$L3 ###### - stq $L2,16($16) # r[0] - umulh $A3,$A3,$R3 ###### - stq $R2,24($16) # r[1] - mulq $A4,$A4,$L4 ###### - stq $L3,32($16) # r[0] - umulh $A4,$A4,$R4 ###### - stq $R3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $L4,-16($16) # r[0] - stq $R4,-8($16) # r[1] - - blt $18,$543 - ldq $A1,0($17) # 1 1 - br $542 - -$442: - ldq $A1,0($17) # a[0] - mulq $A1,$A1,$L1 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $A1,$A1,$R1 # a[0]*w high part r3 - stq $L1,-16($16) # r[0] - stq $R1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$901: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - addq $R1,$L1,$R1 # r=a+b; - subq $19,1,$19 # loop-- - addq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $R1,$L1,$O1 # did we overflow? - cmpult $R1,$CC,$CC # overflow? - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $CC,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - br $800 - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$801: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $801 - .align 4 -$845: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - cmpult $L1,$R1,$O1 # will we borrow? - subq $L1,$R1,$R1 # r=a-b; - subq $19,1,$19 # loop-- - cmpult $R1,$CC,$O2 # will we borrow? - subq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $O2,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$845 - ret $31,($26),1 # else exit - -$800: - addq $19,4,$19 - bgt $19,$845 # goto tail code - ret $31,($26),1 # else exit - .end bn_sub_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - return($data); - } - diff --git a/lib/libcrypto/bn/asm/ca.pl b/lib/libcrypto/bn/asm/ca.pl deleted file mode 100644 index c1ce67a6b4d..00000000000 --- a/lib/libcrypto/bn/asm/ca.pl +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; -require "alpha/mul_add.pl"; -require "alpha/mul.pl"; -require "alpha/sqr.pl"; -require "alpha/add.pl"; -require "alpha/sub.pl"; -require "alpha/mul_c8.pl"; -require "alpha/mul_c4.pl"; -require "alpha/sqr_c4.pl"; -require "alpha/sqr_c8.pl"; -require "alpha/div.pl"; - -&asm_init($ARGV[0],$0); - -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_mul_add_words("bn_mul_add_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_div_words("bn_div_words"); -&bn_mul_comba8("bn_mul_comba8"); -&bn_mul_comba4("bn_mul_comba4"); -&bn_sqr_comba4("bn_sqr_comba4"); -&bn_sqr_comba8("bn_sqr_comba8"); - -&asm_finish(); - diff --git a/lib/libcrypto/bn/asm/co-586.pl b/lib/libcrypto/bn/asm/co-586.pl index 5d962cb957d..57101a6bd77 100644 --- a/lib/libcrypto/bn/asm/co-586.pl +++ b/lib/libcrypto/bn/asm/co-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); diff --git a/lib/libcrypto/bn/asm/co-alpha.pl b/lib/libcrypto/bn/asm/co-alpha.pl deleted file mode 100644 index 67dad3e3d5f..00000000000 --- a/lib/libcrypto/bn/asm/co-alpha.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; - -&asm_init($ARGV[0],$0); - -print &bn_sub_words("bn_sub_words"); - -&asm_finish(); - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - $cc="r0"; - $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13"; - $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14"; - $a2="r3"; $b2="r7"; $r2="r11"; - $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15"; - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$a0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); # add the borrows - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$a1); # will we borrow? - &add($b1,$t1,$cc); # add the borrows - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($a0,&QWPw(0,$rp)); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$a2); # will we borrow? - &add($b2,$tmp,$cc); # add the borrows - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($a1,&QWPw(1,$rp)); # save - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$a3); # will we borrow? - &add($b3,$t3,$cc); # add the borrows - - &st($a2,&QWPw(2,$rp)); # save - &sub($count,4,$count); # count-=4 - &st($a3,&QWPw(3,$rp)); # save - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - } - diff --git a/lib/libcrypto/bn/asm/mips1.s b/lib/libcrypto/bn/asm/mips1.s deleted file mode 100644 index 44fa1254c76..00000000000 --- a/lib/libcrypto/bn/asm/mips1.s +++ /dev/null @@ -1,539 +0,0 @@ -/* This assember is for R2000/R3000 machines, or higher ones that do - * no want to do any 64 bit arithmatic. - * Make sure that the SSLeay bignum library is compiled with - * THIRTY_TWO_BIT set. - * This must either be compiled with the system CC, or, if you use GNU gas, - * cc -E mips1.s|gas -o mips1.o - */ - .set reorder - .set noat - -#define R1 $1 -#define CC $2 -#define R2 $3 -#define R3 $8 -#define R4 $9 -#define L1 $10 -#define L2 $11 -#define L3 $12 -#define L4 $13 -#define H1 $14 -#define H2 $15 -#define H3 $24 -#define H4 $25 - -#define P1 $4 -#define P2 $5 -#define P3 $6 -#define P4 $7 - - .align 2 - .ent bn_mul_add_words - .globl bn_mul_add_words -.text -bn_mul_add_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - #blt P3,4,$lab34 - - subu R1,P3,4 - move CC,$0 - bltz R1,$lab34 -$lab2: - lw R1,0(P1) - lw L1,0(P2) - lw R2,4(P1) - lw L2,4(P2) - lw R3,8(P1) - lw L3,8(P2) - lw R4,12(P1) - lw L4,12(P2) - multu L1,P4 - addu R1,R1,CC - mflo L1 - sltu CC,R1,CC - addu R1,R1,L1 - mfhi H1 - sltu L1,R1,L1 - sw R1,0(P1) - addu CC,CC,L1 - multu L2,P4 - addu CC,H1,CC - mflo L2 - addu R2,R2,CC - sltu CC,R2,CC - mfhi H2 - addu R2,R2,L2 - addu P2,P2,16 - sltu L2,R2,L2 - sw R2,4(P1) - addu CC,CC,L2 - multu L3,P4 - addu CC,H2,CC - mflo L3 - addu R3,R3,CC - sltu CC,R3,CC - mfhi H3 - addu R3,R3,L3 - addu P1,P1,16 - sltu L3,R3,L3 - sw R3,-8(P1) - addu CC,CC,L3 - multu L4,P4 - addu CC,H3,CC - mflo L4 - addu R4,R4,CC - sltu CC,R4,CC - mfhi H4 - addu R4,R4,L4 - subu P3,P3,4 - sltu L4,R4,L4 - addu CC,CC,L4 - addu CC,H4,CC - - subu R1,P3,4 - sw R4,-4(P1) # delay slot - bgez R1,$lab2 - - bleu P3,0,$lab3 - .align 2 -$lab33: - lw L1,0(P2) - lw R1,0(P1) - multu L1,P4 - addu R1,R1,CC - sltu CC,R1,CC - addu P1,P1,4 - mflo L1 - mfhi H1 - addu R1,R1,L1 - addu P2,P2,4 - sltu L1,R1,L1 - subu P3,P3,1 - addu CC,CC,L1 - sw R1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab33 - j $31 - .align 2 -$lab3: - j $31 - .align 2 -$lab34: - bgt P3,0,$lab33 - j $31 - .end bn_mul_add_words - - .align 2 - # Program Unit: bn_mul_words - .ent bn_mul_words - .globl bn_mul_words -.text -bn_mul_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - move CC,$0 - bltz P3,$lab45 -$lab44: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - multu L1,P4 - subu P3,P3,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - multu L2,P4 - sltu CC,L1,CC - sw L1,0(P1) - addu CC,H1,CC - mflo L2 - mfhi H2 - addu L2,L2,CC - multu L3,P4 - sltu CC,L2,CC - sw L2,4(P1) - addu CC,H2,CC - mflo L3 - mfhi H3 - addu L3,L3,CC - multu L4,P4 - sltu CC,L3,CC - sw L3,8(P1) - addu CC,H3,CC - mflo L4 - mfhi H4 - addu L4,L4,CC - addu P1,P1,16 - sltu CC,L4,CC - addu P2,P2,16 - addu CC,H4,CC - sw L4,-4(P1) - - bgez P3,$lab44 - b $lab45 -$lab46: - lw L1,0(P2) - addu P1,P1,4 - multu L1,P4 - addu P2,P2,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - subu P3,P3,1 - sltu CC,L1,CC - sw L1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab46 - j $31 -$lab45: - addu P3,P3,4 - bgtz P3,$lab46 - j $31 - .align 2 - .end bn_mul_words - - # Program Unit: bn_sqr_words - .ent bn_sqr_words - .globl bn_sqr_words -.text -bn_sqr_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - bltz P3,$lab55 -$lab54: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - - multu L1,L1 - subu P3,P3,4 - mflo L1 - mfhi H1 - sw L1,0(P1) - sw H1,4(P1) - - multu L2,L2 - addu P1,P1,32 - mflo L2 - mfhi H2 - sw L2,-24(P1) - sw H2,-20(P1) - - multu L3,L3 - addu P2,P2,16 - mflo L3 - mfhi H3 - sw L3,-16(P1) - sw H3,-12(P1) - - multu L4,L4 - - mflo L4 - mfhi H4 - sw L4,-8(P1) - sw H4,-4(P1) - - bgtz P3,$lab54 - b $lab55 -$lab56: - lw L1,0(P2) - addu P1,P1,8 - multu L1,L1 - addu P2,P2,4 - subu P3,P3,1 - mflo L1 - mfhi H1 - sw L1,-8(P1) - sw H1,-4(P1) - - bgtz P3,$lab56 - j $31 -$lab55: - addu P3,P3,4 - bgtz P3,$lab56 - j $31 - .align 2 - .end bn_sqr_words - - # Program Unit: bn_add_words - .ent bn_add_words - .globl bn_add_words -.text -bn_add_words: # 0x590 - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P4,P4,4 - move CC,$0 - bltz P4,$lab65 -$lab64: - lw L1,0(P2) - lw R1,0(P3) - lw L2,4(P2) - lw R2,4(P3) - - addu L1,L1,CC - lw L3,8(P2) - sltu CC,L1,CC - addu L1,L1,R1 - sltu R1,L1,R1 - lw R3,8(P3) - addu CC,CC,R1 - lw L4,12(P2) - - addu L2,L2,CC - lw R4,12(P3) - sltu CC,L2,CC - addu L2,L2,R2 - sltu R2,L2,R2 - sw L1,0(P1) - addu CC,CC,R2 - addu P1,P1,16 - addu L3,L3,CC - sw L2,-12(P1) - - sltu CC,L3,CC - addu L3,L3,R3 - sltu R3,L3,R3 - addu P2,P2,16 - addu CC,CC,R3 - - addu L4,L4,CC - addu P3,P3,16 - sltu CC,L4,CC - addu L4,L4,R4 - subu P4,P4,4 - sltu R4,L4,R4 - sw L3,-8(P1) - addu CC,CC,R4 - sw L4,-4(P1) - - bgtz P4,$lab64 - b $lab65 -$lab66: - lw L1,0(P2) - lw R1,0(P3) - addu L1,L1,CC - addu P1,P1,4 - sltu CC,L1,CC - addu P2,P2,4 - addu P3,P3,4 - addu L1,L1,R1 - subu P4,P4,1 - sltu R1,L1,R1 - sw L1,-4(P1) - addu CC,CC,R1 - - bgtz P4,$lab66 - j $31 -$lab65: - addu P4,P4,4 - bgtz P4,$lab66 - j $31 - .end bn_add_words - - # Program Unit: bn_div64 - .set at - .set reorder - .text - .align 2 - .globl bn_div64 - # 321 { - .ent bn_div64 2 -bn_div64: - subu $sp, 64 - sw $31, 56($sp) - sw $16, 48($sp) - .mask 0x80010000, -56 - .frame $sp, 64, $31 - move $9, $4 - move $12, $5 - move $16, $6 - # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t; - move $31, $0 - # 323 int i,count=2; - li $13, 2 - # 324 - # 325 if (d == 0) return(BN_MASK2); - bne $16, 0, $80 - li $2, -1 - b $93 -$80: - # 326 - # 327 i=BN_num_bits_word(d); - move $4, $16 - sw $31, 16($sp) - sw $9, 24($sp) - sw $12, 32($sp) - sw $13, 40($sp) - .livereg 0x800ff0e,0xfff - jal BN_num_bits_word - li $4, 32 - lw $31, 16($sp) - lw $9, 24($sp) - lw $12, 32($sp) - lw $13, 40($sp) - move $3, $2 - # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) - beq $2, $4, $81 - li $14, 1 - sll $15, $14, $2 - bleu $9, $15, $81 - # 329 { - # 330 #if !defined(NO_STDIO) && !defined(WIN16) - # 331 fprintf(stderr,"Division would overflow (%d)\n",i); - # 332 #endif - # 333 abort(); - sw $3, 8($sp) - sw $9, 24($sp) - sw $12, 32($sp) - sw $13, 40($sp) - sw $31, 26($sp) - .livereg 0xff0e,0xfff - jal abort - lw $3, 8($sp) - li $4, 32 - lw $9, 24($sp) - lw $12, 32($sp) - lw $13, 40($sp) - lw $31, 26($sp) - # 334 } -$81: - # 335 i=BN_BITS2-i; - subu $3, $4, $3 - # 336 if (h >= d) h-=d; - bltu $9, $16, $82 - subu $9, $9, $16 -$82: - # 337 - # 338 if (i) - beq $3, 0, $83 - # 339 { - # 340 d<<=i; - sll $16, $16, $3 - # 341 h=(h<<i)|(l>>(BN_BITS2-i)); - sll $24, $9, $3 - subu $25, $4, $3 - srl $14, $12, $25 - or $9, $24, $14 - # 342 l<<=i; - sll $12, $12, $3 - # 343 } -$83: - # 344 dh=(d&BN_MASK2h)>>BN_BITS4; - # 345 dl=(d&BN_MASK2l); - and $8, $16, -65536 - srl $8, $8, 16 - and $10, $16, 65535 - li $6, -65536 -$84: - # 346 for (;;) - # 347 { - # 348 if ((h>>BN_BITS4) == dh) - srl $15, $9, 16 - bne $8, $15, $85 - # 349 q=BN_MASK2l; - li $5, 65535 - b $86 -$85: - # 350 else - # 351 q=h/dh; - divu $5, $9, $8 -$86: - # 352 - # 353 for (;;) - # 354 { - # 355 t=(h-q*dh); - mul $4, $5, $8 - subu $2, $9, $4 - move $3, $2 - # 356 if ((t&BN_MASK2h) || - # 357 ((dl*q) <= ( - # 358 (t<<BN_BITS4)+ - # 359 ((l&BN_MASK2h)>>BN_BITS4)))) - and $25, $2, $6 - bne $25, $0, $87 - mul $24, $10, $5 - sll $14, $3, 16 - and $15, $12, $6 - srl $25, $15, 16 - addu $15, $14, $25 - bgtu $24, $15, $88 -$87: - # 360 break; - mul $3, $10, $5 - b $89 -$88: - # 361 q--; - addu $5, $5, -1 - # 362 } - b $86 -$89: - # 363 th=q*dh; - # 364 tl=q*dl; - # 365 t=(tl>>BN_BITS4); - # 366 tl=(tl<<BN_BITS4)&BN_MASK2h; - sll $14, $3, 16 - and $2, $14, $6 - move $11, $2 - # 367 th+=t; - srl $25, $3, 16 - addu $7, $4, $25 - # 368 - # 369 if (l < tl) th++; - bgeu $12, $2, $90 - addu $7, $7, 1 -$90: - # 370 l-=tl; - subu $12, $12, $11 - # 371 if (h < th) - bgeu $9, $7, $91 - # 372 { - # 373 h+=d; - addu $9, $9, $16 - # 374 q--; - addu $5, $5, -1 - # 375 } -$91: - # 376 h-=th; - subu $9, $9, $7 - # 377 - # 378 if (--count == 0) break; - addu $13, $13, -1 - beq $13, 0, $92 - # 379 - # 380 ret=q<<BN_BITS4; - sll $31, $5, 16 - # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; - sll $24, $9, 16 - srl $15, $12, 16 - or $9, $24, $15 - # 382 l=(l&BN_MASK2l)<<BN_BITS4; - and $12, $12, 65535 - sll $12, $12, 16 - # 383 } - b $84 -$92: - # 384 ret|=q; - or $31, $31, $5 - # 385 return(ret); - move $2, $31 -$93: - lw $16, 48($sp) - lw $31, 56($sp) - addu $sp, 64 - j $31 - .end bn_div64 - diff --git a/lib/libcrypto/bn/asm/mo-586.pl b/lib/libcrypto/bn/asm/mo-586.pl deleted file mode 100644 index 0982293094d..00000000000 --- a/lib/libcrypto/bn/asm/mo-586.pl +++ /dev/null @@ -1,603 +0,0 @@ -#!/usr/bin/env perl - -# This is crypto/bn/asm/x86-mont.pl (with asciz from crypto/perlasm/x86asm.pl) -# from OpenSSL 0.9.9-dev - -sub ::asciz -{ my @str=unpack("C*",shift); - push @str,0; - while ($#str>15) { - &data_byte(@str[0..15]); - foreach (0..15) { shift @str; } - } - &data_byte(@str) if (@str); -} - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# This is a "teaser" code, as it can be improved in several ways... -# First of all non-SSE2 path should be implemented (yes, for now it -# performs Montgomery multiplication/convolution only on SSE2-capable -# CPUs such as P4, others fall down to original code). Then inner loop -# can be unrolled and modulo-scheduled to improve ILP and possibly -# moved to 128-bit XMM register bank (though it would require input -# rearrangement and/or increase bus bandwidth utilization). Dedicated -# squaring procedure should give further performance improvement... -# Yet, for being draft, the code improves rsa512 *sign* benchmark by -# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-) - -# December 2006 -# -# Modulo-scheduling SSE2 loops results in further 15-20% improvement. -# Integer-only code [being equipped with dedicated squaring procedure] -# gives ~40% on rsa512 sign benchmark... - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&function_begin("bn_mul_mont"); - -$i="edx"; -$j="ecx"; -$ap="esi"; $tp="esi"; # overlapping variables!!! -$rp="edi"; $bp="edi"; # overlapping variables!!! -$np="ebp"; -$num="ebx"; - -$_num=&DWP(4*0,"esp"); # stack top layout -$_rp=&DWP(4*1,"esp"); -$_ap=&DWP(4*2,"esp"); -$_bp=&DWP(4*3,"esp"); -$_np=&DWP(4*4,"esp"); -$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp"); -$_sp=&DWP(4*6,"esp"); -$_bpend=&DWP(4*7,"esp"); -$frame=32; # size of above frame rounded up to 16n - - &xor ("eax","eax"); - &mov ("edi",&wparam(5)); # int num - &cmp ("edi",4); - &jl (&label("just_leave")); - - &lea ("esi",&wparam(0)); # put aside pointer to argument block - &lea ("edx",&wparam(1)); # load ap - &mov ("ebp","esp"); # saved stack pointer! - &add ("edi",2); # extra two words on top of tp - &neg ("edi"); - &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) - &neg ("edi"); - - # minimize cache contention by arraning 2K window between stack - # pointer and ap argument [np is also position sensitive vector, - # but it's assumed to be near ap, as it's allocated at ~same - # time]. - &mov ("eax","esp"); - &sub ("eax","edx"); - &and ("eax",2047); - &sub ("esp","eax"); # this aligns sp and ap modulo 2048 - - &xor ("edx","esp"); - &and ("edx",2048); - &xor ("edx",2048); - &sub ("esp","edx"); # this splits them apart modulo 4096 - - &and ("esp",-64); # align to cache line - - ################################# load argument block... - &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp - &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap - &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp - &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np - &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 - #&mov ("edi",&DWP(5*4,"esi"));# int num - - &mov ("esi",&DWP(0,"esi")); # pull n0[0] - &mov ($_rp,"eax"); # ... save a copy of argument block - &mov ($_ap,"ebx"); - &mov ($_bp,"ecx"); - &mov ($_np,"edx"); - &mov ($_n0,"esi"); - &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling - #&mov ($_num,$num); # redundant as $num is not reused - &mov ($_sp,"ebp"); # saved stack pointer! - -if($sse2) { -$acc0="mm0"; # mmx register bank layout -$acc1="mm1"; -$car0="mm2"; -$car1="mm3"; -$mul0="mm4"; -$mul1="mm5"; -$temp="mm6"; -$mask="mm7"; - - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"eax"),26); - &jnc (&label("non_sse2")); - - &mov ("eax",-1); - &movd ($mask,"eax"); # mask 32 lower bits - - &mov ($ap,$_ap); # load input pointers - &mov ($bp,$_bp); - &mov ($np,$_np); - - &xor ($i,$i); # i=0 - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp)); # bp[0] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($car1,&DWP(0,$np)); # np[0] - - &pmuludq($mul1,$mul0); # ap[0]*bp[0] - &movq ($car0,$mul1); - &movq ($acc0,$mul1); # I wish movd worked for - &pand ($acc0,$mask); # inter-register transfers - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0 - &paddq ($car1,$acc0); - - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &inc ($j); # j++ -&set_label("1st",16); - &pmuludq($acc0,$mul0); # ap[j]*bp[0] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[0]; - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]= - &psrlq ($car1,32); - - &lea ($j,&DWP(1,$j)); - &cmp ($j,$num); - &jl (&label("1st")); - - &pmuludq($acc0,$mul0); # ap[num-1]*bp[0] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[0]; - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &paddq ($car1,$car0); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &inc ($i); # i++ -&set_label("outer"); - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($temp,&DWP($frame,"esp")); # tp[0] - &movd ($car1,&DWP(0,$np)); # np[0] - &pmuludq($mul1,$mul0); # ap[0]*bp[i] - - &paddq ($mul1,$temp); # +=tp[0] - &movq ($acc0,$mul1); - &movq ($car0,$mul1); - &pand ($acc0,$mask); - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); - &paddq ($car1,$acc0); - - &movd ($temp,&DWP($frame+4,"esp")); # tp[1] - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[1] - - &inc ($j); # j++ - &dec ($num); -&set_label("inner"); - &pmuludq($acc0,$mul0); # ap[j]*bp[i] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1] - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j] - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]= - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[j+1] - - &dec ($num); - &lea ($j,&DWP(1,$j)); # j++ - &jnz (&label("inner")); - - &mov ($num,$j); - &pmuludq($acc0,$mul0); # ap[num-1]*bp[i] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1] - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - &psrlq ($car0,32); - &psrlq ($car1,32); - - &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num] - &paddq ($car1,$car0); - &paddq ($car1,$temp); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &lea ($i,&DWP(1,$i)); # i++ - &cmp ($i,$num); - &jle (&label("outer")); - - &emms (); # done with mmx bank - &jmp (&label("common_tail")); - -&set_label("non_sse2",16); -} - -if (0) { - &mov ("esp",$_sp); - &xor ("eax","eax"); # signal "not fast enough [yet]" - &jmp (&label("just_leave")); - # While the below code provides competitive performance for - # all key lengthes on modern Intel cores, it's still more - # than 10% slower for 4096-bit key elsewhere:-( "Competitive" - # means compared to the original integer-only assembler. - # 512-bit RSA sign is better by ~40%, but that's about all - # one can say about all CPUs... -} else { -$inp="esi"; # integer path uses these registers differently -$word="edi"; -$carry="ebp"; - - &mov ($inp,$_ap); - &lea ($carry,&DWP(1,$num)); - &mov ($word,$_bp); - &xor ($j,$j); # j=0 - &mov ("edx",$inp); - &and ($carry,1); # see if num is even - &sub ("edx",$word); # see if ap==bp - &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num] - &or ($carry,"edx"); - &mov ($word,&DWP(0,$word)); # bp[0] - &jz (&label("bn_sqr_mont")); - &mov ($_bpend,"eax"); - &mov ("eax",&DWP(0,$inp)); - &xor ("edx","edx"); - -&set_label("mull",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[0] - &add ($carry,"eax"); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("mull")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[0] - &mov ($word,$_n0); - &add ("eax",$carry); - &mov ($inp,$_np); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]= - &xor ($j,$j); - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mov ("eax",&DWP(0,$inp)); # np[0] - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &inc ($j); - - &jmp (&label("2ndmadd")); - -&set_label("1stmadd",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[i] - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("1stmadd")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[i] - &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &add ($carry,"eax"); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &xor ($j,$j); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]= - &adc ($j,0); - &mov ("eax",&DWP(0,$inp)); # np[0] - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &mov ($j,1); - -&set_label("2ndmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]= - &jl (&label("2ndmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &xor ("eax","eax"); - &mov ($j,$_bp); # &bp[i] - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &lea ($j,&DWP(4,$j)); - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$_bpend); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(0,$j)); # bp[i+1] - &mov ($inp,$_ap); - &mov ($_bp,$j); # &bp[++i] - &xor ($j,$j); - &xor ("edx","edx"); - &mov ("eax",&DWP(0,$inp)); - &jmp (&label("1stmadd")); - -&set_label("bn_sqr_mont",16); -$sbit=$num; - &mov ($_num,$num); - &mov ($_bp,$j); # i=0 - - &mov ("eax",$word); # ap[0] - &mul ($word); # ap[0]*ap[0] - &mov (&DWP($frame,"esp"),"eax"); # tp[0]= - &mov ($sbit,"edx"); - &shr ("edx",1); - &and ($sbit,1); - &inc ($j); -&set_label("sqr",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[0] - &add ("eax",$carry); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &shr ("eax",31); - &cmp ($j,$_num); - &mov ($sbit,"eax"); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("sqr")); - - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1] - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*ap[0] - &add ("eax",$carry); - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - &shr ("eax",31); - &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]= - - &lea ($carry,&DWP(0,"eax","edx",2)); - &mov ("eax",&DWP(0,$inp)); # np[0] - &shr ("edx",31); - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]= - &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ($num,$j); - &adc ("edx",0); - &mov ("eax",&DWP(4,$inp)); # np[1] - &mov ($j,1); - -&set_label("3rdmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]= - - &mov ($carry,"edx"); - &mul ($word); # np[j+1]*m - &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1] - &lea ($j,&DWP(2,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]= - &jl (&label("3rdmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &mov ($j,$_bp); # i - &xor ("eax","eax"); - &mov ($inp,$_ap); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$num); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(4,$inp,$j,4)); # ap[i] - &lea ($j,&DWP(1,$j)); - &mov ("eax",$word); - &mov ($_bp,$j); # ++i - &mul ($word); # ap[i]*ap[i] - &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i] - &adc ("edx",0); - &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]= - &xor ($carry,$carry); - &cmp ($j,$num); - &lea ($j,&DWP(1,$j)); - &je (&label("sqrlast")); - - &mov ($sbit,"edx"); # zaps $num - &shr ("edx",1); - &and ($sbit,1); -&set_label("sqradd",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[i] - &add ("eax",$carry); - &lea ($carry,&DWP(0,"eax","eax")); - &adc ("edx",0); - &shr ("eax",31); - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("eax",0); - &add ($carry,$sbit); - &adc ("eax",0); - &cmp ($j,$_num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &mov ($sbit,"eax"); - &jle (&label("sqradd")); - - &mov ($carry,"edx"); - &lea ("edx",&DWP(0,$sbit,"edx",2)); - &shr ($carry,31); -&set_label("sqrlast"); - &mov ($word,$_n0); - &mov ($inp,$_np); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num] - &mov ("eax",&DWP(0,$inp)); # np[0] - &adc ($carry,0); - &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]= - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &lea ($num,&DWP(-1,$j)); - &adc ("edx",0); - &mov ($j,1); - &mov ("eax",&DWP(4,$inp)); # np[1] - - &jmp (&label("3rdmadd")); -} - -&set_label("common_tail",16); - &mov ($np,$_np); # load modulus pointer - &mov ($rp,$_rp); # load result pointer - &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped] - - &mov ("eax",&DWP(0,$tp)); # tp[0] - &mov ($j,$num); # j=num-1 - &xor ($i,$i); # i=0 and clear CF! - -&set_label("sub",16); - &sbb ("eax",&DWP(0,$np,$i,4)); - &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i] - &dec ($j); # doesn't affect CF! - &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1] - &lea ($i,&DWP(1,$i)); # i++ - &jge (&label("sub")); - - &sbb ("eax",0); # handle upmost overflow bit - &and ($tp,"eax"); - ¬ ("eax"); - &mov ($np,$rp); - &and ($np,"eax"); - &or ($tp,$np); # tp=carry?tp:rp - -&set_label("copy",16); # copy or in-place refresh - &mov ("eax",&DWP(0,$tp,$num,4)); - &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i] - &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector - &dec ($num); - &jge (&label("copy")); - - &mov ("esp",$_sp); # pull saved stack pointer - &mov ("eax",1); -&set_label("just_leave"); -&function_end("bn_mul_mont"); - -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); - -&asm_finish(); diff --git a/lib/libcrypto/bn/asm/pa-risc.s b/lib/libcrypto/bn/asm/pa-risc.s deleted file mode 100644 index 775130a1912..00000000000 --- a/lib/libcrypto/bn/asm/pa-risc.s +++ /dev/null @@ -1,710 +0,0 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_add_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r25),%r29 - ldo 12(%r26),%r23 - fldws -16(0,%r30),%fr11L -L$0002 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0005 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r26),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r26) - ldw -8(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0010 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -8(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-8(0,%r23) - ldw -4(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0015 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -4(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-4(0,%r23) - ldw 0(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0020 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r23) - ldo 16(%r29),%r29 - ldo 16(%r25),%r25 - ldo 16(%r23),%r23 - bl L$0002,0 - ldo 16(%r26),%r26 -L$0003 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r26),%r29 - ldo 12(%r25),%r23 - fldws -16(0,%r30),%fr11L -L$0026 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0029 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r26) - ldw -8(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0033 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-8(0,%r29) - ldw -4(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0037 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-4(0,%r29) - ldw 0(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0041 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r29) - ldo 16(%r23),%r23 - ldo 16(%r25),%r25 - ldo 16(%r29),%r29 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR -bn_sqr_words - .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r23 - ldo 12(%r25),%r28 -L$0046 - ldw 0(0,%r25),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,0(0,%r26) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-24(0,%r23) - ldw -8(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-20(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-16(0,%r23) - ldw -4(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-12(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-8(0,%r23) - ldw 0(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-4(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,0(0,%r23) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r23),%r23 - bl L$0046,0 - ldo 32(%r26),%r26 -L$0057 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ - - .align 4 -L$C0000 - .STRING "Division would overflow\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 - .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0059 - ldi 2,%r6 - bl L$0076,0 - ldi -1,%r28 -L$0059 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - ldi 32,%r19 - comb,= %r19,%r28,L$0060 - subi 31,%r28,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0060 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0060 - comb,>> %r5,%r3,L$0061 - subi 32,%r28,%r28 - sub %r3,%r5,%r3 -L$0061 - comib,= 0,%r28,L$0062 - subi 31,%r28,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r28,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0062 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0063 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0066 - copy %r3,%r26 - bl L$0067,0 - zdepi -1,31,16,%r29 -L$0066 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0067 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0068 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0069 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0069 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0068,0 - ldo -1(%r29),%r29 -L$0069 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0074 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0074 - addib,= -1,%r6,L$0064 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0063,0 - zdep %r4,15,16,%r4 -L$0064 - or %r7,%r29,%r28 -L$0076 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND diff --git a/lib/libcrypto/bn/asm/r3000.s b/lib/libcrypto/bn/asm/r3000.s deleted file mode 100644 index e95269afa38..00000000000 --- a/lib/libcrypto/bn/asm/r3000.s +++ /dev/null @@ -1,646 +0,0 @@ - .file 1 "../bn_mulw.c" - .set nobopt - .option pic2 - - # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C - - # Cc1 defaults: - # -mabicalls - - # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1): - # -quiet -dumpbase -O2 -o - -gcc2_compiled.: -__gnu_compiled_c: - .rdata - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - .text - .align 2 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $12,$4 - move $14,$5 - move $9,$6 - move $13,$7 - move $8,$0 - addu $10,$12,12 - addu $11,$14,12 -$L2: - lw $6,0($14) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($12) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($12) - .set macro - .set reorder - - lw $6,-8($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-8($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-8($10) - .set macro - .set reorder - - lw $6,-4($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-4($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-4($10) - .set macro - .set reorder - - lw $6,0($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($10) - .set macro - .set reorder - - addu $11,$11,16 - addu $14,$14,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L2 - addu $12,$12,16 - .set macro - .set reorder - -$L3: - .set noreorder - .set nomacro - j $31 - move $2,$8 - .set macro - .set reorder - - .end bn_mul_add_words - .align 2 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $11,$4 - move $12,$5 - move $8,$6 - move $6,$0 - addu $10,$11,12 - addu $9,$12,12 -$L10: - lw $4,0($12) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($11) - .set macro - .set reorder - - lw $4,-8($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-8($10) - .set macro - .set reorder - - lw $4,-4($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-4($10) - .set macro - .set reorder - - lw $4,0($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($10) - .set macro - .set reorder - - addu $9,$9,16 - addu $12,$12,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L10 - addu $11,$11,16 - .set macro - .set reorder - -$L11: - .set noreorder - .set nomacro - j $31 - move $2,$6 - .set macro - .set reorder - - .end bn_mul_words - .align 2 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $9,$4 - addu $7,$9,28 - addu $8,$5,12 -$L18: - lw $2,0($5) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,0($9) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-24($7) - .set macro - .set reorder - - lw $2,-8($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-20($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-16($7) - .set macro - .set reorder - - lw $2,-4($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-12($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-8($7) - .set macro - .set reorder - - lw $2,0($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-4($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,0($7) - .set macro - .set reorder - - addu $8,$8,16 - addu $5,$5,16 - addu $7,$7,32 - .set noreorder - .set nomacro - j $L18 - addu $9,$9,32 - .set macro - .set reorder - -$L19: - j $31 - .end bn_sqr_words - .rdata - .align 2 -$LC0: - - .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e - .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f - .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa - .byte 0x0 - .text - .align 2 - .globl bn_div64 - .ent bn_div64 -bn_div64: - .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8 - .mask 0x901f0000,-8 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - subu $sp,$sp,56 - .cprestore 16 - sw $16,24($sp) - move $16,$4 - sw $17,28($sp) - move $17,$5 - sw $18,32($sp) - move $18,$6 - sw $20,40($sp) - move $20,$0 - sw $19,36($sp) - li $19,0x00000002 # 2 - sw $31,48($sp) - .set noreorder - .set nomacro - bne $18,$0,$L26 - sw $28,44($sp) - .set macro - .set reorder - - .set noreorder - .set nomacro - j $L43 - li $2,-1 # 0xffffffff - .set macro - .set reorder - -$L26: - move $4,$18 - jal BN_num_bits_word - move $4,$2 - li $2,0x00000020 # 32 - .set noreorder - .set nomacro - beq $4,$2,$L27 - li $2,0x00000001 # 1 - .set macro - .set reorder - - sll $2,$2,$4 - sltu $2,$2,$16 - .set noreorder - .set nomacro - beq $2,$0,$L44 - li $5,0x00000020 # 32 - .set macro - .set reorder - - la $4,__iob+32 - la $5,$LC0 - jal fprintf - jal abort -$L27: - li $5,0x00000020 # 32 -$L44: - sltu $2,$16,$18 - .set noreorder - .set nomacro - bne $2,$0,$L28 - subu $4,$5,$4 - .set macro - .set reorder - - subu $16,$16,$18 -$L28: - .set noreorder - .set nomacro - beq $4,$0,$L29 - li $10,-65536 # 0xffff0000 - .set macro - .set reorder - - sll $18,$18,$4 - sll $3,$16,$4 - subu $2,$5,$4 - srl $2,$17,$2 - or $16,$3,$2 - sll $17,$17,$4 -$L29: - srl $7,$18,16 - andi $9,$18,0xffff -$L30: - srl $2,$16,16 - .set noreorder - .set nomacro - beq $2,$7,$L34 - li $6,0x0000ffff # 65535 - .set macro - .set reorder - - divu $6,$16,$7 -$L34: - mult $6,$9 - mflo $5 - #nop - #nop - mult $6,$7 - and $2,$17,$10 - srl $8,$2,16 - mflo $4 -$L35: - subu $3,$16,$4 - and $2,$3,$10 - .set noreorder - .set nomacro - bne $2,$0,$L36 - sll $2,$3,16 - .set macro - .set reorder - - addu $2,$2,$8 - sltu $2,$2,$5 - .set noreorder - .set nomacro - beq $2,$0,$L36 - subu $5,$5,$9 - .set macro - .set reorder - - subu $4,$4,$7 - .set noreorder - .set nomacro - j $L35 - addu $6,$6,-1 - .set macro - .set reorder - -$L36: - mult $6,$7 - mflo $5 - #nop - #nop - mult $6,$9 - mflo $4 - #nop - #nop - srl $3,$4,16 - sll $2,$4,16 - and $4,$2,$10 - sltu $2,$17,$4 - .set noreorder - .set nomacro - beq $2,$0,$L40 - addu $5,$5,$3 - .set macro - .set reorder - - addu $5,$5,1 -$L40: - sltu $2,$16,$5 - .set noreorder - .set nomacro - beq $2,$0,$L41 - subu $17,$17,$4 - .set macro - .set reorder - - addu $16,$16,$18 - addu $6,$6,-1 -$L41: - addu $19,$19,-1 - .set noreorder - .set nomacro - beq $19,$0,$L31 - subu $16,$16,$5 - .set macro - .set reorder - - sll $20,$6,16 - sll $3,$16,16 - srl $2,$17,16 - or $16,$3,$2 - .set noreorder - .set nomacro - j $L30 - sll $17,$17,16 - .set macro - .set reorder - -$L31: - or $2,$20,$6 -$L43: - lw $31,48($sp) - lw $20,40($sp) - lw $19,36($sp) - lw $18,32($sp) - lw $17,28($sp) - lw $16,24($sp) - addu $sp,$sp,56 - j $31 - .end bn_div64 - - .globl abort .text - .globl fprintf .text - .globl BN_num_bits_word .text diff --git a/lib/libcrypto/bn/asm/sparcv8plus.S b/lib/libcrypto/bn/asm/sparcv8plus.S index 8c56e2e7e7c..63de1860f28 100644 --- a/lib/libcrypto/bn/asm/sparcv8plus.S +++ b/lib/libcrypto/bn/asm/sparcv8plus.S @@ -144,6 +144,19 @@ * } */ +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#elif defined(__GNUC__) && defined(__arch64__) + /* They've said -m64 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#else +# define FRAME_SIZE -96 +#endif /* * GNU assembler can't stand stuw:-( */ @@ -619,8 +632,6 @@ bn_sub_words: * Andy. */ -#define FRAME_SIZE -96 - /* * Here is register usage map for *all* routines below. */ |