summaryrefslogtreecommitdiff
path: root/lib/libcrypto/bn/asm
diff options
context:
space:
mode:
authorDamien Miller <djm@cvs.openbsd.org>2010-10-01 22:59:02 +0000
committerDamien Miller <djm@cvs.openbsd.org>2010-10-01 22:59:02 +0000
commit367b2622e0527401666a65476f4111fdda2e3c12 (patch)
treedc507d2394eb3b616bd9eae56d17671899a24a05 /lib/libcrypto/bn/asm
parentae9cbeba0dc25f0e95e6a0e50b6c161bf6384e17 (diff)
resolve conflicts, fix local changes
Diffstat (limited to 'lib/libcrypto/bn/asm')
-rw-r--r--lib/libcrypto/bn/asm/alpha.s3199
-rw-r--r--lib/libcrypto/bn/asm/alpha.s.works533
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/add.pl119
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/div.pl144
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/mul.pl116
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/mul_add.pl120
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/mul_c4.pl213
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl98
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/mul_c8.pl177
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/sqr.pl113
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl109
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl132
-rw-r--r--lib/libcrypto/bn/asm/alpha.works/sub.pl108
-rw-r--r--lib/libcrypto/bn/asm/alpha/add.pl118
-rw-r--r--lib/libcrypto/bn/asm/alpha/div.pl144
-rw-r--r--lib/libcrypto/bn/asm/alpha/mul.pl104
-rw-r--r--lib/libcrypto/bn/asm/alpha/mul_add.pl123
-rw-r--r--lib/libcrypto/bn/asm/alpha/mul_c4.pl215
-rw-r--r--lib/libcrypto/bn/asm/alpha/mul_c4.works.pl98
-rw-r--r--lib/libcrypto/bn/asm/alpha/mul_c8.pl177
-rw-r--r--lib/libcrypto/bn/asm/alpha/sqr.pl113
-rw-r--r--lib/libcrypto/bn/asm/alpha/sqr_c4.pl109
-rw-r--r--lib/libcrypto/bn/asm/alpha/sqr_c8.pl132
-rw-r--r--lib/libcrypto/bn/asm/alpha/sub.pl108
-rw-r--r--lib/libcrypto/bn/asm/bn-586.pl203
-rw-r--r--lib/libcrypto/bn/asm/bn-alpha.pl571
-rw-r--r--lib/libcrypto/bn/asm/ca.pl33
-rw-r--r--lib/libcrypto/bn/asm/co-586.pl3
-rw-r--r--lib/libcrypto/bn/asm/co-alpha.pl116
-rw-r--r--lib/libcrypto/bn/asm/mips1.s539
-rw-r--r--lib/libcrypto/bn/asm/mo-586.pl603
-rw-r--r--lib/libcrypto/bn/asm/pa-risc.s710
-rw-r--r--lib/libcrypto/bn/asm/r3000.s646
-rw-r--r--lib/libcrypto/bn/asm/sparcv8plus.S15
34 files changed, 166 insertions, 9895 deletions
diff --git a/lib/libcrypto/bn/asm/alpha.s b/lib/libcrypto/bn/asm/alpha.s
deleted file mode 100644
index 555ff0b92d1..00000000000
--- a/lib/libcrypto/bn/asm/alpha.s
+++ /dev/null
@@ -1,3199 +0,0 @@
- # DEC Alpha assember
- # The bn_div_words is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div_words.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
- .align 3
-$42:
- mulq $20,$19,$5 # 1 2 1 ######
- ldq $21,8($17) # 2 1
- ldq $2,8($16) # 2 1
- umulh $20,$19,$20 # 1 2 ######
- ldq $27,16($17) # 3 1
- ldq $3,16($16) # 3 1
- mulq $21,$19,$6 # 2 2 1 ######
- ldq $28,24($17) # 4 1
- addq $1,$5,$1 # 1 2 2
- ldq $4,24($16) # 4 1
- umulh $21,$19,$21 # 2 2 ######
- cmpult $1,$5,$22 # 1 2 3 1
- addq $20,$22,$20 # 1 3 1
- addq $1,$0,$1 # 1 2 3 1
- mulq $27,$19,$7 # 3 2 1 ######
- cmpult $1,$0,$0 # 1 2 3 2
- addq $2,$6,$2 # 2 2 2
- addq $20,$0,$0 # 1 3 2
- cmpult $2,$6,$23 # 2 2 3 1
- addq $21,$23,$21 # 2 3 1
- umulh $27,$19,$27 # 3 2 ######
- addq $2,$0,$2 # 2 2 3 1
- cmpult $2,$0,$0 # 2 2 3 2
- subq $18,4,$18
- mulq $28,$19,$8 # 4 2 1 ######
- addq $21,$0,$0 # 2 3 2
- addq $3,$7,$3 # 3 2 2
- addq $16,32,$16
- cmpult $3,$7,$24 # 3 2 3 1
- stq $1,-32($16) # 1 2 4
- umulh $28,$19,$28 # 4 2 ######
- addq $27,$24,$27 # 3 3 1
- addq $3,$0,$3 # 3 2 3 1
- stq $2,-24($16) # 2 2 4
- cmpult $3,$0,$0 # 3 2 3 2
- stq $3,-16($16) # 3 2 4
- addq $4,$8,$4 # 4 2 2
- addq $27,$0,$0 # 3 3 2
- cmpult $4,$8,$25 # 4 2 3 1
- addq $17,32,$17
- addq $28,$25,$28 # 4 3 1
- addq $4,$0,$4 # 4 2 3 1
- cmpult $4,$0,$0 # 4 2 3 2
- stq $4,-8($16) # 4 2 4
- addq $28,$0,$0 # 4 3 2
- blt $18,$43
-
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $20,0($17) # 4 1
- ldq $1,0($16) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $20,$19,$20 # 4 2
- addq $1,$5,$1 # 4 2 2
- cmpult $1,$5,$22 # 4 2 3 1
- addq $20,$22,$20 # 4 3 1
- addq $1,$0,$1 # 4 2 3 1
- cmpult $1,$0,$0 # 4 2 3 2
- addq $20,$0,$0 # 4 3 2
- stq $1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$142:
-
- mulq $20,$19,$5 # 1 2 1 #####
- ldq $21,8($17) # 2 1
- ldq $27,16($17) # 3 1
- umulh $20,$19,$20 # 1 2 #####
- ldq $28,24($17) # 4 1
- mulq $21,$19,$6 # 2 2 1 #####
- addq $5,$0,$5 # 1 2 3 1
- subq $18,4,$18
- cmpult $5,$0,$0 # 1 2 3 2
- umulh $21,$19,$21 # 2 2 #####
- addq $20,$0,$0 # 1 3 2
- addq $17,32,$17
- addq $6,$0,$6 # 2 2 3 1
- mulq $27,$19,$7 # 3 2 1 #####
- cmpult $6,$0,$0 # 2 2 3 2
- addq $21,$0,$0 # 2 3 2
- addq $16,32,$16
- umulh $27,$19,$27 # 3 2 #####
- stq $5,-32($16) # 1 2 4
- mulq $28,$19,$8 # 4 2 1 #####
- addq $7,$0,$7 # 3 2 3 1
- stq $6,-24($16) # 2 2 4
- cmpult $7,$0,$0 # 3 2 3 2
- umulh $28,$19,$28 # 4 2 #####
- addq $27,$0,$0 # 3 3 2
- stq $7,-16($16) # 3 2 4
- addq $8,$0,$8 # 4 2 3 1
- cmpult $8,$0,$0 # 4 2 3 2
-
- addq $28,$0,$0 # 4 3 2
-
- stq $8,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $20,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $20,0($17) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- umulh $20,$19,$20 # 4 2
- addq $5,$0,$5 # 4 2 3 1
- addq $16,8,$16
- cmpult $5,$0,$0 # 4 2 3 2
- addq $17,8,$17
- addq $20,$0,$0 # 4 3 2
- stq $5,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$542:
- mulq $20,$20,$5 ######
- ldq $21,8($17) # 1 1
- subq $18,4
- umulh $20,$20,$1 ######
- ldq $27,16($17) # 1 1
- mulq $21,$21,$6 ######
- ldq $28,24($17) # 1 1
- stq $5,0($16) # r[0]
- umulh $21,$21,$2 ######
- stq $1,8($16) # r[1]
- mulq $27,$27,$7 ######
- stq $6,16($16) # r[0]
- umulh $27,$27,$3 ######
- stq $2,24($16) # r[1]
- mulq $28,$28,$8 ######
- stq $7,32($16) # r[0]
- umulh $28,$28,$4 ######
- stq $3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $8,-16($16) # r[0]
- stq $4,-8($16) # r[1]
-
- blt $18,$543
- ldq $20,0($17) # 1 1
- br $542
-
-$442:
- ldq $20,0($17) # a[0]
- mulq $20,$20,$5 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $20,$20,$1 # a[0]*w high part r3
- stq $5,-16($16) # r[0]
- stq $1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$0 # carry = 0
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- .align 3
-$901:
- addq $1,$5,$1 # r=a+b;
- ldq $6,8($17) # a[1]
- cmpult $1,$5,$22 # did we overflow?
- ldq $2,8($18) # b[1]
- addq $1,$0,$1 # c+= overflow
- ldq $7,16($17) # a[2]
- cmpult $1,$0,$0 # overflow?
- ldq $3,16($18) # b[2]
- addq $0,$22,$0
- ldq $8,24($17) # a[3]
- addq $2,$6,$2 # r=a+b;
- ldq $4,24($18) # b[3]
- cmpult $2,$6,$23 # did we overflow?
- addq $3,$7,$3 # r=a+b;
- addq $2,$0,$2 # c+= overflow
- cmpult $3,$7,$24 # did we overflow?
- cmpult $2,$0,$0 # overflow?
- addq $4,$8,$4 # r=a+b;
- addq $0,$23,$0
- cmpult $4,$8,$25 # did we overflow?
- addq $3,$0,$3 # c+= overflow
- stq $1,0($16) # r[0]=c
- cmpult $3,$0,$0 # overflow?
- stq $2,8($16) # r[1]=c
- addq $0,$24,$0
- stq $3,16($16) # r[2]=c
- addq $4,$0,$4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $4,$0,$0 # overflow?
- addq $17,32,$17 # a++
- addq $0,$25,$0
- stq $4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- addq $1,$5,$1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $1,$0,$1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $1,$5,$22 # did we overflow?
- cmpult $1,$0,$0 # overflow?
- addq $18,8,$18 # b++
- stq $1,0($16) # r[0]=c
- addq $0,$22,$0
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words:
- ldgp $29,0($27)
-bn_div_words..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-
- .set noat
- .text
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19, 4, $19
- bis $31, $31, $0
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
-$101:
- ldq $3, 8($17)
- cmpult $1, $2, $4
- ldq $5, 8($18)
- subq $1, $2, $1
- ldq $6, 16($17)
- cmpult $1, $0, $2
- ldq $7, 16($18)
- subq $1, $0, $23
- ldq $8, 24($17)
- addq $2, $4, $0
- cmpult $3, $5, $24
- subq $3, $5, $3
- ldq $22, 24($18)
- cmpult $3, $0, $5
- subq $3, $0, $25
- addq $5, $24, $0
- cmpult $6, $7, $27
- subq $6, $7, $6
- stq $23, 0($16)
- cmpult $6, $0, $7
- subq $6, $0, $28
- addq $7, $27, $0
- cmpult $8, $22, $21
- subq $8, $22, $8
- stq $25, 8($16)
- cmpult $8, $0, $22
- subq $8, $0, $20
- addq $22, $21, $0
- stq $28, 16($16)
- subq $19, 4, $19
- stq $20, 24($16)
- addq $17, 32, $17
- addq $18, 32, $18
- addq $16, 32, $16
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
- br $101
-$102:
- ldq $1, 0($17)
- ldq $2, 0($18)
- cmpult $1, $2, $27
- subq $1, $2, $1
- cmpult $1, $0, $2
- subq $1, $0, $1
- stq $1, 0($16)
- addq $2, $27, $0
- addq $17, 8, $17
- addq $18, 8, $18
- addq $16, 8, $16
- subq $19, 1, $19
- bgt $19, $102
- ret $31,($26),1
-$100:
- addq $19, 4, $19
- bgt $19, $102
-$103:
- ret $31,($26),1
- .end bn_sub_words
- .text
- .align 3
- .globl bn_mul_comba4
- .ent bn_mul_comba4
-bn_mul_comba4:
-bn_mul_comba4..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 0($18)
- ldq $2, 8($17)
- ldq $3, 8($18)
- ldq $4, 16($17)
- ldq $5, 16($18)
- ldq $6, 24($17)
- ldq $7, 24($18)
- bis $31, $31, $23
- mulq $0, $1, $8
- umulh $0, $1, $22
- stq $8, 0($16)
- bis $31, $31, $8
- mulq $0, $3, $24
- umulh $0, $3, $25
- addq $22, $24, $22
- cmpult $22, $24, $27
- addq $27, $25, $25
- addq $23, $25, $23
- cmpult $23, $25, $28
- addq $8, $28, $8
- mulq $2, $1, $21
- umulh $2, $1, $20
- addq $22, $21, $22
- cmpult $22, $21, $19
- addq $19, $20, $20
- addq $23, $20, $23
- cmpult $23, $20, $17
- addq $8, $17, $8
- stq $22, 8($16)
- bis $31, $31, $22
- mulq $2, $3, $18
- umulh $2, $3, $24
- addq $23, $18, $23
- cmpult $23, $18, $27
- addq $27, $24, $24
- addq $8, $24, $8
- cmpult $8, $24, $25
- addq $22, $25, $22
- mulq $0, $5, $28
- umulh $0, $5, $21
- addq $23, $28, $23
- cmpult $23, $28, $19
- addq $19, $21, $21
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $22, $20, $22
- mulq $4, $1, $17
- umulh $4, $1, $18
- addq $23, $17, $23
- cmpult $23, $17, $27
- addq $27, $18, $18
- addq $8, $18, $8
- cmpult $8, $18, $24
- addq $22, $24, $22
- stq $23, 16($16)
- bis $31, $31, $23
- mulq $0, $7, $25
- umulh $0, $7, $28
- addq $8, $25, $8
- cmpult $8, $25, $19
- addq $19, $28, $28
- addq $22, $28, $22
- cmpult $22, $28, $21
- addq $23, $21, $23
- mulq $2, $5, $20
- umulh $2, $5, $17
- addq $8, $20, $8
- cmpult $8, $20, $27
- addq $27, $17, $17
- addq $22, $17, $22
- cmpult $22, $17, $18
- addq $23, $18, $23
- mulq $4, $3, $24
- umulh $4, $3, $25
- addq $8, $24, $8
- cmpult $8, $24, $19
- addq $19, $25, $25
- addq $22, $25, $22
- cmpult $22, $25, $28
- addq $23, $28, $23
- mulq $6, $1, $21
- umulh $6, $1, $0
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $20, $0, $0
- addq $22, $0, $22
- cmpult $22, $0, $27
- addq $23, $27, $23
- stq $8, 24($16)
- bis $31, $31, $8
- mulq $2, $7, $17
- umulh $2, $7, $18
- addq $22, $17, $22
- cmpult $22, $17, $24
- addq $24, $18, $18
- addq $23, $18, $23
- cmpult $23, $18, $19
- addq $8, $19, $8
- mulq $4, $5, $25
- umulh $4, $5, $28
- addq $22, $25, $22
- cmpult $22, $25, $21
- addq $21, $28, $28
- addq $23, $28, $23
- cmpult $23, $28, $20
- addq $8, $20, $8
- mulq $6, $3, $0
- umulh $6, $3, $27
- addq $22, $0, $22
- cmpult $22, $0, $1
- addq $1, $27, $27
- addq $23, $27, $23
- cmpult $23, $27, $17
- addq $8, $17, $8
- stq $22, 32($16)
- bis $31, $31, $22
- mulq $4, $7, $24
- umulh $4, $7, $18
- addq $23, $24, $23
- cmpult $23, $24, $19
- addq $19, $18, $18
- addq $8, $18, $8
- cmpult $8, $18, $2
- addq $22, $2, $22
- mulq $6, $5, $25
- umulh $6, $5, $21
- addq $23, $25, $23
- cmpult $23, $25, $28
- addq $28, $21, $21
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $22, $20, $22
- stq $23, 40($16)
- bis $31, $31, $23
- mulq $6, $7, $0
- umulh $6, $7, $1
- addq $8, $0, $8
- cmpult $8, $0, $27
- addq $27, $1, $1
- addq $22, $1, $22
- cmpult $22, $1, $17
- addq $23, $17, $23
- stq $8, 48($16)
- stq $22, 56($16)
- ret $31,($26),1
- .end bn_mul_comba4
- .text
- .align 3
- .globl bn_mul_comba8
- .ent bn_mul_comba8
-bn_mul_comba8:
-bn_mul_comba8..ng:
- .frame $30,0,$26,0
- .prologue 0
- ldq $1, 0($17)
- ldq $2, 0($18)
- zapnot $1, 15, $7
- srl $2, 32, $8
- mulq $8, $7, $22
- srl $1, 32, $6
- zapnot $2, 15, $5
- mulq $5, $6, $4
- mulq $7, $5, $24
- addq $22, $4, $22
- cmpult $22, $4, $1
- mulq $6, $8, $3
- beq $1, $173
- bis $31, 1, $1
- sll $1, 32, $1
- addq $3, $1, $3
-$173:
- sll $22, 32, $4
- addq $24, $4, $24
- stq $24, 0($16)
- ldq $2, 0($17)
- ldq $1, 8($18)
- zapnot $2, 15, $7
- srl $1, 32, $8
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $2, 32, $6
- mulq $5, $6, $23
- mulq $6, $8, $6
- srl $22, 32, $1
- cmpult $24, $4, $2
- addq $3, $1, $3
- addq $2, $3, $22
- addq $25, $23, $25
- cmpult $25, $23, $1
- bis $31, 1, $2
- beq $1, $177
- sll $2, 32, $1
- addq $6, $1, $6
-$177:
- sll $25, 32, $23
- ldq $1, 0($18)
- addq $0, $23, $0
- bis $0, $0, $7
- ldq $3, 8($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $4
- zapnot $3, 15, $7
- mulq $8, $7, $28
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $25, 32, $1
- cmpult $0, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $4, $6, $24
- srl $3, 32, $6
- mulq $5, $6, $2
- mulq $6, $8, $6
- addq $28, $2, $28
- cmpult $28, $2, $1
- bis $31, 1, $2
- beq $1, $181
- sll $2, 32, $1
- addq $6, $1, $6
-$181:
- sll $28, 32, $2
- addq $21, $2, $21
- bis $21, $21, $7
- addq $22, $7, $22
- stq $22, 8($16)
- ldq $3, 16($17)
- ldq $1, 0($18)
- cmpult $22, $7, $4
- zapnot $3, 15, $7
- srl $1, 32, $8
- mulq $8, $7, $22
- zapnot $1, 15, $5
- mulq $7, $5, $20
- srl $28, 32, $1
- cmpult $21, $2, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $4, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $3, 32, $6
- mulq $5, $6, $2
- mulq $6, $8, $6
- addq $22, $2, $22
- cmpult $22, $2, $1
- bis $31, 1, $2
- beq $1, $185
- sll $2, 32, $1
- addq $6, $1, $6
-$185:
- sll $22, 32, $2
- ldq $1, 8($18)
- addq $20, $2, $20
- bis $20, $20, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $22, 32, $1
- cmpult $20, $2, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $189
- sll $21, 32, $1
- addq $6, $1, $6
-$189:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 0($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $193
- sll $21, 32, $1
- addq $6, $1, $6
-$193:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 16($16)
- ldq $4, 0($17)
- ldq $5, 24($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $197
- sll $21, 32, $1
- addq $6, $1, $6
-$197:
- sll $0, 32, $24
- ldq $1, 16($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $201
- sll $20, 32, $1
- addq $6, $1, $6
-$201:
- sll $25, 32, $5
- ldq $2, 8($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $205
- sll $20, 32, $1
- addq $6, $1, $6
-$205:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $209
- sll $20, 32, $1
- addq $6, $1, $6
-$209:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 24($16)
- ldq $4, 32($17)
- ldq $5, 0($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $213
- sll $20, 32, $1
- addq $6, $1, $6
-$213:
- sll $28, 32, $23
- ldq $1, 8($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $217
- sll $21, 32, $1
- addq $6, $1, $6
-$217:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 16($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $221
- sll $21, 32, $1
- addq $6, $1, $6
-$221:
- sll $28, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $225
- sll $21, 32, $1
- addq $6, $1, $6
-$225:
- sll $0, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 0($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $229
- sll $21, 32, $1
- addq $6, $1, $6
-$229:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 32($16)
- ldq $4, 0($17)
- ldq $5, 40($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $22, $0
- cmpult $0, $22, $1
- mulq $6, $8, $6
- beq $1, $233
- sll $21, 32, $1
- addq $6, $1, $6
-$233:
- sll $0, 32, $22
- ldq $1, 32($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $237
- sll $20, 32, $1
- addq $6, $1, $6
-$237:
- sll $25, 32, $5
- ldq $2, 24($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $241
- sll $20, 32, $1
- addq $6, $1, $6
-$241:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $245
- sll $20, 32, $1
- addq $6, $1, $6
-$245:
- sll $0, 32, $25
- ldq $2, 8($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $249
- sll $20, 32, $1
- addq $6, $1, $6
-$249:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $253
- sll $20, 32, $1
- addq $6, $1, $6
-$253:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 40($16)
- ldq $4, 48($17)
- ldq $5, 0($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $24, $28
- cmpult $28, $24, $1
- mulq $6, $8, $6
- beq $1, $257
- sll $20, 32, $1
- addq $6, $1, $6
-$257:
- sll $28, 32, $24
- ldq $1, 8($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $261
- sll $21, 32, $1
- addq $6, $1, $6
-$261:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 32($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $265
- sll $21, 32, $1
- addq $6, $1, $6
-$265:
- sll $28, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $269
- sll $21, 32, $1
- addq $6, $1, $6
-$269:
- sll $0, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 16($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $273
- sll $21, 32, $1
- addq $6, $1, $6
-$273:
- sll $28, 32, $25
- ldq $2, 40($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $277
- sll $21, 32, $1
- addq $6, $1, $6
-$277:
- sll $0, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 0($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $281
- sll $21, 32, $1
- addq $6, $1, $6
-$281:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 48($16)
- ldq $4, 0($17)
- ldq $5, 56($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $23, $0
- cmpult $0, $23, $1
- mulq $6, $8, $6
- beq $1, $285
- sll $21, 32, $1
- addq $6, $1, $6
-$285:
- sll $0, 32, $23
- ldq $1, 48($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $289
- sll $20, 32, $1
- addq $6, $1, $6
-$289:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $293
- sll $20, 32, $1
- addq $6, $1, $6
-$293:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $297
- sll $20, 32, $1
- addq $6, $1, $6
-$297:
- sll $0, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $301
- sll $20, 32, $1
- addq $6, $1, $6
-$301:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $305
- sll $20, 32, $1
- addq $6, $1, $6
-$305:
- sll $0, 32, $25
- ldq $2, 8($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $309
- sll $20, 32, $1
- addq $6, $1, $6
-$309:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $313
- sll $20, 32, $1
- addq $6, $1, $6
-$313:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 56($16)
- ldq $4, 56($17)
- ldq $5, 8($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $22, $28
- cmpult $28, $22, $1
- mulq $6, $8, $6
- beq $1, $317
- sll $20, 32, $1
- addq $6, $1, $6
-$317:
- sll $28, 32, $22
- ldq $1, 16($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 48($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $321
- sll $21, 32, $1
- addq $6, $1, $6
-$321:
- sll $25, 32, $5
- ldq $2, 24($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $325
- sll $21, 32, $1
- addq $6, $1, $6
-$325:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $329
- sll $21, 32, $1
- addq $6, $1, $6
-$329:
- sll $0, 32, $25
- ldq $2, 40($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $333
- sll $21, 32, $1
- addq $6, $1, $6
-$333:
- sll $28, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 16($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $337
- sll $21, 32, $1
- addq $6, $1, $6
-$337:
- sll $0, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $341
- sll $21, 32, $1
- addq $6, $1, $6
-$341:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 64($16)
- ldq $4, 16($17)
- ldq $5, 56($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $345
- sll $21, 32, $1
- addq $6, $1, $6
-$345:
- sll $0, 32, $24
- ldq $1, 48($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $349
- sll $20, 32, $1
- addq $6, $1, $6
-$349:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 32($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $353
- sll $20, 32, $1
- addq $6, $1, $6
-$353:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $357
- sll $20, 32, $1
- addq $6, $1, $6
-$357:
- sll $0, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $361
- sll $20, 32, $1
- addq $6, $1, $6
-$361:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $365
- sll $20, 32, $1
- addq $6, $1, $6
-$365:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 72($16)
- ldq $4, 56($17)
- ldq $5, 24($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $369
- sll $20, 32, $1
- addq $6, $1, $6
-$369:
- sll $28, 32, $23
- ldq $1, 32($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 48($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $373
- sll $21, 32, $1
- addq $6, $1, $6
-$373:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 40($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $377
- sll $21, 32, $1
- addq $6, $1, $6
-$377:
- sll $28, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $381
- sll $21, 32, $1
- addq $6, $1, $6
-$381:
- sll $0, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $385
- sll $21, 32, $1
- addq $6, $1, $6
-$385:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 80($16)
- ldq $4, 32($17)
- ldq $5, 56($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $22, $0
- cmpult $0, $22, $1
- mulq $6, $8, $6
- beq $1, $389
- sll $21, 32, $1
- addq $6, $1, $6
-$389:
- sll $0, 32, $22
- ldq $1, 48($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $393
- sll $20, 32, $1
- addq $6, $1, $6
-$393:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 48($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $397
- sll $20, 32, $1
- addq $6, $1, $6
-$397:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $21
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $21, $25, $21
- cmpult $21, $25, $1
- mulq $6, $8, $6
- beq $1, $401
- sll $20, 32, $1
- addq $6, $1, $6
-$401:
- sll $21, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 88($16)
- ldq $4, 56($17)
- ldq $5, 40($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $21, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $405
- sll $20, 32, $1
- addq $6, $1, $6
-$405:
- sll $0, 32, $24
- ldq $2, 48($18)
- addq $5, $24, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $24, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $409
- sll $20, 32, $1
- addq $6, $1, $6
-$409:
- sll $28, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $413
- sll $20, 32, $1
- addq $6, $1, $6
-$413:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 96($16)
- ldq $4, 48($17)
- ldq $5, 56($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $417
- sll $20, 32, $1
- addq $6, $1, $6
-$417:
- sll $28, 32, $23
- ldq $2, 48($18)
- addq $5, $23, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $23, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $421
- sll $20, 32, $1
- addq $6, $1, $6
-$421:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 104($16)
- ldq $4, 56($17)
- ldq $5, 56($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $22, $28
- cmpult $28, $22, $1
- mulq $6, $8, $3
- beq $1, $425
- sll $20, 32, $1
- addq $3, $1, $3
-$425:
- sll $28, 32, $22
- srl $28, 32, $1
- addq $2, $22, $2
- addq $3, $1, $3
- bis $2, $2, $7
- addq $24, $7, $24
- cmpult $7, $22, $1
- cmpult $24, $7, $2
- addq $1, $3, $6
- addq $2, $6, $6
- stq $24, 112($16)
- addq $23, $6, $23
- stq $23, 120($16)
- ret $31, ($26), 1
- .end bn_mul_comba8
- .text
- .align 3
- .globl bn_sqr_comba4
- .ent bn_sqr_comba4
-bn_sqr_comba4:
-bn_sqr_comba4..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 8($17)
- ldq $2, 16($17)
- ldq $3, 24($17)
- bis $31, $31, $6
- mulq $0, $0, $4
- umulh $0, $0, $5
- stq $4, 0($16)
- bis $31, $31, $4
- mulq $0, $1, $7
- umulh $0, $1, $8
- cmplt $7, $31, $22
- cmplt $8, $31, $23
- addq $7, $7, $7
- addq $8, $8, $8
- addq $8, $22, $8
- addq $4, $23, $4
- addq $5, $7, $5
- addq $6, $8, $6
- cmpult $5, $7, $24
- cmpult $6, $8, $25
- addq $6, $24, $6
- addq $4, $25, $4
- stq $5, 8($16)
- bis $31, $31, $5
- mulq $1, $1, $27
- umulh $1, $1, $28
- addq $6, $27, $6
- addq $4, $28, $4
- cmpult $6, $27, $21
- cmpult $4, $28, $20
- addq $4, $21, $4
- addq $5, $20, $5
- mulq $2, $0, $19
- umulh $2, $0, $18
- cmplt $19, $31, $17
- cmplt $18, $31, $22
- addq $19, $19, $19
- addq $18, $18, $18
- addq $18, $17, $18
- addq $5, $22, $5
- addq $6, $19, $6
- addq $4, $18, $4
- cmpult $6, $19, $23
- cmpult $4, $18, $7
- addq $4, $23, $4
- addq $5, $7, $5
- stq $6, 16($16)
- bis $31, $31, $6
- mulq $3, $0, $8
- umulh $3, $0, $24
- cmplt $8, $31, $25
- cmplt $24, $31, $27
- addq $8, $8, $8
- addq $24, $24, $24
- addq $24, $25, $24
- addq $6, $27, $6
- addq $4, $8, $4
- addq $5, $24, $5
- cmpult $4, $8, $28
- cmpult $5, $24, $21
- addq $5, $28, $5
- addq $6, $21, $6
- mulq $2, $1, $20
- umulh $2, $1, $17
- cmplt $20, $31, $22
- cmplt $17, $31, $19
- addq $20, $20, $20
- addq $17, $17, $17
- addq $17, $22, $17
- addq $6, $19, $6
- addq $4, $20, $4
- addq $5, $17, $5
- cmpult $4, $20, $18
- cmpult $5, $17, $23
- addq $5, $18, $5
- addq $6, $23, $6
- stq $4, 24($16)
- bis $31, $31, $4
- mulq $2, $2, $7
- umulh $2, $2, $25
- addq $5, $7, $5
- addq $6, $25, $6
- cmpult $5, $7, $27
- cmpult $6, $25, $8
- addq $6, $27, $6
- addq $4, $8, $4
- mulq $3, $1, $24
- umulh $3, $1, $28
- cmplt $24, $31, $21
- cmplt $28, $31, $22
- addq $24, $24, $24
- addq $28, $28, $28
- addq $28, $21, $28
- addq $4, $22, $4
- addq $5, $24, $5
- addq $6, $28, $6
- cmpult $5, $24, $19
- cmpult $6, $28, $20
- addq $6, $19, $6
- addq $4, $20, $4
- stq $5, 32($16)
- bis $31, $31, $5
- mulq $3, $2, $17
- umulh $3, $2, $18
- cmplt $17, $31, $23
- cmplt $18, $31, $7
- addq $17, $17, $17
- addq $18, $18, $18
- addq $18, $23, $18
- addq $5, $7, $5
- addq $6, $17, $6
- addq $4, $18, $4
- cmpult $6, $17, $25
- cmpult $4, $18, $27
- addq $4, $25, $4
- addq $5, $27, $5
- stq $6, 40($16)
- bis $31, $31, $6
- mulq $3, $3, $8
- umulh $3, $3, $21
- addq $4, $8, $4
- addq $5, $21, $5
- cmpult $4, $8, $22
- cmpult $5, $21, $24
- addq $5, $22, $5
- addq $6, $24, $6
- stq $4, 48($16)
- stq $5, 56($16)
- ret $31,($26),1
- .end bn_sqr_comba4
- .text
- .align 3
- .globl bn_sqr_comba8
- .ent bn_sqr_comba8
-bn_sqr_comba8:
-bn_sqr_comba8..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 8($17)
- ldq $2, 16($17)
- ldq $3, 24($17)
- ldq $4, 32($17)
- ldq $5, 40($17)
- ldq $6, 48($17)
- ldq $7, 56($17)
- bis $31, $31, $23
- mulq $0, $0, $8
- umulh $0, $0, $22
- stq $8, 0($16)
- bis $31, $31, $8
- mulq $1, $0, $24
- umulh $1, $0, $25
- cmplt $24, $31, $27
- cmplt $25, $31, $28
- addq $24, $24, $24
- addq $25, $25, $25
- addq $25, $27, $25
- addq $8, $28, $8
- addq $22, $24, $22
- addq $23, $25, $23
- cmpult $22, $24, $21
- cmpult $23, $25, $20
- addq $23, $21, $23
- addq $8, $20, $8
- stq $22, 8($16)
- bis $31, $31, $22
- mulq $1, $1, $19
- umulh $1, $1, $18
- addq $23, $19, $23
- addq $8, $18, $8
- cmpult $23, $19, $17
- cmpult $8, $18, $27
- addq $8, $17, $8
- addq $22, $27, $22
- mulq $2, $0, $28
- umulh $2, $0, $24
- cmplt $28, $31, $25
- cmplt $24, $31, $21
- addq $28, $28, $28
- addq $24, $24, $24
- addq $24, $25, $24
- addq $22, $21, $22
- addq $23, $28, $23
- addq $8, $24, $8
- cmpult $23, $28, $20
- cmpult $8, $24, $19
- addq $8, $20, $8
- addq $22, $19, $22
- stq $23, 16($16)
- bis $31, $31, $23
- mulq $2, $1, $18
- umulh $2, $1, $17
- cmplt $18, $31, $27
- cmplt $17, $31, $25
- addq $18, $18, $18
- addq $17, $17, $17
- addq $17, $27, $17
- addq $23, $25, $23
- addq $8, $18, $8
- addq $22, $17, $22
- cmpult $8, $18, $21
- cmpult $22, $17, $28
- addq $22, $21, $22
- addq $23, $28, $23
- mulq $3, $0, $24
- umulh $3, $0, $20
- cmplt $24, $31, $19
- cmplt $20, $31, $27
- addq $24, $24, $24
- addq $20, $20, $20
- addq $20, $19, $20
- addq $23, $27, $23
- addq $8, $24, $8
- addq $22, $20, $22
- cmpult $8, $24, $25
- cmpult $22, $20, $18
- addq $22, $25, $22
- addq $23, $18, $23
- stq $8, 24($16)
- bis $31, $31, $8
- mulq $2, $2, $17
- umulh $2, $2, $21
- addq $22, $17, $22
- addq $23, $21, $23
- cmpult $22, $17, $28
- cmpult $23, $21, $19
- addq $23, $28, $23
- addq $8, $19, $8
- mulq $3, $1, $27
- umulh $3, $1, $24
- cmplt $27, $31, $20
- cmplt $24, $31, $25
- addq $27, $27, $27
- addq $24, $24, $24
- addq $24, $20, $24
- addq $8, $25, $8
- addq $22, $27, $22
- addq $23, $24, $23
- cmpult $22, $27, $18
- cmpult $23, $24, $17
- addq $23, $18, $23
- addq $8, $17, $8
- mulq $4, $0, $21
- umulh $4, $0, $28
- cmplt $21, $31, $19
- cmplt $28, $31, $20
- addq $21, $21, $21
- addq $28, $28, $28
- addq $28, $19, $28
- addq $8, $20, $8
- addq $22, $21, $22
- addq $23, $28, $23
- cmpult $22, $21, $25
- cmpult $23, $28, $27
- addq $23, $25, $23
- addq $8, $27, $8
- stq $22, 32($16)
- bis $31, $31, $22
- mulq $3, $2, $24
- umulh $3, $2, $18
- cmplt $24, $31, $17
- cmplt $18, $31, $19
- addq $24, $24, $24
- addq $18, $18, $18
- addq $18, $17, $18
- addq $22, $19, $22
- addq $23, $24, $23
- addq $8, $18, $8
- cmpult $23, $24, $20
- cmpult $8, $18, $21
- addq $8, $20, $8
- addq $22, $21, $22
- mulq $4, $1, $28
- umulh $4, $1, $25
- cmplt $28, $31, $27
- cmplt $25, $31, $17
- addq $28, $28, $28
- addq $25, $25, $25
- addq $25, $27, $25
- addq $22, $17, $22
- addq $23, $28, $23
- addq $8, $25, $8
- cmpult $23, $28, $19
- cmpult $8, $25, $24
- addq $8, $19, $8
- addq $22, $24, $22
- mulq $5, $0, $18
- umulh $5, $0, $20
- cmplt $18, $31, $21
- cmplt $20, $31, $27
- addq $18, $18, $18
- addq $20, $20, $20
- addq $20, $21, $20
- addq $22, $27, $22
- addq $23, $18, $23
- addq $8, $20, $8
- cmpult $23, $18, $17
- cmpult $8, $20, $28
- addq $8, $17, $8
- addq $22, $28, $22
- stq $23, 40($16)
- bis $31, $31, $23
- mulq $3, $3, $25
- umulh $3, $3, $19
- addq $8, $25, $8
- addq $22, $19, $22
- cmpult $8, $25, $24
- cmpult $22, $19, $21
- addq $22, $24, $22
- addq $23, $21, $23
- mulq $4, $2, $27
- umulh $4, $2, $18
- cmplt $27, $31, $20
- cmplt $18, $31, $17
- addq $27, $27, $27
- addq $18, $18, $18
- addq $18, $20, $18
- addq $23, $17, $23
- addq $8, $27, $8
- addq $22, $18, $22
- cmpult $8, $27, $28
- cmpult $22, $18, $25
- addq $22, $28, $22
- addq $23, $25, $23
- mulq $5, $1, $19
- umulh $5, $1, $24
- cmplt $19, $31, $21
- cmplt $24, $31, $20
- addq $19, $19, $19
- addq $24, $24, $24
- addq $24, $21, $24
- addq $23, $20, $23
- addq $8, $19, $8
- addq $22, $24, $22
- cmpult $8, $19, $17
- cmpult $22, $24, $27
- addq $22, $17, $22
- addq $23, $27, $23
- mulq $6, $0, $18
- umulh $6, $0, $28
- cmplt $18, $31, $25
- cmplt $28, $31, $21
- addq $18, $18, $18
- addq $28, $28, $28
- addq $28, $25, $28
- addq $23, $21, $23
- addq $8, $18, $8
- addq $22, $28, $22
- cmpult $8, $18, $20
- cmpult $22, $28, $19
- addq $22, $20, $22
- addq $23, $19, $23
- stq $8, 48($16)
- bis $31, $31, $8
- mulq $4, $3, $24
- umulh $4, $3, $17
- cmplt $24, $31, $27
- cmplt $17, $31, $25
- addq $24, $24, $24
- addq $17, $17, $17
- addq $17, $27, $17
- addq $8, $25, $8
- addq $22, $24, $22
- addq $23, $17, $23
- cmpult $22, $24, $21
- cmpult $23, $17, $18
- addq $23, $21, $23
- addq $8, $18, $8
- mulq $5, $2, $28
- umulh $5, $2, $20
- cmplt $28, $31, $19
- cmplt $20, $31, $27
- addq $28, $28, $28
- addq $20, $20, $20
- addq $20, $19, $20
- addq $8, $27, $8
- addq $22, $28, $22
- addq $23, $20, $23
- cmpult $22, $28, $25
- cmpult $23, $20, $24
- addq $23, $25, $23
- addq $8, $24, $8
- mulq $6, $1, $17
- umulh $6, $1, $21
- cmplt $17, $31, $18
- cmplt $21, $31, $19
- addq $17, $17, $17
- addq $21, $21, $21
- addq $21, $18, $21
- addq $8, $19, $8
- addq $22, $17, $22
- addq $23, $21, $23
- cmpult $22, $17, $27
- cmpult $23, $21, $28
- addq $23, $27, $23
- addq $8, $28, $8
- mulq $7, $0, $20
- umulh $7, $0, $25
- cmplt $20, $31, $24
- cmplt $25, $31, $18
- addq $20, $20, $20
- addq $25, $25, $25
- addq $25, $24, $25
- addq $8, $18, $8
- addq $22, $20, $22
- addq $23, $25, $23
- cmpult $22, $20, $19
- cmpult $23, $25, $17
- addq $23, $19, $23
- addq $8, $17, $8
- stq $22, 56($16)
- bis $31, $31, $22
- mulq $4, $4, $21
- umulh $4, $4, $27
- addq $23, $21, $23
- addq $8, $27, $8
- cmpult $23, $21, $28
- cmpult $8, $27, $24
- addq $8, $28, $8
- addq $22, $24, $22
- mulq $5, $3, $18
- umulh $5, $3, $20
- cmplt $18, $31, $25
- cmplt $20, $31, $19
- addq $18, $18, $18
- addq $20, $20, $20
- addq $20, $25, $20
- addq $22, $19, $22
- addq $23, $18, $23
- addq $8, $20, $8
- cmpult $23, $18, $17
- cmpult $8, $20, $21
- addq $8, $17, $8
- addq $22, $21, $22
- mulq $6, $2, $27
- umulh $6, $2, $28
- cmplt $27, $31, $24
- cmplt $28, $31, $25
- addq $27, $27, $27
- addq $28, $28, $28
- addq $28, $24, $28
- addq $22, $25, $22
- addq $23, $27, $23
- addq $8, $28, $8
- cmpult $23, $27, $19
- cmpult $8, $28, $18
- addq $8, $19, $8
- addq $22, $18, $22
- mulq $7, $1, $20
- umulh $7, $1, $17
- cmplt $20, $31, $21
- cmplt $17, $31, $24
- addq $20, $20, $20
- addq $17, $17, $17
- addq $17, $21, $17
- addq $22, $24, $22
- addq $23, $20, $23
- addq $8, $17, $8
- cmpult $23, $20, $25
- cmpult $8, $17, $27
- addq $8, $25, $8
- addq $22, $27, $22
- stq $23, 64($16)
- bis $31, $31, $23
- mulq $5, $4, $28
- umulh $5, $4, $19
- cmplt $28, $31, $18
- cmplt $19, $31, $21
- addq $28, $28, $28
- addq $19, $19, $19
- addq $19, $18, $19
- addq $23, $21, $23
- addq $8, $28, $8
- addq $22, $19, $22
- cmpult $8, $28, $24
- cmpult $22, $19, $20
- addq $22, $24, $22
- addq $23, $20, $23
- mulq $6, $3, $17
- umulh $6, $3, $25
- cmplt $17, $31, $27
- cmplt $25, $31, $18
- addq $17, $17, $17
- addq $25, $25, $25
- addq $25, $27, $25
- addq $23, $18, $23
- addq $8, $17, $8
- addq $22, $25, $22
- cmpult $8, $17, $21
- cmpult $22, $25, $28
- addq $22, $21, $22
- addq $23, $28, $23
- mulq $7, $2, $19
- umulh $7, $2, $24
- cmplt $19, $31, $20
- cmplt $24, $31, $27
- addq $19, $19, $19
- addq $24, $24, $24
- addq $24, $20, $24
- addq $23, $27, $23
- addq $8, $19, $8
- addq $22, $24, $22
- cmpult $8, $19, $18
- cmpult $22, $24, $17
- addq $22, $18, $22
- addq $23, $17, $23
- stq $8, 72($16)
- bis $31, $31, $8
- mulq $5, $5, $25
- umulh $5, $5, $21
- addq $22, $25, $22
- addq $23, $21, $23
- cmpult $22, $25, $28
- cmpult $23, $21, $20
- addq $23, $28, $23
- addq $8, $20, $8
- mulq $6, $4, $27
- umulh $6, $4, $19
- cmplt $27, $31, $24
- cmplt $19, $31, $18
- addq $27, $27, $27
- addq $19, $19, $19
- addq $19, $24, $19
- addq $8, $18, $8
- addq $22, $27, $22
- addq $23, $19, $23
- cmpult $22, $27, $17
- cmpult $23, $19, $25
- addq $23, $17, $23
- addq $8, $25, $8
- mulq $7, $3, $21
- umulh $7, $3, $28
- cmplt $21, $31, $20
- cmplt $28, $31, $24
- addq $21, $21, $21
- addq $28, $28, $28
- addq $28, $20, $28
- addq $8, $24, $8
- addq $22, $21, $22
- addq $23, $28, $23
- cmpult $22, $21, $18
- cmpult $23, $28, $27
- addq $23, $18, $23
- addq $8, $27, $8
- stq $22, 80($16)
- bis $31, $31, $22
- mulq $6, $5, $19
- umulh $6, $5, $17
- cmplt $19, $31, $25
- cmplt $17, $31, $20
- addq $19, $19, $19
- addq $17, $17, $17
- addq $17, $25, $17
- addq $22, $20, $22
- addq $23, $19, $23
- addq $8, $17, $8
- cmpult $23, $19, $24
- cmpult $8, $17, $21
- addq $8, $24, $8
- addq $22, $21, $22
- mulq $7, $4, $28
- umulh $7, $4, $18
- cmplt $28, $31, $27
- cmplt $18, $31, $25
- addq $28, $28, $28
- addq $18, $18, $18
- addq $18, $27, $18
- addq $22, $25, $22
- addq $23, $28, $23
- addq $8, $18, $8
- cmpult $23, $28, $20
- cmpult $8, $18, $19
- addq $8, $20, $8
- addq $22, $19, $22
- stq $23, 88($16)
- bis $31, $31, $23
- mulq $6, $6, $17
- umulh $6, $6, $24
- addq $8, $17, $8
- addq $22, $24, $22
- cmpult $8, $17, $21
- cmpult $22, $24, $27
- addq $22, $21, $22
- addq $23, $27, $23
- mulq $7, $5, $25
- umulh $7, $5, $28
- cmplt $25, $31, $18
- cmplt $28, $31, $20
- addq $25, $25, $25
- addq $28, $28, $28
- addq $28, $18, $28
- addq $23, $20, $23
- addq $8, $25, $8
- addq $22, $28, $22
- cmpult $8, $25, $19
- cmpult $22, $28, $17
- addq $22, $19, $22
- addq $23, $17, $23
- stq $8, 96($16)
- bis $31, $31, $8
- mulq $7, $6, $24
- umulh $7, $6, $21
- cmplt $24, $31, $27
- cmplt $21, $31, $18
- addq $24, $24, $24
- addq $21, $21, $21
- addq $21, $27, $21
- addq $8, $18, $8
- addq $22, $24, $22
- addq $23, $21, $23
- cmpult $22, $24, $20
- cmpult $23, $21, $25
- addq $23, $20, $23
- addq $8, $25, $8
- stq $22, 104($16)
- bis $31, $31, $22
- mulq $7, $7, $28
- umulh $7, $7, $19
- addq $23, $28, $23
- addq $8, $19, $8
- cmpult $23, $28, $17
- cmpult $8, $19, $27
- addq $8, $17, $8
- addq $22, $27, $22
- stq $23, 112($16)
- stq $8, 120($16)
- ret $31,($26),1
- .end bn_sqr_comba8
diff --git a/lib/libcrypto/bn/asm/alpha.s.works b/lib/libcrypto/bn/asm/alpha.s.works
deleted file mode 100644
index ee6c5878099..00000000000
--- a/lib/libcrypto/bn/asm/alpha.s.works
+++ /dev/null
@@ -1,533 +0,0 @@
-
- # DEC Alpha assember
- # The bn_div64 is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div64.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
- .align 3
-$42:
- mulq $20,$19,$5 # 1 2 1 ######
- ldq $21,8($17) # 2 1
- ldq $2,8($16) # 2 1
- umulh $20,$19,$20 # 1 2 ######
- ldq $27,16($17) # 3 1
- ldq $3,16($16) # 3 1
- mulq $21,$19,$6 # 2 2 1 ######
- ldq $28,24($17) # 4 1
- addq $1,$5,$1 # 1 2 2
- ldq $4,24($16) # 4 1
- umulh $21,$19,$21 # 2 2 ######
- cmpult $1,$5,$22 # 1 2 3 1
- addq $20,$22,$20 # 1 3 1
- addq $1,$0,$1 # 1 2 3 1
- mulq $27,$19,$7 # 3 2 1 ######
- cmpult $1,$0,$0 # 1 2 3 2
- addq $2,$6,$2 # 2 2 2
- addq $20,$0,$0 # 1 3 2
- cmpult $2,$6,$23 # 2 2 3 1
- addq $21,$23,$21 # 2 3 1
- umulh $27,$19,$27 # 3 2 ######
- addq $2,$0,$2 # 2 2 3 1
- cmpult $2,$0,$0 # 2 2 3 2
- subq $18,4,$18
- mulq $28,$19,$8 # 4 2 1 ######
- addq $21,$0,$0 # 2 3 2
- addq $3,$7,$3 # 3 2 2
- addq $16,32,$16
- cmpult $3,$7,$24 # 3 2 3 1
- stq $1,-32($16) # 1 2 4
- umulh $28,$19,$28 # 4 2 ######
- addq $27,$24,$27 # 3 3 1
- addq $3,$0,$3 # 3 2 3 1
- stq $2,-24($16) # 2 2 4
- cmpult $3,$0,$0 # 3 2 3 2
- stq $3,-16($16) # 3 2 4
- addq $4,$8,$4 # 4 2 2
- addq $27,$0,$0 # 3 3 2
- cmpult $4,$8,$25 # 4 2 3 1
- addq $17,32,$17
- addq $28,$25,$28 # 4 3 1
- addq $4,$0,$4 # 4 2 3 1
- cmpult $4,$0,$0 # 4 2 3 2
- stq $4,-8($16) # 4 2 4
- addq $28,$0,$0 # 4 3 2
- blt $18,$43
-
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $20,0($17) # 4 1
- ldq $1,0($16) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $20,$19,$20 # 4 2
- addq $1,$5,$1 # 4 2 2
- cmpult $1,$5,$22 # 4 2 3 1
- addq $20,$22,$20 # 4 3 1
- addq $1,$0,$1 # 4 2 3 1
- cmpult $1,$0,$0 # 4 2 3 2
- addq $20,$0,$0 # 4 3 2
- stq $1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$142:
-
- mulq $20,$19,$5 # 1 2 1 #####
- ldq $21,8($17) # 2 1
- ldq $27,16($17) # 3 1
- umulh $20,$19,$20 # 1 2 #####
- ldq $28,24($17) # 4 1
- mulq $21,$19,$6 # 2 2 1 #####
- addq $5,$0,$5 # 1 2 3 1
- subq $18,4,$18
- cmpult $5,$0,$0 # 1 2 3 2
- umulh $21,$19,$21 # 2 2 #####
- addq $20,$0,$0 # 1 3 2
- addq $17,32,$17
- addq $6,$0,$6 # 2 2 3 1
- mulq $27,$19,$7 # 3 2 1 #####
- cmpult $6,$0,$0 # 2 2 3 2
- addq $21,$0,$0 # 2 3 2
- addq $16,32,$16
- umulh $27,$19,$27 # 3 2 #####
- stq $5,-32($16) # 1 2 4
- mulq $28,$19,$8 # 4 2 1 #####
- addq $7,$0,$7 # 3 2 3 1
- stq $6,-24($16) # 2 2 4
- cmpult $7,$0,$0 # 3 2 3 2
- umulh $28,$19,$28 # 4 2 #####
- addq $27,$0,$0 # 3 3 2
- stq $7,-16($16) # 3 2 4
- addq $8,$0,$8 # 4 2 3 1
- cmpult $8,$0,$0 # 4 2 3 2
-
- addq $28,$0,$0 # 4 3 2
-
- stq $8,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $20,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $20,0($17) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- umulh $20,$19,$20 # 4 2
- addq $5,$0,$5 # 4 2 3 1
- addq $16,8,$16
- cmpult $5,$0,$0 # 4 2 3 2
- addq $17,8,$17
- addq $20,$0,$0 # 4 3 2
- stq $5,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$542:
- mulq $20,$20,$5 ######
- ldq $21,8($17) # 1 1
- subq $18,4
- umulh $20,$20,$1 ######
- ldq $27,16($17) # 1 1
- mulq $21,$21,$6 ######
- ldq $28,24($17) # 1 1
- stq $5,0($16) # r[0]
- umulh $21,$21,$2 ######
- stq $1,8($16) # r[1]
- mulq $27,$27,$7 ######
- stq $6,16($16) # r[0]
- umulh $27,$27,$3 ######
- stq $2,24($16) # r[1]
- mulq $28,$28,$8 ######
- stq $7,32($16) # r[0]
- umulh $28,$28,$4 ######
- stq $3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $8,-16($16) # r[0]
- stq $4,-8($16) # r[1]
-
- blt $18,$543
- ldq $20,0($17) # 1 1
- br $542
-
-$442:
- ldq $20,0($17) # a[0]
- mulq $20,$20,$5 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $20,$20,$1 # a[0]*w high part r3
- stq $5,-16($16) # r[0]
- stq $1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$0 # carry = 0
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- .align 3
-$901:
- addq $1,$5,$1 # r=a+b;
- ldq $6,8($17) # a[1]
- cmpult $1,$5,$22 # did we overflow?
- ldq $2,8($18) # b[1]
- addq $1,$0,$1 # c+= overflow
- ldq $7,16($17) # a[2]
- cmpult $1,$0,$0 # overflow?
- ldq $3,16($18) # b[2]
- addq $0,$22,$0
- ldq $8,24($17) # a[3]
- addq $2,$6,$2 # r=a+b;
- ldq $4,24($18) # b[3]
- cmpult $2,$6,$23 # did we overflow?
- addq $3,$7,$3 # r=a+b;
- addq $2,$0,$2 # c+= overflow
- cmpult $3,$7,$24 # did we overflow?
- cmpult $2,$0,$0 # overflow?
- addq $4,$8,$4 # r=a+b;
- addq $0,$23,$0
- cmpult $4,$8,$25 # did we overflow?
- addq $3,$0,$3 # c+= overflow
- stq $1,0($16) # r[0]=c
- cmpult $3,$0,$0 # overflow?
- stq $2,8($16) # r[1]=c
- addq $0,$24,$0
- stq $3,16($16) # r[2]=c
- addq $4,$0,$4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $4,$0,$0 # overflow?
- addq $17,32,$17 # a++
- addq $0,$25,$0
- stq $4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- addq $1,$5,$1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $1,$0,$1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $1,$5,$22 # did we overflow?
- cmpult $1,$0,$0 # overflow?
- addq $18,8,$18 # b++
- stq $1,0($16) # r[0]=c
- addq $0,$22,$0
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- ldgp $29,0($27)
-bn_div64..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div64
-
- .set noat
- .text
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19, 4, $19
- bis $31, $31, $0
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
-$101:
- ldq $3, 8($17)
- cmpult $1, $2, $4
- ldq $5, 8($18)
- subq $1, $2, $1
- ldq $6, 16($17)
- cmpult $1, $0, $2
- ldq $7, 16($18)
- subq $1, $0, $23
- ldq $8, 24($17)
- addq $2, $4, $0
- cmpult $3, $5, $24
- subq $3, $5, $3
- ldq $22, 24($18)
- cmpult $3, $0, $5
- subq $3, $0, $25
- addq $5, $24, $0
- cmpult $6, $7, $27
- subq $6, $7, $6
- stq $23, 0($16)
- cmpult $6, $0, $7
- subq $6, $0, $28
- addq $7, $27, $0
- cmpult $8, $22, $21
- subq $8, $22, $8
- stq $25, 8($16)
- cmpult $8, $0, $22
- subq $8, $0, $20
- addq $22, $21, $0
- stq $28, 16($16)
- subq $19, 4, $19
- stq $20, 24($16)
- addq $17, 32, $17
- addq $18, 32, $18
- addq $16, 32, $16
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
- br $101
-$102:
- ldq $1, 0($17)
- ldq $2, 0($18)
- cmpult $1, $2, $27
- subq $1, $2, $1
- cmpult $1, $0, $2
- subq $1, $0, $1
- stq $1, 0($16)
- addq $2, $27, $0
- addq $17, 8, $17
- addq $18, 8, $18
- addq $16, 8, $16
- subq $19, 1, $19
- bgt $19, $102
- ret $31,($26),1
-$100:
- addq $19, 4, $19
- bgt $19, $102
-$103:
- ret $31,($26),1
- .end bn_sub_words
diff --git a/lib/libcrypto/bn/asm/alpha.works/add.pl b/lib/libcrypto/bn/asm/alpha.works/add.pl
deleted file mode 100644
index 4dc76e6b69f..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/add.pl
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- ($t0,$o0)=&NR(2);
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
-
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0); # will we borrow?
- &add($o0,$cc,$o0); # will we borrow?
- &cmpult($o0,$cc,$cc); # will we borrow?
- &add($cc,$t0,$cc); # add the borrows
- &st($o0,&QWPw(0,$rp)); # save
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($o0,$t0,$a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/div.pl b/lib/libcrypto/bn/asm/alpha.works/div.pl
deleted file mode 100644
index 7ec144377fa..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/div.pl
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/local/bin/perl
-
-sub bn_div64
- {
- local($data)=<<'EOF';
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .set noreorder
- .set volatile
- .align 3
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- ldgp $29,0($27)
-bn_div64..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$9119
- lda $0,-1
- br $31,$9136
- .align 4
-$9119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$9120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$9120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$9120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$9122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$9122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$9123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$9126
- zapnot $7,15,$27
- br $31,$9127
- .align 4
-$9126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$9127:
- srl $10,32,$4
- .align 5
-$9128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$9129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$9129
- subq $27,1,$27
- br $31,$9128
- .align 4
-$9129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$9134
- addq $9,$11,$9
- subq $27,1,$27
-$9134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$9124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$9123
- .align 4
-$9124:
- bis $13,$27,$0
-$9136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div64
-EOF
- &asm_add($data);
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/mul.pl b/lib/libcrypto/bn/asm/alpha.works/mul.pl
deleted file mode 100644
index b182bae4520..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/mul.pl
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$word,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- &add($l0,$cc,$l0);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &cmpult($l0,$cc,$cc);
- &st($l0,&QWPw(-1,$rp)); &FR($l0);
- &add($h0,$cc,$cc); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
deleted file mode 100644
index e37f6315fbc..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
- &mul($a0,$word,($l0)=&NR(1));
- &sub($count,1,$count);
- &add($ap,$QWS,$ap);
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- &add($r0,$l0,$r0);
- &add($rp,$QWS,$rp);
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &st($r0,&QWPw(-1,$rp)); &FR($r0);
- &add($h0,$cc,$cc); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
deleted file mode 100644
index 5efd2012814..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
+++ /dev/null
@@ -1,213 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &mul($a[0],$b[0],($r00)=&NR(1));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &muh($a[0],$b[0],($r01)=&NR(1));
- &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &mul($a[0],$b[1],($r02)=&NR(1));
-
- ($R,$H1,$H2)=&NR(3);
-
- &st($r00,&QWPw(0,$rp)); &FR($r00);
-
- &mov("zero",$R);
- &mul($a[1],$b[0],($r03)=&NR(1));
-
- &mov("zero",$H1);
- &mov("zero",$H0);
- &add($R,$r01,$R);
- &muh($a[0],$b[1],($r04)=&NR(1));
- &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
- &add($R,$r02,$R);
- &add($H1,$t01,$H1) &FR($t01);
- &muh($a[1],$b[0],($r05)=&NR(1));
- &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
- &add($R,$r03,$R);
- &add($H2,$t02,$H2) &FR($t02);
- &mul($a[0],$b[2],($r06)=&NR(1));
- &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
- &add($H1,$t03,$H1) &FR($t03);
- &st($R,&QWPw(1,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r04,$R);
- &mov("zero",$H2);
- &mul($a[1],$b[1],($r07)=&NR(1));
- &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
- &add($R,$r05,$R);
- &add($H1,$t04,$H1) &FR($t04);
- &mul($a[2],$b[0],($r08)=&NR(1));
- &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
- &add($R,$r01,$R);
- &add($H2,$t05,$H2) &FR($t05);
- &muh($a[0],$b[2],($r09)=&NR(1));
- &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
- &add($R,$r07,$R);
- &add($H1,$t06,$H1) &FR($t06);
- &muh($a[1],$b[1],($r10)=&NR(1));
- &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
- &add($R,$r08,$R);
- &add($H2,$t07,$H2) &FR($t07);
- &muh($a[2],$b[0],($r11)=&NR(1));
- &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
- &add($H1,$t08,$H1) &FR($t08);
- &st($R,&QWPw(2,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r09,$R);
- &mov("zero",$H2);
- &mul($a[0],$b[3],($r12)=&NR(1));
- &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
- &add($R,$r10,$R);
- &add($H1,$t09,$H1) &FR($t09);
- &mul($a[1],$b[2],($r13)=&NR(1));
- &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
- &add($R,$r11,$R);
- &add($H1,$t10,$H1) &FR($t10);
- &mul($a[2],$b[1],($r14)=&NR(1));
- &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
- &add($R,$r12,$R);
- &add($H1,$t11,$H1) &FR($t11);
- &mul($a[3],$b[0],($r15)=&NR(1));
- &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
- &add($R,$r13,$R);
- &add($H1,$t12,$H1) &FR($t12);
- &muh($a[0],$b[3],($r16)=&NR(1));
- &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
- &add($R,$r14,$R);
- &add($H1,$t13,$H1) &FR($t13);
- &muh($a[1],$b[2],($r17)=&NR(1));
- &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
- &add($R,$r15,$R);
- &add($H1,$t14,$H1) &FR($t14);
- &muh($a[2],$b[1],($r18)=&NR(1));
- &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
- &add($H1,$t15,$H1) &FR($t15);
- &st($R,&QWPw(3,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r16,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[0],($r19)=&NR(1));
- &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
- &add($R,$r17,$R);
- &add($H1,$t16,$H1) &FR($t16);
- &mul($a[1],$b[3],($r20)=&NR(1));
- &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
- &add($R,$r18,$R);
- &add($H1,$t17,$H1) &FR($t17);
- &mul($a[2],$b[2],($r21)=&NR(1));
- &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
- &add($R,$r19,$R);
- &add($H1,$t18,$H1) &FR($t18);
- &mul($a[3],$b[1],($r22)=&NR(1));
- &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
- &add($R,$r20,$R);
- &add($H1,$t19,$H1) &FR($t19);
- &muh($a[1],$b[3],($r23)=&NR(1));
- &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
- &add($R,$r21,$R);
- &add($H1,$t20,$H1) &FR($t20);
- &muh($a[2],$b[2],($r24)=&NR(1));
- &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
- &add($R,$r22,$R);
- &add($H1,$t21,$H1) &FR($t21);
- &muh($a[3],$b[1],($r25)=&NR(1));
- &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
- &add($H1,$t22,$H1) &FR($t22);
- &st($R,&QWPw(4,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r23,$R);
- &mov("zero",$H2);
- &mul($a[2],$b[3],($r26)=&NR(1));
- &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
- &add($R,$r24,$R);
- &add($H1,$t23,$H1) &FR($t23);
- &mul($a[3],$b[2],($r27)=&NR(1));
- &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
- &add($R,$r25,$R);
- &add($H1,$t24,$H1) &FR($t24);
- &muh($a[2],$b[3],($r28)=&NR(1));
- &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
- &add($R,$r26,$R);
- &add($H1,$t25,$H1) &FR($t25);
- &muh($a[3],$b[2],($r29)=&NR(1));
- &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
- &add($R,$r27,$R);
- &add($H1,$t26,$H1) &FR($t26);
- &mul($a[3],$b[3],($r30)=&NR(1));
- &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
- &add($H1,$t27,$H1) &FR($t27);
- &st($R,&QWPw(5,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r28,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[3],($r31)=&NR(1));
- &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
- &add($R,$r29,$R);
- &add($H1,$t28,$H1) &FR($t28);
- ############
- &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
- &add($R,$r30,$R);
- &add($H1,$t29,$H1) &FR($t29);
- ############
- &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
- &add($H1,$t30,$H1) &FR($t30);
- &st($R,&QWPw(6,$rp));
- &add($H1,$H2,$R);
-
- &add($R,$r31,$R); &FR($r31);
- &st($R,&QWPw(7,$rp));
-
- &FR($R,$H1,$H2);
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
deleted file mode 100644
index 79d86dd25cd..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
-print STDERR "count=$cnt\n"; $cnt++;
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
- &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &FR($c0,$c1,$c2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
deleted file mode 100644
index 525ca7494b7..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &stack_push(2);
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &st($reg_s0,&swtmp(0)); &FR($reg_s0);
- &st($reg_s1,&swtmp(1)); &FR($reg_s1);
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &ld(($a[4])=&NR(1),&QWPw(1,$ap));
- &ld(($b[4])=&NR(1),&QWPw(1,$bp));
- &ld(($a[5])=&NR(1),&QWPw(1,$ap));
- &ld(($b[5])=&NR(1),&QWPw(1,$bp));
- &ld(($a[6])=&NR(1),&QWPw(1,$ap));
- &ld(($b[6])=&NR(1),&QWPw(1,$bp));
- &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
- &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
- &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
- &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
- &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
- &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
- &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
- &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
- &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
- &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &FR($c0,$c1,$c2);
-
- &ld($reg_s0,&swtmp(0));
- &ld($reg_s1,&swtmp(1));
- &stack_pop(2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/lib/libcrypto/bn/asm/alpha.works/sqr.pl
deleted file mode 100644
index a55b696906e..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/sqr.pl
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(3);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$a0,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &add($rp,2*$QWS,$rp);
- &sub($count,1,$count);
- &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
- &st($l0,&QWPw(-2,$rp)); &FR($l0);
- &st($h0,&QWPw(-1,$rp)); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
deleted file mode 100644
index bf33f5b5037..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub sqr_add_c
- {
- local($a,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$a,($l1)=&NR(1));
- &muh($a,$a,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c1,$t1,$c1); &FR($t1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub sqr_add_c2
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &cmplt($l1,"zero",($lc1)=&NR(1));
- &cmplt($h1,"zero",($hc1)=&NR(1));
- &add($l1,$l1,$l1);
- &add($h1,$h1,$h1);
- &add($h1,$lc1,$h1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
-
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
-
- &add($c1,$lc1,$c1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
- }
-
-
-sub bn_sqr_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
deleted file mode 100644
index b4afe085f1c..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($a[4])=&NR(1),&QWPw(4,$ap));
- &ld(($a[5])=&NR(1),&QWPw(5,$ap));
- &ld(($a[6])=&NR(1),&QWPw(6,$ap));
- &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(7,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(8,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(9,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(10,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
- &st($c0,&QWPw(11,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[6],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
- &st($c0,&QWPw(12,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
- &st($c0,&QWPw(13,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[7],$c0,$c1,$c2);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha.works/sub.pl b/lib/libcrypto/bn/asm/alpha.works/sub.pl
deleted file mode 100644
index d998da5c21a..00000000000
--- a/lib/libcrypto/bn/asm/alpha.works/sub.pl
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$o0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$o1); # will we borrow?
- &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($o0,&QWPw(0,$rp)); &FR($o0); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$o2); # will we borrow?
- &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($o1,&QWPw(1,$rp)); &FR($o1);
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$o3); # will we borrow?
- &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
-
- &st($o2,&QWPw(2,$rp)); &FR($o2);
- &sub($count,4,$count); # count-=4
- &st($o3,&QWPw(3,$rp)); &FR($o3);
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/add.pl b/lib/libcrypto/bn/asm/alpha/add.pl
deleted file mode 100644
index 13bf5164281..00000000000
--- a/lib/libcrypto/bn/asm/alpha/add.pl
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
-
-##########################################################
- &set_label("loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
- &ld(($b0)=&NR(1),&QWPw(0,$bp));
- &ld(($a1)=&NR(1),&QWPw(1,$ap));
- &ld(($b1)=&NR(1),&QWPw(1,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &ld(($a2)=&NR(1),&QWPw(2,$ap));
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &ld(($b2)=&NR(1),&QWPw(2,$bp));
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &ld(($a3)=&NR(1),&QWPw(3,$ap));
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &ld(($b3)=&NR(1),&QWPw(3,$bp));
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &cmpult($o3,$cc,$cc);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
- &add($cc,$t3,$cc); &FR($t3);
-
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- ###
- &bge($count,&label("loop"));
- ###
- &br(&label("finish"));
-##################################################
- # Do the last 0..3 words
-
- ($t0,$o0)=&NR(2);
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($a0,$b0,$o0);
- &sub($count,1,$count);
- &cmpult($o0,$b0,$t0); # will we borrow?
- &add($o0,$cc,$o0); # will we borrow?
- &cmpult($o0,$cc,$cc); # will we borrow?
- &add($rp,$QWS,$rp);
- &st($o0,&QWPw(-1,$rp)); # save
- &add($cc,$t0,$cc); # add the borrows
-
- ###
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($o0,$t0,$a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/div.pl b/lib/libcrypto/bn/asm/alpha/div.pl
deleted file mode 100644
index e9e680897aa..00000000000
--- a/lib/libcrypto/bn/asm/alpha/div.pl
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/local/bin/perl
-
-sub bn_div_words
- {
- local($data)=<<'EOF';
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .set noreorder
- .set volatile
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words
- ldgp $29,0($27)
-bn_div_words.ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$9119
- lda $0,-1
- br $31,$9136
- .align 4
-$9119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$9120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$9120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$9120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$9122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$9122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$9123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$9126
- zapnot $7,15,$27
- br $31,$9127
- .align 4
-$9126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$9127:
- srl $10,32,$4
- .align 5
-$9128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$9129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$9129
- subq $27,1,$27
- br $31,$9128
- .align 4
-$9129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$9134
- addq $9,$11,$9
- subq $27,1,$27
-$9134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$9124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$9123
- .align 4
-$9124:
- bis $13,$27,$0
-$9136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-EOF
- &asm_add($data);
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/mul.pl b/lib/libcrypto/bn/asm/alpha/mul.pl
deleted file mode 100644
index 76c926566c7..00000000000
--- a/lib/libcrypto/bn/asm/alpha/mul.pl
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- ###
- &blt($count,&label("finish"));
-
- ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
-
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
-
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ### wait 8
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- ### wait 8
- &muh($a1,$word,($h1)=&NR(1)); &FR($a1);
- &add($l0,$cc,$l0); ### wait 8
- &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
- &cmpult($l0,$cc,$cc); ### wait 8
- &muh($a2,$word,($h2)=&NR(1)); &FR($a2);
- &add($h0,$cc,$cc); &FR($h0); ### wait 8
- &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
- &add($l1,$cc,$l1); ### wait 8
- &st($l0,&QWPw(0,$rp)); &FR($l0);
- &cmpult($l1,$cc,$cc); ### wait 8
- &muh($a3,$word,($h3)=&NR(1)); &FR($a3);
- &add($h1,$cc,$cc); &FR($h1);
- &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
- &add($l2,$cc,$l2);
- &st($l1,&QWPw(1,$rp)); &FR($l1);
- &cmpult($l2,$cc,$cc);
- &add($h2,$cc,$cc); &FR($h2);
- &sub($count,4,$count); # count-=4
- &st($l2,&QWPw(2,$rp)); &FR($l2);
- &add($l3,$cc,$l3);
- &cmpult($l3,$cc,$cc);
- &add($bp,4*$QWS,$bp); # count+=4
- &add($h3,$cc,$cc); &FR($h3);
- &add($ap,4*$QWS,$ap); # count+=4
- &st($l3,&QWPw(3,$rp)); &FR($l3);
- &add($rp,4*$QWS,$rp); # count+=4
- ###
- &blt($count,&label("finish"));
- ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
- &br(&label("finish"));
-##################################################
-
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- ###
- ###
- ###
- &muh($a0,$word,($h0)=&NR(1));
- ### Wait 8 for next mul issue
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0)
- &add($ap,$QWS,$ap);
- ### Loose 12 until result is available
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &add($l0,$cc,$l0);
- ###
- &st($l0,&QWPw(-1,$rp)); &FR($l0);
- &cmpult($l0,$cc,$cc);
- &add($h0,$cc,$cc); &FR($h0);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/mul_add.pl b/lib/libcrypto/bn/asm/alpha/mul_add.pl
deleted file mode 100644
index 0d6df69bc4b..00000000000
--- a/lib/libcrypto/bn/asm/alpha/mul_add.pl
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- ###
- &blt($count,&label("finish"));
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- &ld(($r0)=&NR(1),&QWPw(0,$rp));
- &ld(($a1)=&NR(1),&QWPw(1,$ap));
- &muh($a0,$word,($h0)=&NR(1));
- &ld(($r1)=&NR(1),&QWPw(1,$rp));
- &ld(($a2)=&NR(1),&QWPw(2,$ap));
- ###
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- &ld(($r2)=&NR(1),&QWPw(2,$rp));
- &muh($a1,$word,($h1)=&NR(1));
- &ld(($a3)=&NR(1),&QWPw(3,$ap));
- &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
- &ld(($r3)=&NR(1),&QWPw(3,$rp));
- &add($r0,$l0,$r0);
- &add($r1,$l1,$r1);
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1);
- &muh($a2,$word,($h2)=&NR(1));
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &add($h1,$t1,$h1); &FR($t1);
- &add($h0,$cc,$cc); &FR($h0);
- &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
- &add($r1,$cc,$r1);
- &cmpult($r1,$cc,$cc);
- &add($r2,$l2,$r2);
- &add($h1,$cc,$cc); &FR($h1);
- &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2);
- &muh($a3,$word,($h3)=&NR(1));
- &add($r2,$cc,$r2);
- &st($r0,&QWPw(0,$rp)); &FR($r0);
- &add($h2,$t2,$h2); &FR($t2);
- &st($r1,&QWPw(1,$rp)); &FR($r1);
- &cmpult($r2,$cc,$cc);
- &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
- &add($h2,$cc,$cc); &FR($h2);
- &st($r2,&QWPw(2,$rp)); &FR($r2);
- &sub($count,4,$count); # count-=4
- &add($rp,4*$QWS,$rp); # count+=4
- &add($r3,$l3,$r3);
- &add($ap,4*$QWS,$ap); # count+=4
- &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3);
- &add($r3,$cc,$r3);
- &add($h3,$t3,$h3); &FR($t3);
- &cmpult($r3,$cc,$cc);
- &st($r3,&QWPw(-1,$rp)); &FR($r3);
- &add($h3,$cc,$cc); &FR($h3);
-
- ###
- &blt($count,&label("finish"));
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
- ###
- ###
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- ### wait 8
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- &add($rp,$QWS,$rp);
- &add($ap,$QWS,$ap);
- &sub($count,1,$count);
- ### wait 3 until l0 is available
- &add($r0,$l0,$r0);
- ###
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &add($h0,$cc,$cc); &FR($h0);
-
- &st($r0,&QWPw(-1,$rp)); &FR($r0);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/lib/libcrypto/bn/asm/alpha/mul_c4.pl
deleted file mode 100644
index 9cc876ded4a..00000000000
--- a/lib/libcrypto/bn/asm/alpha/mul_c4.pl
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-# upto
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &mul($a[0],$b[0],($r00)=&NR(1));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &muh($a[0],$b[0],($r01)=&NR(1));
- &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &mul($a[0],$b[1],($r02)=&NR(1));
-
- ($R,$H1,$H2)=&NR(3);
-
- &st($r00,&QWPw(0,$rp)); &FR($r00);
-
- &mov("zero",$R);
- &mul($a[1],$b[0],($r03)=&NR(1));
-
- &mov("zero",$H1);
- &mov("zero",$H0);
- &add($R,$r01,$R);
- &muh($a[0],$b[1],($r04)=&NR(1));
- &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
- &add($R,$r02,$R);
- &add($H1,$t01,$H1) &FR($t01);
- &muh($a[1],$b[0],($r05)=&NR(1));
- &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
- &add($R,$r03,$R);
- &add($H2,$t02,$H2) &FR($t02);
- &mul($a[0],$b[2],($r06)=&NR(1));
- &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
- &add($H1,$t03,$H1) &FR($t03);
- &st($R,&QWPw(1,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r04,$R);
- &mov("zero",$H2);
- &mul($a[1],$b[1],($r07)=&NR(1));
- &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
- &add($R,$r05,$R);
- &add($H1,$t04,$H1) &FR($t04);
- &mul($a[2],$b[0],($r08)=&NR(1));
- &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
- &add($R,$r01,$R);
- &add($H2,$t05,$H2) &FR($t05);
- &muh($a[0],$b[2],($r09)=&NR(1));
- &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
- &add($R,$r07,$R);
- &add($H1,$t06,$H1) &FR($t06);
- &muh($a[1],$b[1],($r10)=&NR(1));
- &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
- &add($R,$r08,$R);
- &add($H2,$t07,$H2) &FR($t07);
- &muh($a[2],$b[0],($r11)=&NR(1));
- &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
- &add($H1,$t08,$H1) &FR($t08);
- &st($R,&QWPw(2,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r09,$R);
- &mov("zero",$H2);
- &mul($a[0],$b[3],($r12)=&NR(1));
- &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
- &add($R,$r10,$R);
- &add($H1,$t09,$H1) &FR($t09);
- &mul($a[1],$b[2],($r13)=&NR(1));
- &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
- &add($R,$r11,$R);
- &add($H1,$t10,$H1) &FR($t10);
- &mul($a[2],$b[1],($r14)=&NR(1));
- &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
- &add($R,$r12,$R);
- &add($H1,$t11,$H1) &FR($t11);
- &mul($a[3],$b[0],($r15)=&NR(1));
- &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
- &add($R,$r13,$R);
- &add($H1,$t12,$H1) &FR($t12);
- &muh($a[0],$b[3],($r16)=&NR(1));
- &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
- &add($R,$r14,$R);
- &add($H1,$t13,$H1) &FR($t13);
- &muh($a[1],$b[2],($r17)=&NR(1));
- &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
- &add($R,$r15,$R);
- &add($H1,$t14,$H1) &FR($t14);
- &muh($a[2],$b[1],($r18)=&NR(1));
- &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
- &add($H1,$t15,$H1) &FR($t15);
- &st($R,&QWPw(3,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r16,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[0],($r19)=&NR(1));
- &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
- &add($R,$r17,$R);
- &add($H1,$t16,$H1) &FR($t16);
- &mul($a[1],$b[3],($r20)=&NR(1));
- &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
- &add($R,$r18,$R);
- &add($H1,$t17,$H1) &FR($t17);
- &mul($a[2],$b[2],($r21)=&NR(1));
- &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
- &add($R,$r19,$R);
- &add($H1,$t18,$H1) &FR($t18);
- &mul($a[3],$b[1],($r22)=&NR(1));
- &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
- &add($R,$r20,$R);
- &add($H1,$t19,$H1) &FR($t19);
- &muh($a[1],$b[3],($r23)=&NR(1));
- &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
- &add($R,$r21,$R);
- &add($H1,$t20,$H1) &FR($t20);
- &muh($a[2],$b[2],($r24)=&NR(1));
- &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
- &add($R,$r22,$R);
- &add($H1,$t21,$H1) &FR($t21);
- &muh($a[3],$b[1],($r25)=&NR(1));
- &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
- &add($H1,$t22,$H1) &FR($t22);
- &st($R,&QWPw(4,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r23,$R);
- &mov("zero",$H2);
- &mul($a[2],$b[3],($r26)=&NR(1));
- &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
- &add($R,$r24,$R);
- &add($H1,$t23,$H1) &FR($t23);
- &mul($a[3],$b[2],($r27)=&NR(1));
- &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
- &add($R,$r25,$R);
- &add($H1,$t24,$H1) &FR($t24);
- &muh($a[2],$b[3],($r28)=&NR(1));
- &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
- &add($R,$r26,$R);
- &add($H1,$t25,$H1) &FR($t25);
- &muh($a[3],$b[2],($r29)=&NR(1));
- &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
- &add($R,$r27,$R);
- &add($H1,$t26,$H1) &FR($t26);
- &mul($a[3],$b[3],($r30)=&NR(1));
- &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
- &add($H1,$t27,$H1) &FR($t27);
- &st($R,&QWPw(5,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r28,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[3],($r31)=&NR(1));
- &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
- &add($R,$r29,$R);
- &add($H1,$t28,$H1) &FR($t28);
- ############
- &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
- &add($R,$r30,$R);
- &add($H1,$t29,$H1) &FR($t29);
- ############
- &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
- &add($H1,$t30,$H1) &FR($t30);
- &st($R,&QWPw(6,$rp));
- &add($H1,$H2,$R);
-
- &add($R,$r31,$R); &FR($r31);
- &st($R,&QWPw(7,$rp));
-
- &FR($R,$H1,$H2);
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
deleted file mode 100644
index 79d86dd25cd..00000000000
--- a/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
-print STDERR "count=$cnt\n"; $cnt++;
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
- &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &FR($c0,$c1,$c2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/lib/libcrypto/bn/asm/alpha/mul_c8.pl
deleted file mode 100644
index 525ca7494b7..00000000000
--- a/lib/libcrypto/bn/asm/alpha/mul_c8.pl
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &stack_push(2);
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &st($reg_s0,&swtmp(0)); &FR($reg_s0);
- &st($reg_s1,&swtmp(1)); &FR($reg_s1);
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &ld(($a[4])=&NR(1),&QWPw(1,$ap));
- &ld(($b[4])=&NR(1),&QWPw(1,$bp));
- &ld(($a[5])=&NR(1),&QWPw(1,$ap));
- &ld(($b[5])=&NR(1),&QWPw(1,$bp));
- &ld(($a[6])=&NR(1),&QWPw(1,$ap));
- &ld(($b[6])=&NR(1),&QWPw(1,$bp));
- &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
- &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
- &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
- &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
- &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
- &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
- &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
- &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
- &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
- &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &FR($c0,$c1,$c2);
-
- &ld($reg_s0,&swtmp(0));
- &ld($reg_s1,&swtmp(1));
- &stack_pop(2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/sqr.pl b/lib/libcrypto/bn/asm/alpha/sqr.pl
deleted file mode 100644
index a55b696906e..00000000000
--- a/lib/libcrypto/bn/asm/alpha/sqr.pl
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(3);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$a0,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &add($rp,2*$QWS,$rp);
- &sub($count,1,$count);
- &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
- &st($l0,&QWPw(-2,$rp)); &FR($l0);
- &st($h0,&QWPw(-1,$rp)); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
deleted file mode 100644
index bf33f5b5037..00000000000
--- a/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub sqr_add_c
- {
- local($a,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$a,($l1)=&NR(1));
- &muh($a,$a,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c1,$t1,$c1); &FR($t1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub sqr_add_c2
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &cmplt($l1,"zero",($lc1)=&NR(1));
- &cmplt($h1,"zero",($hc1)=&NR(1));
- &add($l1,$l1,$l1);
- &add($h1,$h1,$h1);
- &add($h1,$lc1,$h1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
-
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
-
- &add($c1,$lc1,$c1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
- }
-
-
-sub bn_sqr_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
deleted file mode 100644
index b4afe085f1c..00000000000
--- a/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($a[4])=&NR(1),&QWPw(4,$ap));
- &ld(($a[5])=&NR(1),&QWPw(5,$ap));
- &ld(($a[6])=&NR(1),&QWPw(6,$ap));
- &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(7,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(8,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(9,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(10,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
- &st($c0,&QWPw(11,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[6],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
- &st($c0,&QWPw(12,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
- &st($c0,&QWPw(13,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[7],$c0,$c1,$c2);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/alpha/sub.pl b/lib/libcrypto/bn/asm/alpha/sub.pl
deleted file mode 100644
index d998da5c21a..00000000000
--- a/lib/libcrypto/bn/asm/alpha/sub.pl
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$o0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$o1); # will we borrow?
- &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($o0,&QWPw(0,$rp)); &FR($o0); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$o2); # will we borrow?
- &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($o1,&QWPw(1,$rp)); &FR($o1);
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$o3); # will we borrow?
- &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
-
- &st($o2,&QWPw(2,$rp)); &FR($o2);
- &sub($count,4,$count); # count-=4
- &st($o3,&QWPw(3,$rp)); &FR($o3);
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl
index 26c2685a726..332ef3e91d6 100644
--- a/lib/libcrypto/bn/asm/bn-586.pl
+++ b/lib/libcrypto/bn/asm/bn-586.pl
@@ -1,6 +1,7 @@
#!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],$0);
@@ -24,38 +25,25 @@ sub bn_mul_add_words
{
local($name)=@_;
- &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ebp";
- $r="edi";
- $c="esi";
-
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
-
- &mov("ecx",&wparam(2)); #
- &mov($a,&wparam(1)); #
-
- &and("ecx",0xfffffff8); # num / 8
- &mov($w,&wparam(3)); #
-
- &push("ecx"); # Up the stack for a tmp variable
-
- &jz(&label("maw_finish"));
+ $r="eax";
+ $a="edx";
+ $c="ecx";
if ($sse2) {
&picmeup("eax","OPENSSL_ia32cap_P");
&bt(&DWP(0,"eax"),26);
- &jnc(&label("maw_loop"));
+ &jnc(&label("maw_non_sse2"));
- &movd("mm0",$w); # mm0 = w
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+ &movd("mm0",&wparam(3)); # mm0 = w
&pxor("mm1","mm1"); # mm1 = carry_in
-
- &set_label("maw_sse2_loop",0);
+ &jmp(&label("maw_sse2_entry"));
+
+ &set_label("maw_sse2_unrolled",16);
&movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
&paddq("mm1","mm3"); # mm1 = carry_in + r[0]
&movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
@@ -112,42 +100,82 @@ sub bn_mul_add_words
&psrlq("mm1",32); # mm1 = carry6
&paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
&movd(&DWP(28,$r,"",0),"mm1");
- &add($r,32);
+ &lea($r,&DWP(32,$r));
&psrlq("mm1",32); # mm1 = carry_out
- &sub("ecx",8);
+ &sub($c,8);
+ &jz(&label("maw_sse2_exit"));
+ &set_label("maw_sse2_entry");
+ &test($c,0xfffffff8);
+ &jnz(&label("maw_sse2_unrolled"));
+
+ &set_label("maw_sse2_loop",4);
+ &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
+ &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
+ &pmuludq("mm2","mm0"); # a[i] *= w
+ &lea($a,&DWP(4,$a));
+ &paddq("mm1","mm3"); # carry += r[i]
+ &paddq("mm1","mm2"); # carry += a[i]*w
+ &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
+ &sub($c,1);
+ &psrlq("mm1",32); # carry = carry_high
+ &lea($r,&DWP(4,$r));
&jnz(&label("maw_sse2_loop"));
-
- &movd($c,"mm1"); # c = carry_out
+ &set_label("maw_sse2_exit");
+ &movd("eax","mm1"); # c = carry_out
&emms();
+ &ret();
- &jmp(&label("maw_finish"));
+ &set_label("maw_non_sse2",16);
}
- &set_label("maw_loop",0);
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ebp";
+ $r="edi";
+ $c="esi";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+
+ &mov("ecx",&wparam(2)); #
+ &mov($a,&wparam(1)); #
+
+ &and("ecx",0xfffffff8); # num / 8
+ &mov($w,&wparam(3)); #
- &mov(&swtmp(0),"ecx"); #
+ &push("ecx"); # Up the stack for a tmp variable
+
+ &jz(&label("maw_finish"));
+
+ &set_label("maw_loop",16);
for ($i=0; $i<32; $i+=4)
{
&comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mov("eax",&DWP($i,$a)); # *a
&mul($w); # *a * w
- &add("eax",$c); # L(t)+= *r
- &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
+ &add("eax",$c); # L(t)+= c
&adc("edx",0); # H(t)+=carry
- &add("eax",$c); # L(t)+=c
+ &add("eax",&DWP($i,$r)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+ &mov(&DWP($i,$r),"eax"); # *r= L(t);
&mov($c,"edx"); # c= H(t);
}
&comment("");
- &mov("ecx",&swtmp(0)); #
- &add($a,32);
- &add($r,32);
&sub("ecx",8);
+ &lea($a,&DWP(32,$a));
+ &lea($r,&DWP(32,$r));
&jnz(&label("maw_loop"));
&set_label("maw_finish",0);
@@ -160,16 +188,15 @@ sub bn_mul_add_words
for ($i=0; $i<7; $i++)
{
&comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mov("eax",&DWP($i*4,$a)); # *a
&mul($w); # *a * w
&add("eax",$c); # L(t)+=c
- &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
- &add("eax",$c);
+ &add("eax",&DWP($i*4,$r)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
&dec("ecx") if ($i != 7-1);
- &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
+ &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
&jz(&label("maw_end")) if ($i != 7-1);
}
&set_label("maw_end",0);
@@ -184,7 +211,45 @@ sub bn_mul_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+ $r="eax";
+ $a="edx";
+ $c="ecx";
+
+ if ($sse2) {
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt(&DWP(0,"eax"),26);
+ &jnc(&label("mw_non_sse2"));
+
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+ &movd("mm0",&wparam(3)); # mm0 = w
+ &pxor("mm1","mm1"); # mm1 = carry = 0
+
+ &set_label("mw_sse2_loop",16);
+ &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
+ &pmuludq("mm2","mm0"); # a[i] *= w
+ &lea($a,&DWP(4,$a));
+ &paddq("mm1","mm2"); # carry += a[i]*w
+ &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
+ &sub($c,1);
+ &psrlq("mm1",32); # carry = carry_high
+ &lea($r,&DWP(4,$r));
+ &jnz(&label("mw_sse2_loop"));
+
+ &movd("eax","mm1"); # return carry
+ &emms();
+ &ret();
+ &set_label("mw_non_sse2",16);
+ }
+
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
&comment("");
$Low="eax";
@@ -257,7 +322,40 @@ sub bn_sqr_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+ $r="eax";
+ $a="edx";
+ $c="ecx";
+
+ if ($sse2) {
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt(&DWP(0,"eax"),26);
+ &jnc(&label("sqr_non_sse2"));
+
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+
+ &set_label("sqr_sse2_loop",16);
+ &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
+ &pmuludq("mm0","mm0"); # a[i] *= a[i]
+ &lea($a,&DWP(4,$a)); # a++
+ &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
+ &sub($c,1);
+ &lea($r,&DWP(8,$r)); # r += 2
+ &jnz(&label("sqr_sse2_loop"));
+
+ &emms();
+ &ret();
+ &set_label("sqr_non_sse2",16);
+ }
+
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
&comment("");
$r="esi";
@@ -313,12 +411,13 @@ sub bn_div_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,"");
&mov("edx",&wparam(0)); #
&mov("eax",&wparam(1)); #
- &mov("ebx",&wparam(2)); #
- &div("ebx");
- &function_end($name);
+ &mov("ecx",&wparam(2)); #
+ &div("ecx");
+ &ret();
+ &function_end_B($name);
}
sub bn_add_words
diff --git a/lib/libcrypto/bn/asm/bn-alpha.pl b/lib/libcrypto/bn/asm/bn-alpha.pl
deleted file mode 100644
index 302edf23767..00000000000
--- a/lib/libcrypto/bn/asm/bn-alpha.pl
+++ /dev/null
@@ -1,571 +0,0 @@
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-$d=&data();
-$d =~ s/CC/0/g;
-$d =~ s/R1/1/g;
-$d =~ s/R2/2/g;
-$d =~ s/R3/3/g;
-$d =~ s/R4/4/g;
-$d =~ s/L1/5/g;
-$d =~ s/L2/6/g;
-$d =~ s/L3/7/g;
-$d =~ s/L4/8/g;
-$d =~ s/O1/22/g;
-$d =~ s/O2/23/g;
-$d =~ s/O3/24/g;
-$d =~ s/O4/25/g;
-$d =~ s/A1/20/g;
-$d =~ s/A2/21/g;
-$d =~ s/A3/27/g;
-$d =~ s/A4/28/g;
-if (0){
-}
-
-print $d;
-
-sub data
- {
- local($data)=<<'EOF';
-
- # DEC Alpha assember
- # The bn_div_words is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div_words.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$CC
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- ldq $R1,0($16) # 1 1
- .align 3
-$42:
- mulq $A1,$19,$L1 # 1 2 1 ######
- ldq $A2,8($17) # 2 1
- ldq $R2,8($16) # 2 1
- umulh $A1,$19,$A1 # 1 2 ######
- ldq $A3,16($17) # 3 1
- ldq $R3,16($16) # 3 1
- mulq $A2,$19,$L2 # 2 2 1 ######
- ldq $A4,24($17) # 4 1
- addq $R1,$L1,$R1 # 1 2 2
- ldq $R4,24($16) # 4 1
- umulh $A2,$19,$A2 # 2 2 ######
- cmpult $R1,$L1,$O1 # 1 2 3 1
- addq $A1,$O1,$A1 # 1 3 1
- addq $R1,$CC,$R1 # 1 2 3 1
- mulq $A3,$19,$L3 # 3 2 1 ######
- cmpult $R1,$CC,$CC # 1 2 3 2
- addq $R2,$L2,$R2 # 2 2 2
- addq $A1,$CC,$CC # 1 3 2
- cmpult $R2,$L2,$O2 # 2 2 3 1
- addq $A2,$O2,$A2 # 2 3 1
- umulh $A3,$19,$A3 # 3 2 ######
- addq $R2,$CC,$R2 # 2 2 3 1
- cmpult $R2,$CC,$CC # 2 2 3 2
- subq $18,4,$18
- mulq $A4,$19,$L4 # 4 2 1 ######
- addq $A2,$CC,$CC # 2 3 2
- addq $R3,$L3,$R3 # 3 2 2
- addq $16,32,$16
- cmpult $R3,$L3,$O3 # 3 2 3 1
- stq $R1,-32($16) # 1 2 4
- umulh $A4,$19,$A4 # 4 2 ######
- addq $A3,$O3,$A3 # 3 3 1
- addq $R3,$CC,$R3 # 3 2 3 1
- stq $R2,-24($16) # 2 2 4
- cmpult $R3,$CC,$CC # 3 2 3 2
- stq $R3,-16($16) # 3 2 4
- addq $R4,$L4,$R4 # 4 2 2
- addq $A3,$CC,$CC # 3 3 2
- cmpult $R4,$L4,$O4 # 4 2 3 1
- addq $17,32,$17
- addq $A4,$O4,$A4 # 4 3 1
- addq $R4,$CC,$R4 # 4 2 3 1
- cmpult $R4,$CC,$CC # 4 2 3 2
- stq $R4,-8($16) # 4 2 4
- addq $A4,$CC,$CC # 4 3 2
- blt $18,$43
-
- ldq $A1,0($17) # 1 1
- ldq $R1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $A1,0($17) # 4 1
- ldq $R1,0($16) # 4 1
- mulq $A1,$19,$L1 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $A1,$19,$A1 # 4 2
- addq $R1,$L1,$R1 # 4 2 2
- cmpult $R1,$L1,$O1 # 4 2 3 1
- addq $A1,$O1,$A1 # 4 3 1
- addq $R1,$CC,$R1 # 4 2 3 1
- cmpult $R1,$CC,$CC # 4 2 3 2
- addq $A1,$CC,$CC # 4 3 2
- stq $R1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$CC
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- .align 3
-$142:
-
- mulq $A1,$19,$L1 # 1 2 1 #####
- ldq $A2,8($17) # 2 1
- ldq $A3,16($17) # 3 1
- umulh $A1,$19,$A1 # 1 2 #####
- ldq $A4,24($17) # 4 1
- mulq $A2,$19,$L2 # 2 2 1 #####
- addq $L1,$CC,$L1 # 1 2 3 1
- subq $18,4,$18
- cmpult $L1,$CC,$CC # 1 2 3 2
- umulh $A2,$19,$A2 # 2 2 #####
- addq $A1,$CC,$CC # 1 3 2
- addq $17,32,$17
- addq $L2,$CC,$L2 # 2 2 3 1
- mulq $A3,$19,$L3 # 3 2 1 #####
- cmpult $L2,$CC,$CC # 2 2 3 2
- addq $A2,$CC,$CC # 2 3 2
- addq $16,32,$16
- umulh $A3,$19,$A3 # 3 2 #####
- stq $L1,-32($16) # 1 2 4
- mulq $A4,$19,$L4 # 4 2 1 #####
- addq $L3,$CC,$L3 # 3 2 3 1
- stq $L2,-24($16) # 2 2 4
- cmpult $L3,$CC,$CC # 3 2 3 2
- umulh $A4,$19,$A4 # 4 2 #####
- addq $A3,$CC,$CC # 3 3 2
- stq $L3,-16($16) # 3 2 4
- addq $L4,$CC,$L4 # 4 2 3 1
- cmpult $L4,$CC,$CC # 4 2 3 2
-
- addq $A4,$CC,$CC # 4 3 2
-
- stq $L4,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $A1,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $A1,0($17) # 4 1
- mulq $A1,$19,$L1 # 4 2 1
- subq $18,1,$18
- umulh $A1,$19,$A1 # 4 2
- addq $L1,$CC,$L1 # 4 2 3 1
- addq $16,8,$16
- cmpult $L1,$CC,$CC # 4 2 3 2
- addq $17,8,$17
- addq $A1,$CC,$CC # 4 3 2
- stq $L1,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- .align 3
-$542:
- mulq $A1,$A1,$L1 ######
- ldq $A2,8($17) # 1 1
- subq $18,4
- umulh $A1,$A1,$R1 ######
- ldq $A3,16($17) # 1 1
- mulq $A2,$A2,$L2 ######
- ldq $A4,24($17) # 1 1
- stq $L1,0($16) # r[0]
- umulh $A2,$A2,$R2 ######
- stq $R1,8($16) # r[1]
- mulq $A3,$A3,$L3 ######
- stq $L2,16($16) # r[0]
- umulh $A3,$A3,$R3 ######
- stq $R2,24($16) # r[1]
- mulq $A4,$A4,$L4 ######
- stq $L3,32($16) # r[0]
- umulh $A4,$A4,$R4 ######
- stq $R3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $L4,-16($16) # r[0]
- stq $R4,-8($16) # r[1]
-
- blt $18,$543
- ldq $A1,0($17) # 1 1
- br $542
-
-$442:
- ldq $A1,0($17) # a[0]
- mulq $A1,$A1,$L1 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $A1,$A1,$R1 # a[0]*w high part r3
- stq $L1,-16($16) # r[0]
- stq $R1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$CC # carry = 0
- blt $19,$900
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- .align 3
-$901:
- addq $R1,$L1,$R1 # r=a+b;
- ldq $L2,8($17) # a[1]
- cmpult $R1,$L1,$O1 # did we overflow?
- ldq $R2,8($18) # b[1]
- addq $R1,$CC,$R1 # c+= overflow
- ldq $L3,16($17) # a[2]
- cmpult $R1,$CC,$CC # overflow?
- ldq $R3,16($18) # b[2]
- addq $CC,$O1,$CC
- ldq $L4,24($17) # a[3]
- addq $R2,$L2,$R2 # r=a+b;
- ldq $R4,24($18) # b[3]
- cmpult $R2,$L2,$O2 # did we overflow?
- addq $R3,$L3,$R3 # r=a+b;
- addq $R2,$CC,$R2 # c+= overflow
- cmpult $R3,$L3,$O3 # did we overflow?
- cmpult $R2,$CC,$CC # overflow?
- addq $R4,$L4,$R4 # r=a+b;
- addq $CC,$O2,$CC
- cmpult $R4,$L4,$O4 # did we overflow?
- addq $R3,$CC,$R3 # c+= overflow
- stq $R1,0($16) # r[0]=c
- cmpult $R3,$CC,$CC # overflow?
- stq $R2,8($16) # r[1]=c
- addq $CC,$O3,$CC
- stq $R3,16($16) # r[2]=c
- addq $R4,$CC,$R4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $R4,$CC,$CC # overflow?
- addq $17,32,$17 # a++
- addq $CC,$O4,$CC
- stq $R4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- addq $R1,$L1,$R1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $R1,$CC,$R1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $R1,$L1,$O1 # did we overflow?
- cmpult $R1,$CC,$CC # overflow?
- addq $18,8,$18 # b++
- stq $R1,0($16) # r[0]=c
- addq $CC,$O1,$CC
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$CC # carry = 0
- br $800
- blt $19,$800
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- .align 3
-$801:
- addq $R1,$L1,$R1 # r=a+b;
- ldq $L2,8($17) # a[1]
- cmpult $R1,$L1,$O1 # did we overflow?
- ldq $R2,8($18) # b[1]
- addq $R1,$CC,$R1 # c+= overflow
- ldq $L3,16($17) # a[2]
- cmpult $R1,$CC,$CC # overflow?
- ldq $R3,16($18) # b[2]
- addq $CC,$O1,$CC
- ldq $L4,24($17) # a[3]
- addq $R2,$L2,$R2 # r=a+b;
- ldq $R4,24($18) # b[3]
- cmpult $R2,$L2,$O2 # did we overflow?
- addq $R3,$L3,$R3 # r=a+b;
- addq $R2,$CC,$R2 # c+= overflow
- cmpult $R3,$L3,$O3 # did we overflow?
- cmpult $R2,$CC,$CC # overflow?
- addq $R4,$L4,$R4 # r=a+b;
- addq $CC,$O2,$CC
- cmpult $R4,$L4,$O4 # did we overflow?
- addq $R3,$CC,$R3 # c+= overflow
- stq $R1,0($16) # r[0]=c
- cmpult $R3,$CC,$CC # overflow?
- stq $R2,8($16) # r[1]=c
- addq $CC,$O3,$CC
- stq $R3,16($16) # r[2]=c
- addq $R4,$CC,$R4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $R4,$CC,$CC # overflow?
- addq $17,32,$17 # a++
- addq $CC,$O4,$CC
- stq $R4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$800
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- br $801
- .align 4
-$845:
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- cmpult $L1,$R1,$O1 # will we borrow?
- subq $L1,$R1,$R1 # r=a-b;
- subq $19,1,$19 # loop--
- cmpult $R1,$CC,$O2 # will we borrow?
- subq $R1,$CC,$R1 # c+= overflow
- addq $17,8,$17 # a++
- addq $18,8,$18 # b++
- stq $R1,0($16) # r[0]=c
- addq $O2,$O1,$CC
- addq $16,8,$16 # r++
-
- bgt $19,$845
- ret $31,($26),1 # else exit
-
-$800:
- addq $19,4,$19
- bgt $19,$845 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sub_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words:
- ldgp $29,0($27)
-bn_div_words..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-EOF
- return($data);
- }
-
diff --git a/lib/libcrypto/bn/asm/ca.pl b/lib/libcrypto/bn/asm/ca.pl
deleted file mode 100644
index c1ce67a6b4d..00000000000
--- a/lib/libcrypto/bn/asm/ca.pl
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-push(@INC,"perlasm","../../perlasm");
-require "alpha.pl";
-require "alpha/mul_add.pl";
-require "alpha/mul.pl";
-require "alpha/sqr.pl";
-require "alpha/add.pl";
-require "alpha/sub.pl";
-require "alpha/mul_c8.pl";
-require "alpha/mul_c4.pl";
-require "alpha/sqr_c4.pl";
-require "alpha/sqr_c8.pl";
-require "alpha/div.pl";
-
-&asm_init($ARGV[0],$0);
-
-&bn_mul_words("bn_mul_words");
-&bn_sqr_words("bn_sqr_words");
-&bn_mul_add_words("bn_mul_add_words");
-&bn_add_words("bn_add_words");
-&bn_sub_words("bn_sub_words");
-&bn_div_words("bn_div_words");
-&bn_mul_comba8("bn_mul_comba8");
-&bn_mul_comba4("bn_mul_comba4");
-&bn_sqr_comba4("bn_sqr_comba4");
-&bn_sqr_comba8("bn_sqr_comba8");
-
-&asm_finish();
-
diff --git a/lib/libcrypto/bn/asm/co-586.pl b/lib/libcrypto/bn/asm/co-586.pl
index 5d962cb957d..57101a6bd77 100644
--- a/lib/libcrypto/bn/asm/co-586.pl
+++ b/lib/libcrypto/bn/asm/co-586.pl
@@ -1,6 +1,7 @@
#!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],$0);
diff --git a/lib/libcrypto/bn/asm/co-alpha.pl b/lib/libcrypto/bn/asm/co-alpha.pl
deleted file mode 100644
index 67dad3e3d5f..00000000000
--- a/lib/libcrypto/bn/asm/co-alpha.pl
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-push(@INC,"perlasm","../../perlasm");
-require "alpha.pl";
-
-&asm_init($ARGV[0],$0);
-
-print &bn_sub_words("bn_sub_words");
-
-&asm_finish();
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- $cc="r0";
- $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13";
- $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14";
- $a2="r3"; $b2="r7"; $r2="r11";
- $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15";
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$a0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); # add the borrows
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$a1); # will we borrow?
- &add($b1,$t1,$cc); # add the borrows
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($a0,&QWPw(0,$rp)); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$a2); # will we borrow?
- &add($b2,$tmp,$cc); # add the borrows
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($a1,&QWPw(1,$rp)); # save
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$a3); # will we borrow?
- &add($b3,$t3,$cc); # add the borrows
-
- &st($a2,&QWPw(2,$rp)); # save
- &sub($count,4,$count); # count-=4
- &st($a3,&QWPw(3,$rp)); # save
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
- }
-
diff --git a/lib/libcrypto/bn/asm/mips1.s b/lib/libcrypto/bn/asm/mips1.s
deleted file mode 100644
index 44fa1254c76..00000000000
--- a/lib/libcrypto/bn/asm/mips1.s
+++ /dev/null
@@ -1,539 +0,0 @@
-/* This assember is for R2000/R3000 machines, or higher ones that do
- * no want to do any 64 bit arithmatic.
- * Make sure that the SSLeay bignum library is compiled with
- * THIRTY_TWO_BIT set.
- * This must either be compiled with the system CC, or, if you use GNU gas,
- * cc -E mips1.s|gas -o mips1.o
- */
- .set reorder
- .set noat
-
-#define R1 $1
-#define CC $2
-#define R2 $3
-#define R3 $8
-#define R4 $9
-#define L1 $10
-#define L2 $11
-#define L3 $12
-#define L4 $13
-#define H1 $14
-#define H2 $15
-#define H3 $24
-#define H4 $25
-
-#define P1 $4
-#define P2 $5
-#define P3 $6
-#define P4 $7
-
- .align 2
- .ent bn_mul_add_words
- .globl bn_mul_add_words
-.text
-bn_mul_add_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- #blt P3,4,$lab34
-
- subu R1,P3,4
- move CC,$0
- bltz R1,$lab34
-$lab2:
- lw R1,0(P1)
- lw L1,0(P2)
- lw R2,4(P1)
- lw L2,4(P2)
- lw R3,8(P1)
- lw L3,8(P2)
- lw R4,12(P1)
- lw L4,12(P2)
- multu L1,P4
- addu R1,R1,CC
- mflo L1
- sltu CC,R1,CC
- addu R1,R1,L1
- mfhi H1
- sltu L1,R1,L1
- sw R1,0(P1)
- addu CC,CC,L1
- multu L2,P4
- addu CC,H1,CC
- mflo L2
- addu R2,R2,CC
- sltu CC,R2,CC
- mfhi H2
- addu R2,R2,L2
- addu P2,P2,16
- sltu L2,R2,L2
- sw R2,4(P1)
- addu CC,CC,L2
- multu L3,P4
- addu CC,H2,CC
- mflo L3
- addu R3,R3,CC
- sltu CC,R3,CC
- mfhi H3
- addu R3,R3,L3
- addu P1,P1,16
- sltu L3,R3,L3
- sw R3,-8(P1)
- addu CC,CC,L3
- multu L4,P4
- addu CC,H3,CC
- mflo L4
- addu R4,R4,CC
- sltu CC,R4,CC
- mfhi H4
- addu R4,R4,L4
- subu P3,P3,4
- sltu L4,R4,L4
- addu CC,CC,L4
- addu CC,H4,CC
-
- subu R1,P3,4
- sw R4,-4(P1) # delay slot
- bgez R1,$lab2
-
- bleu P3,0,$lab3
- .align 2
-$lab33:
- lw L1,0(P2)
- lw R1,0(P1)
- multu L1,P4
- addu R1,R1,CC
- sltu CC,R1,CC
- addu P1,P1,4
- mflo L1
- mfhi H1
- addu R1,R1,L1
- addu P2,P2,4
- sltu L1,R1,L1
- subu P3,P3,1
- addu CC,CC,L1
- sw R1,-4(P1)
- addu CC,H1,CC
- bgtz P3,$lab33
- j $31
- .align 2
-$lab3:
- j $31
- .align 2
-$lab34:
- bgt P3,0,$lab33
- j $31
- .end bn_mul_add_words
-
- .align 2
- # Program Unit: bn_mul_words
- .ent bn_mul_words
- .globl bn_mul_words
-.text
-bn_mul_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P3,P3,4
- move CC,$0
- bltz P3,$lab45
-$lab44:
- lw L1,0(P2)
- lw L2,4(P2)
- lw L3,8(P2)
- lw L4,12(P2)
- multu L1,P4
- subu P3,P3,4
- mflo L1
- mfhi H1
- addu L1,L1,CC
- multu L2,P4
- sltu CC,L1,CC
- sw L1,0(P1)
- addu CC,H1,CC
- mflo L2
- mfhi H2
- addu L2,L2,CC
- multu L3,P4
- sltu CC,L2,CC
- sw L2,4(P1)
- addu CC,H2,CC
- mflo L3
- mfhi H3
- addu L3,L3,CC
- multu L4,P4
- sltu CC,L3,CC
- sw L3,8(P1)
- addu CC,H3,CC
- mflo L4
- mfhi H4
- addu L4,L4,CC
- addu P1,P1,16
- sltu CC,L4,CC
- addu P2,P2,16
- addu CC,H4,CC
- sw L4,-4(P1)
-
- bgez P3,$lab44
- b $lab45
-$lab46:
- lw L1,0(P2)
- addu P1,P1,4
- multu L1,P4
- addu P2,P2,4
- mflo L1
- mfhi H1
- addu L1,L1,CC
- subu P3,P3,1
- sltu CC,L1,CC
- sw L1,-4(P1)
- addu CC,H1,CC
- bgtz P3,$lab46
- j $31
-$lab45:
- addu P3,P3,4
- bgtz P3,$lab46
- j $31
- .align 2
- .end bn_mul_words
-
- # Program Unit: bn_sqr_words
- .ent bn_sqr_words
- .globl bn_sqr_words
-.text
-bn_sqr_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P3,P3,4
- bltz P3,$lab55
-$lab54:
- lw L1,0(P2)
- lw L2,4(P2)
- lw L3,8(P2)
- lw L4,12(P2)
-
- multu L1,L1
- subu P3,P3,4
- mflo L1
- mfhi H1
- sw L1,0(P1)
- sw H1,4(P1)
-
- multu L2,L2
- addu P1,P1,32
- mflo L2
- mfhi H2
- sw L2,-24(P1)
- sw H2,-20(P1)
-
- multu L3,L3
- addu P2,P2,16
- mflo L3
- mfhi H3
- sw L3,-16(P1)
- sw H3,-12(P1)
-
- multu L4,L4
-
- mflo L4
- mfhi H4
- sw L4,-8(P1)
- sw H4,-4(P1)
-
- bgtz P3,$lab54
- b $lab55
-$lab56:
- lw L1,0(P2)
- addu P1,P1,8
- multu L1,L1
- addu P2,P2,4
- subu P3,P3,1
- mflo L1
- mfhi H1
- sw L1,-8(P1)
- sw H1,-4(P1)
-
- bgtz P3,$lab56
- j $31
-$lab55:
- addu P3,P3,4
- bgtz P3,$lab56
- j $31
- .align 2
- .end bn_sqr_words
-
- # Program Unit: bn_add_words
- .ent bn_add_words
- .globl bn_add_words
-.text
-bn_add_words: # 0x590
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P4,P4,4
- move CC,$0
- bltz P4,$lab65
-$lab64:
- lw L1,0(P2)
- lw R1,0(P3)
- lw L2,4(P2)
- lw R2,4(P3)
-
- addu L1,L1,CC
- lw L3,8(P2)
- sltu CC,L1,CC
- addu L1,L1,R1
- sltu R1,L1,R1
- lw R3,8(P3)
- addu CC,CC,R1
- lw L4,12(P2)
-
- addu L2,L2,CC
- lw R4,12(P3)
- sltu CC,L2,CC
- addu L2,L2,R2
- sltu R2,L2,R2
- sw L1,0(P1)
- addu CC,CC,R2
- addu P1,P1,16
- addu L3,L3,CC
- sw L2,-12(P1)
-
- sltu CC,L3,CC
- addu L3,L3,R3
- sltu R3,L3,R3
- addu P2,P2,16
- addu CC,CC,R3
-
- addu L4,L4,CC
- addu P3,P3,16
- sltu CC,L4,CC
- addu L4,L4,R4
- subu P4,P4,4
- sltu R4,L4,R4
- sw L3,-8(P1)
- addu CC,CC,R4
- sw L4,-4(P1)
-
- bgtz P4,$lab64
- b $lab65
-$lab66:
- lw L1,0(P2)
- lw R1,0(P3)
- addu L1,L1,CC
- addu P1,P1,4
- sltu CC,L1,CC
- addu P2,P2,4
- addu P3,P3,4
- addu L1,L1,R1
- subu P4,P4,1
- sltu R1,L1,R1
- sw L1,-4(P1)
- addu CC,CC,R1
-
- bgtz P4,$lab66
- j $31
-$lab65:
- addu P4,P4,4
- bgtz P4,$lab66
- j $31
- .end bn_add_words
-
- # Program Unit: bn_div64
- .set at
- .set reorder
- .text
- .align 2
- .globl bn_div64
- # 321 {
- .ent bn_div64 2
-bn_div64:
- subu $sp, 64
- sw $31, 56($sp)
- sw $16, 48($sp)
- .mask 0x80010000, -56
- .frame $sp, 64, $31
- move $9, $4
- move $12, $5
- move $16, $6
- # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
- move $31, $0
- # 323 int i,count=2;
- li $13, 2
- # 324
- # 325 if (d == 0) return(BN_MASK2);
- bne $16, 0, $80
- li $2, -1
- b $93
-$80:
- # 326
- # 327 i=BN_num_bits_word(d);
- move $4, $16
- sw $31, 16($sp)
- sw $9, 24($sp)
- sw $12, 32($sp)
- sw $13, 40($sp)
- .livereg 0x800ff0e,0xfff
- jal BN_num_bits_word
- li $4, 32
- lw $31, 16($sp)
- lw $9, 24($sp)
- lw $12, 32($sp)
- lw $13, 40($sp)
- move $3, $2
- # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
- beq $2, $4, $81
- li $14, 1
- sll $15, $14, $2
- bleu $9, $15, $81
- # 329 {
- # 330 #if !defined(NO_STDIO) && !defined(WIN16)
- # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
- # 332 #endif
- # 333 abort();
- sw $3, 8($sp)
- sw $9, 24($sp)
- sw $12, 32($sp)
- sw $13, 40($sp)
- sw $31, 26($sp)
- .livereg 0xff0e,0xfff
- jal abort
- lw $3, 8($sp)
- li $4, 32
- lw $9, 24($sp)
- lw $12, 32($sp)
- lw $13, 40($sp)
- lw $31, 26($sp)
- # 334 }
-$81:
- # 335 i=BN_BITS2-i;
- subu $3, $4, $3
- # 336 if (h >= d) h-=d;
- bltu $9, $16, $82
- subu $9, $9, $16
-$82:
- # 337
- # 338 if (i)
- beq $3, 0, $83
- # 339 {
- # 340 d<<=i;
- sll $16, $16, $3
- # 341 h=(h<<i)|(l>>(BN_BITS2-i));
- sll $24, $9, $3
- subu $25, $4, $3
- srl $14, $12, $25
- or $9, $24, $14
- # 342 l<<=i;
- sll $12, $12, $3
- # 343 }
-$83:
- # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
- # 345 dl=(d&BN_MASK2l);
- and $8, $16, -65536
- srl $8, $8, 16
- and $10, $16, 65535
- li $6, -65536
-$84:
- # 346 for (;;)
- # 347 {
- # 348 if ((h>>BN_BITS4) == dh)
- srl $15, $9, 16
- bne $8, $15, $85
- # 349 q=BN_MASK2l;
- li $5, 65535
- b $86
-$85:
- # 350 else
- # 351 q=h/dh;
- divu $5, $9, $8
-$86:
- # 352
- # 353 for (;;)
- # 354 {
- # 355 t=(h-q*dh);
- mul $4, $5, $8
- subu $2, $9, $4
- move $3, $2
- # 356 if ((t&BN_MASK2h) ||
- # 357 ((dl*q) <= (
- # 358 (t<<BN_BITS4)+
- # 359 ((l&BN_MASK2h)>>BN_BITS4))))
- and $25, $2, $6
- bne $25, $0, $87
- mul $24, $10, $5
- sll $14, $3, 16
- and $15, $12, $6
- srl $25, $15, 16
- addu $15, $14, $25
- bgtu $24, $15, $88
-$87:
- # 360 break;
- mul $3, $10, $5
- b $89
-$88:
- # 361 q--;
- addu $5, $5, -1
- # 362 }
- b $86
-$89:
- # 363 th=q*dh;
- # 364 tl=q*dl;
- # 365 t=(tl>>BN_BITS4);
- # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
- sll $14, $3, 16
- and $2, $14, $6
- move $11, $2
- # 367 th+=t;
- srl $25, $3, 16
- addu $7, $4, $25
- # 368
- # 369 if (l < tl) th++;
- bgeu $12, $2, $90
- addu $7, $7, 1
-$90:
- # 370 l-=tl;
- subu $12, $12, $11
- # 371 if (h < th)
- bgeu $9, $7, $91
- # 372 {
- # 373 h+=d;
- addu $9, $9, $16
- # 374 q--;
- addu $5, $5, -1
- # 375 }
-$91:
- # 376 h-=th;
- subu $9, $9, $7
- # 377
- # 378 if (--count == 0) break;
- addu $13, $13, -1
- beq $13, 0, $92
- # 379
- # 380 ret=q<<BN_BITS4;
- sll $31, $5, 16
- # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
- sll $24, $9, 16
- srl $15, $12, 16
- or $9, $24, $15
- # 382 l=(l&BN_MASK2l)<<BN_BITS4;
- and $12, $12, 65535
- sll $12, $12, 16
- # 383 }
- b $84
-$92:
- # 384 ret|=q;
- or $31, $31, $5
- # 385 return(ret);
- move $2, $31
-$93:
- lw $16, 48($sp)
- lw $31, 56($sp)
- addu $sp, 64
- j $31
- .end bn_div64
-
diff --git a/lib/libcrypto/bn/asm/mo-586.pl b/lib/libcrypto/bn/asm/mo-586.pl
deleted file mode 100644
index 0982293094d..00000000000
--- a/lib/libcrypto/bn/asm/mo-586.pl
+++ /dev/null
@@ -1,603 +0,0 @@
-#!/usr/bin/env perl
-
-# This is crypto/bn/asm/x86-mont.pl (with asciz from crypto/perlasm/x86asm.pl)
-# from OpenSSL 0.9.9-dev
-
-sub ::asciz
-{ my @str=unpack("C*",shift);
- push @str,0;
- while ($#str>15) {
- &data_byte(@str[0..15]);
- foreach (0..15) { shift @str; }
- }
- &data_byte(@str) if (@str);
-}
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# October 2005
-#
-# This is a "teaser" code, as it can be improved in several ways...
-# First of all non-SSE2 path should be implemented (yes, for now it
-# performs Montgomery multiplication/convolution only on SSE2-capable
-# CPUs such as P4, others fall down to original code). Then inner loop
-# can be unrolled and modulo-scheduled to improve ILP and possibly
-# moved to 128-bit XMM register bank (though it would require input
-# rearrangement and/or increase bus bandwidth utilization). Dedicated
-# squaring procedure should give further performance improvement...
-# Yet, for being draft, the code improves rsa512 *sign* benchmark by
-# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
-
-# December 2006
-#
-# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
-# Integer-only code [being equipped with dedicated squaring procedure]
-# gives ~40% on rsa512 sign benchmark...
-
-push(@INC,"perlasm","../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],$0);
-
-$sse2=0;
-for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
-
-&external_label("OPENSSL_ia32cap_P") if ($sse2);
-
-&function_begin("bn_mul_mont");
-
-$i="edx";
-$j="ecx";
-$ap="esi"; $tp="esi"; # overlapping variables!!!
-$rp="edi"; $bp="edi"; # overlapping variables!!!
-$np="ebp";
-$num="ebx";
-
-$_num=&DWP(4*0,"esp"); # stack top layout
-$_rp=&DWP(4*1,"esp");
-$_ap=&DWP(4*2,"esp");
-$_bp=&DWP(4*3,"esp");
-$_np=&DWP(4*4,"esp");
-$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
-$_sp=&DWP(4*6,"esp");
-$_bpend=&DWP(4*7,"esp");
-$frame=32; # size of above frame rounded up to 16n
-
- &xor ("eax","eax");
- &mov ("edi",&wparam(5)); # int num
- &cmp ("edi",4);
- &jl (&label("just_leave"));
-
- &lea ("esi",&wparam(0)); # put aside pointer to argument block
- &lea ("edx",&wparam(1)); # load ap
- &mov ("ebp","esp"); # saved stack pointer!
- &add ("edi",2); # extra two words on top of tp
- &neg ("edi");
- &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
- &neg ("edi");
-
- # minimize cache contention by arraning 2K window between stack
- # pointer and ap argument [np is also position sensitive vector,
- # but it's assumed to be near ap, as it's allocated at ~same
- # time].
- &mov ("eax","esp");
- &sub ("eax","edx");
- &and ("eax",2047);
- &sub ("esp","eax"); # this aligns sp and ap modulo 2048
-
- &xor ("edx","esp");
- &and ("edx",2048);
- &xor ("edx",2048);
- &sub ("esp","edx"); # this splits them apart modulo 4096
-
- &and ("esp",-64); # align to cache line
-
- ################################# load argument block...
- &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
- &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
- &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
- &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
- &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
- #&mov ("edi",&DWP(5*4,"esi"));# int num
-
- &mov ("esi",&DWP(0,"esi")); # pull n0[0]
- &mov ($_rp,"eax"); # ... save a copy of argument block
- &mov ($_ap,"ebx");
- &mov ($_bp,"ecx");
- &mov ($_np,"edx");
- &mov ($_n0,"esi");
- &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
- #&mov ($_num,$num); # redundant as $num is not reused
- &mov ($_sp,"ebp"); # saved stack pointer!
-
-if($sse2) {
-$acc0="mm0"; # mmx register bank layout
-$acc1="mm1";
-$car0="mm2";
-$car1="mm3";
-$mul0="mm4";
-$mul1="mm5";
-$temp="mm6";
-$mask="mm7";
-
- &picmeup("eax","OPENSSL_ia32cap_P");
- &bt (&DWP(0,"eax"),26);
- &jnc (&label("non_sse2"));
-
- &mov ("eax",-1);
- &movd ($mask,"eax"); # mask 32 lower bits
-
- &mov ($ap,$_ap); # load input pointers
- &mov ($bp,$_bp);
- &mov ($np,$_np);
-
- &xor ($i,$i); # i=0
- &xor ($j,$j); # j=0
-
- &movd ($mul0,&DWP(0,$bp)); # bp[0]
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
- &movd ($car1,&DWP(0,$np)); # np[0]
-
- &pmuludq($mul1,$mul0); # ap[0]*bp[0]
- &movq ($car0,$mul1);
- &movq ($acc0,$mul1); # I wish movd worked for
- &pand ($acc0,$mask); # inter-register transfers
-
- &pmuludq($mul1,$_n0q); # *=n0
-
- &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
- &paddq ($car1,$acc0);
-
- &movd ($acc1,&DWP(4,$np)); # np[1]
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &inc ($j); # j++
-&set_label("1st",16);
- &pmuludq($acc0,$mul0); # ap[j]*bp[0]
- &pmuludq($acc1,$mul1); # np[j]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
- &paddq ($car1,$acc0); # +=ap[j]*bp[0];
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
- &psrlq ($car0,32);
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
- &psrlq ($car1,32);
-
- &lea ($j,&DWP(1,$j));
- &cmp ($j,$num);
- &jl (&label("1st"));
-
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
- &pmuludq($acc1,$mul1); # np[num-1]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &paddq ($car1,$car0);
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
-
- &inc ($i); # i++
-&set_label("outer");
- &xor ($j,$j); # j=0
-
- &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
- &movd ($temp,&DWP($frame,"esp")); # tp[0]
- &movd ($car1,&DWP(0,$np)); # np[0]
- &pmuludq($mul1,$mul0); # ap[0]*bp[i]
-
- &paddq ($mul1,$temp); # +=tp[0]
- &movq ($acc0,$mul1);
- &movq ($car0,$mul1);
- &pand ($acc0,$mask);
-
- &pmuludq($mul1,$_n0q); # *=n0
-
- &pmuludq($car1,$mul1);
- &paddq ($car1,$acc0);
-
- &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
- &movd ($acc1,&DWP(4,$np)); # np[1]
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
- &paddq ($car0,$temp); # +=tp[1]
-
- &inc ($j); # j++
- &dec ($num);
-&set_label("inner");
- &pmuludq($acc0,$mul0); # ap[j]*bp[i]
- &pmuludq($acc1,$mul1); # np[j]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
- &pand ($acc0,$mask);
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
- &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
- &psrlq ($car0,32);
- &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
- &psrlq ($car1,32);
- &paddq ($car0,$temp); # +=tp[j+1]
-
- &dec ($num);
- &lea ($j,&DWP(1,$j)); # j++
- &jnz (&label("inner"));
-
- &mov ($num,$j);
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
- &pmuludq($acc1,$mul1); # np[num-1]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
- &paddq ($car1,$car0);
- &paddq ($car1,$temp);
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
-
- &lea ($i,&DWP(1,$i)); # i++
- &cmp ($i,$num);
- &jle (&label("outer"));
-
- &emms (); # done with mmx bank
- &jmp (&label("common_tail"));
-
-&set_label("non_sse2",16);
-}
-
-if (0) {
- &mov ("esp",$_sp);
- &xor ("eax","eax"); # signal "not fast enough [yet]"
- &jmp (&label("just_leave"));
- # While the below code provides competitive performance for
- # all key lengthes on modern Intel cores, it's still more
- # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
- # means compared to the original integer-only assembler.
- # 512-bit RSA sign is better by ~40%, but that's about all
- # one can say about all CPUs...
-} else {
-$inp="esi"; # integer path uses these registers differently
-$word="edi";
-$carry="ebp";
-
- &mov ($inp,$_ap);
- &lea ($carry,&DWP(1,$num));
- &mov ($word,$_bp);
- &xor ($j,$j); # j=0
- &mov ("edx",$inp);
- &and ($carry,1); # see if num is even
- &sub ("edx",$word); # see if ap==bp
- &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
- &or ($carry,"edx");
- &mov ($word,&DWP(0,$word)); # bp[0]
- &jz (&label("bn_sqr_mont"));
- &mov ($_bpend,"eax");
- &mov ("eax",&DWP(0,$inp));
- &xor ("edx","edx");
-
-&set_label("mull",16);
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*bp[0]
- &add ($carry,"eax");
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
- &cmp ($j,$num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("mull"));
-
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*bp[0]
- &mov ($word,$_n0);
- &add ("eax",$carry);
- &mov ($inp,$_np);
- &adc ("edx",0);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
- &xor ($j,$j);
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
-
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &adc ("edx",0);
- &inc ($j);
-
- &jmp (&label("2ndmadd"));
-
-&set_label("1stmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*bp[i]
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("1stmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*bp[i]
- &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &mov ($word,$_n0);
- &adc ("edx",0);
- &mov ($inp,$_np);
- &add ($carry,"eax");
- &adc ("edx",0);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &xor ($j,$j);
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
- &adc ($j,0);
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &adc ("edx",0);
- &mov ($j,1);
-
-&set_label("2ndmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
- &jl (&label("2ndmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &adc ("edx",0);
- &add ($carry,"eax");
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
-
- &xor ("eax","eax");
- &mov ($j,$_bp); # &bp[i]
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
- &lea ($j,&DWP(4,$j));
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
- &cmp ($j,$_bpend);
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
- &je (&label("common_tail"));
-
- &mov ($word,&DWP(0,$j)); # bp[i+1]
- &mov ($inp,$_ap);
- &mov ($_bp,$j); # &bp[++i]
- &xor ($j,$j);
- &xor ("edx","edx");
- &mov ("eax",&DWP(0,$inp));
- &jmp (&label("1stmadd"));
-
-&set_label("bn_sqr_mont",16);
-$sbit=$num;
- &mov ($_num,$num);
- &mov ($_bp,$j); # i=0
-
- &mov ("eax",$word); # ap[0]
- &mul ($word); # ap[0]*ap[0]
- &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
- &mov ($sbit,"edx");
- &shr ("edx",1);
- &and ($sbit,1);
- &inc ($j);
-&set_label("sqr",16);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*ap[0]
- &add ("eax",$carry);
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &lea ($carry,&DWP(0,$sbit,"eax",2));
- &shr ("eax",31);
- &cmp ($j,$_num);
- &mov ($sbit,"eax");
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("sqr"));
-
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*ap[0]
- &add ("eax",$carry);
- &mov ($word,$_n0);
- &adc ("edx",0);
- &mov ($inp,$_np);
- &lea ($carry,&DWP(0,$sbit,"eax",2));
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
- &shr ("eax",31);
- &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
-
- &lea ($carry,&DWP(0,"eax","edx",2));
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &shr ("edx",31);
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
- &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ($num,$j);
- &adc ("edx",0);
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &mov ($j,1);
-
-&set_label("3rdmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
-
- &mov ($carry,"edx");
- &mul ($word); # np[j+1]*m
- &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
- &lea ($j,&DWP(2,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("3rdmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &adc ("edx",0);
- &add ($carry,"eax");
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
-
- &mov ($j,$_bp); # i
- &xor ("eax","eax");
- &mov ($inp,$_ap);
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
- &cmp ($j,$num);
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
- &je (&label("common_tail"));
-
- &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
- &lea ($j,&DWP(1,$j));
- &mov ("eax",$word);
- &mov ($_bp,$j); # ++i
- &mul ($word); # ap[i]*ap[i]
- &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
- &adc ("edx",0);
- &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
- &xor ($carry,$carry);
- &cmp ($j,$num);
- &lea ($j,&DWP(1,$j));
- &je (&label("sqrlast"));
-
- &mov ($sbit,"edx"); # zaps $num
- &shr ("edx",1);
- &and ($sbit,1);
-&set_label("sqradd",16);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*ap[i]
- &add ("eax",$carry);
- &lea ($carry,&DWP(0,"eax","eax"));
- &adc ("edx",0);
- &shr ("eax",31);
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("eax",0);
- &add ($carry,$sbit);
- &adc ("eax",0);
- &cmp ($j,$_num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &mov ($sbit,"eax");
- &jle (&label("sqradd"));
-
- &mov ($carry,"edx");
- &lea ("edx",&DWP(0,$sbit,"edx",2));
- &shr ($carry,31);
-&set_label("sqrlast");
- &mov ($word,$_n0);
- &mov ($inp,$_np);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &adc ($carry,0);
- &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &lea ($num,&DWP(-1,$j));
- &adc ("edx",0);
- &mov ($j,1);
- &mov ("eax",&DWP(4,$inp)); # np[1]
-
- &jmp (&label("3rdmadd"));
-}
-
-&set_label("common_tail",16);
- &mov ($np,$_np); # load modulus pointer
- &mov ($rp,$_rp); # load result pointer
- &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
-
- &mov ("eax",&DWP(0,$tp)); # tp[0]
- &mov ($j,$num); # j=num-1
- &xor ($i,$i); # i=0 and clear CF!
-
-&set_label("sub",16);
- &sbb ("eax",&DWP(0,$np,$i,4));
- &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
- &dec ($j); # doesn't affect CF!
- &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
- &lea ($i,&DWP(1,$i)); # i++
- &jge (&label("sub"));
-
- &sbb ("eax",0); # handle upmost overflow bit
- &and ($tp,"eax");
- &not ("eax");
- &mov ($np,$rp);
- &and ($np,"eax");
- &or ($tp,$np); # tp=carry?tp:rp
-
-&set_label("copy",16); # copy or in-place refresh
- &mov ("eax",&DWP(0,$tp,$num,4));
- &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
- &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
- &dec ($num);
- &jge (&label("copy"));
-
- &mov ("esp",$_sp); # pull saved stack pointer
- &mov ("eax",1);
-&set_label("just_leave");
-&function_end("bn_mul_mont");
-
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
diff --git a/lib/libcrypto/bn/asm/pa-risc.s b/lib/libcrypto/bn/asm/pa-risc.s
deleted file mode 100644
index 775130a1912..00000000000
--- a/lib/libcrypto/bn/asm/pa-risc.s
+++ /dev/null
@@ -1,710 +0,0 @@
- .SPACE $PRIVATE$
- .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
- .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
- .SPACE $TEXT$
- .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
- .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
- .IMPORT $global$,DATA
- .IMPORT $$dyncall,MILLICODE
-; gcc_compiled.:
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_add_words
- .PROC
- .CALLINFO FRAME=0,CALLS,SAVE_RP
- .ENTRY
- stw %r2,-20(0,%r30)
- ldi 0,%r28
- extru %r23,31,16,%r2
- stw %r2,-16(0,%r30)
- extru %r23,15,16,%r23
- ldil L'65536,%r31
- fldws -16(0,%r30),%fr11R
- stw %r23,-16(0,%r30)
- ldo 12(%r25),%r29
- ldo 12(%r26),%r23
- fldws -16(0,%r30),%fr11L
-L$0002
- ldw 0(0,%r25),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0005
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw 0(0,%r26),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,0(0,%r26)
- ldw -8(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0010
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw -8(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,-8(0,%r23)
- ldw -4(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0015
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw -4(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,-4(0,%r23)
- ldw 0(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0020
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw 0(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,0(0,%r23)
- ldo 16(%r29),%r29
- ldo 16(%r25),%r25
- ldo 16(%r23),%r23
- bl L$0002,0
- ldo 16(%r26),%r26
-L$0003
- ldw -20(0,%r30),%r2
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_words
- .PROC
- .CALLINFO FRAME=0,CALLS,SAVE_RP
- .ENTRY
- stw %r2,-20(0,%r30)
- ldi 0,%r28
- extru %r23,31,16,%r2
- stw %r2,-16(0,%r30)
- extru %r23,15,16,%r23
- ldil L'65536,%r31
- fldws -16(0,%r30),%fr11R
- stw %r23,-16(0,%r30)
- ldo 12(%r26),%r29
- ldo 12(%r25),%r23
- fldws -16(0,%r30),%fr11L
-L$0026
- ldw 0(0,%r25),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0029
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,0(0,%r26)
- ldw -8(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0033
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,-8(0,%r29)
- ldw -4(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0037
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,-4(0,%r29)
- ldw 0(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0041
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,0(0,%r29)
- ldo 16(%r23),%r23
- ldo 16(%r25),%r25
- ldo 16(%r29),%r29
- bl L$0026,0
- ldo 16(%r26),%r26
-L$0027
- ldw -20(0,%r30),%r2
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
-bn_sqr_words
- .PROC
- .CALLINFO FRAME=0,NO_CALLS
- .ENTRY
- ldo 28(%r26),%r23
- ldo 12(%r25),%r28
-L$0046
- ldw 0(0,%r25),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,0(0,%r26)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-24(0,%r23)
- ldw -8(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-20(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-16(0,%r23)
- ldw -4(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-12(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-8(0,%r23)
- ldw 0(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-4(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,0(0,%r23)
- ldo 16(%r28),%r28
- ldo 16(%r25),%r25
- ldo 32(%r23),%r23
- bl L$0046,0
- ldo 32(%r26),%r26
-L$0057
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .IMPORT BN_num_bits_word,CODE
- .IMPORT fprintf,CODE
- .IMPORT __iob,DATA
- .SPACE $TEXT$
- .SUBSPA $LIT$
-
- .align 4
-L$C0000
- .STRING "Division would overflow\x0a\x00"
- .IMPORT abort,CODE
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
-bn_div64
- .PROC
- .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
- .ENTRY
- stw %r2,-20(0,%r30)
- stwm %r8,128(0,%r30)
- stw %r7,-124(0,%r30)
- stw %r4,-112(0,%r30)
- stw %r3,-108(0,%r30)
- copy %r26,%r3
- copy %r25,%r4
- stw %r6,-120(0,%r30)
- ldi 0,%r7
- stw %r5,-116(0,%r30)
- movb,<> %r24,%r5,L$0059
- ldi 2,%r6
- bl L$0076,0
- ldi -1,%r28
-L$0059
- .CALL ARGW0=GR
- bl BN_num_bits_word,%r2
- copy %r5,%r26
- ldi 32,%r19
- comb,= %r19,%r28,L$0060
- subi 31,%r28,%r19
- mtsar %r19
- zvdepi 1,32,%r19
- comb,>>= %r19,%r3,L$0060
- addil LR'__iob-$global$+32,%r27
- ldo RR'__iob-$global$+32(%r1),%r26
- ldil LR'L$C0000,%r25
- .CALL ARGW0=GR,ARGW1=GR
- bl fprintf,%r2
- ldo RR'L$C0000(%r25),%r25
- .CALL
- bl abort,%r2
- nop
-L$0060
- comb,>> %r5,%r3,L$0061
- subi 32,%r28,%r28
- sub %r3,%r5,%r3
-L$0061
- comib,= 0,%r28,L$0062
- subi 31,%r28,%r19
- mtsar %r19
- zvdep %r5,32,%r5
- zvdep %r3,32,%r21
- subi 32,%r28,%r20
- mtsar %r20
- vshd 0,%r4,%r20
- or %r21,%r20,%r3
- mtsar %r19
- zvdep %r4,32,%r4
-L$0062
- extru %r5,15,16,%r23
- extru %r5,31,16,%r28
-L$0063
- extru %r3,15,16,%r19
- comb,<> %r23,%r19,L$0066
- copy %r3,%r26
- bl L$0067,0
- zdepi -1,31,16,%r29
-L$0066
- .IMPORT $$divU,MILLICODE
- bl $$divU,%r31
- copy %r23,%r25
-L$0067
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r23,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr9
- ldw -16(0,%r30),%r8
- fstws %fr9R,-16(0,%r30)
- copy %r8,%r22
- ldw -16(0,%r30),%r8
- extru %r4,15,16,%r24
- copy %r8,%r21
-L$0068
- sub %r3,%r21,%r20
- copy %r20,%r19
- depi 0,31,16,%r19
- comib,<> 0,%r19,L$0069
- zdep %r20,15,16,%r19
- addl %r19,%r24,%r19
- comb,>>= %r19,%r22,L$0069
- sub %r22,%r28,%r22
- sub %r21,%r23,%r21
- bl L$0068,0
- ldo -1(%r29),%r29
-L$0069
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r8
- stw %r23,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- copy %r8,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- extru %r19,15,16,%r20
- ldw -16(0,%r30),%r8
- zdep %r19,15,16,%r19
- addl %r8,%r20,%r20
- comclr,<<= %r19,%r4,0
- addi 1,%r20,%r20
- comb,<<= %r20,%r3,L$0074
- sub %r4,%r19,%r4
- addl %r3,%r5,%r3
- ldo -1(%r29),%r29
-L$0074
- addib,= -1,%r6,L$0064
- sub %r3,%r20,%r3
- zdep %r29,15,16,%r7
- shd %r3,%r4,16,%r3
- bl L$0063,0
- zdep %r4,15,16,%r4
-L$0064
- or %r7,%r29,%r28
-L$0076
- ldw -148(0,%r30),%r2
- ldw -124(0,%r30),%r7
- ldw -120(0,%r30),%r6
- ldw -116(0,%r30),%r5
- ldw -112(0,%r30),%r4
- ldw -108(0,%r30),%r3
- bv 0(%r2)
- ldwm -128(0,%r30),%r8
- .EXIT
- .PROCEND
diff --git a/lib/libcrypto/bn/asm/r3000.s b/lib/libcrypto/bn/asm/r3000.s
deleted file mode 100644
index e95269afa38..00000000000
--- a/lib/libcrypto/bn/asm/r3000.s
+++ /dev/null
@@ -1,646 +0,0 @@
- .file 1 "../bn_mulw.c"
- .set nobopt
- .option pic2
-
- # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C
-
- # Cc1 defaults:
- # -mabicalls
-
- # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1):
- # -quiet -dumpbase -O2 -o
-
-gcc2_compiled.:
-__gnu_compiled_c:
- .rdata
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
- .byte 0x0
- .text
- .align 2
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $12,$4
- move $14,$5
- move $9,$6
- move $13,$7
- move $8,$0
- addu $10,$12,12
- addu $11,$14,12
-$L2:
- lw $6,0($14)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,0($12)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,0($12)
- .set macro
- .set reorder
-
- lw $6,-8($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,-8($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,-8($10)
- .set macro
- .set reorder
-
- lw $6,-4($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,-4($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,-4($10)
- .set macro
- .set reorder
-
- lw $6,0($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,0($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,0($10)
- .set macro
- .set reorder
-
- addu $11,$11,16
- addu $14,$14,16
- addu $10,$10,16
- .set noreorder
- .set nomacro
- j $L2
- addu $12,$12,16
- .set macro
- .set reorder
-
-$L3:
- .set noreorder
- .set nomacro
- j $31
- move $2,$8
- .set macro
- .set reorder
-
- .end bn_mul_add_words
- .align 2
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $11,$4
- move $12,$5
- move $8,$6
- move $6,$0
- addu $10,$11,12
- addu $9,$12,12
-$L10:
- lw $4,0($12)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,0($11)
- .set macro
- .set reorder
-
- lw $4,-8($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,-8($10)
- .set macro
- .set reorder
-
- lw $4,-4($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,-4($10)
- .set macro
- .set reorder
-
- lw $4,0($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,0($10)
- .set macro
- .set reorder
-
- addu $9,$9,16
- addu $12,$12,16
- addu $10,$10,16
- .set noreorder
- .set nomacro
- j $L10
- addu $11,$11,16
- .set macro
- .set reorder
-
-$L11:
- .set noreorder
- .set nomacro
- j $31
- move $2,$6
- .set macro
- .set reorder
-
- .end bn_mul_words
- .align 2
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $9,$4
- addu $7,$9,28
- addu $8,$5,12
-$L18:
- lw $2,0($5)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,0($9)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-24($7)
- .set macro
- .set reorder
-
- lw $2,-8($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-20($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-16($7)
- .set macro
- .set reorder
-
- lw $2,-4($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-12($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-8($7)
- .set macro
- .set reorder
-
- lw $2,0($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-4($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,0($7)
- .set macro
- .set reorder
-
- addu $8,$8,16
- addu $5,$5,16
- addu $7,$7,32
- .set noreorder
- .set nomacro
- j $L18
- addu $9,$9,32
- .set macro
- .set reorder
-
-$L19:
- j $31
- .end bn_sqr_words
- .rdata
- .align 2
-$LC0:
-
- .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e
- .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f
- .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa
- .byte 0x0
- .text
- .align 2
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8
- .mask 0x901f0000,-8
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- subu $sp,$sp,56
- .cprestore 16
- sw $16,24($sp)
- move $16,$4
- sw $17,28($sp)
- move $17,$5
- sw $18,32($sp)
- move $18,$6
- sw $20,40($sp)
- move $20,$0
- sw $19,36($sp)
- li $19,0x00000002 # 2
- sw $31,48($sp)
- .set noreorder
- .set nomacro
- bne $18,$0,$L26
- sw $28,44($sp)
- .set macro
- .set reorder
-
- .set noreorder
- .set nomacro
- j $L43
- li $2,-1 # 0xffffffff
- .set macro
- .set reorder
-
-$L26:
- move $4,$18
- jal BN_num_bits_word
- move $4,$2
- li $2,0x00000020 # 32
- .set noreorder
- .set nomacro
- beq $4,$2,$L27
- li $2,0x00000001 # 1
- .set macro
- .set reorder
-
- sll $2,$2,$4
- sltu $2,$2,$16
- .set noreorder
- .set nomacro
- beq $2,$0,$L44
- li $5,0x00000020 # 32
- .set macro
- .set reorder
-
- la $4,__iob+32
- la $5,$LC0
- jal fprintf
- jal abort
-$L27:
- li $5,0x00000020 # 32
-$L44:
- sltu $2,$16,$18
- .set noreorder
- .set nomacro
- bne $2,$0,$L28
- subu $4,$5,$4
- .set macro
- .set reorder
-
- subu $16,$16,$18
-$L28:
- .set noreorder
- .set nomacro
- beq $4,$0,$L29
- li $10,-65536 # 0xffff0000
- .set macro
- .set reorder
-
- sll $18,$18,$4
- sll $3,$16,$4
- subu $2,$5,$4
- srl $2,$17,$2
- or $16,$3,$2
- sll $17,$17,$4
-$L29:
- srl $7,$18,16
- andi $9,$18,0xffff
-$L30:
- srl $2,$16,16
- .set noreorder
- .set nomacro
- beq $2,$7,$L34
- li $6,0x0000ffff # 65535
- .set macro
- .set reorder
-
- divu $6,$16,$7
-$L34:
- mult $6,$9
- mflo $5
- #nop
- #nop
- mult $6,$7
- and $2,$17,$10
- srl $8,$2,16
- mflo $4
-$L35:
- subu $3,$16,$4
- and $2,$3,$10
- .set noreorder
- .set nomacro
- bne $2,$0,$L36
- sll $2,$3,16
- .set macro
- .set reorder
-
- addu $2,$2,$8
- sltu $2,$2,$5
- .set noreorder
- .set nomacro
- beq $2,$0,$L36
- subu $5,$5,$9
- .set macro
- .set reorder
-
- subu $4,$4,$7
- .set noreorder
- .set nomacro
- j $L35
- addu $6,$6,-1
- .set macro
- .set reorder
-
-$L36:
- mult $6,$7
- mflo $5
- #nop
- #nop
- mult $6,$9
- mflo $4
- #nop
- #nop
- srl $3,$4,16
- sll $2,$4,16
- and $4,$2,$10
- sltu $2,$17,$4
- .set noreorder
- .set nomacro
- beq $2,$0,$L40
- addu $5,$5,$3
- .set macro
- .set reorder
-
- addu $5,$5,1
-$L40:
- sltu $2,$16,$5
- .set noreorder
- .set nomacro
- beq $2,$0,$L41
- subu $17,$17,$4
- .set macro
- .set reorder
-
- addu $16,$16,$18
- addu $6,$6,-1
-$L41:
- addu $19,$19,-1
- .set noreorder
- .set nomacro
- beq $19,$0,$L31
- subu $16,$16,$5
- .set macro
- .set reorder
-
- sll $20,$6,16
- sll $3,$16,16
- srl $2,$17,16
- or $16,$3,$2
- .set noreorder
- .set nomacro
- j $L30
- sll $17,$17,16
- .set macro
- .set reorder
-
-$L31:
- or $2,$20,$6
-$L43:
- lw $31,48($sp)
- lw $20,40($sp)
- lw $19,36($sp)
- lw $18,32($sp)
- lw $17,28($sp)
- lw $16,24($sp)
- addu $sp,$sp,56
- j $31
- .end bn_div64
-
- .globl abort .text
- .globl fprintf .text
- .globl BN_num_bits_word .text
diff --git a/lib/libcrypto/bn/asm/sparcv8plus.S b/lib/libcrypto/bn/asm/sparcv8plus.S
index 8c56e2e7e7c..63de1860f28 100644
--- a/lib/libcrypto/bn/asm/sparcv8plus.S
+++ b/lib/libcrypto/bn/asm/sparcv8plus.S
@@ -144,6 +144,19 @@
* }
*/
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+ /* They've said -xarch=v9 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME_SIZE -192
+#elif defined(__GNUC__) && defined(__arch64__)
+ /* They've said -m64 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME_SIZE -192
+#else
+# define FRAME_SIZE -96
+#endif
/*
* GNU assembler can't stand stuw:-(
*/
@@ -619,8 +632,6 @@ bn_sub_words:
* Andy.
*/
-#define FRAME_SIZE -96
-
/*
* Here is register usage map for *all* routines below.
*/