summaryrefslogtreecommitdiff
path: root/lib/libssl/crypto
diff options
context:
space:
mode:
authorMarkus Friedl <markus@cvs.openbsd.org>2003-11-18 12:39:06 +0000
committerMarkus Friedl <markus@cvs.openbsd.org>2003-11-18 12:39:06 +0000
commit64b184aae012bcb0f2c9f75300747843237611be (patch)
treec850b42f1cb31e38e93bc95f09b062b5550c3fc3 /lib/libssl/crypto
parentcef08ea4027194e2bc3ebf65715bf77a84b1342e (diff)
use bn_asm_vax.S (from netbsd); test + ok by miod
use asm code for i386, except for the CBC code, because it is not clean PIC code. add <machime/asm.h> support to x86unix.pl tested by: nick (on 30386), henning, djm, tedu, jmc and more; no shlib minor crank necessary, only internal symbols changed.
Diffstat (limited to 'lib/libssl/crypto')
-rw-r--r--lib/libssl/crypto/Makefile47
-rw-r--r--lib/libssl/crypto/arch/vax/bn_asm_vax.S436
2 files changed, 478 insertions, 5 deletions
diff --git a/lib/libssl/crypto/Makefile b/lib/libssl/crypto/Makefile
index dead23398f6..18c744d706c 100644
--- a/lib/libssl/crypto/Makefile
+++ b/lib/libssl/crypto/Makefile
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile,v 1.31 2003/06/02 08:45:58 markus Exp $
+# $OpenBSD: Makefile,v 1.32 2003/11/18 12:39:05 markus Exp $
LIB= crypto
@@ -53,17 +53,17 @@ SRCS+= cbc_cksm.c cbc_enc.c cfb64enc.c cfb_enc.c \
des_old.c read2pwd.c
SRCS+= rc2_ecb.c rc2_skey.c rc2_cbc.c rc2cfb64.c
SRCS+= rc2ofb64.c
-SRCS+= rc4_skey.c rc4_enc.c
+SRCS+= rc4_skey.c
#SRCS+= rc5_skey.c rc5_ecb.c rc5cfb64.c rc5cfb64.c
#SRCS+= rc5ofb64.c rc5_enc.c
#SRCS+= i_cbc.c i_cfb64.c i_ofb64.c i_ecb.c
#SRCS+= i_skey.c
-SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c bf_enc.c
+SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c
SRCS+= c_skey.c c_ecb.c c_cfb64.c c_ofb64.c c_enc.c
SRCS+= bn_add.c bn_div.c bn_exp.c bn_lib.c
SRCS+= bn_mul.c bn_print.c bn_rand.c bn_shift.c
SRCS+= bn_word.c bn_blind.c bn_gcd.c bn_prime.c bn_err.c
-SRCS+= bn_sqr.c bn_recp.c bn_mont.c bn_mpi.c bn_asm.c bn_mod.c
+SRCS+= bn_sqr.c bn_recp.c bn_mont.c bn_mpi.c bn_mod.c
SRCS+= bn_exp2.c bn_ctx.c
SRCS+= bn_sqrt.c bn_kron.c
SRCS+= rsa_eay.c rsa_gen.c rsa_lib.c rsa_sign.c
@@ -151,7 +151,8 @@ SRCS+= ec_err.c ec_mult.c ecp_nist.c ecp_smpl.c
${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/x509v3 ${LCRYPTO_SRC}/pkcs12 \
${LCRYPTO_SRC}/comp ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/md4 \
${LCRYPTO_SRC}/engine ${LCRYPTO_SRC}/dso ${LCRYPTO_SRC}/ui \
- ${LCRYPTO_SRC}/ocsp ${LCRYPTO_SRC}/ec ${LCRYPTO_SRC}/aes ${LCRYPTO_SRC}
+ ${LCRYPTO_SRC}/ocsp ${LCRYPTO_SRC}/ec ${LCRYPTO_SRC}/aes ${LCRYPTO_SRC} \
+ ${.CURDIR}/arch/${MACHINE_ARCH}
HDRS=\
crypto/aes/aes.h \
@@ -260,6 +261,42 @@ des_enc.po:
${CC} ${CFLAGS} -O1 ${CPPFLAGS} -c ${.IMPSRC} -o $@
.endif
+.if (${MACHINE_ARCH} == "i386")
+CFLAGS+= -DMD5_ASM
+CFLAGS+= -DSHA1_ASM
+CFLAGS+= -DRMD160_ASM
+CFLAGS+= -DOPENBSD_CAST_ASM
+CFLAGS+= -DOPENBSD_DES_ASM
+SSLASM=\
+ bf bf-586 \
+ bn bn-586 \
+ bn co-586 \
+ cast cast-586 \
+ des des-586 \
+ md5 md5-586 \
+ rc4 rc4-586 \
+ ripemd rmd-586 \
+ sha sha1-586
+.for dir f in ${SSLASM}
+SRCS+= ${f}.S
+GENERATED+=${f}.S
+${f}.S: ${LCRYPTO_SRC}/${dir}/asm/${f}.pl ${LCRYPTO_SRC}/perlasm/x86unix.pl
+ /usr/bin/perl -I${LCRYPTO_SRC}/perlasm:${LCRYPTO_SRC}/${dir}/asm \
+ ${LCRYPTO_SRC}/${dir}/asm/${f}.pl openbsd-elf 386 > ${.TARGET}
+.endfor
+SRCS+= bf_cbc.c
+.else
+.if (${MACHINE_ARCH} == "vax")
+SRCS+= bf_enc.c
+SRCS+= bn_asm_vax.S
+SRCS+= rc4_enc.c
+.else
+SRCS+= bf_enc.c
+SRCS+= bn_asm.c
+SRCS+= rc4_enc.c
+.endif
+.endif
+
all beforedepend: ${GENERATED}
.include <bsd.lib.mk>
diff --git a/lib/libssl/crypto/arch/vax/bn_asm_vax.S b/lib/libssl/crypto/arch/vax/bn_asm_vax.S
new file mode 100644
index 00000000000..bd067a55d36
--- /dev/null
+++ b/lib/libssl/crypto/arch/vax/bn_asm_vax.S
@@ -0,0 +1,436 @@
+# $OpenBSD: bn_asm_vax.S,v 1.1 2003/11/18 12:39:05 markus Exp $
+# $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $
+
+#include <machine/asm.h>
+
+# w.j.m. 15-jan-1999
+#
+# it's magic ...
+#
+# ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
+# ULONG c = 0;
+# int i;
+# for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
+# return c;
+# }
+
+ENTRY(bn_mul_add_words,R6)
+ movl 4(ap),r2 # *r
+ movl 8(ap),r3 # *a
+ movl 12(ap),r4 # n
+ movl 16(ap),r5 # w
+ clrl r6 # return value ("carry")
+
+0: emul r5,(r3),(r2),r0 # w * a[0] + r[0] -> r0
+
+ # fixup for "negative" r[]
+ tstl (r2)
+ bgeq 1f
+ incl r1 # add 1 to highword
+
+1: # add saved carry to result
+ addl2 r6,r0
+ adwc $0,r1
+
+ # combined fixup for "negative" w, a[]
+ tstl r5 # if w is negative...
+ bgeq 1f
+ addl2 (r3),r1 # ...add a[0] again to highword
+1: tstl (r3) # if a[0] is negative...
+ bgeq 1f
+ addl2 r5,r1 # ...add w again to highword
+1:
+ movl r0,(r2)+ # save low word in dest & advance *r
+ addl2 $4,r3 # advance *a
+ movl r1,r6 # high word in r6 for return value
+
+ sobgtr r4,0b # loop?
+
+ movl r6,r0
+ ret
+
+# .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
+#;
+#; w.j.m. 15-jan-1999
+#;
+#; it's magic ...
+#;
+#; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
+#; ULONG c = 0;
+#; int i;
+#; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
+#; return(c);
+#; }
+#
+
+ENTRY(bn_mul_words,R6)
+ movl 4(ap),r2 # *r
+ movl 8(ap),r3 # *a
+ movl 12(ap),r4 # n
+ movl 16(ap),r5 # w
+ clrl r6 # carry
+
+0: emul r5,(r3),r6,r0 # w * a[0] + carry -> r0
+
+ # fixup for "negative" carry
+ tstl r6
+ bgeq 1f
+ incl r1
+
+1: # combined fixup for "negative" w, a[]
+ tstl r5
+ bgeq 1f
+ addl2 (r3),r1
+1: tstl (r3)
+ bgeq 1f
+ addl2 r5,r1
+
+1: movl r0,(r2)+
+ addl2 $4,r3
+ movl r1,r6
+
+ sobgtr r4,0b
+
+ movl r6,r0
+ ret
+
+
+
+# .title vax_bn_sqr_words unsigned square, 32*32=>64
+#;
+#; w.j.m. 15-jan-1999
+#;
+#; it's magic ...
+#;
+#; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
+#; int i;
+#; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
+#; }
+#
+
+ENTRY(bn_sqr_words,0)
+ movl 4(ap),r2 # r
+ movl 8(ap),r3 # a
+ movl 12(ap),r4 # n
+
+0: movl (r3)+,r5 # r5 = a[] & advance
+
+ emul r5,r5,$0,r0 # a[0] * a[0] + 0 -> r0
+
+ # fixup for "negative" a[]
+ tstl r5
+ bgeq 1f
+ addl2 r5,r1
+ addl2 r5,r1
+
+1: movq r0,(r2)+ # store 64-bit result
+
+ sobgtr r4,0b # loop
+
+ ret
+
+
+# .title vax_bn_div_words unsigned divide
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
+#; {
+#; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
+#; }
+#;
+#; Using EDIV would be very easy, if it didn't do signed calculations.
+#; Any time any of the input numbers are signed, there are problems,
+#; usually with integer overflow, at which point it returns useless
+#; data (the quotient gets the value of l, and the remainder becomes 0).
+#;
+#; If it was just for the dividend, it would be very easy, just divide
+#; it by 2 (unsigned), do the division, multiply the resulting quotient
+#; and remainder by 2, add the bit that was dropped when dividing by 2
+#; to the remainder, and do some adjustment so the remainder doesn't
+#; end up larger than the divisor. For some cases when the divisor is
+#; negative (from EDIV's point of view, i.e. when the highest bit is set),
+#; dividing the dividend by 2 isn't enough, and since some operations
+#; might generate integer overflows even when the dividend is divided by
+#; 4 (when the high part of the shifted down dividend ends up being exactly
+#; half of the divisor, the result is the quotient 0x80000000, which is
+#; negative...) it needs to be divided by 8. Furthermore, the divisor needs
+#; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
+#; In this case, a little extra fiddling with the remainder is required.
+#;
+#; So, the simplest way to handle this is always to divide the dividend
+#; by 8, and to divide the divisor by 2 if it's highest bit is set.
+#; After EDIV has been used, the quotient gets multiplied by 8 if the
+#; original divisor was positive, otherwise 4. The remainder, oddly
+#; enough, is *always* multiplied by 8.
+#; NOTE: in the case mentioned above, where the high part of the shifted
+#; down dividend ends up being exactly half the shifted down divisor, we
+#; end up with a 33 bit quotient. That's no problem however, it usually
+#; means we have ended up with a too large remainder as well, and the
+#; problem is fixed by the last part of the algorithm (next paragraph).
+#;
+#; The routine ends with comparing the resulting remainder with the
+#; original divisor and if the remainder is larger, subtract the
+#; original divisor from it, and increase the quotient by 1. This is
+#; done until the remainder is smaller than the divisor.
+#;
+#; The complete algorithm looks like this:
+#;
+#; d' = d
+#; l' = l & 7
+#; [h,l] = [h,l] >> 3
+#; [q,r] = floor([h,l] / d) # This is the EDIV operation
+#; if (q < 0) q = -q # I doubt this is necessary any more
+#;
+#; r' = r >> 29
+#; if (d' >= 0)
+#; q' = q >> 29
+#; q = q << 3
+#; else
+#; q' = q >> 30
+#; q = q << 2
+#; r = (r << 3) + l'
+#;
+#; if (d' < 0)
+#; {
+#; [r',r] = [r',r] - q
+#; while ([r',r] < 0)
+#; {
+#; [r',r] = [r',r] + d
+#; [q',q] = [q',q] - 1
+#; }
+#; }
+#;
+#; while ([r',r] >= d')
+#; {
+#; [r',r] = [r',r] - d'
+#; [q',q] = [q',q] + 1
+#; }
+#;
+#; return q
+#
+#;r2 = l, q
+#;r3 = h, r
+#;r4 = d
+#;r5 = l'
+#;r6 = r'
+#;r7 = d'
+#;r8 = q'
+#
+
+ENTRY(bn_div_words,R6|R7|R8)
+ movl 4(ap),r3 # h
+ movl 8(ap),r2 # l
+ movl 12(ap),r4 # d
+
+ bicl3 $-8,r2,r5 # l' = l & 7
+ bicl3 $7,r2,r2
+
+ bicl3 $-8,r3,r6
+ bicl3 $7,r3,r3
+
+ addl2 r6,r2
+
+ rotl $-3,r2,r2 # l = l >> 3
+ rotl $-3,r3,r3 # h = h >> 3
+
+ movl r4,r7 # d' = d
+
+ clrl r6 # r' = 0
+ clrl r8 # q' = 0
+
+ tstl r4
+ beql 0f # Uh-oh, the divisor is 0...
+ bgtr 1f
+ rotl $-1,r4,r4 # If d is negative, shift it right.
+ bicl2 $0x80000000,r4 # Since d is then a large number, the
+ # lowest bit is insignificant
+ # (contradict that, and I'll fix the problem!)
+1:
+ ediv r4,r2,r2,r3 # Do the actual division
+
+ tstl r2
+ bgeq 1f
+ mnegl r2,r2 # if q < 0, negate it
+1:
+ tstl r7
+ blss 1f
+ rotl $3,r2,r2 # q = q << 3
+ bicl3 $-8,r2,r8 # q' gets the high bits from q
+ bicl3 $7,r2,r2
+ brb 2f
+
+1: # else
+ rotl $2,r2,r2 # q = q << 2
+ bicl3 $-4,r2,r8 # q' gets the high bits from q
+ bicl3 $3,r2,r2
+2:
+ rotl $3,r3,r3 # r = r << 3
+ bicl3 $-8,r3,r6 # r' gets the high bits from r
+ bicl3 $7,r3,r3
+ addl2 r5,r3 # r = r + l'
+
+ tstl r7
+ bgeq 5f
+ bitl $1,r7
+ beql 5f # if d' < 0 && d' & 1
+ subl2 r2,r3 # [r',r] = [r',r] - [q',q]
+ sbwc r8,r6
+3:
+ bgeq 5f # while r < 0
+ decl r2 # [q',q] = [q',q] - 1
+ sbwc $0,r8
+ addl2 r7,r3 # [r',r] = [r',r] + d'
+ adwc $0,r6
+ brb 3b
+
+# The return points are placed in the middle to keep a short distance from
+# all the branch points
+1:
+# movl r3,r1
+ movl r2,r0
+ ret
+0:
+ movl $-1,r0
+ ret
+5:
+ tstl r6
+ bneq 6f
+ cmpl r3,r7
+ blssu 1b # while [r',r] >= d'
+6:
+ subl2 r7,r3 # [r',r] = [r',r] - d'
+ sbwc $0,r6
+ incl r2 # [q',q] = [q',q] + 1
+ adwc $0,r8
+ brb 5b
+
+
+
+# .title vax_bn_add_words unsigned add of two arrays
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+#; ULONG c = 0;
+#; int i;
+#; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
+#; return(c);
+#; }
+#
+
+ENTRY(bn_add_words,0)
+ movl 4(ap),r2 # r
+ movl 8(ap),r3 # a
+ movl 12(ap),r4 # b
+ movl 16(ap),r5 # n
+ clrl r0
+
+ tstl r5
+ bleq 1f
+
+0: movl (r3)+,r1 # carry untouched
+ adwc (r4)+,r1 # carry used and touched
+ movl r1,(r2)+ # carry untouched
+ sobgtr r5,0b # carry untouched
+
+ adwc $0,r0
+1: ret
+
+#;
+#; Richard Levitte 20-Nov-2000
+#;
+#; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
+#; ULONG c = 0;
+#; int i;
+#; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
+#; return(c);
+#; }
+#
+
+ENTRY(bn_sub_words,R6)
+ movl 4(ap),r2 # r
+ movl 8(ap),r3 # a
+ movl 12(ap),r4 # b
+ movl 16(ap),r5 # n
+ clrl r0
+
+ tstl r5
+ bleq 1f
+
+0: movl (r3)+,r6 # carry untouched
+ sbwc (r4)+,r6 # carry used and touched
+ movl r6,(r2)+ # carry untouched
+ sobgtr r5,0b # carry untouched
+
+1: adwc $0,r0
+ ret
+
+#
+# Ragge 20-Sep-2003
+#
+# Multiply a vector of 4/8 longword by another.
+# Uses two loops and 16/64 emuls.
+#
+
+ENTRY(bn_mul_comba4,R6|R7|R8|R9)
+ movl $4,r9 # 4*4
+ brb 6f
+
+ENTRY(bn_mul_comba8,R6|R7|R8|R9)
+ movl $8,r9 # 8*8
+
+6: movl 8(ap),r3 # a[]
+ movl 12(ap),r7 # b[]
+ brb 5f
+
+ENTRY(bn_sqr_comba4,R6|R7|R8|R9)
+ movl $4,r9 # 4*4
+ brb 0f
+
+ENTRY(bn_sqr_comba8,R6|R7|R8|R9)
+ movl $8,r9 # 8*8
+
+0:
+ movl 8(ap),r3 # a[]
+ movl r3,r7 # a[]
+
+5: movl 4(ap),r5 # r[]
+ movl r9,r8
+
+ clrq (r5) # clear destinatino, for add.
+ clrq 8(r5)
+ clrq 16(r5) # these only needed for comba8
+ clrq 24(r5)
+
+2: clrl r4 # carry
+ movl r9,r6 # inner loop count
+ movl (r7)+,r2 # value to multiply with
+
+1: emul r2,(r3),r4,r0
+ tstl r4
+ bgeq 3f
+ incl r1
+3: tstl r2
+ bgeq 3f
+ addl2 (r3),r1
+3: tstl (r3)
+ bgeq 3f
+ addl2 r2,r1
+
+3: addl2 r0,(r5)+ # add to destination
+ adwc $0,r1 # remember carry
+ movl r1,r4 # add carry in next emul
+ addl2 $4,r3
+ sobgtr r6,1b
+
+ movl r4,(r5) # save highest add result
+
+ ashl $2,r9,r4
+ subl2 r4,r3
+ subl2 $4,r4
+ subl2 r4,r5
+
+ sobgtr r8,2b
+
+ ret