src - OpenBSD base system

diff options


context:
space:
mode:

author	Markus Friedl <markus@cvs.openbsd.org>	2003-11-18 12:39:06 +0000
committer	Markus Friedl <markus@cvs.openbsd.org>	2003-11-18 12:39:06 +0000
commit	64b184aae012bcb0f2c9f75300747843237611be (patch)
tree	c850b42f1cb31e38e93bc95f09b062b5550c3fc3 /lib/libssl/crypto
parent	cef08ea4027194e2bc3ebf65715bf77a84b1342e (diff)

use bn_asm_vax.S (from netbsd); test + ok by miod

use asm code for i386, except for the CBC code, because it is not clean PIC code. add <machime/asm.h> support to x86unix.pl tested by: nick (on 30386), henning, djm, tedu, jmc and more; no shlib minor crank necessary, only internal symbols changed.

Diffstat (limited to 'lib/libssl/crypto')

-rw-r--r--

lib/libssl/crypto/Makefile

-rw-r--r--

lib/libssl/crypto/arch/vax/bn_asm_vax.S

436

2 files changed, 478 insertions, 5 deletions

diff --git a/lib/libssl/crypto/Makefile b/lib/libssl/crypto/Makefile
index dead23398f6..18c744d706c 100644
--- a/lib/libssl/crypto/Makefile
+++ b/lib/libssl/crypto/Makefile

@@ -1,4 +1,4 @@

-# $OpenBSD: Makefile,v 1.31 2003/06/02 08:45:58 markus Exp $

+# $OpenBSD: Makefile,v 1.32 2003/11/18 12:39:05 markus Exp $

LIB= crypto

@@ -53,17 +53,17 @@ SRCS+= cbc_cksm.c cbc_enc.c cfb64enc.c cfb_enc.c \

des_old.c read2pwd.c

SRCS+= rc2_ecb.c rc2_skey.c rc2_cbc.c rc2cfb64.c

SRCS+= rc2ofb64.c

-SRCS+= rc4_skey.c rc4_enc.c

+SRCS+= rc4_skey.c

#SRCS+= rc5_skey.c rc5_ecb.c rc5cfb64.c rc5cfb64.c

#SRCS+= rc5ofb64.c rc5_enc.c

#SRCS+= i_cbc.c i_cfb64.c i_ofb64.c i_ecb.c

#SRCS+= i_skey.c

-SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c bf_enc.c

+SRCS+= bf_skey.c bf_ecb.c bf_cfb64.c bf_ofb64.c

SRCS+= c_skey.c c_ecb.c c_cfb64.c c_ofb64.c c_enc.c

SRCS+= bn_add.c bn_div.c bn_exp.c bn_lib.c

SRCS+= bn_mul.c bn_print.c bn_rand.c bn_shift.c

SRCS+= bn_word.c bn_blind.c bn_gcd.c bn_prime.c bn_err.c

-SRCS+= bn_sqr.c bn_recp.c bn_mont.c bn_mpi.c bn_asm.c bn_mod.c

+SRCS+= bn_sqr.c bn_recp.c bn_mont.c bn_mpi.c bn_mod.c

SRCS+= bn_exp2.c bn_ctx.c

SRCS+= bn_sqrt.c bn_kron.c

SRCS+= rsa_eay.c rsa_gen.c rsa_lib.c rsa_sign.c

@@ -151,7 +151,8 @@ SRCS+= ec_err.c ec_mult.c ecp_nist.c ecp_smpl.c

${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/x509v3 ${LCRYPTO_SRC}/pkcs12 \

${LCRYPTO_SRC}/comp ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/md4 \

${LCRYPTO_SRC}/engine ${LCRYPTO_SRC}/dso ${LCRYPTO_SRC}/ui \

- ${LCRYPTO_SRC}/ocsp ${LCRYPTO_SRC}/ec ${LCRYPTO_SRC}/aes ${LCRYPTO_SRC}

+ ${LCRYPTO_SRC}/ocsp ${LCRYPTO_SRC}/ec ${LCRYPTO_SRC}/aes ${LCRYPTO_SRC} \

+ ${.CURDIR}/arch/${MACHINE_ARCH}

HDRS=\

crypto/aes/aes.h \

@@ -260,6 +261,42 @@ des_enc.po:

${CC} ${CFLAGS} -O1 ${CPPFLAGS} -c ${.IMPSRC} -o $@

.endif

+.if (${MACHINE_ARCH} == "i386")

+CFLAGS+= -DMD5_ASM

+CFLAGS+= -DSHA1_ASM

+CFLAGS+= -DRMD160_ASM

+CFLAGS+= -DOPENBSD_CAST_ASM

+CFLAGS+= -DOPENBSD_DES_ASM

+SSLASM=\

+ bf bf-586 \

+ bn bn-586 \

+ bn co-586 \

+ cast cast-586 \

+ des des-586 \

+ md5 md5-586 \

+ rc4 rc4-586 \

+ ripemd rmd-586 \

+ sha sha1-586

+.for dir f in ${SSLASM}

+SRCS+= ${f}.S

+GENERATED+=${f}.S

+${f}.S: ${LCRYPTO_SRC}/${dir}/asm/${f}.pl ${LCRYPTO_SRC}/perlasm/x86unix.pl

+ /usr/bin/perl -I${LCRYPTO_SRC}/perlasm:${LCRYPTO_SRC}/${dir}/asm \

+ ${LCRYPTO_SRC}/${dir}/asm/${f}.pl openbsd-elf 386 > ${.TARGET}

+.endfor

+SRCS+= bf_cbc.c

+.else

+.if (${MACHINE_ARCH} == "vax")

+SRCS+= bf_enc.c

+SRCS+= bn_asm_vax.S

+SRCS+= rc4_enc.c

+.else

+SRCS+= bf_enc.c

+SRCS+= bn_asm.c

+SRCS+= rc4_enc.c

+.endif

all beforedepend: ${GENERATED}

.include <bsd.lib.mk>

diff --git a/lib/libssl/crypto/arch/vax/bn_asm_vax.S b/lib/libssl/crypto/arch/vax/bn_asm_vax.S
new file mode 100644
index 00000000000..bd067a55d36
--- /dev/null
+++ b/lib/libssl/crypto/arch/vax/bn_asm_vax.S

@@ -0,0 +1,436 @@

+# $OpenBSD: bn_asm_vax.S,v 1.1 2003/11/18 12:39:05 markus Exp $

+# $NetBSD: bn_asm_vax.S,v 1.1 2003/11/03 10:22:28 ragge Exp $

+#include <machine/asm.h>

+# w.j.m. 15-jan-1999

+# it's magic ...

+# ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {

+# ULONG c = 0;

+# int i;

+# for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;

+# return c;

+# }

+ENTRY(bn_mul_add_words,R6)

+ movl 4(ap),r2 # *r

+ movl 8(ap),r3 # *a

+ movl 12(ap),r4 # n

+ movl 16(ap),r5 # w

+ clrl r6 # return value ("carry")

+0: emul r5,(r3),(r2),r0 # w * a[0] + r[0] -> r0

+ # fixup for "negative" r[]

+ tstl (r2)

+ bgeq 1f

+ incl r1 # add 1 to highword

+1: # add saved carry to result

+ addl2 r6,r0

+ adwc $0,r1

+ # combined fixup for "negative" w, a[]

+ tstl r5 # if w is negative...

+ bgeq 1f

+ addl2 (r3),r1 # ...add a[0] again to highword

+1: tstl (r3) # if a[0] is negative...

+ bgeq 1f

+ addl2 r5,r1 # ...add w again to highword

+1:

+ movl r0,(r2)+ # save low word in dest & advance *r

+ addl2 $4,r3 # advance *a

+ movl r1,r6 # high word in r6 for return value

+ sobgtr r4,0b # loop?

+ movl r6,r0

+ ret

+# .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64

+#;

+#; w.j.m. 15-jan-1999

+#;

+#; it's magic ...

+#;

+#; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {

+#; ULONG c = 0;

+#; int i;

+#; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;

+#; return(c);

+#; }

+ENTRY(bn_mul_words,R6)

+ movl 4(ap),r2 # *r

+ movl 8(ap),r3 # *a

+ movl 12(ap),r4 # n

+ movl 16(ap),r5 # w

+ clrl r6 # carry

+0: emul r5,(r3),r6,r0 # w * a[0] + carry -> r0

+ # fixup for "negative" carry

+ tstl r6

+ bgeq 1f

+ incl r1

+1: # combined fixup for "negative" w, a[]

+ tstl r5

+ bgeq 1f

+ addl2 (r3),r1

+1: tstl (r3)

+ bgeq 1f

+ addl2 r5,r1

+1: movl r0,(r2)+

+ addl2 $4,r3

+ movl r1,r6

+ sobgtr r4,0b

+ movl r6,r0

+ ret

+# .title vax_bn_sqr_words unsigned square, 32*32=>64

+#;

+#; w.j.m. 15-jan-1999

+#;

+#; it's magic ...

+#;

+#; void bn_sqr_words(ULONG r[],ULONG a[],int n) {

+#; int i;

+#; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;

+#; }

+ENTRY(bn_sqr_words,0)

+ movl 4(ap),r2 # r

+ movl 8(ap),r3 # a

+ movl 12(ap),r4 # n

+0: movl (r3)+,r5 # r5 = a[] & advance

+ emul r5,r5,$0,r0 # a[0] * a[0] + 0 -> r0

+ # fixup for "negative" a[]

+ tstl r5

+ bgeq 1f

+ addl2 r5,r1

+1: movq r0,(r2)+ # store 64-bit result

+ sobgtr r4,0b # loop

+ ret

+# .title vax_bn_div_words unsigned divide

+#;

+#; Richard Levitte 20-Nov-2000

+#;

+#; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)

+#; {

+#; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);

+#; }

+#;

+#; Using EDIV would be very easy, if it didn't do signed calculations.

+#; Any time any of the input numbers are signed, there are problems,

+#; usually with integer overflow, at which point it returns useless

+#; data (the quotient gets the value of l, and the remainder becomes 0).

+#;

+#; If it was just for the dividend, it would be very easy, just divide

+#; it by 2 (unsigned), do the division, multiply the resulting quotient

+#; and remainder by 2, add the bit that was dropped when dividing by 2

+#; to the remainder, and do some adjustment so the remainder doesn't

+#; end up larger than the divisor. For some cases when the divisor is

+#; negative (from EDIV's point of view, i.e. when the highest bit is set),

+#; dividing the dividend by 2 isn't enough, and since some operations

+#; might generate integer overflows even when the dividend is divided by

+#; 4 (when the high part of the shifted down dividend ends up being exactly

+#; half of the divisor, the result is the quotient 0x80000000, which is

+#; negative...) it needs to be divided by 8. Furthermore, the divisor needs

+#; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.

+#; In this case, a little extra fiddling with the remainder is required.

+#;

+#; So, the simplest way to handle this is always to divide the dividend

+#; by 8, and to divide the divisor by 2 if it's highest bit is set.

+#; After EDIV has been used, the quotient gets multiplied by 8 if the

+#; original divisor was positive, otherwise 4. The remainder, oddly

+#; enough, is *always* multiplied by 8.

+#; NOTE: in the case mentioned above, where the high part of the shifted

+#; down dividend ends up being exactly half the shifted down divisor, we

+#; end up with a 33 bit quotient. That's no problem however, it usually

+#; means we have ended up with a too large remainder as well, and the

+#; problem is fixed by the last part of the algorithm (next paragraph).

+#;

+#; The routine ends with comparing the resulting remainder with the

+#; original divisor and if the remainder is larger, subtract the

+#; original divisor from it, and increase the quotient by 1. This is

+#; done until the remainder is smaller than the divisor.

+#;

+#; The complete algorithm looks like this:

+#;

+#; d' = d

+#; l' = l & 7

+#; [h,l] = [h,l] >> 3

+#; [q,r] = floor([h,l] / d) # This is the EDIV operation

+#; if (q < 0) q = -q # I doubt this is necessary any more

+#;

+#; r' = r >> 29

+#; if (d' >= 0)

+#; q' = q >> 29

+#; q = q << 3

+#; else

+#; q' = q >> 30

+#; q = q << 2

+#; r = (r << 3) + l'

+#;

+#; if (d' < 0)

+#; {

+#; [r',r] = [r',r] - q

+#; while ([r',r] < 0)

+#; {

+#; [r',r] = [r',r] + d

+#; [q',q] = [q',q] - 1

+#; }

+#;

+#; while ([r',r] >= d')

+#; {

+#; [r',r] = [r',r] - d'

+#; [q',q] = [q',q] + 1

+#; }

+#;

+#; return q

+#;r2 = l, q

+#;r3 = h, r

+#;r4 = d

+#;r5 = l'

+#;r6 = r'

+#;r7 = d'

+#;r8 = q'

+ENTRY(bn_div_words,R6|R7|R8)

+ movl 4(ap),r3 # h

+ movl 8(ap),r2 # l

+ movl 12(ap),r4 # d

+ bicl3 $-8,r2,r5 # l' = l & 7

+ bicl3 $7,r2,r2

+ bicl3 $-8,r3,r6

+ bicl3 $7,r3,r3

+ addl2 r6,r2

+ rotl $-3,r2,r2 # l = l >> 3

+ rotl $-3,r3,r3 # h = h >> 3

+ movl r4,r7 # d' = d

+ clrl r6 # r' = 0

+ clrl r8 # q' = 0

+ tstl r4

+ beql 0f # Uh-oh, the divisor is 0...

+ bgtr 1f

+ rotl $-1,r4,r4 # If d is negative, shift it right.

+ bicl2 $0x80000000,r4 # Since d is then a large number, the

+ # lowest bit is insignificant

+ # (contradict that, and I'll fix the problem!)

+1:

+ ediv r4,r2,r2,r3 # Do the actual division

+ tstl r2

+ bgeq 1f

+ mnegl r2,r2 # if q < 0, negate it

+1:

+ tstl r7

+ blss 1f

+ rotl $3,r2,r2 # q = q << 3

+ bicl3 $-8,r2,r8 # q' gets the high bits from q

+ bicl3 $7,r2,r2

+ brb 2f

+1: # else

+ rotl $2,r2,r2 # q = q << 2

+ bicl3 $-4,r2,r8 # q' gets the high bits from q

+ bicl3 $3,r2,r2

+2:

+ rotl $3,r3,r3 # r = r << 3

+ bicl3 $-8,r3,r6 # r' gets the high bits from r

+ bicl3 $7,r3,r3

+ addl2 r5,r3 # r = r + l'

+ tstl r7

+ bgeq 5f

+ bitl $1,r7

+ beql 5f # if d' < 0 && d' & 1

+ subl2 r2,r3 # [r',r] = [r',r] - [q',q]

+ sbwc r8,r6

+3:

+ bgeq 5f # while r < 0

+ decl r2 # [q',q] = [q',q] - 1

+ sbwc $0,r8

+ addl2 r7,r3 # [r',r] = [r',r] + d'

+ adwc $0,r6

+ brb 3b

+# The return points are placed in the middle to keep a short distance from

+# all the branch points

+1:

+# movl r3,r1

+ movl r2,r0

+ ret

+0:

+ movl $-1,r0

+ ret

+5:

+ tstl r6

+ bneq 6f

+ cmpl r3,r7

+ blssu 1b # while [r',r] >= d'

+6:

+ subl2 r7,r3 # [r',r] = [r',r] - d'

+ sbwc $0,r6

+ incl r2 # [q',q] = [q',q] + 1

+ adwc $0,r8

+ brb 5b

+# .title vax_bn_add_words unsigned add of two arrays

+#;

+#; Richard Levitte 20-Nov-2000

+#;

+#; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {

+#; ULONG c = 0;

+#; int i;

+#; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;

+#; return(c);

+#; }

+ENTRY(bn_add_words,0)

+ movl 4(ap),r2 # r

+ movl 8(ap),r3 # a

+ movl 12(ap),r4 # b

+ movl 16(ap),r5 # n

+ clrl r0

+ tstl r5

+ bleq 1f

+0: movl (r3)+,r1 # carry untouched

+ adwc (r4)+,r1 # carry used and touched

+ movl r1,(r2)+ # carry untouched

+ sobgtr r5,0b # carry untouched

+ adwc $0,r0

+1: ret

+#;

+#; Richard Levitte 20-Nov-2000

+#;

+#; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {

+#; ULONG c = 0;

+#; int i;

+#; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;

+#; return(c);

+#; }

+ENTRY(bn_sub_words,R6)

+ movl 4(ap),r2 # r

+ movl 8(ap),r3 # a

+ movl 12(ap),r4 # b

+ movl 16(ap),r5 # n

+ clrl r0

+ tstl r5

+ bleq 1f

+0: movl (r3)+,r6 # carry untouched

+ sbwc (r4)+,r6 # carry used and touched

+ movl r6,(r2)+ # carry untouched

+ sobgtr r5,0b # carry untouched

+1: adwc $0,r0

+ ret

+# Ragge 20-Sep-2003

+# Multiply a vector of 4/8 longword by another.

+# Uses two loops and 16/64 emuls.

+ENTRY(bn_mul_comba4,R6|R7|R8|R9)

+ movl $4,r9 # 4*4

+ brb 6f

+ENTRY(bn_mul_comba8,R6|R7|R8|R9)

+ movl $8,r9 # 8*8

+6: movl 8(ap),r3 # a[]

+ movl 12(ap),r7 # b[]

+ brb 5f

+ENTRY(bn_sqr_comba4,R6|R7|R8|R9)

+ movl $4,r9 # 4*4

+ brb 0f

+ENTRY(bn_sqr_comba8,R6|R7|R8|R9)

+ movl $8,r9 # 8*8

+0:

+ movl 8(ap),r3 # a[]

+ movl r3,r7 # a[]

+5: movl 4(ap),r5 # r[]

+ movl r9,r8

+ clrq (r5) # clear destinatino, for add.

+ clrq 8(r5)

+ clrq 16(r5) # these only needed for comba8

+ clrq 24(r5)

+2: clrl r4 # carry

+ movl r9,r6 # inner loop count

+ movl (r7)+,r2 # value to multiply with

+1: emul r2,(r3),r4,r0

+ tstl r4

+ bgeq 3f

+ incl r1

+3: tstl r2

+ bgeq 3f

+ addl2 (r3),r1

+3: tstl (r3)

+ bgeq 3f

+ addl2 r2,r1

+3: addl2 r0,(r5)+ # add to destination

+ adwc $0,r1 # remember carry

+ movl r1,r4 # add carry in next emul

+ addl2 $4,r3

+ sobgtr r6,1b

+ movl r4,(r5) # save highest add result

+ ashl $2,r9,r4

+ subl2 r4,r3

+ subl2 $4,r4

+ subl2 r4,r5

+ sobgtr r8,2b

+ ret