src - OpenBSD base system

diff options


context:
space:
mode:

author	Todd C. Miller <millert@cvs.openbsd.org>	2003-01-08 19:54:00 +0000
committer	Todd C. Miller <millert@cvs.openbsd.org>	2003-01-08 19:54:00 +0000
commit	386ce957be10fcd8d5d5e8db04c325f88de7b6ad (patch)
tree	70f5105dc2063e061dba333498e6422e6b753184 /sys
parent	088589c8494cf3359bba01a446a61f8d32840065 (diff)

Move the rounds into separate functions on sparc64 so gcc's optimizer

doesn't blow up. This is a hack but is better than compiling sha1.c with -O0 on sparc64. From NetBSD (mrg). deraadt@ OK

Diffstat (limited to 'sys')

-rw-r--r--

sys/arch/sparc64/conf/Makefile.sparc64

-rw-r--r--

sys/crypto/sha1.c

2 files changed, 69 insertions, 10 deletions

diff --git a/sys/arch/sparc64/conf/Makefile.sparc64 b/sys/arch/sparc64/conf/Makefile.sparc64
index b91c72e02aa..ae52a993d41 100644
--- a/sys/arch/sparc64/conf/Makefile.sparc64
+++ b/sys/arch/sparc64/conf/Makefile.sparc64

@@ -158,7 +158,4 @@ install-kernel-${MACHINE_NAME}:

mv /nbsd /bsd

.endif

-sha1.o: $S/crypto/sha1.c

- ${NORMAL_C} -O0

%RULES

diff --git a/sys/crypto/sha1.c b/sys/crypto/sha1.c
index 69fd63088b9..73f04e0a2bf 100644
--- a/sys/crypto/sha1.c
+++ b/sys/crypto/sha1.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: sha1.c,v 1.2 2000/06/04 16:37:02 deraadt Exp $ */

+/* $OpenBSD: sha1.c,v 1.3 2003/01/08 19:53:59 millert Exp $ */

* SHA-1 in C

@@ -44,21 +44,74 @@

#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);

#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);

+typedef union {

+ u_char c[64];

+ u_int l[16];

+} CHAR64LONG16;

+#ifdef __sparc_v9__

+static void do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);

+static void do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);

+static void do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);

+static void do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *);

+#define nR0(v,w,x,y,z,i) R0(*v,*w,*x,*y,*z,i)

+#define nR1(v,w,x,y,z,i) R1(*v,*w,*x,*y,*z,i)

+#define nR2(v,w,x,y,z,i) R2(*v,*w,*x,*y,*z,i)

+#define nR3(v,w,x,y,z,i) R3(*v,*w,*x,*y,*z,i)

+#define nR4(v,w,x,y,z,i) R4(*v,*w,*x,*y,*z,i)

+static void

+do_R01(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)

+ nR0(a,b,c,d,e, 0); nR0(e,a,b,c,d, 1); nR0(d,e,a,b,c, 2); nR0(c,d,e,a,b, 3);

+ nR0(b,c,d,e,a, 4); nR0(a,b,c,d,e, 5); nR0(e,a,b,c,d, 6); nR0(d,e,a,b,c, 7);

+ nR0(c,d,e,a,b, 8); nR0(b,c,d,e,a, 9); nR0(a,b,c,d,e,10); nR0(e,a,b,c,d,11);

+ nR0(d,e,a,b,c,12); nR0(c,d,e,a,b,13); nR0(b,c,d,e,a,14); nR0(a,b,c,d,e,15);

+ nR1(e,a,b,c,d,16); nR1(d,e,a,b,c,17); nR1(c,d,e,a,b,18); nR1(b,c,d,e,a,19);

+static void

+do_R2(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)

+ nR2(a,b,c,d,e,20); nR2(e,a,b,c,d,21); nR2(d,e,a,b,c,22); nR2(c,d,e,a,b,23);

+ nR2(b,c,d,e,a,24); nR2(a,b,c,d,e,25); nR2(e,a,b,c,d,26); nR2(d,e,a,b,c,27);

+ nR2(c,d,e,a,b,28); nR2(b,c,d,e,a,29); nR2(a,b,c,d,e,30); nR2(e,a,b,c,d,31);

+ nR2(d,e,a,b,c,32); nR2(c,d,e,a,b,33); nR2(b,c,d,e,a,34); nR2(a,b,c,d,e,35);

+ nR2(e,a,b,c,d,36); nR2(d,e,a,b,c,37); nR2(c,d,e,a,b,38); nR2(b,c,d,e,a,39);

+static void

+do_R3(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)

+ nR3(a,b,c,d,e,40); nR3(e,a,b,c,d,41); nR3(d,e,a,b,c,42); nR3(c,d,e,a,b,43);

+ nR3(b,c,d,e,a,44); nR3(a,b,c,d,e,45); nR3(e,a,b,c,d,46); nR3(d,e,a,b,c,47);

+ nR3(c,d,e,a,b,48); nR3(b,c,d,e,a,49); nR3(a,b,c,d,e,50); nR3(e,a,b,c,d,51);

+ nR3(d,e,a,b,c,52); nR3(c,d,e,a,b,53); nR3(b,c,d,e,a,54); nR3(a,b,c,d,e,55);

+ nR3(e,a,b,c,d,56); nR3(d,e,a,b,c,57); nR3(c,d,e,a,b,58); nR3(b,c,d,e,a,59);

+static void

+do_R4(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d, u_int32_t *e, CHAR64LONG16 *block)

+ nR4(a,b,c,d,e,60); nR4(e,a,b,c,d,61); nR4(d,e,a,b,c,62); nR4(c,d,e,a,b,63);

+ nR4(b,c,d,e,a,64); nR4(a,b,c,d,e,65); nR4(e,a,b,c,d,66); nR4(d,e,a,b,c,67);

+ nR4(c,d,e,a,b,68); nR4(b,c,d,e,a,69); nR4(a,b,c,d,e,70); nR4(e,a,b,c,d,71);

+ nR4(d,e,a,b,c,72); nR4(c,d,e,a,b,73); nR4(b,c,d,e,a,74); nR4(a,b,c,d,e,75);

+ nR4(e,a,b,c,d,76); nR4(d,e,a,b,c,77); nR4(c,d,e,a,b,78); nR4(b,c,d,e,a,79);

+#endif

/* Hash a single 512-bit block. This is the core of the algorithm. */

void SHA1Transform(u_int32_t state[5], unsigned char buffer[64])

{

u_int32_t a, b, c, d, e;

-typedef union {

- unsigned char c[64];

- unsigned int l[16];

-} CHAR64LONG16;

CHAR64LONG16* block;

#ifdef SHA1HANDSOFF

- static unsigned char workspace[64];

+ static CHAR64LONG16 workspace;

- block = (CHAR64LONG16 *)workspace;

+ block = &workspace;

bcopy(buffer, block, 64);

#else

block = (CHAR64LONG16 *)buffer;

@@ -69,6 +122,13 @@ CHAR64LONG16* block;

c = state[2];

d = state[3];

e = state[4];

+#ifdef __sparc_v9__

+ do_R01(&a, &b, &c, &d, &e, block);

+ do_R2(&a, &b, &c, &d, &e, block);

+ do_R3(&a, &b, &c, &d, &e, block);

+ do_R4(&a, &b, &c, &d, &e, block);

+#else

/* 4 rounds of 20 operations each. Loop unrolled. */

R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);

R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);

@@ -90,6 +150,8 @@ CHAR64LONG16* block;

R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);

R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);

R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);

+#endif

/* Add the working vars back into context.state[] */

state[0] += a;

state[1] += b;