From 1c6bc079ab33d7b342870c7cdf6a9e344ff5936f Mon Sep 17 00:00:00 2001 From: Miod Vallat Date: Mon, 28 Apr 2014 21:14:51 +0000 Subject: Remove WIN32, WIN64 and MINGW32 tentacles. Also check for _LP64 rather than __arch64__ (the former being more reliable than __LP64__ or __arch64__) to tell 64-bit int platforms apart from 32-bit int platforms. Loosely based upon a diff from Martijn van Duren on tech@ --- lib/libssl/src/crypto/aes/aes_x86core.c | 5 +- lib/libssl/src/crypto/bn/asm/x86_64-gcc.c | 83 ++++++++++++++++++++---------- lib/libssl/src/crypto/bn/bn_exp.c | 7 +-- lib/libssl/src/crypto/cast/cast_lcl.h | 5 -- lib/libssl/src/crypto/des/enc_read.c | 8 --- lib/libssl/src/crypto/des/enc_writ.c | 4 -- lib/libssl/src/crypto/engine/eng_aesni.c | 4 -- lib/libssl/src/crypto/engine/eng_padlock.c | 7 +-- lib/libssl/src/crypto/engine/eng_rsax.c | 4 -- lib/libssl/src/crypto/modes/modes_lcl.h | 6 +-- lib/libssl/src/crypto/sha/sha.h | 5 +- lib/libssl/src/crypto/srp/srp_lib.c | 4 +- lib/libssl/src/crypto/ts/ts.h | 5 -- lib/libssl/src/crypto/whrlpool/wp_block.c | 4 +- 14 files changed, 62 insertions(+), 89 deletions(-) diff --git a/lib/libssl/src/crypto/aes/aes_x86core.c b/lib/libssl/src/crypto/aes/aes_x86core.c index 295ea22bb46..8b3b29e28c6 100644 --- a/lib/libssl/src/crypto/aes/aes_x86core.c +++ b/lib/libssl/src/crypto/aes/aes_x86core.c @@ -79,10 +79,7 @@ prefetch256(const void *table) #undef GETU32 #define GETU32(p) (*((u32*)(p))) -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -typedef unsigned __int64 u64; -#define U64(C) C##UI64 -#elif defined(__arch64__) +#if defined(_LP64) typedef unsigned long u64; #define U64(C) C##UL #else diff --git a/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c b/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c index b97b394661a..6a7fa4aea95 100644 --- a/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c +++ b/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c @@ -1,3 +1,7 @@ +#include "../bn_lcl.h" +#if !(defined(__GNUC__) && __GNUC__>=2) +# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ +#else /* * x86_64 BIGNUM accelerator version 0.1, December 2002. * @@ -13,24 +17,50 @@ * A. Well, that's because this code is basically a quick-n-dirty * proof-of-concept hack. As you can see it's implemented with * inline assembler, which means that you're bound to GCC and that - * there must be a room for fine-tuning. + * there might be enough room for further improvement. * * Q. Why inline assembler? - * A. x86_64 features own ABI I'm not familiar with. Which is why - * I decided to let the compiler take care of subroutine - * prologue/epilogue as well as register allocation. + * A. x86_64 features own ABI which I'm not familiar with. This is + * why I decided to let the compiler take care of subroutine + * prologue/epilogue as well as register allocation. For reference. + * Win64 implements different ABI for AMD64, different from Linux. * * Q. How much faster does it get? - * A. Unfortunately people sitting on x86_64 hardware are prohibited - * to disclose the performance numbers, so they (SuSE labs to be - * specific) wouldn't tell me. However! Very similar coding technique - * (reaching out for 128-bit result from 64x64-bit multiplication) - * results in >3 times performance improvement on MIPS and I see no - * reason why gain on x86_64 would be so much different:-) + * A. 'apps/openssl speed rsa dsa' output with no-asm: + * + * sign verify sign/s verify/s + * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2 + * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0 + * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8 + * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6 + * sign verify sign/s verify/s + * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3 + * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2 + * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0 + * + * 'apps/openssl speed rsa dsa' output with this module: + * + * sign verify sign/s verify/s + * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9 + * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7 + * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0 + * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8 + * sign verify sign/s verify/s + * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 + * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 + * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 + * + * For the reference. IA-32 assembler implementation performs + * very much like 64-bit code compiled with no-asm on the same + * machine. */ #define BN_ULONG unsigned long +#undef mul +#undef mul_add +#undef sqr + /* * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; * "g"(0) let the compiler to decide where does it @@ -72,7 +102,7 @@ : "a"(a) \ : "cc"); -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -96,7 +126,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -119,7 +149,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { if (n <= 0) return; @@ -142,7 +172,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { BN_ULONG ret,waste; - asm ("divq %3" + asm ("divq %4" : "=a"(ret),"=d"(waste) : "a"(l),"d"(h),"g"(d) : "cc"); @@ -150,21 +180,21 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) return ret; } -BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) -{ BN_ULONG ret,i; +BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) +{ BN_ULONG ret=0,i=0; if (n <= 0) return 0; asm ( " subq %2,%2 \n" - ".align 16 \n" + ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" " leaq 1(%2),%2 \n" " loop 1b \n" " sbbq %0,%0 \n" - : "+a"(ret),"+c"(n),"+r"(i) + : "=&a"(ret),"+c"(n),"=&r"(i) : "r"(rp),"r"(ap),"r"(bp) : "cc" ); @@ -173,21 +203,21 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) } #ifndef SIMICS -BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) -{ BN_ULONG ret,i; +BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) +{ BN_ULONG ret=0,i=0; if (n <= 0) return 0; asm ( " subq %2,%2 \n" - ".align 16 \n" + ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" " leaq 1(%2),%2 \n" " loop 1b \n" " sbbq %0,%0 \n" - : "+a"(ret),"+c"(n),"+r"(i) + : "=&a"(ret),"+c"(n),"=&r"(i) : "r"(rp),"r"(ap),"r"(bp) : "cc" ); @@ -318,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -423,7 +452,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -462,9 +490,8 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[7]=c2; } -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -539,9 +566,8 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) r[15]=c1; } -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { - BN_ULONG bl,bh; BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -573,3 +599,4 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) r[6]=c1; r[7]=c2; } +#endif diff --git a/lib/libssl/src/crypto/bn/bn_exp.c b/lib/libssl/src/crypto/bn/bn_exp.c index 2047e1cc3f0..22ef643c02c 100644 --- a/lib/libssl/src/crypto/bn/bn_exp.c +++ b/lib/libssl/src/crypto/bn/bn_exp.c @@ -114,12 +114,7 @@ #include "bn_lcl.h" #include -#ifdef _WIN32 -# include -# ifndef alloca -# define alloca _alloca -# endif -#elif defined(__GNUC__) +#if defined(__GNUC__) # ifndef alloca # define alloca(s) __builtin_alloca((s)) # endif diff --git a/lib/libssl/src/crypto/cast/cast_lcl.h b/lib/libssl/src/crypto/cast/cast_lcl.h index ec14804fbbd..61facd09d8a 100644 --- a/lib/libssl/src/crypto/cast/cast_lcl.h +++ b/lib/libssl/src/crypto/cast/cast_lcl.h @@ -56,11 +56,6 @@ * [including the GNU Public Licence.] */ -#ifdef OPENSSL_SYS_WIN32 -#include -#endif - - #undef c2l #define c2l(c,l) (l =((unsigned long)(*((c)++))) , \ l|=((unsigned long)(*((c)++)))<< 8L, \ diff --git a/lib/libssl/src/crypto/des/enc_read.c b/lib/libssl/src/crypto/des/enc_read.c index 23ad458dcf6..e1ac04c5b39 100644 --- a/lib/libssl/src/crypto/des/enc_read.c +++ b/lib/libssl/src/crypto/des/enc_read.c @@ -150,11 +150,7 @@ int DES_enc_read(int fd, void *buf, int len, DES_key_schedule *sched, /* first - get the length */ while (net_num < HDRSIZE) { -#ifndef OPENSSL_SYS_WIN32 i=read(fd,(void *)&(net[net_num]),HDRSIZE-net_num); -#else - i=_read(fd,(void *)&(net[net_num]),HDRSIZE-net_num); -#endif #ifdef EINTR if ((i == -1) && (errno == EINTR)) continue; #endif @@ -176,11 +172,7 @@ int DES_enc_read(int fd, void *buf, int len, DES_key_schedule *sched, net_num=0; while (net_num < rnum) { -#ifndef OPENSSL_SYS_WIN32 i=read(fd,(void *)&(net[net_num]),rnum-net_num); -#else - i=_read(fd,(void *)&(net[net_num]),rnum-net_num); -#endif #ifdef EINTR if ((i == -1) && (errno == EINTR)) continue; #endif diff --git a/lib/libssl/src/crypto/des/enc_writ.c b/lib/libssl/src/crypto/des/enc_writ.c index 8f6b033c877..18562310eed 100644 --- a/lib/libssl/src/crypto/des/enc_writ.c +++ b/lib/libssl/src/crypto/des/enc_writ.c @@ -156,11 +156,7 @@ int DES_enc_write(int fd, const void *_buf, int len, { /* eay 26/08/92 I was not doing writing from where we * got up to. */ -#ifndef _WIN32 i=write(fd,(void *)&(outbuf[j]),outnum-j); -#else - i=_write(fd,(void *)&(outbuf[j]),outnum-j); -#endif if (i == -1) { #ifdef EINTR diff --git a/lib/libssl/src/crypto/engine/eng_aesni.c b/lib/libssl/src/crypto/engine/eng_aesni.c index 5fdb33bfded..d547d7f4656 100644 --- a/lib/libssl/src/crypto/engine/eng_aesni.c +++ b/lib/libssl/src/crypto/engine/eng_aesni.c @@ -309,11 +309,7 @@ static void aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out, } /* ===== Engine "management" functions ===== */ -#if defined(_WIN32) -typedef unsigned __int64 IA32CAP; -#else typedef unsigned long long IA32CAP; -#endif /* Prepare the ENGINE structure for registration */ static int diff --git a/lib/libssl/src/crypto/engine/eng_padlock.c b/lib/libssl/src/crypto/engine/eng_padlock.c index c27181ba758..5a80b2b16da 100644 --- a/lib/libssl/src/crypto/engine/eng_padlock.c +++ b/lib/libssl/src/crypto/engine/eng_padlock.c @@ -129,12 +129,7 @@ void ENGINE_load_padlock (void) /* We do these includes here to avoid header problems on platforms that do not have the VIA padlock anyway... */ #include -#ifdef _WIN32 -# include -# ifndef alloca -# define alloca _alloca -# endif -#elif defined(__GNUC__) +#if defined(__GNUC__) # ifndef alloca # define alloca(s) __builtin_alloca(s) # endif diff --git a/lib/libssl/src/crypto/engine/eng_rsax.c b/lib/libssl/src/crypto/engine/eng_rsax.c index c0f6851601c..1b15b6f1a3f 100644 --- a/lib/libssl/src/crypto/engine/eng_rsax.c +++ b/lib/libssl/src/crypto/engine/eng_rsax.c @@ -217,11 +217,7 @@ static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void)) #ifndef OPENSSL_NO_RSA -#ifdef _WIN32 -typedef unsigned __int64 UINT64; -#else typedef unsigned long long UINT64; -#endif typedef unsigned short UINT16; /* Table t is interleaved in the following manner: diff --git a/lib/libssl/src/crypto/modes/modes_lcl.h b/lib/libssl/src/crypto/modes/modes_lcl.h index 2fc81382734..68c0e355ad9 100644 --- a/lib/libssl/src/crypto/modes/modes_lcl.h +++ b/lib/libssl/src/crypto/modes/modes_lcl.h @@ -9,11 +9,7 @@ #include -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -typedef __int64 i64; -typedef unsigned __int64 u64; -#define U64(C) C##UI64 -#elif defined(__arch64__) +#if defined(_LP64) typedef long i64; typedef unsigned long u64; #define U64(C) C##UL diff --git a/lib/libssl/src/crypto/sha/sha.h b/lib/libssl/src/crypto/sha/sha.h index 435352c2080..c0c3c16c08a 100644 --- a/lib/libssl/src/crypto/sha/sha.h +++ b/lib/libssl/src/crypto/sha/sha.h @@ -154,10 +154,7 @@ void SHA256_Transform(SHA256_CTX *c, const unsigned char *data); #define SHA512_CBLOCK (SHA_LBLOCK*8) /* SHA-512 treats input data as a * contiguous array of 64 bit * wide big-endian values. */ -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -#define SHA_LONG64 unsigned __int64 -#define U64(C) C##UI64 -#elif defined(__arch64__) +#if defined(_LP64) #define SHA_LONG64 unsigned long #define U64(C) C##UL #else diff --git a/lib/libssl/src/crypto/srp/srp_lib.c b/lib/libssl/src/crypto/srp/srp_lib.c index 8cc94f51db8..a3a67eda2e2 100644 --- a/lib/libssl/src/crypto/srp/srp_lib.c +++ b/lib/libssl/src/crypto/srp/srp_lib.c @@ -63,9 +63,7 @@ #include #if (BN_BYTES == 8) -# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -# define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64) -# elif defined(__arch64__) +# if defined(_LP64) # define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL) # else # define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL) diff --git a/lib/libssl/src/crypto/ts/ts.h b/lib/libssl/src/crypto/ts/ts.h index 3c5ab727db5..085e062b96c 100644 --- a/lib/libssl/src/crypto/ts/ts.h +++ b/lib/libssl/src/crypto/ts/ts.h @@ -89,11 +89,6 @@ extern "C" { #endif -#ifdef WIN32 -/* Under Win32 this is defined in wincrypt.h */ -#undef X509_NAME -#endif - #include #include diff --git a/lib/libssl/src/crypto/whrlpool/wp_block.c b/lib/libssl/src/crypto/whrlpool/wp_block.c index 11164e50f56..9c194f968b1 100644 --- a/lib/libssl/src/crypto/whrlpool/wp_block.c +++ b/lib/libssl/src/crypto/whrlpool/wp_block.c @@ -40,9 +40,7 @@ #include typedef unsigned char u8; -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32) -typedef unsigned __int64 u64; -#elif defined(__arch64__) +#if defined(_LP64) typedef unsigned long u64; #else typedef unsigned long long u64; -- cgit v1.2.3