summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2014-04-28 21:14:51 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2014-04-28 21:14:51 +0000
commit1c6bc079ab33d7b342870c7cdf6a9e344ff5936f (patch)
treec6a2c6b9c1bc952796e62fba1675de4f4ab995dc
parent41eb3a1b440b53f7e6167fb1be91fb11ccf95a18 (diff)
Remove WIN32, WIN64 and MINGW32 tentacles.
Also check for _LP64 rather than __arch64__ (the former being more reliable than __LP64__ or __arch64__) to tell 64-bit int platforms apart from 32-bit int platforms. Loosely based upon a diff from Martijn van Duren on tech@
-rw-r--r--lib/libssl/src/crypto/aes/aes_x86core.c5
-rw-r--r--lib/libssl/src/crypto/bn/asm/x86_64-gcc.c83
-rw-r--r--lib/libssl/src/crypto/bn/bn_exp.c7
-rw-r--r--lib/libssl/src/crypto/cast/cast_lcl.h5
-rw-r--r--lib/libssl/src/crypto/des/enc_read.c8
-rw-r--r--lib/libssl/src/crypto/des/enc_writ.c4
-rw-r--r--lib/libssl/src/crypto/engine/eng_aesni.c4
-rw-r--r--lib/libssl/src/crypto/engine/eng_padlock.c7
-rw-r--r--lib/libssl/src/crypto/engine/eng_rsax.c4
-rw-r--r--lib/libssl/src/crypto/modes/modes_lcl.h6
-rw-r--r--lib/libssl/src/crypto/sha/sha.h5
-rw-r--r--lib/libssl/src/crypto/srp/srp_lib.c4
-rw-r--r--lib/libssl/src/crypto/ts/ts.h5
-rw-r--r--lib/libssl/src/crypto/whrlpool/wp_block.c4
14 files changed, 62 insertions, 89 deletions
diff --git a/lib/libssl/src/crypto/aes/aes_x86core.c b/lib/libssl/src/crypto/aes/aes_x86core.c
index 295ea22bb46..8b3b29e28c6 100644
--- a/lib/libssl/src/crypto/aes/aes_x86core.c
+++ b/lib/libssl/src/crypto/aes/aes_x86core.c
@@ -79,10 +79,7 @@ prefetch256(const void *table)
#undef GETU32
#define GETU32(p) (*((u32*)(p)))
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
-typedef unsigned __int64 u64;
-#define U64(C) C##UI64
-#elif defined(__arch64__)
+#if defined(_LP64)
typedef unsigned long u64;
#define U64(C) C##UL
#else
diff --git a/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c b/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
index b97b394661a..6a7fa4aea95 100644
--- a/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
+++ b/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
@@ -1,3 +1,7 @@
+#include "../bn_lcl.h"
+#if !(defined(__GNUC__) && __GNUC__>=2)
+# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
+#else
/*
* x86_64 BIGNUM accelerator version 0.1, December 2002.
*
@@ -13,24 +17,50 @@
* A. Well, that's because this code is basically a quick-n-dirty
* proof-of-concept hack. As you can see it's implemented with
* inline assembler, which means that you're bound to GCC and that
- * there must be a room for fine-tuning.
+ * there might be enough room for further improvement.
*
* Q. Why inline assembler?
- * A. x86_64 features own ABI I'm not familiar with. Which is why
- * I decided to let the compiler take care of subroutine
- * prologue/epilogue as well as register allocation.
+ * A. x86_64 features own ABI which I'm not familiar with. This is
+ * why I decided to let the compiler take care of subroutine
+ * prologue/epilogue as well as register allocation. For reference.
+ * Win64 implements different ABI for AMD64, different from Linux.
*
* Q. How much faster does it get?
- * A. Unfortunately people sitting on x86_64 hardware are prohibited
- * to disclose the performance numbers, so they (SuSE labs to be
- * specific) wouldn't tell me. However! Very similar coding technique
- * (reaching out for 128-bit result from 64x64-bit multiplication)
- * results in >3 times performance improvement on MIPS and I see no
- * reason why gain on x86_64 would be so much different:-)
+ * A. 'apps/openssl speed rsa dsa' output with no-asm:
+ *
+ * sign verify sign/s verify/s
+ * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
+ * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
+ * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
+ * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
+ * sign verify sign/s verify/s
+ * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
+ * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
+ * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
+ *
+ * 'apps/openssl speed rsa dsa' output with this module:
+ *
+ * sign verify sign/s verify/s
+ * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
+ * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
+ * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
+ * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
+ * sign verify sign/s verify/s
+ * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
+ * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
+ * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
+ *
+ * For the reference. IA-32 assembler implementation performs
+ * very much like 64-bit code compiled with no-asm on the same
+ * machine.
*/
#define BN_ULONG unsigned long
+#undef mul
+#undef mul_add
+#undef sqr
+
/*
* "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
* "g"(0) let the compiler to decide where does it
@@ -72,7 +102,7 @@
: "a"(a) \
: "cc");
-BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG c1=0;
@@ -96,7 +126,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
return(c1);
}
-BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG c1=0;
@@ -119,7 +149,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
return(c1);
}
-void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
if (n <= 0) return;
@@ -142,7 +172,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
{ BN_ULONG ret,waste;
- asm ("divq %3"
+ asm ("divq %4"
: "=a"(ret),"=d"(waste)
: "a"(l),"d"(h),"g"(d)
: "cc");
@@ -150,21 +180,21 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
return ret;
}
-BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
-{ BN_ULONG ret,i;
+BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
+{ BN_ULONG ret=0,i=0;
if (n <= 0) return 0;
asm (
" subq %2,%2 \n"
- ".align 16 \n"
+ ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" leaq 1(%2),%2 \n"
" loop 1b \n"
" sbbq %0,%0 \n"
- : "+a"(ret),"+c"(n),"+r"(i)
+ : "=&a"(ret),"+c"(n),"=&r"(i)
: "r"(rp),"r"(ap),"r"(bp)
: "cc"
);
@@ -173,21 +203,21 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
}
#ifndef SIMICS
-BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
-{ BN_ULONG ret,i;
+BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
+{ BN_ULONG ret=0,i=0;
if (n <= 0) return 0;
asm (
" subq %2,%2 \n"
- ".align 16 \n"
+ ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" leaq 1(%2),%2 \n"
" loop 1b \n"
" sbbq %0,%0 \n"
- : "+a"(ret),"+c"(n),"+r"(i)
+ : "=&a"(ret),"+c"(n),"=&r"(i)
: "r"(rp),"r"(ap),"r"(bp)
: "cc"
);
@@ -318,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
@@ -423,7 +452,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
@@ -462,9 +490,8 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
r[7]=c2;
}
-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
@@ -539,9 +566,8 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
r[15]=c1;
}
-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG bl,bh;
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
@@ -573,3 +599,4 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
r[6]=c1;
r[7]=c2;
}
+#endif
diff --git a/lib/libssl/src/crypto/bn/bn_exp.c b/lib/libssl/src/crypto/bn/bn_exp.c
index 2047e1cc3f0..22ef643c02c 100644
--- a/lib/libssl/src/crypto/bn/bn_exp.c
+++ b/lib/libssl/src/crypto/bn/bn_exp.c
@@ -114,12 +114,7 @@
#include "bn_lcl.h"
#include <stdlib.h>
-#ifdef _WIN32
-# include <malloc.h>
-# ifndef alloca
-# define alloca _alloca
-# endif
-#elif defined(__GNUC__)
+#if defined(__GNUC__)
# ifndef alloca
# define alloca(s) __builtin_alloca((s))
# endif
diff --git a/lib/libssl/src/crypto/cast/cast_lcl.h b/lib/libssl/src/crypto/cast/cast_lcl.h
index ec14804fbbd..61facd09d8a 100644
--- a/lib/libssl/src/crypto/cast/cast_lcl.h
+++ b/lib/libssl/src/crypto/cast/cast_lcl.h
@@ -56,11 +56,6 @@
* [including the GNU Public Licence.]
*/
-#ifdef OPENSSL_SYS_WIN32
-#include <stdlib.h>
-#endif
-
-
#undef c2l
#define c2l(c,l) (l =((unsigned long)(*((c)++))) , \
l|=((unsigned long)(*((c)++)))<< 8L, \
diff --git a/lib/libssl/src/crypto/des/enc_read.c b/lib/libssl/src/crypto/des/enc_read.c
index 23ad458dcf6..e1ac04c5b39 100644
--- a/lib/libssl/src/crypto/des/enc_read.c
+++ b/lib/libssl/src/crypto/des/enc_read.c
@@ -150,11 +150,7 @@ int DES_enc_read(int fd, void *buf, int len, DES_key_schedule *sched,
/* first - get the length */
while (net_num < HDRSIZE)
{
-#ifndef OPENSSL_SYS_WIN32
i=read(fd,(void *)&(net[net_num]),HDRSIZE-net_num);
-#else
- i=_read(fd,(void *)&(net[net_num]),HDRSIZE-net_num);
-#endif
#ifdef EINTR
if ((i == -1) && (errno == EINTR)) continue;
#endif
@@ -176,11 +172,7 @@ int DES_enc_read(int fd, void *buf, int len, DES_key_schedule *sched,
net_num=0;
while (net_num < rnum)
{
-#ifndef OPENSSL_SYS_WIN32
i=read(fd,(void *)&(net[net_num]),rnum-net_num);
-#else
- i=_read(fd,(void *)&(net[net_num]),rnum-net_num);
-#endif
#ifdef EINTR
if ((i == -1) && (errno == EINTR)) continue;
#endif
diff --git a/lib/libssl/src/crypto/des/enc_writ.c b/lib/libssl/src/crypto/des/enc_writ.c
index 8f6b033c877..18562310eed 100644
--- a/lib/libssl/src/crypto/des/enc_writ.c
+++ b/lib/libssl/src/crypto/des/enc_writ.c
@@ -156,11 +156,7 @@ int DES_enc_write(int fd, const void *_buf, int len,
{
/* eay 26/08/92 I was not doing writing from where we
* got up to. */
-#ifndef _WIN32
i=write(fd,(void *)&(outbuf[j]),outnum-j);
-#else
- i=_write(fd,(void *)&(outbuf[j]),outnum-j);
-#endif
if (i == -1)
{
#ifdef EINTR
diff --git a/lib/libssl/src/crypto/engine/eng_aesni.c b/lib/libssl/src/crypto/engine/eng_aesni.c
index 5fdb33bfded..d547d7f4656 100644
--- a/lib/libssl/src/crypto/engine/eng_aesni.c
+++ b/lib/libssl/src/crypto/engine/eng_aesni.c
@@ -309,11 +309,7 @@ static void aesni_ofb128_encrypt(const unsigned char *in, unsigned char *out,
}
/* ===== Engine "management" functions ===== */
-#if defined(_WIN32)
-typedef unsigned __int64 IA32CAP;
-#else
typedef unsigned long long IA32CAP;
-#endif
/* Prepare the ENGINE structure for registration */
static int
diff --git a/lib/libssl/src/crypto/engine/eng_padlock.c b/lib/libssl/src/crypto/engine/eng_padlock.c
index c27181ba758..5a80b2b16da 100644
--- a/lib/libssl/src/crypto/engine/eng_padlock.c
+++ b/lib/libssl/src/crypto/engine/eng_padlock.c
@@ -129,12 +129,7 @@ void ENGINE_load_padlock (void)
/* We do these includes here to avoid header problems on platforms that
do not have the VIA padlock anyway... */
#include <stdlib.h>
-#ifdef _WIN32
-# include <malloc.h>
-# ifndef alloca
-# define alloca _alloca
-# endif
-#elif defined(__GNUC__)
+#if defined(__GNUC__)
# ifndef alloca
# define alloca(s) __builtin_alloca(s)
# endif
diff --git a/lib/libssl/src/crypto/engine/eng_rsax.c b/lib/libssl/src/crypto/engine/eng_rsax.c
index c0f6851601c..1b15b6f1a3f 100644
--- a/lib/libssl/src/crypto/engine/eng_rsax.c
+++ b/lib/libssl/src/crypto/engine/eng_rsax.c
@@ -217,11 +217,7 @@ static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void))
#ifndef OPENSSL_NO_RSA
-#ifdef _WIN32
-typedef unsigned __int64 UINT64;
-#else
typedef unsigned long long UINT64;
-#endif
typedef unsigned short UINT16;
/* Table t is interleaved in the following manner:
diff --git a/lib/libssl/src/crypto/modes/modes_lcl.h b/lib/libssl/src/crypto/modes/modes_lcl.h
index 2fc81382734..68c0e355ad9 100644
--- a/lib/libssl/src/crypto/modes/modes_lcl.h
+++ b/lib/libssl/src/crypto/modes/modes_lcl.h
@@ -9,11 +9,7 @@
#include <machine/endian.h>
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
-typedef __int64 i64;
-typedef unsigned __int64 u64;
-#define U64(C) C##UI64
-#elif defined(__arch64__)
+#if defined(_LP64)
typedef long i64;
typedef unsigned long u64;
#define U64(C) C##UL
diff --git a/lib/libssl/src/crypto/sha/sha.h b/lib/libssl/src/crypto/sha/sha.h
index 435352c2080..c0c3c16c08a 100644
--- a/lib/libssl/src/crypto/sha/sha.h
+++ b/lib/libssl/src/crypto/sha/sha.h
@@ -154,10 +154,7 @@ void SHA256_Transform(SHA256_CTX *c, const unsigned char *data);
#define SHA512_CBLOCK (SHA_LBLOCK*8) /* SHA-512 treats input data as a
* contiguous array of 64 bit
* wide big-endian values. */
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
-#define SHA_LONG64 unsigned __int64
-#define U64(C) C##UI64
-#elif defined(__arch64__)
+#if defined(_LP64)
#define SHA_LONG64 unsigned long
#define U64(C) C##UL
#else
diff --git a/lib/libssl/src/crypto/srp/srp_lib.c b/lib/libssl/src/crypto/srp/srp_lib.c
index 8cc94f51db8..a3a67eda2e2 100644
--- a/lib/libssl/src/crypto/srp/srp_lib.c
+++ b/lib/libssl/src/crypto/srp/srp_lib.c
@@ -63,9 +63,7 @@
#include <openssl/evp.h>
#if (BN_BYTES == 8)
-# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
-# define bn_pack4(a1,a2,a3,a4) ((a1##UI64<<48)|(a2##UI64<<32)|(a3##UI64<<16)|a4##UI64)
-# elif defined(__arch64__)
+# if defined(_LP64)
# define bn_pack4(a1,a2,a3,a4) ((a1##UL<<48)|(a2##UL<<32)|(a3##UL<<16)|a4##UL)
# else
# define bn_pack4(a1,a2,a3,a4) ((a1##ULL<<48)|(a2##ULL<<32)|(a3##ULL<<16)|a4##ULL)
diff --git a/lib/libssl/src/crypto/ts/ts.h b/lib/libssl/src/crypto/ts/ts.h
index 3c5ab727db5..085e062b96c 100644
--- a/lib/libssl/src/crypto/ts/ts.h
+++ b/lib/libssl/src/crypto/ts/ts.h
@@ -89,11 +89,6 @@
extern "C" {
#endif
-#ifdef WIN32
-/* Under Win32 this is defined in wincrypt.h */
-#undef X509_NAME
-#endif
-
#include <openssl/x509.h>
#include <openssl/x509v3.h>
diff --git a/lib/libssl/src/crypto/whrlpool/wp_block.c b/lib/libssl/src/crypto/whrlpool/wp_block.c
index 11164e50f56..9c194f968b1 100644
--- a/lib/libssl/src/crypto/whrlpool/wp_block.c
+++ b/lib/libssl/src/crypto/whrlpool/wp_block.c
@@ -40,9 +40,7 @@
#include <machine/endian.h>
typedef unsigned char u8;
-#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
-typedef unsigned __int64 u64;
-#elif defined(__arch64__)
+#if defined(_LP64)
typedef unsigned long u64;
#else
typedef unsigned long long u64;