diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2015-09-12 09:01:46 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2015-09-12 09:01:46 +0000 |
commit | bfe7b374bf5e6d2fdb3c097acddca30069ebd808 (patch) | |
tree | ff7299b17e6b62aeb57256c59d168412494cec32 | |
parent | 01b5ceabcbb057a98bc8280c9a9b613358bcc67d (diff) |
Remove horribly old and outdated `documentation' for the assembly code.
-rw-r--r-- | lib/libssl/src/crypto/bf/asm/bf-686.pl | 127 | ||||
-rw-r--r-- | lib/libssl/src/crypto/bf/asm/readme | 10 | ||||
-rw-r--r-- | lib/libssl/src/crypto/bn/asm/README | 27 | ||||
-rw-r--r-- | lib/libssl/src/crypto/camellia/asm/BSD_license.txt | 24 | ||||
-rw-r--r-- | lib/libssl/src/crypto/cast/asm/readme | 7 | ||||
-rw-r--r-- | lib/libssl/src/crypto/des/asm/readme | 131 |
6 files changed, 0 insertions, 326 deletions
diff --git a/lib/libssl/src/crypto/bf/asm/bf-686.pl b/lib/libssl/src/crypto/bf/asm/bf-686.pl deleted file mode 100644 index 8e4c25f5984..00000000000 --- a/lib/libssl/src/crypto/bf/asm/bf-686.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/local/bin/perl - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; - -&asm_init($ARGV[0],"bf-686.pl"); - -$BF_ROUNDS=16; -$BF_OFF=($BF_ROUNDS+2)*4; -$L="ecx"; -$R="edx"; -$P="edi"; -$tot="esi"; -$tmp1="eax"; -$tmp2="ebx"; -$tmp3="ebp"; - -&des_encrypt("BF_encrypt",1); -&des_encrypt("BF_decrypt",0); -&cbc("BF_cbc_encrypt","BF_encrypt","BF_decrypt",1,4,5,3,-1,-1); - -&asm_finish(); - -&file_end(); - -sub des_encrypt - { - local($name,$enc)=@_; - - &function_begin($name,""); - - &comment(""); - &comment("Load the 2 words"); - &mov("eax",&wparam(0)); - &mov($L,&DWP(0,"eax","",0)); - &mov($R,&DWP(4,"eax","",0)); - - &comment(""); - &comment("P pointer, s and enc flag"); - &mov($P,&wparam(1)); - - &xor( $tmp1, $tmp1); - &xor( $tmp2, $tmp2); - - # encrypting part - - if ($enc) - { - &xor($L,&DWP(0,$P,"",0)); - for ($i=0; $i<$BF_ROUNDS; $i+=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i+1,$R,$L,$P,$tot,$tmp1,$tmp2,$tmp3); - - &comment(""); - &comment("Round ".sprintf("%d",$i+1)); - &BF_ENCRYPT($i+2,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3); - } - &xor($R,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - - &mov("eax",&wparam(0)); - &mov(&DWP(0,"eax","",0),$R); - &mov(&DWP(4,"eax","",0),$L); - &function_end_A($name); - } - else - { - &xor($L,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - for ($i=$BF_ROUNDS; $i>0; $i-=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i,$R,$L,$P,$tot,$tmp1,$tmp2,$tmp3); - &comment(""); - &comment("Round ".sprintf("%d",$i-1)); - &BF_ENCRYPT($i-1,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3); - } - &xor($R,&DWP(0,$P,"",0)); - - &mov("eax",&wparam(0)); - &mov(&DWP(0,"eax","",0),$R); - &mov(&DWP(4,"eax","",0),$L); - &function_end_A($name); - } - - &function_end_B($name); - } - -sub BF_ENCRYPT - { - local($i,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3)=@_; - - &rotr( $R, 16); - &mov( $tot, &DWP(&n2a($i*4),$P,"",0)); - - &movb( &LB($tmp1), &HB($R)); - &movb( &LB($tmp2), &LB($R)); - - &rotr( $R, 16); - &xor( $L, $tot); - - &mov( $tot, &DWP(&n2a($BF_OFF+0x0000),$P,$tmp1,4)); - &mov( $tmp3, &DWP(&n2a($BF_OFF+0x0400),$P,$tmp2,4)); - - &movb( &LB($tmp1), &HB($R)); - &movb( &LB($tmp2), &LB($R)); - - &add( $tot, $tmp3); - &mov( $tmp1, &DWP(&n2a($BF_OFF+0x0800),$P,$tmp1,4)); # delay - - &xor( $tot, $tmp1); - &mov( $tmp3, &DWP(&n2a($BF_OFF+0x0C00),$P,$tmp2,4)); - - &add( $tot, $tmp3); - &xor( $tmp1, $tmp1); - - &xor( $L, $tot); - # delay - } - -sub n2a - { - sprintf("%d",$_[0]); - } - diff --git a/lib/libssl/src/crypto/bf/asm/readme b/lib/libssl/src/crypto/bf/asm/readme deleted file mode 100644 index 2385fa3812c..00000000000 --- a/lib/libssl/src/crypto/bf/asm/readme +++ /dev/null @@ -1,10 +0,0 @@ -There are blowfish assembler generation scripts. -bf-586.pl version is for the pentium and -bf-686.pl is my original version, which is faster on the pentium pro. - -When using a bf-586.pl, the pentium pro/II is %8 slower than using -bf-686.pl. When using a bf-686.pl, the pentium is %16 slower -than bf-586.pl - -So the default is bf-586.pl - diff --git a/lib/libssl/src/crypto/bn/asm/README b/lib/libssl/src/crypto/bn/asm/README deleted file mode 100644 index 323d1a06b92..00000000000 --- a/lib/libssl/src/crypto/bn/asm/README +++ /dev/null @@ -1,27 +0,0 @@ -<OBSOLETE> - -All assembler in this directory are just version of the file -crypto/bn/bn_asm.c. - -Quite a few of these files are just the assembler output from gcc since on -quite a few machines they are 2 times faster than the system compiler. - -For the x86, I have hand written assembler because of the bad job all -compilers seem to do on it. This normally gives a 2 time speed up in the RSA -routines. - -For the DEC alpha, I also hand wrote the assembler (except the division which -is just the output from the C compiler pasted on the end of the file). -On the 2 alpha C compilers I had access to, it was not possible to do -64b x 64b -> 128b calculations (both long and the long long data types -were 64 bits). So the hand assembler gives access to the 128 bit result and -a 2 times speedup :-). - -There are 3 versions of assembler for the HP PA-RISC. - -pa-risc.s is the origional one which works fine and generated using gcc :-) - -pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations -by Chris Ruemmler from HP (with some help from the HP C compiler). - -</OBSOLETE> diff --git a/lib/libssl/src/crypto/camellia/asm/BSD_license.txt b/lib/libssl/src/crypto/camellia/asm/BSD_license.txt deleted file mode 100644 index 591975cb98b..00000000000 --- a/lib/libssl/src/crypto/camellia/asm/BSD_license.txt +++ /dev/null @@ -1,24 +0,0 @@ -Camellia assembler implementation.
-
-Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer as
- the first lines of this file unmodified.
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY Andy Polyakov ``AS IS'' AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL NTT BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/libssl/src/crypto/cast/asm/readme b/lib/libssl/src/crypto/cast/asm/readme deleted file mode 100644 index fbcd76289e2..00000000000 --- a/lib/libssl/src/crypto/cast/asm/readme +++ /dev/null @@ -1,7 +0,0 @@ -There is a ppro flag in cast-586 which turns on/off -generation of pentium pro/II friendly code - -This flag makes the inner loop one cycle longer, but generates -code that runs %30 faster on the pentium pro/II, while only %7 slower -on the pentium. By default, this flag is on. - diff --git a/lib/libssl/src/crypto/des/asm/readme b/lib/libssl/src/crypto/des/asm/readme deleted file mode 100644 index 1beafe253b1..00000000000 --- a/lib/libssl/src/crypto/des/asm/readme +++ /dev/null @@ -1,131 +0,0 @@ -First up, let me say I don't like writing in assembler. It is not portable, -dependant on the particular CPU architecture release and is generally a pig -to debug and get right. Having said that, the x86 architecture is probably -the most important for speed due to number of boxes and since -it appears to be the worst architecture to to get -good C compilers for. So due to this, I have lowered myself to do -assembler for the inner DES routines in libdes :-). - -The file to implement in assembler is des_enc.c. Replace the following -4 functions -des_encrypt1(DES_LONG data[2],des_key_schedule ks, int encrypt); -des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); -des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); -des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); - -They encrypt/decrypt the 64 bits held in 'data' using -the 'ks' key schedules. The only difference between the 4 functions is that -des_encrypt2() does not perform IP() or FP() on the data (this is an -optimization for when doing triple DES and des_encrypt3() and des_decrypt3() -perform triple des. The triple DES routines are in here because it does -make a big difference to have them located near the des_encrypt2 function -at link time.. - -Now as we all know, there are lots of different operating systems running on -x86 boxes, and unfortunately they normally try to make sure their assembler -formating is not the same as the other peoples. -The 4 main formats I know of are -Microsoft Windows 95/Windows NT -Elf Includes Linux and FreeBSD(?). -a.out The older Linux. -Solaris Same as Elf but different comments :-(. - -Now I was not overly keen to write 4 different copies of the same code, -so I wrote a few perl routines to output the correct assembler, given -a target assembler type. This code is ugly and is just a hack. -The libraries are x86unix.pl and x86ms.pl. -des586.pl, des686.pl and des-som[23].pl are the programs to actually -generate the assembler. - -So to generate elf assembler -perl des-som3.pl elf >dx86-elf.s -For Windows 95/NT -perl des-som2.pl win32 >win32.asm - -[ update 4 Jan 1996 ] -I have added another way to do things. -perl des-som3.pl cpp >dx86-cpp.s -generates a file that will be included by dx86unix.cpp when it is compiled. -To build for elf, a.out, solaris, bsdi etc, -cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o -cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o -cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o -cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o -This was done to cut down the number of files in the distribution. - -Now the ugly part. I acquired my copy of Intels -"Optimization's For Intel's 32-Bit Processors" and found a few interesting -things. First, the aim of the exersize is to 'extract' one byte at a time -from a word and do an array lookup. This involves getting the byte from -the 4 locations in the word and moving it to a new word and doing the lookup. -The most obvious way to do this is -xor eax, eax # clear word -movb al, cl # get low byte -xor edi DWORD PTR 0x100+des_SP[eax] # xor in word -movb al, ch # get next byte -xor edi DWORD PTR 0x300+des_SP[eax] # xor in word -shr ecx 16 -which seems ok. For the pentium, this system appears to be the best. -One has to do instruction interleaving to keep both functional units -operating, but it is basically very efficient. - -Now the crunch. When a full register is used after a partial write, eg. -mov al, cl -xor edi, DWORD PTR 0x100+des_SP[eax] -386 - 1 cycle stall -486 - 1 cycle stall -586 - 0 cycle stall -686 - at least 7 cycle stall (page 22 of the above mentioned document). - -So the technique that produces the best results on a pentium, according to -the documentation, will produce hideous results on a pentium pro. - -To get around this, des686.pl will generate code that is not as fast on -a pentium, should be very good on a pentium pro. -mov eax, ecx # copy word -shr ecx, 8 # line up next byte -and eax, 0fch # mask byte -xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup -mov eax, ecx # get word -shr ecx 8 # line up next byte -and eax, 0fch # mask byte -xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup - -Due to the execution units in the pentium, this actually works quite well. -For a pentium pro it should be very good. This is the type of output -Visual C++ generates. - -There is a third option. instead of using -mov al, ch -which is bad on the pentium pro, one may be able to use -movzx eax, ch -which may not incur the partial write penalty. On the pentium, -this instruction takes 4 cycles so is not worth using but on the -pentium pro it appears it may be worth while. I need access to one to -experiment :-). - -eric (20 Oct 1996) - -22 Nov 1996 - I have asked people to run the 2 different version on pentium -pros and it appears that the intel documentation is wrong. The -mov al,bh is still faster on a pentium pro, so just use the des586.pl -install des686.pl - -3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these -functions into des_enc.c because it does make a massive performance -difference on some boxes to have the functions code located close to -the des_encrypt2() function. - -9 Jan 1997 - des-som2.pl is now the correct perl script to use for -pentiums. It contains an inner loop from -Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at -273,000 per second. He had a previous version at 250,000 and the best -I was able to get was 203,000. The content has not changed, this is all -due to instruction sequencing (and actual instructions choice) which is able -to keep both functional units of the pentium going. -We may have lost the ugly register usage restrictions when x86 went 32 bit -but for the pentium it has been replaced by evil instruction ordering tricks. - -13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. -raw DES at 281,000 per second on a pentium 100. - |