diff options
author | Joel Sing <jsing@cvs.openbsd.org> | 2024-03-27 12:59:13 +0000 |
---|---|---|
committer | Joel Sing <jsing@cvs.openbsd.org> | 2024-03-27 12:59:13 +0000 |
commit | 6d6dae00f9449be00d737531668025a77b926bfb (patch) | |
tree | 3f9125cb183aa1d884a41615b70aa06115caf4c3 | |
parent | 3b1350973d0a9a83de7c77e28779bb9ade9e854d (diff) |
Remove unused rc4 parisc assembly.
This is already disabled since it is "about 35% slower than C code".
-rw-r--r-- | lib/libcrypto/arch/hppa/Makefile.inc | 6 | ||||
-rw-r--r-- | lib/libcrypto/rc4/asm/rc4-parisc.pl | 294 |
2 files changed, 1 insertions, 299 deletions
diff --git a/lib/libcrypto/arch/hppa/Makefile.inc b/lib/libcrypto/arch/hppa/Makefile.inc index 75bb288c91d..92f18cc6b8a 100644 --- a/lib/libcrypto/arch/hppa/Makefile.inc +++ b/lib/libcrypto/arch/hppa/Makefile.inc @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.inc,v 1.16 2024/03/27 12:54:42 jsing Exp $ +# $OpenBSD: Makefile.inc,v 1.17 2024/03/27 12:59:12 jsing Exp $ # hppa-specific libcrypto build rules @@ -17,11 +17,7 @@ SRCS+= des_enc.c fcrypt_b.c CFLAGS+= -DGHASH_ASM SSLASM+= modes ghash-parisc ghash-parisc # rc4 -.if 0 # about 35% slower than C code -SSLASM+= rc4 rc4-parisc rc4-parisc -.else SRCS+= rc4.c -.endif # sha CFLAGS+= -DSHA1_ASM SSLASM+= sha sha1-parisc sha1-parisc diff --git a/lib/libcrypto/rc4/asm/rc4-parisc.pl b/lib/libcrypto/rc4/asm/rc4-parisc.pl deleted file mode 100644 index 6a1a2aad77b..00000000000 --- a/lib/libcrypto/rc4/asm/rc4-parisc.pl +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# RC4 for PA-RISC. - -# June 2009. -# -# Performance is 33% better than gcc 3.2 generated code on PA-7100LC. -# For reference, [4x] unrolled loop is >40% faster than folded one. -# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement -# is believed to be not sufficient to justify the effort... -# -# Special thanks to polarhome.com for providing HP-UX account. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; -} else { - $LEVEL ="1.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; -} - -$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker - # [+ argument transfer] -$SZ=1; # defaults to RC4_CHAR -if (open CONF,"<${dir}../../opensslconf.h") { - while(<CONF>) { - if (m/#\s*define\s+RC4_INT\s+(.*)/) { - $SZ = ($1=~/char$/) ? 1 : 4; - last; - } - } - close CONF; -} - -if ($SZ==1) { # RC4_CHAR - $LD="ldb"; - $LDX="ldbx"; - $MKX="addl"; - $ST="stb"; -} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) - $LD="ldw"; - $LDX="ldwx,s"; - $MKX="sh2addl"; - $ST="stw"; -} - -$key="%r26"; -$len="%r25"; -$inp="%r24"; -$out="%r23"; - -@XX=("%r19","%r20"); -@TX=("%r21","%r22"); -$YY="%r28"; -$TY="%r29"; - -$acc="%r1"; -$ix="%r2"; -$iy="%r3"; -$dat0="%r4"; -$dat1="%r5"; -$rem="%r6"; -$mask="%r31"; - -sub unrolledloopbody { -for ($i=0;$i<4;$i++) { -$code.=<<___; - ldo 1($XX[0]),$XX[1] - `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` - and $mask,$XX[1],$XX[1] - $LDX $YY($key),$TY - $MKX $YY,$key,$ix - $LDX $XX[1]($key),$TX[1] - $MKX $XX[0],$key,$iy - $ST $TX[0],0($ix) - comclr,<> $XX[1],$YY,%r0 ; conditional - copy $TX[0],$TX[1] ; move - `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` - $ST $TY,0($iy) - addl $TX[0],$TY,$TY - addl $TX[1],$YY,$YY - and $mask,$TY,$TY - and $mask,$YY,$YY -___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers -} } - -sub foldedloop { -my ($label,$count)=@_; -$code.=<<___; -$label - $MKX $YY,$key,$iy - $LDX $YY($key),$TY - $MKX $XX[0],$key,$ix - $ST $TX[0],0($iy) - ldo 1($XX[0]),$XX[0] - $ST $TY,0($ix) - addl $TX[0],$TY,$TY - ldbx $inp($out),$dat1 - and $mask,$TY,$TY - and $mask,$XX[0],$XX[0] - $LDX $TY($key),$acc - $LDX $XX[0]($key),$TX[0] - ldo 1($out),$out - xor $dat1,$acc,$acc - addl $TX[0],$YY,$YY - stb $acc,-1($out) - addib,<> -1,$count,$label ; $count is always small - and $mask,$YY,$YY -___ -} - -$code=<<___; - .LEVEL $LEVEL - .text - - .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR -RC4 - .PROC - .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - - cmpib,*= 0,$len,L\$abort - sub $inp,$out,$inp ; distance between $inp and $out - - $LD `0*$SZ`($key),$XX[0] - $LD `1*$SZ`($key),$YY - ldo `2*$SZ`($key),$key - - ldi 0xff,$mask - ldi 3,$dat0 - - ldo 1($XX[0]),$XX[0] ; warm up loop - and $mask,$XX[0],$XX[0] - $LDX $XX[0]($key),$TX[0] - addl $TX[0],$YY,$YY - cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? - and $mask,$YY,$YY - - and,<> $out,$dat0,$rem ; is $out aligned? - b L\$alignedout - subi 4,$rem,$rem - sub $len,$rem,$len -___ -&foldedloop("L\$alignout",$rem); # process till $out is aligned - -$code.=<<___; -L\$alignedout ; $len is at least 4 here - and,<> $inp,$dat0,$acc ; is $inp aligned? - b L\$oop4 - sub $inp,$acc,$rem ; align $inp - - sh3addl $acc,%r0,$acc - subi 32,$acc,$acc - mtctl $acc,%cr11 ; load %sar with vshd align factor - ldwx $rem($out),$dat0 - ldo 4($rem),$rem -L\$oop4misalignedinp -___ -&unrolledloopbody(); -$code.=<<___; - $LDX $TY($key),$ix - ldwx $rem($out),$dat1 - ldo -4($len),$len - or $ix,$acc,$acc ; last piece, no need to dep - vshd $dat0,$dat1,$iy ; align data - copy $dat1,$dat0 - xor $iy,$acc,$acc - stw $acc,0($out) - cmpib,*<< 3,$len,L\$oop4misalignedinp - ldo 4($out),$out - cmpib,*= 0,$len,L\$done - nop - b L\$oop1 - nop - - .ALIGN 8 -L\$oop4 -___ -&unrolledloopbody(); -$code.=<<___; - $LDX $TY($key),$ix - ldwx $inp($out),$dat0 - ldo -4($len),$len - or $ix,$acc,$acc ; last piece, no need to dep - xor $dat0,$acc,$acc - stw $acc,0($out) - cmpib,*<< 3,$len,L\$oop4 - ldo 4($out),$out - cmpib,*= 0,$len,L\$done - nop -___ -&foldedloop("L\$oop1",$len); -$code.=<<___; -L\$done - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 - ldo -1($XX[0]),$XX[0] ; chill out loop - sub $YY,$TX[0],$YY - and $mask,$XX[0],$XX[0] - and $mask,$YY,$YY - $ST $XX[0],`-2*$SZ`($key) - $ST $YY,`-1*$SZ`($key) - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 -L\$abort - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND -___ - -$code.=<<___; - - .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR - .ALIGN 8 -RC4_set_key - .PROC - .CALLINFO NO_CALLS - .ENTRY - $ST %r0,`0*$SZ`($key) - $ST %r0,`1*$SZ`($key) - ldo `2*$SZ`($key),$key - copy %r0,@XX[0] -L\$1st - $ST @XX[0],0($key) - ldo 1(@XX[0]),@XX[0] - bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 - ldo $SZ($key),$key - - ldo `-256*$SZ`($key),$key ; rewind $key - addl $len,$inp,$inp ; $inp to point at the end - sub %r0,$len,%r23 ; inverse index - copy %r0,@XX[0] - copy %r0,@XX[1] - ldi 0xff,$mask - -L\$2nd - $LDX @XX[0]($key),@TX[0] - ldbx %r23($inp),@TX[1] - addi,nuv 1,%r23,%r23 ; increment and conditional - sub %r0,$len,%r23 ; inverse index - addl @TX[0],@XX[1],@XX[1] - addl @TX[1],@XX[1],@XX[1] - and $mask,@XX[1],@XX[1] - $MKX @XX[0],$key,$TY - $LDX @XX[1]($key),@TX[1] - $MKX @XX[1],$key,$YY - ldo 1(@XX[0]),@XX[0] - $ST @TX[0],0($YY) - bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 - $ST @TX[1],0($TY) - - bv,n (%r2) - .EXIT - nop - .PROCEND -___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); -$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); - -print $code; -close STDOUT; |