diff options
author | Damien Miller <djm@cvs.openbsd.org> | 2006-06-27 05:05:41 +0000 |
---|---|---|
committer | Damien Miller <djm@cvs.openbsd.org> | 2006-06-27 05:05:41 +0000 |
commit | ecc645c71513728e7357c84aa8997b4dc2301936 (patch) | |
tree | 9ac84a2d60cae724af19457cc8f9f91644b23aea /lib/libcrypto/rc4 | |
parent | 70ff9835c415e893131dd8dbe643c6f5e4873c53 (diff) |
import of openssl-0.9.7j
Diffstat (limited to 'lib/libcrypto/rc4')
-rwxr-xr-x | lib/libcrypto/rc4/asm/rc4-x86_64.pl | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/lib/libcrypto/rc4/asm/rc4-x86_64.pl b/lib/libcrypto/rc4/asm/rc4-x86_64.pl new file mode 100755 index 00000000000..b628daca705 --- /dev/null +++ b/lib/libcrypto/rc4/asm/rc4-x86_64.pl @@ -0,0 +1,150 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# project. Rights for redistribution and usage in source and binary +# forms are granted according to the OpenSSL license. +# ==================================================================== +# +# Unlike 0.9.7f this code expects RC4_CHAR back in config line! See +# commentary section in corresponding script in development branch +# for background information about this option carousel. For those +# who don't have energy to figure out these gory details, here is +# basis in form of performance matrix relative to the original +# 0.9.7e C code-base: +# +# 0.9.7e 0.9.7f this +# AMD64 1x 3.3x 2.4x +# EM64T 1x 0.8x 1.5x +# +# In other words idea is to trade -25% AMD64 performance to compensate +# for deterioration and gain +90% on EM64T core. Development branch +# maintains best performance for either target, i.e. 3.3x for AMD64 +# and 1.5x for EM64T. + +$output=shift; + +open STDOUT,">$output" || die "can't open $output: $!"; + +$dat="%rdi"; # arg1 +$len="%rsi"; # arg2 +$inp="%rdx"; # arg3 +$out="%rcx"; # arg4 + +@XX=("%r8","%r10"); +@TX=("%r9","%r11"); +$YY="%r12"; +$TY="%r13"; + +$code=<<___;; +.text + +.globl RC4 +.type RC4,\@function +.align 16 +RC4: or $len,$len + jne .Lentry + repret +.Lentry: + push %r12 + push %r13 + + add \$2,$dat + movzb -2($dat),$XX[0]#d + movzb -1($dat),$YY#d + + add \$1,$XX[0]#b + movzb ($dat,$XX[0]),$TX[0]#d + test \$-8,$len + jz .Lcloop1 + push %rbx +.align 16 # incidentally aligned already +.Lcloop8: + mov ($inp),%eax + mov 4($inp),%ebx +___ +# unroll 2x4-wise, because 64-bit rotates kill Intel P4... +for ($i=0;$i<4;$i++) { +$code.=<<___; + add $TX[0]#b,$YY#b + lea 1($XX[0]),$XX[1] + movzb ($dat,$YY),$TY#d + movzb $XX[1]#b,$XX[1]#d + movzb ($dat,$XX[1]),$TX[1]#d + movb $TX[0]#b,($dat,$YY) + cmp $XX[1],$YY + movb $TY#b,($dat,$XX[0]) + jne .Lcmov$i # Intel cmov is sloooow... + mov $TX[0],$TX[1] +.Lcmov$i: + add $TX[0]#b,$TY#b + xor ($dat,$TY),%al + ror \$8,%eax +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} +for ($i=4;$i<8;$i++) { +$code.=<<___; + add $TX[0]#b,$YY#b + lea 1($XX[0]),$XX[1] + movzb ($dat,$YY),$TY#d + movzb $XX[1]#b,$XX[1]#d + movzb ($dat,$XX[1]),$TX[1]#d + movb $TX[0]#b,($dat,$YY) + cmp $XX[1],$YY + movb $TY#b,($dat,$XX[0]) + jne .Lcmov$i # Intel cmov is sloooow... + mov $TX[0],$TX[1] +.Lcmov$i: + add $TX[0]#b,$TY#b + xor ($dat,$TY),%bl + ror \$8,%ebx +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} +$code.=<<___; + lea -8($len),$len + mov %eax,($out) + lea 8($inp),$inp + mov %ebx,4($out) + lea 8($out),$out + + test \$-8,$len + jnz .Lcloop8 + pop %rbx + cmp \$0,$len + jne .Lcloop1 +.Lexit: + sub \$1,$XX[0]#b + movb $XX[0]#b,-2($dat) + movb $YY#b,-1($dat) + + pop %r13 + pop %r12 + repret + +.align 16 +.Lcloop1: + add $TX[0]#b,$YY#b + movzb ($dat,$YY),$TY#d + movb $TX[0]#b,($dat,$YY) + movb $TY#b,($dat,$XX[0]) + add $TX[0]#b,$TY#b + add \$1,$XX[0]#b + movzb ($dat,$TY),$TY#d + movzb ($dat,$XX[0]),$TX[0]#d + xorb ($inp),$TY#b + lea 1($inp),$inp + movb $TY#b,($out) + lea 1($out),$out + sub \$1,$len + jnz .Lcloop1 + jmp .Lexit +.size RC4,.-RC4 +___ + +$code =~ s/#([bwd])/$1/gm; + +$code =~ s/repret/.byte\t0xF3,0xC3/gm; + +print $code; |