summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2023-02-01 20:45:05 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2023-02-01 20:45:05 +0000
commit5f59efbdf609ac302476ee5a23a3557285be5b19 (patch)
treef3f554845a427741edd72210976671586d424801 /lib
parent461a7552b676e4b3aa94480b628e8a3776759fff (diff)
Move all data blocks from .text to .rodata and cleanup up and homogeneize code
responsible from getting the proper address of those blocks. ok tb@ jsing@
Diffstat (limited to 'lib')
-rw-r--r--lib/libcrypto/aes/asm/aes-586.pl45
-rw-r--r--lib/libcrypto/aes/asm/aesni-x86.pl1
-rw-r--r--lib/libcrypto/aes/asm/vpaes-x86.pl34
-rw-r--r--lib/libcrypto/bn/asm/bn-586.pl9
-rw-r--r--lib/libcrypto/bn/asm/x86-gf2m.pl5
-rwxr-xr-xlib/libcrypto/bn/asm/x86-mont.pl5
-rw-r--r--lib/libcrypto/camellia/asm/cmll-x86.pl40
-rw-r--r--lib/libcrypto/des/asm/des-586.pl9
-rw-r--r--lib/libcrypto/modes/asm/ghash-x86.pl66
-rw-r--r--lib/libcrypto/perlasm/cbc.pl82
-rw-r--r--lib/libcrypto/perlasm/x86gas.pl92
-rw-r--r--lib/libcrypto/rc4/asm/rc4-586.pl26
-rw-r--r--lib/libcrypto/sha/asm/sha1-586.pl26
-rw-r--r--lib/libcrypto/sha/asm/sha256-586.pl66
-rw-r--r--lib/libcrypto/sha/asm/sha512-586.pl18
-rw-r--r--lib/libcrypto/whrlpool/asm/wp-mmx.pl12
16 files changed, 273 insertions, 263 deletions
diff --git a/lib/libcrypto/aes/asm/aes-586.pl b/lib/libcrypto/aes/asm/aes-586.pl
index c5ae3f6903e..4e0f34cba31 100644
--- a/lib/libcrypto/aes/asm/aes-586.pl
+++ b/lib/libcrypto/aes/asm/aes-586.pl
@@ -950,8 +950,10 @@ sub enclast()
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_encrypt");
-&set_label("AES_Te",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Te",64);
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
&_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
@@ -1154,7 +1156,7 @@ sub enclast()
&data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
&data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000);
&data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
-&function_end_B("_x86_AES_encrypt");
+ &previous();
# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_encrypt");
@@ -1174,11 +1176,9 @@ sub enclast()
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
# pick Te4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
@@ -1744,8 +1744,10 @@ sub declast()
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_decrypt");
-&set_label("AES_Td",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Td",64);
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
&_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
@@ -1943,7 +1945,7 @@ sub declast()
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-&function_end_B("_x86_AES_decrypt");
+ &previous();
# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_decrypt");
@@ -1963,11 +1965,9 @@ sub declast()
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
- &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Td"), $tbl);
# pick Td4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
@@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
&cmp ($s2,0);
&je (&label("drop_out"));
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
&cmp (&wparam(5),0);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
&jne (&label("picked_te"));
&lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
&set_label("picked_te");
@@ -2659,10 +2656,9 @@ sub enckey()
&test ("edi",-1);
&jz (&label("badpointer"));
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
+
&lea ($tbl,&DWP(2048+128,$tbl));
# prefetch Te4
@@ -2975,6 +2971,5 @@ sub deckey()
&xor ("eax","eax"); # return success
&function_end("AES_set_decrypt_key");
-&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/aes/asm/aesni-x86.pl b/lib/libcrypto/aes/asm/aesni-x86.pl
index 8c1d0b5bed2..ff444156113 100644
--- a/lib/libcrypto/aes/asm/aesni-x86.pl
+++ b/lib/libcrypto/aes/asm/aesni-x86.pl
@@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") {
&set_label("dec_key_ret");
&ret ();
&function_end_B("${PREFIX}_set_decrypt_key");
-&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/aes/asm/vpaes-x86.pl b/lib/libcrypto/aes/asm/vpaes-x86.pl
index 1533e2c3042..38cef61733c 100644
--- a/lib/libcrypto/aes/asm/vpaes-x86.pl
+++ b/lib/libcrypto/aes/asm/vpaes-x86.pl
@@ -57,6 +57,7 @@ $PREFIX="vpaes";
my ($round, $base, $magic, $key, $const, $inp, $out)=
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
+ &rodataseg();
&static_label("_vpaes_consts");
&static_label("_vpaes_schedule_low_round");
@@ -153,8 +154,7 @@ $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
$k_dsbo=0x2c0; # decryption sbox final output
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
-&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
-&align (64);
+ &previous();
&function_begin_B("_vpaes_preheat");
&add ($const,&DWP(0,"esp"));
@@ -762,9 +762,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov ($magic,0x30);
&mov ($out,0);
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
@@ -792,18 +794,22 @@ $k_dsbo=0x2c0; # decryption sbox final output
&and ($magic,32);
&xor ($magic,32); # nbist==192?0:32;
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&function_end("${PREFIX}_set_decrypt_key");
&function_begin("${PREFIX}_encrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
@@ -820,9 +826,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&function_end("${PREFIX}_encrypt");
&function_begin("${PREFIX}_decrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
@@ -859,9 +867,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov (&DWP(8,"esp"),$const); # save ivp
&mov ($out,$round); # $out works as $len
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&cmp ($magic,0);
&je (&label("cbc_dec_loop"));
&jmp (&label("cbc_enc_loop"));
diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl
index c4e2baa6c5a..b502fe60ee2 100644
--- a/lib/libcrypto/bn/asm/bn-586.pl
+++ b/lib/libcrypto/bn/asm/bn-586.pl
@@ -32,7 +32,8 @@ sub bn_mul_add_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("maw_non_sse2"));
@@ -218,7 +219,8 @@ sub bn_mul_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("mw_non_sse2"));
@@ -329,7 +331,8 @@ sub bn_sqr_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("sqr_non_sse2"));
diff --git a/lib/libcrypto/bn/asm/x86-gf2m.pl b/lib/libcrypto/bn/asm/x86-gf2m.pl
index 9715b2158fd..cb2f2a5c305 100644
--- a/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -200,7 +200,8 @@ $R="mm0";
# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
&function_begin_B("bn_GF2m_mul_2x2");
if (!$x86only) {
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");
&mov ("eax",&DWP(0,"edx"));
&mov ("edx",&DWP(4,"edx"));
&test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit
@@ -308,6 +309,4 @@ if ($sse2) {
&ret ();
&function_end_B("bn_GF2m_mul_2x2");
-&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/bn/asm/x86-mont.pl b/lib/libcrypto/bn/asm/x86-mont.pl
index e6c04739b1d..65246517485 100755
--- a/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/lib/libcrypto/bn/asm/x86-mont.pl
@@ -113,7 +113,8 @@ $mul1="mm5";
$temp="mm6";
$mask="mm7";
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("non_sse2"));
@@ -588,6 +589,4 @@ $sbit=$num;
&set_label("just_leave");
&function_end("bn_mul_mont");
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/camellia/asm/cmll-x86.pl b/lib/libcrypto/camellia/asm/cmll-x86.pl
index 027302ac869..a4ab11e54d6 100644
--- a/lib/libcrypto/camellia/asm/cmll-x86.pl
+++ b/lib/libcrypto/camellia/asm/cmll-x86.pl
@@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4];
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
@@ -206,10 +204,8 @@ if ($OPENSSL) {
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
@@ -316,10 +312,8 @@ if ($OPENSSL) {
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
@@ -381,10 +375,8 @@ if ($OPENSSL) {
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
@@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0;
&xor (@T[3],&DWP(1*8+4,$key));
&set_label("1st128",4);
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl));
&mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0]
@@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<
sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; }
sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; }
+ &rodataseg();
&set_label("Camellia_SIGMA",64);
&data_word(
0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2,
@@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<
# tables are interleaved, remember?
for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
+ &previous();
# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
# size_t length, const CAMELLIA_KEY *key,
@@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T;
&mov ($_key,$s3); # save copy of key
&mov ($_ivp,$Tbl); # save copy of ivp
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov ($idx,32);
&set_label("prefetch_sbox",4);
@@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T;
&function_end("Camellia_cbc_encrypt");
}
-&asciz("Camellia for x86 by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/des/asm/des-586.pl b/lib/libcrypto/des/asm/des-586.pl
index 5b5f39cebd1..e11b2ef80ff 100644
--- a/lib/libcrypto/des/asm/des-586.pl
+++ b/lib/libcrypto/des/asm/des-586.pl
@@ -154,11 +154,8 @@ sub DES_encrypt
&rotl($L,3);
}
- # PIC-ification:-)
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($trans);
- &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
+ &picsetup($trans);
+ &picsymbol($trans, &label("DES_SPtrans"), $trans);
&mov( "ecx", &wparam(1) );
@@ -314,6 +311,7 @@ sub FP_new
sub DES_SPtrans
{
+ &rodataseg();
&set_label("DES_SPtrans",64);
&data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
&data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
@@ -450,4 +448,5 @@ sub DES_SPtrans
&data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
&data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
&data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
+ &previous();
}
diff --git a/lib/libcrypto/modes/asm/ghash-x86.pl b/lib/libcrypto/modes/asm/ghash-x86.pl
index 27492597adb..5e868a43ff2 100644
--- a/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -411,10 +411,8 @@ $S=12; # shift factor for rem_4bit
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
@@ -436,10 +434,8 @@ $S=12; # shift factor for rem_4bit
&mov ($inp,&wparam(2)); # load in
&mov ($Zlh,&wparam(3)); # load len
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&add ($Zlh,$inp);
&mov (&wparam(3),$Zlh); # len to point at the end of input
@@ -584,10 +580,8 @@ sub mmx_loop() {
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
@@ -618,10 +612,9 @@ sub mmx_loop() {
&mov ("ecx",&wparam(2)); # inp
&mov ("edx",&wparam(3)); # len
&mov ("ebp","esp"); # original %esp
- &call (&label("pic_point"));
- &set_label ("pic_point");
- &blindpop ($rem_8bit);
- &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit));
+
+ &picsetup($rem_8bit);
+ &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit);
&sub ("esp",512+16+16); # allocate stack frame...
&and ("esp",-64); # ...and align it
@@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_;
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
@@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_;
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_;
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_;
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
@@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_;
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($Xn,&QWP(0,$const));
@@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_;
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_;
}
+ &rodataseg();
&set_label("bswap",64);
&data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
&data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
+ &previous();
}} # $sse2
+ &rodataseg();
&set_label("rem_4bit",64);
&data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
&data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
@@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_;
&data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
&data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
&data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
+ &previous();
}}} # !$x86only
-&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
# A question was risen about choice of vanilla MMX. Or rather why wasn't
diff --git a/lib/libcrypto/perlasm/cbc.pl b/lib/libcrypto/perlasm/cbc.pl
index 24561e759ab..392f23e145e 100644
--- a/lib/libcrypto/perlasm/cbc.pl
+++ b/lib/libcrypto/perlasm/cbc.pl
@@ -34,6 +34,15 @@ sub cbc
# p1,p2,p3 are the offsets for parameters to be passed to the
# underlying calls.
+&static_label("cbc_enc_jmp_table_".$name);
+&static_label("ej1_".$name);
+&static_label("ej2_".$name);
+&static_label("ej3_".$name);
+&static_label("ej4_".$name);
+&static_label("ej5_".$name);
+&static_label("ej6_".$name);
+&static_label("ej7_".$name);
+
&function_begin_B($name,"");
&comment("");
@@ -146,33 +155,32 @@ sub cbc
&mov($count, &wparam(2)); # length
&and($count, 7);
&jz(&label("finish"));
- &call(&label("PIC_point"));
-&set_label("PIC_point");
- &blindpop("edx");
- &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+
+ &picsetup("edx");
+ &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx")
&mov($count,&DWP(0,"ecx",$count,4));
- &add($count,"edx");
+ &picadjust($count, "edx");
+
&xor("ecx","ecx");
&xor("edx","edx");
- #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
&jmp_ptr($count);
-&set_label("ej7");
+&set_label("ej7_".$name);
&movb(&HB("edx"), &BP(6,$in,"",0));
&shl("edx",8);
-&set_label("ej6");
+&set_label("ej6_".$name);
&movb(&HB("edx"), &BP(5,$in,"",0));
-&set_label("ej5");
+&set_label("ej5_".$name);
&movb(&LB("edx"), &BP(4,$in,"",0));
-&set_label("ej4");
+&set_label("ej4_".$name);
&mov("ecx", &DWP(0,$in,"",0));
&jmp(&label("ejend"));
-&set_label("ej3");
+&set_label("ej3_".$name);
&movb(&HB("ecx"), &BP(2,$in,"",0));
&shl("ecx",8);
-&set_label("ej2");
+&set_label("ej2_".$name);
&movb(&HB("ecx"), &BP(1,$in,"",0));
-&set_label("ej1");
+&set_label("ej1_".$name);
&movb(&LB("ecx"), &BP(0,$in,"",0));
&set_label("ejend");
@@ -279,30 +287,14 @@ sub cbc
&mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
&mov("ebx", &DWP(4,$in,"",0)); # next iv actually
-&set_label("dj7");
&rotr("edx", 16);
&movb(&BP(6,$out,"",0), &LB("edx"));
&shr("edx",16);
-&set_label("dj6");
&movb(&BP(5,$out,"",0), &HB("edx"));
-&set_label("dj5");
&movb(&BP(4,$out,"",0), &LB("edx"));
-&set_label("dj4");
&mov(&DWP(0,$out,"",0), "ecx");
- &jmp(&label("djend"));
-&set_label("dj3");
- &rotr("ecx", 16);
- &movb(&BP(2,$out,"",0), &LB("ecx"));
- &shl("ecx",16);
-&set_label("dj2");
- &movb(&BP(1,$in,"",0), &HB("ecx"));
-&set_label("dj1");
- &movb(&BP(0,$in,"",0), &LB("ecx"));
-&set_label("djend");
# final iv is still in eax:ebx
- &jmp(&label("finish"));
-
############################ FINISH #######################3
&set_label("finish",1);
@@ -319,31 +311,21 @@ sub cbc
&mov(&DWP(4,"ecx","",0), "ebx"); # save iv
&function_end_A($name);
+ &function_end_B($name);
+ &rodataseg();
&align(64);
- &set_label("cbc_enc_jmp_table");
+ &set_label("cbc_enc_jmp_table_".$name);
&data_word("0");
- &data_word(&label("ej1")."-".&label("PIC_point"));
- &data_word(&label("ej2")."-".&label("PIC_point"));
- &data_word(&label("ej3")."-".&label("PIC_point"));
- &data_word(&label("ej4")."-".&label("PIC_point"));
- &data_word(&label("ej5")."-".&label("PIC_point"));
- &data_word(&label("ej6")."-".&label("PIC_point"));
- &data_word(&label("ej7")."-".&label("PIC_point"));
- # not used
- #&set_label("cbc_dec_jmp_table",1);
- #&data_word("0");
- #&data_word(&label("dj1")."-".&label("PIC_point"));
- #&data_word(&label("dj2")."-".&label("PIC_point"));
- #&data_word(&label("dj3")."-".&label("PIC_point"));
- #&data_word(&label("dj4")."-".&label("PIC_point"));
- #&data_word(&label("dj5")."-".&label("PIC_point"));
- #&data_word(&label("dj6")."-".&label("PIC_point"));
- #&data_word(&label("dj7")."-".&label("PIC_point"));
- &align(64);
+ &data_word(&code_sym(&label("ej1_".$name)));
+ &data_word(&code_sym(&label("ej2_".$name)));
+ &data_word(&code_sym(&label("ej3_".$name)));
+ &data_word(&code_sym(&label("ej4_".$name)));
+ &data_word(&code_sym(&label("ej5_".$name)));
+ &data_word(&code_sym(&label("ej6_".$name)));
+ &data_word(&code_sym(&label("ej7_".$name)));
+ &previous();
- &function_end_B($name);
-
}
1;
diff --git a/lib/libcrypto/perlasm/x86gas.pl b/lib/libcrypto/perlasm/x86gas.pl
index ca644ba5534..f28a590549b 100644
--- a/lib/libcrypto/perlasm/x86gas.pl
+++ b/lib/libcrypto/perlasm/x86gas.pl
@@ -177,34 +177,52 @@ sub ::align
push(@out,".align\t$val\n");
}
-sub ::picmeup
-{ my($dst,$sym,$base,$reflabel)=@_;
-
- if ($::openbsd)
- { &::emitraw("#if defined(PIC) || defined(__PIC__)");
- &::emitraw("PIC_PROLOGUE");
- &::mov($dst, &::DWP("PIC_GOT($sym)"));
- &::emitraw("PIC_EPILOGUE");
- &::emitraw("#else /* PIC */");
- &::lea($dst,&::DWP($sym));
- &::emitraw("#endif /* PIC */");
- }
- elsif (($::pic && ($::elf || $::aout)) || $::macosx)
- { if (!defined($base))
- { &::call(&::label("PIC_me_up"));
- &::set_label("PIC_me_up");
- &::blindpop($dst);
- $base=$dst;
- $reflabel=&::label("PIC_me_up");
- }
+#
+# PIC data access wrappers
+#
+# Usage:
+# picsetup($base)
+# - only allowed once per function (because of hardcoded label name),
+# sets up pic access, uses $base register as temporary
+# picsymbol($dst, $sym, $base)
+# - loads the address of symbol $sym into $dst with the help of $base
+# initialized by picsetup
+# picadjust($sym, $base)
+# - adjusts a code pointer read from a code_sym table with the help of
+# $base initialized by picsetup
+# code_sym($sym)
+# - emits a pointer to the given code symbol, relative to the GOT if
+# PIC. This pointer will need to be adjusted with picadjust above
+# before use.
+
+sub ::picsetup
+{ my($base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ &::call(&::label("PIC_setup"));
+ &::set_label("PIC_setup");
+ &::blindpop($base);
if ($::macosx)
{ my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr");
- &::mov($dst,&::DWP("$indirect-$reflabel",$base));
$non_lazy_ptr{"$nmdecor$sym"}=$indirect;
}
+ }
+}
+
+sub ::picsymbol
+{ my($dst,$sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ if ($::macosx)
+ { my $indirect=$non_lazy_ptr{"$nmdecor$sym"};
+ &::mov($dst,&::DWP("$indirect-$reflabel",$base));
+ }
else
{ &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
- $base));
+ $base));
&::mov($dst,&::DWP("$sym\@GOT",$dst));
}
}
@@ -212,6 +230,30 @@ sub ::picmeup
{ &::lea($dst,&::DWP($sym)); }
}
+sub ::picadjust
+{ my($sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+ $base,$sym));
+ }
+}
+
+sub ::code_sym
+{ my($sym)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ $sym."\@GOTOFF";
+ }
+ else
+ {
+ $sym;
+ }
+}
+
sub ::initseg
{ my $f=$nmdecor.shift;
@@ -264,4 +306,10 @@ ___
sub ::dataseg
{ push(@out,".data\n"); }
+sub ::rodataseg
+{ push(@out,".rodata\n"); }
+
+sub ::previous
+{ push(@out,".previous\n"); }
+
1;
diff --git a/lib/libcrypto/rc4/asm/rc4-586.pl b/lib/libcrypto/rc4/asm/rc4-586.pl
index f3c3e117bc3..4991c37c2c1 100644
--- a/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -188,7 +188,8 @@ if ($alt=0) {
&mov (&wparam(3),$out); # $out as accumulator in these loops
&jz (&label("go4loop4"));
- &picmeup($out,"OPENSSL_ia32cap_P");
+ &picsetup($out);
+ &picsymbol($out, "OPENSSL_ia32cap_P", $out);
# check SSE2 bit [could have been MMX]
&bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("go4loop4"));
@@ -305,7 +306,9 @@ $idx="edx";
&mov ($out,&wparam(0)); # load key
&mov ($idi,&wparam(1)); # load len
&mov ($inp,&wparam(2)); # load data
- &picmeup($idx,"OPENSSL_ia32cap_P");
+
+ &picsetup($idx);
+ &picsymbol($idx, "OPENSSL_ia32cap_P", $idx);
&lea ($out,&DWP(2*4,$out)); # &key->data
&lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end
@@ -382,12 +385,12 @@ $idx="edx";
&function_end("RC4_set_key");
# const char *RC4_options(void);
+&static_label("opts");
&function_begin_B("RC4_options");
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("eax", &label("opts"), "edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");;
+
&mov ("edx",&DWP(0,"edx"));
&bt ("edx","\$IA32CAP_BIT0_INTELP4");
&jc (&label("1xchar"));
@@ -399,13 +402,14 @@ $idx="edx";
&add ("eax",12);
&set_label("ret");
&ret ();
-&set_label("opts",64);
+&function_end_B("RC4_options");
+
+ &rodataseg();
+&set_label("opts");
&asciz ("rc4(4x,int)");
&asciz ("rc4(1x,char)");
&asciz ("rc4(8x,mmx)");
-&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
-&align (64);
-&function_end_B("RC4_options");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha1-586.pl b/lib/libcrypto/sha/asm/sha1-586.pl
index 1de5e2650e1..5928e083c1f 100644
--- a/lib/libcrypto/sha/asm/sha1-586.pl
+++ b/lib/libcrypto/sha/asm/sha1-586.pl
@@ -295,11 +295,9 @@ if ($xmm) {
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
@@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) };
my $_ror=sub { &ror(@_) };
&function_begin("_sha1_block_data_order_ssse3");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("ssse3_shortcut");
&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
@@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) };
my $_ror=sub { &shrd(@_[0],@_) };
&function_begin("_sha1_block_data_order_avx");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("avx_shortcut");
&vzeroall();
@@ -1213,13 +1209,15 @@ sub Xtail_avx()
&mov (&DWP(16,@T[1]),$E);
&function_end("_sha1_block_data_order_avx");
}
+
+ &rodataseg();
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39
&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59
&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79
&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask
+ &previous();
}
-&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha256-586.pl b/lib/libcrypto/sha/asm/sha256-586.pl
index ecc8b69c75d..2b05c960634 100644
--- a/lib/libcrypto/sha/asm/sha256-586.pl
+++ b/lib/libcrypto/sha/asm/sha256-586.pl
@@ -14,8 +14,8 @@
# Pentium PIII P4 AMD K8 Core2
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
-# x86 asm 40 30 35 20 20
-# x86_64 asm(*) - - 21 15.8 16.5
+# x86 asm 40 30 33 20 18
+# x86_64 asm(*) - - 21 16 16
#
# (*) x86_64 assembler performance is presented for reference
# purposes.
@@ -48,20 +48,19 @@ sub BODY_00_15() {
my $in_16_63=shift;
&mov ("ecx",$E);
- &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7]
- &ror ("ecx",6);
- &mov ("edi",$E);
- &ror ("edi",11);
+ &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
+ &ror ("ecx",25-11);
&mov ("esi",$Foff);
- &xor ("ecx","edi");
- &ror ("edi",25-11);
+ &xor ("ecx",$E);
+ &ror ("ecx",11-6);
&mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
- &xor ("ecx","edi"); # Sigma1(e)
+ &xor ("ecx",$E);
+ &ror ("ecx",6); # Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
- &mov ($Eoff,$E); # modulo-scheduled
&xor ("esi","edi");
+ &mov ($Eoff,$E); # modulo-scheduled
&mov ("ecx",$A);
&and ("esi",$E);
&mov ($E,$Doff); # e becomes d, which is e in next iteration
@@ -69,14 +68,14 @@ sub BODY_00_15() {
&mov ("edi",$A);
&add ($T,"esi"); # T += Ch(e,f,g)
- &ror ("ecx",2);
+ &ror ("ecx",22-13);
&add ($T,$Hoff); # T += h
- &ror ("edi",13);
+ &xor ("ecx",$A);
+ &ror ("ecx",13-2);
&mov ("esi",$Boff);
- &xor ("ecx","edi");
- &ror ("edi",22-13);
+ &xor ("ecx",$A);
+ &ror ("ecx",2); # Sigma0(a)
&add ($E,$T); # d += T
- &xor ("ecx","edi"); # Sigma0(a)
&mov ("edi",$Coff);
&add ($T,"ecx"); # T += Sigma0(a)
@@ -97,16 +96,15 @@ sub BODY_00_15() {
&add ($A,"esi"); # h += K256[i]
}
+&static_label("K256");
&function_begin("sha256_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K256);
- &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));
+ &picsetup($K256);
+ &picsymbol($K256, &label("K256"), $K256);
&sub ("esp",16);
&and ("esp",-64);
@@ -168,23 +166,22 @@ sub BODY_00_15() {
&set_label("16_63",16);
&mov ("esi",$T);
&mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
- &shr ($T,3);
- &ror ("esi",7);
- &xor ($T,"esi");
&ror ("esi",18-7);
&mov ("edi","ecx");
- &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("esi",$T);
+ &ror ("esi",7);
+ &shr ($T,3);
- &shr ("ecx",10);
- &mov ("esi",&DWP(4*(8+15+16),"esp"));
- &ror ("edi",17);
- &xor ("ecx","edi");
&ror ("edi",19-17);
- &add ($T,"esi"); # T += X[-16]
- &xor ("edi","ecx") # sigma1(X[-2])
+ &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("edi","ecx");
+ &ror ("edi",17);
+ &shr ("ecx",10);
+ &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
+ &xor ("edi","ecx"); # sigma1(X[-2])
- &add ($T,"edi"); # T += sigma1(X[-2])
- # &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1)
+ &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
+ # &add ($T,"edi"); # T += sigma1(X[-2])
# &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
&BODY_00_15(1);
@@ -227,8 +224,10 @@ sub BODY_00_15() {
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha256_block_data_order");
-&set_label("K256",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K256",64);
&data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
&data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
&data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
@@ -245,7 +244,6 @@ sub BODY_00_15() {
&data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
&data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
&data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
-&asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha512-586.pl b/lib/libcrypto/sha/asm/sha512-586.pl
index 163361ebe9d..c1d0684e92b 100644
--- a/lib/libcrypto/sha/asm/sha512-586.pl
+++ b/lib/libcrypto/sha/asm/sha512-586.pl
@@ -261,16 +261,18 @@ sub BODY_00_15_x86 {
}
+&static_label("K512");
&function_begin("sha512_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K512);
- &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
+ &picsetup($K512);
+if ($sse2) {
+ &picsymbol("edx", "OPENSSL_ia32cap_P", $K512);
+}
+ &picsymbol($K512, &label("K512"), $K512);
&sub ("esp",16);
&and ("esp",-64);
@@ -283,7 +285,6 @@ sub BODY_00_15_x86 {
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if ($sse2) {
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
&bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("loop_x86"));
@@ -556,8 +557,10 @@ if ($sse2) {
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha512_block_data_order");
-&set_label("K512",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K512",64);
&data_word(0xd728ae22,0x428a2f98); # u64
&data_word(0x23ef65cd,0x71374491); # u64
&data_word(0xec4d3b2f,0xb5c0fbcf); # u64
@@ -638,7 +641,6 @@ if ($sse2) {
&data_word(0xfc657e2a,0x597f299c); # u64
&data_word(0x3ad6faec,0x5fcb6fab); # u64
&data_word(0x4a475817,0x6c44198c); # u64
-&function_end_B("sha512_block_data_order");
-&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/whrlpool/asm/wp-mmx.pl b/lib/libcrypto/whrlpool/asm/wp-mmx.pl
index 0ff8e5b6121..a54d702c3fc 100644
--- a/lib/libcrypto/whrlpool/asm/wp-mmx.pl
+++ b/lib/libcrypto/whrlpool/asm/wp-mmx.pl
@@ -77,6 +77,8 @@ sub row()
$tbl="ebp";
@mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7");
+&static_label("table");
+
&function_begin_B("whirlpool_block_mmx");
&push ("ebp");
&push ("ebx");
@@ -97,10 +99,8 @@ $tbl="ebp";
&mov (&DWP(8,"ebx"),"ebp");
&mov (&DWP(16,"ebx"),"eax"); # saved stack pointer
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("table"), $tbl);
&xor ("ecx","ecx");
&xor ("edx","edx");
@@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) {
&pop ("ebx");
&pop ("ebp");
&ret ();
+&function_end_B("whirlpool_block_mmx");
+ &rodataseg();
&align(64);
&set_label("table");
&LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8);
@@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) {
&L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8);
&L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
&L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
+ &previous();
-&function_end_B("whirlpool_block_mmx");
&asm_finish();