summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2023-02-01 20:45:05 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2023-02-01 20:45:05 +0000
commit5f59efbdf609ac302476ee5a23a3557285be5b19 (patch)
treef3f554845a427741edd72210976671586d424801
parent461a7552b676e4b3aa94480b628e8a3776759fff (diff)
Move all data blocks from .text to .rodata and cleanup up and homogeneize code
responsible from getting the proper address of those blocks. ok tb@ jsing@
-rw-r--r--lib/libcrypto/aes/asm/aes-586.pl45
-rw-r--r--lib/libcrypto/aes/asm/aesni-x86.pl1
-rw-r--r--lib/libcrypto/aes/asm/vpaes-x86.pl34
-rw-r--r--lib/libcrypto/bn/asm/bn-586.pl9
-rw-r--r--lib/libcrypto/bn/asm/x86-gf2m.pl5
-rwxr-xr-xlib/libcrypto/bn/asm/x86-mont.pl5
-rw-r--r--lib/libcrypto/camellia/asm/cmll-x86.pl40
-rw-r--r--lib/libcrypto/des/asm/des-586.pl9
-rw-r--r--lib/libcrypto/modes/asm/ghash-x86.pl66
-rw-r--r--lib/libcrypto/perlasm/cbc.pl82
-rw-r--r--lib/libcrypto/perlasm/x86gas.pl92
-rw-r--r--lib/libcrypto/rc4/asm/rc4-586.pl26
-rw-r--r--lib/libcrypto/sha/asm/sha1-586.pl26
-rw-r--r--lib/libcrypto/sha/asm/sha256-586.pl66
-rw-r--r--lib/libcrypto/sha/asm/sha512-586.pl18
-rw-r--r--lib/libcrypto/whrlpool/asm/wp-mmx.pl12
16 files changed, 273 insertions, 263 deletions
diff --git a/lib/libcrypto/aes/asm/aes-586.pl b/lib/libcrypto/aes/asm/aes-586.pl
index c5ae3f6903e..4e0f34cba31 100644
--- a/lib/libcrypto/aes/asm/aes-586.pl
+++ b/lib/libcrypto/aes/asm/aes-586.pl
@@ -950,8 +950,10 @@ sub enclast()
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_encrypt");
-&set_label("AES_Te",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Te",64);
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
&_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
@@ -1154,7 +1156,7 @@ sub enclast()
&data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
&data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000);
&data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
-&function_end_B("_x86_AES_encrypt");
+ &previous();
# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_encrypt");
@@ -1174,11 +1176,9 @@ sub enclast()
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
# pick Te4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
@@ -1744,8 +1744,10 @@ sub declast()
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_decrypt");
-&set_label("AES_Td",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Td",64);
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
&_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
@@ -1943,7 +1945,7 @@ sub declast()
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-&function_end_B("_x86_AES_decrypt");
+ &previous();
# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_decrypt");
@@ -1963,11 +1965,9 @@ sub declast()
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
- &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Td"), $tbl);
# pick Td4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
@@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
&cmp ($s2,0);
&je (&label("drop_out"));
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
&cmp (&wparam(5),0);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
&jne (&label("picked_te"));
&lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
&set_label("picked_te");
@@ -2659,10 +2656,9 @@ sub enckey()
&test ("edi",-1);
&jz (&label("badpointer"));
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
+
&lea ($tbl,&DWP(2048+128,$tbl));
# prefetch Te4
@@ -2975,6 +2971,5 @@ sub deckey()
&xor ("eax","eax"); # return success
&function_end("AES_set_decrypt_key");
-&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/aes/asm/aesni-x86.pl b/lib/libcrypto/aes/asm/aesni-x86.pl
index 8c1d0b5bed2..ff444156113 100644
--- a/lib/libcrypto/aes/asm/aesni-x86.pl
+++ b/lib/libcrypto/aes/asm/aesni-x86.pl
@@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") {
&set_label("dec_key_ret");
&ret ();
&function_end_B("${PREFIX}_set_decrypt_key");
-&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/aes/asm/vpaes-x86.pl b/lib/libcrypto/aes/asm/vpaes-x86.pl
index 1533e2c3042..38cef61733c 100644
--- a/lib/libcrypto/aes/asm/vpaes-x86.pl
+++ b/lib/libcrypto/aes/asm/vpaes-x86.pl
@@ -57,6 +57,7 @@ $PREFIX="vpaes";
my ($round, $base, $magic, $key, $const, $inp, $out)=
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
+ &rodataseg();
&static_label("_vpaes_consts");
&static_label("_vpaes_schedule_low_round");
@@ -153,8 +154,7 @@ $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
$k_dsbo=0x2c0; # decryption sbox final output
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
-&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
-&align (64);
+ &previous();
&function_begin_B("_vpaes_preheat");
&add ($const,&DWP(0,"esp"));
@@ -762,9 +762,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov ($magic,0x30);
&mov ($out,0);
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
@@ -792,18 +794,22 @@ $k_dsbo=0x2c0; # decryption sbox final output
&and ($magic,32);
&xor ($magic,32); # nbist==192?0:32;
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&function_end("${PREFIX}_set_decrypt_key");
&function_begin("${PREFIX}_encrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
@@ -820,9 +826,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&function_end("${PREFIX}_encrypt");
&function_begin("${PREFIX}_decrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
@@ -859,9 +867,11 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov (&DWP(8,"esp"),$const); # save ivp
&mov ($out,$round); # $out works as $len
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&cmp ($magic,0);
&je (&label("cbc_dec_loop"));
&jmp (&label("cbc_enc_loop"));
diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl
index c4e2baa6c5a..b502fe60ee2 100644
--- a/lib/libcrypto/bn/asm/bn-586.pl
+++ b/lib/libcrypto/bn/asm/bn-586.pl
@@ -32,7 +32,8 @@ sub bn_mul_add_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("maw_non_sse2"));
@@ -218,7 +219,8 @@ sub bn_mul_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("mw_non_sse2"));
@@ -329,7 +331,8 @@ sub bn_sqr_words
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("sqr_non_sse2"));
diff --git a/lib/libcrypto/bn/asm/x86-gf2m.pl b/lib/libcrypto/bn/asm/x86-gf2m.pl
index 9715b2158fd..cb2f2a5c305 100644
--- a/lib/libcrypto/bn/asm/x86-gf2m.pl
+++ b/lib/libcrypto/bn/asm/x86-gf2m.pl
@@ -200,7 +200,8 @@ $R="mm0";
# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
&function_begin_B("bn_GF2m_mul_2x2");
if (!$x86only) {
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");
&mov ("eax",&DWP(0,"edx"));
&mov ("edx",&DWP(4,"edx"));
&test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit
@@ -308,6 +309,4 @@ if ($sse2) {
&ret ();
&function_end_B("bn_GF2m_mul_2x2");
-&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/bn/asm/x86-mont.pl b/lib/libcrypto/bn/asm/x86-mont.pl
index e6c04739b1d..65246517485 100755
--- a/lib/libcrypto/bn/asm/x86-mont.pl
+++ b/lib/libcrypto/bn/asm/x86-mont.pl
@@ -113,7 +113,8 @@ $mul1="mm5";
$temp="mm6";
$mask="mm7";
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("non_sse2"));
@@ -588,6 +589,4 @@ $sbit=$num;
&set_label("just_leave");
&function_end("bn_mul_mont");
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/camellia/asm/cmll-x86.pl b/lib/libcrypto/camellia/asm/cmll-x86.pl
index 027302ac869..a4ab11e54d6 100644
--- a/lib/libcrypto/camellia/asm/cmll-x86.pl
+++ b/lib/libcrypto/camellia/asm/cmll-x86.pl
@@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4];
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
@@ -206,10 +204,8 @@ if ($OPENSSL) {
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
@@ -316,10 +312,8 @@ if ($OPENSSL) {
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
@@ -381,10 +375,8 @@ if ($OPENSSL) {
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
@@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0;
&xor (@T[3],&DWP(1*8+4,$key));
&set_label("1st128",4);
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl));
&mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0]
@@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<
sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; }
sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; }
+ &rodataseg();
&set_label("Camellia_SIGMA",64);
&data_word(
0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2,
@@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<
# tables are interleaved, remember?
for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
+ &previous();
# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
# size_t length, const CAMELLIA_KEY *key,
@@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T;
&mov ($_key,$s3); # save copy of key
&mov ($_ivp,$Tbl); # save copy of ivp
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov ($idx,32);
&set_label("prefetch_sbox",4);
@@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T;
&function_end("Camellia_cbc_encrypt");
}
-&asciz("Camellia for x86 by <appro\@openssl.org>");
-
&asm_finish();
diff --git a/lib/libcrypto/des/asm/des-586.pl b/lib/libcrypto/des/asm/des-586.pl
index 5b5f39cebd1..e11b2ef80ff 100644
--- a/lib/libcrypto/des/asm/des-586.pl
+++ b/lib/libcrypto/des/asm/des-586.pl
@@ -154,11 +154,8 @@ sub DES_encrypt
&rotl($L,3);
}
- # PIC-ification:-)
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($trans);
- &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
+ &picsetup($trans);
+ &picsymbol($trans, &label("DES_SPtrans"), $trans);
&mov( "ecx", &wparam(1) );
@@ -314,6 +311,7 @@ sub FP_new
sub DES_SPtrans
{
+ &rodataseg();
&set_label("DES_SPtrans",64);
&data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
&data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
@@ -450,4 +448,5 @@ sub DES_SPtrans
&data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
&data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
&data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
+ &previous();
}
diff --git a/lib/libcrypto/modes/asm/ghash-x86.pl b/lib/libcrypto/modes/asm/ghash-x86.pl
index 27492597adb..5e868a43ff2 100644
--- a/lib/libcrypto/modes/asm/ghash-x86.pl
+++ b/lib/libcrypto/modes/asm/ghash-x86.pl
@@ -411,10 +411,8 @@ $S=12; # shift factor for rem_4bit
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
@@ -436,10 +434,8 @@ $S=12; # shift factor for rem_4bit
&mov ($inp,&wparam(2)); # load in
&mov ($Zlh,&wparam(3)); # load len
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&add ($Zlh,$inp);
&mov (&wparam(3),$Zlh); # len to point at the end of input
@@ -584,10 +580,8 @@ sub mmx_loop() {
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
@@ -618,10 +612,9 @@ sub mmx_loop() {
&mov ("ecx",&wparam(2)); # inp
&mov ("edx",&wparam(3)); # len
&mov ("ebp","esp"); # original %esp
- &call (&label("pic_point"));
- &set_label ("pic_point");
- &blindpop ($rem_8bit);
- &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit));
+
+ &picsetup($rem_8bit);
+ &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit);
&sub ("esp",512+16+16); # allocate stack frame...
&and ("esp",-64); # ...and align it
@@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_;
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
@@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_;
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_;
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_;
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
@@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_;
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($Xn,&QWP(0,$const));
@@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_;
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
@@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_;
}
+ &rodataseg();
&set_label("bswap",64);
&data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
&data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
+ &previous();
}} # $sse2
+ &rodataseg();
&set_label("rem_4bit",64);
&data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
&data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
@@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_;
&data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
&data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
&data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
+ &previous();
}}} # !$x86only
-&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
# A question was risen about choice of vanilla MMX. Or rather why wasn't
diff --git a/lib/libcrypto/perlasm/cbc.pl b/lib/libcrypto/perlasm/cbc.pl
index 24561e759ab..392f23e145e 100644
--- a/lib/libcrypto/perlasm/cbc.pl
+++ b/lib/libcrypto/perlasm/cbc.pl
@@ -34,6 +34,15 @@ sub cbc
# p1,p2,p3 are the offsets for parameters to be passed to the
# underlying calls.
+&static_label("cbc_enc_jmp_table_".$name);
+&static_label("ej1_".$name);
+&static_label("ej2_".$name);
+&static_label("ej3_".$name);
+&static_label("ej4_".$name);
+&static_label("ej5_".$name);
+&static_label("ej6_".$name);
+&static_label("ej7_".$name);
+
&function_begin_B($name,"");
&comment("");
@@ -146,33 +155,32 @@ sub cbc
&mov($count, &wparam(2)); # length
&and($count, 7);
&jz(&label("finish"));
- &call(&label("PIC_point"));
-&set_label("PIC_point");
- &blindpop("edx");
- &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+
+ &picsetup("edx");
+ &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx")
&mov($count,&DWP(0,"ecx",$count,4));
- &add($count,"edx");
+ &picadjust($count, "edx");
+
&xor("ecx","ecx");
&xor("edx","edx");
- #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
&jmp_ptr($count);
-&set_label("ej7");
+&set_label("ej7_".$name);
&movb(&HB("edx"), &BP(6,$in,"",0));
&shl("edx",8);
-&set_label("ej6");
+&set_label("ej6_".$name);
&movb(&HB("edx"), &BP(5,$in,"",0));
-&set_label("ej5");
+&set_label("ej5_".$name);
&movb(&LB("edx"), &BP(4,$in,"",0));
-&set_label("ej4");
+&set_label("ej4_".$name);
&mov("ecx", &DWP(0,$in,"",0));
&jmp(&label("ejend"));
-&set_label("ej3");
+&set_label("ej3_".$name);
&movb(&HB("ecx"), &BP(2,$in,"",0));
&shl("ecx",8);
-&set_label("ej2");
+&set_label("ej2_".$name);
&movb(&HB("ecx"), &BP(1,$in,"",0));
-&set_label("ej1");
+&set_label("ej1_".$name);
&movb(&LB("ecx"), &BP(0,$in,"",0));
&set_label("ejend");
@@ -279,30 +287,14 @@ sub cbc
&mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
&mov("ebx", &DWP(4,$in,"",0)); # next iv actually
-&set_label("dj7");
&rotr("edx", 16);
&movb(&BP(6,$out,"",0), &LB("edx"));
&shr("edx",16);
-&set_label("dj6");
&movb(&BP(5,$out,"",0), &HB("edx"));
-&set_label("dj5");
&movb(&BP(4,$out,"",0), &LB("edx"));
-&set_label("dj4");
&mov(&DWP(0,$out,"",0), "ecx");
- &jmp(&label("djend"));
-&set_label("dj3");
- &rotr("ecx", 16);
- &movb(&BP(2,$out,"",0), &LB("ecx"));
- &shl("ecx",16);
-&set_label("dj2");
- &movb(&BP(1,$in,"",0), &HB("ecx"));
-&set_label("dj1");
- &movb(&BP(0,$in,"",0), &LB("ecx"));
-&set_label("djend");
# final iv is still in eax:ebx
- &jmp(&label("finish"));
-
############################ FINISH #######################3
&set_label("finish",1);
@@ -319,31 +311,21 @@ sub cbc
&mov(&DWP(4,"ecx","",0), "ebx"); # save iv
&function_end_A($name);
+ &function_end_B($name);
+ &rodataseg();
&align(64);
- &set_label("cbc_enc_jmp_table");
+ &set_label("cbc_enc_jmp_table_".$name);
&data_word("0");
- &data_word(&label("ej1")."-".&label("PIC_point"));
- &data_word(&label("ej2")."-".&label("PIC_point"));
- &data_word(&label("ej3")."-".&label("PIC_point"));
- &data_word(&label("ej4")."-".&label("PIC_point"));
- &data_word(&label("ej5")."-".&label("PIC_point"));
- &data_word(&label("ej6")."-".&label("PIC_point"));
- &data_word(&label("ej7")."-".&label("PIC_point"));
- # not used
- #&set_label("cbc_dec_jmp_table",1);
- #&data_word("0");
- #&data_word(&label("dj1")."-".&label("PIC_point"));
- #&data_word(&label("dj2")."-".&label("PIC_point"));
- #&data_word(&label("dj3")."-".&label("PIC_point"));
- #&data_word(&label("dj4")."-".&label("PIC_point"));
- #&data_word(&label("dj5")."-".&label("PIC_point"));
- #&data_word(&label("dj6")."-".&label("PIC_point"));
- #&data_word(&label("dj7")."-".&label("PIC_point"));
- &align(64);
+ &data_word(&code_sym(&label("ej1_".$name)));
+ &data_word(&code_sym(&label("ej2_".$name)));
+ &data_word(&code_sym(&label("ej3_".$name)));
+ &data_word(&code_sym(&label("ej4_".$name)));
+ &data_word(&code_sym(&label("ej5_".$name)));
+ &data_word(&code_sym(&label("ej6_".$name)));
+ &data_word(&code_sym(&label("ej7_".$name)));
+ &previous();
- &function_end_B($name);
-
}
1;
diff --git a/lib/libcrypto/perlasm/x86gas.pl b/lib/libcrypto/perlasm/x86gas.pl
index ca644ba5534..f28a590549b 100644
--- a/lib/libcrypto/perlasm/x86gas.pl
+++ b/lib/libcrypto/perlasm/x86gas.pl
@@ -177,34 +177,52 @@ sub ::align
push(@out,".align\t$val\n");
}
-sub ::picmeup
-{ my($dst,$sym,$base,$reflabel)=@_;
-
- if ($::openbsd)
- { &::emitraw("#if defined(PIC) || defined(__PIC__)");
- &::emitraw("PIC_PROLOGUE");
- &::mov($dst, &::DWP("PIC_GOT($sym)"));
- &::emitraw("PIC_EPILOGUE");
- &::emitraw("#else /* PIC */");
- &::lea($dst,&::DWP($sym));
- &::emitraw("#endif /* PIC */");
- }
- elsif (($::pic && ($::elf || $::aout)) || $::macosx)
- { if (!defined($base))
- { &::call(&::label("PIC_me_up"));
- &::set_label("PIC_me_up");
- &::blindpop($dst);
- $base=$dst;
- $reflabel=&::label("PIC_me_up");
- }
+#
+# PIC data access wrappers
+#
+# Usage:
+# picsetup($base)
+# - only allowed once per function (because of hardcoded label name),
+# sets up pic access, uses $base register as temporary
+# picsymbol($dst, $sym, $base)
+# - loads the address of symbol $sym into $dst with the help of $base
+# initialized by picsetup
+# picadjust($sym, $base)
+# - adjusts a code pointer read from a code_sym table with the help of
+# $base initialized by picsetup
+# code_sym($sym)
+# - emits a pointer to the given code symbol, relative to the GOT if
+# PIC. This pointer will need to be adjusted with picadjust above
+# before use.
+
+sub ::picsetup
+{ my($base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ &::call(&::label("PIC_setup"));
+ &::set_label("PIC_setup");
+ &::blindpop($base);
if ($::macosx)
{ my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr");
- &::mov($dst,&::DWP("$indirect-$reflabel",$base));
$non_lazy_ptr{"$nmdecor$sym"}=$indirect;
}
+ }
+}
+
+sub ::picsymbol
+{ my($dst,$sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ if ($::macosx)
+ { my $indirect=$non_lazy_ptr{"$nmdecor$sym"};
+ &::mov($dst,&::DWP("$indirect-$reflabel",$base));
+ }
else
{ &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
- $base));
+ $base));
&::mov($dst,&::DWP("$sym\@GOT",$dst));
}
}
@@ -212,6 +230,30 @@ sub ::picmeup
{ &::lea($dst,&::DWP($sym)); }
}
+sub ::picadjust
+{ my($sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+ $base,$sym));
+ }
+}
+
+sub ::code_sym
+{ my($sym)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ $sym."\@GOTOFF";
+ }
+ else
+ {
+ $sym;
+ }
+}
+
sub ::initseg
{ my $f=$nmdecor.shift;
@@ -264,4 +306,10 @@ ___
sub ::dataseg
{ push(@out,".data\n"); }
+sub ::rodataseg
+{ push(@out,".rodata\n"); }
+
+sub ::previous
+{ push(@out,".previous\n"); }
+
1;
diff --git a/lib/libcrypto/rc4/asm/rc4-586.pl b/lib/libcrypto/rc4/asm/rc4-586.pl
index f3c3e117bc3..4991c37c2c1 100644
--- a/lib/libcrypto/rc4/asm/rc4-586.pl
+++ b/lib/libcrypto/rc4/asm/rc4-586.pl
@@ -188,7 +188,8 @@ if ($alt=0) {
&mov (&wparam(3),$out); # $out as accumulator in these loops
&jz (&label("go4loop4"));
- &picmeup($out,"OPENSSL_ia32cap_P");
+ &picsetup($out);
+ &picsymbol($out, "OPENSSL_ia32cap_P", $out);
# check SSE2 bit [could have been MMX]
&bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("go4loop4"));
@@ -305,7 +306,9 @@ $idx="edx";
&mov ($out,&wparam(0)); # load key
&mov ($idi,&wparam(1)); # load len
&mov ($inp,&wparam(2)); # load data
- &picmeup($idx,"OPENSSL_ia32cap_P");
+
+ &picsetup($idx);
+ &picsymbol($idx, "OPENSSL_ia32cap_P", $idx);
&lea ($out,&DWP(2*4,$out)); # &key->data
&lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end
@@ -382,12 +385,12 @@ $idx="edx";
&function_end("RC4_set_key");
# const char *RC4_options(void);
+&static_label("opts");
&function_begin_B("RC4_options");
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("eax", &label("opts"), "edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");;
+
&mov ("edx",&DWP(0,"edx"));
&bt ("edx","\$IA32CAP_BIT0_INTELP4");
&jc (&label("1xchar"));
@@ -399,13 +402,14 @@ $idx="edx";
&add ("eax",12);
&set_label("ret");
&ret ();
-&set_label("opts",64);
+&function_end_B("RC4_options");
+
+ &rodataseg();
+&set_label("opts");
&asciz ("rc4(4x,int)");
&asciz ("rc4(1x,char)");
&asciz ("rc4(8x,mmx)");
-&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
-&align (64);
-&function_end_B("RC4_options");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha1-586.pl b/lib/libcrypto/sha/asm/sha1-586.pl
index 1de5e2650e1..5928e083c1f 100644
--- a/lib/libcrypto/sha/asm/sha1-586.pl
+++ b/lib/libcrypto/sha/asm/sha1-586.pl
@@ -295,11 +295,9 @@ if ($xmm) {
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
@@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) };
my $_ror=sub { &ror(@_) };
&function_begin("_sha1_block_data_order_ssse3");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("ssse3_shortcut");
&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
@@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) };
my $_ror=sub { &shrd(@_[0],@_) };
&function_begin("_sha1_block_data_order_avx");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("avx_shortcut");
&vzeroall();
@@ -1213,13 +1209,15 @@ sub Xtail_avx()
&mov (&DWP(16,@T[1]),$E);
&function_end("_sha1_block_data_order_avx");
}
+
+ &rodataseg();
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39
&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59
&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79
&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask
+ &previous();
}
-&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha256-586.pl b/lib/libcrypto/sha/asm/sha256-586.pl
index ecc8b69c75d..2b05c960634 100644
--- a/lib/libcrypto/sha/asm/sha256-586.pl
+++ b/lib/libcrypto/sha/asm/sha256-586.pl
@@ -14,8 +14,8 @@
# Pentium PIII P4 AMD K8 Core2
# gcc 46 36 41 27 26
# icc 57 33 38 25 23
-# x86 asm 40 30 35 20 20
-# x86_64 asm(*) - - 21 15.8 16.5
+# x86 asm 40 30 33 20 18
+# x86_64 asm(*) - - 21 16 16
#
# (*) x86_64 assembler performance is presented for reference
# purposes.
@@ -48,20 +48,19 @@ sub BODY_00_15() {
my $in_16_63=shift;
&mov ("ecx",$E);
- &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7]
- &ror ("ecx",6);
- &mov ("edi",$E);
- &ror ("edi",11);
+ &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
+ &ror ("ecx",25-11);
&mov ("esi",$Foff);
- &xor ("ecx","edi");
- &ror ("edi",25-11);
+ &xor ("ecx",$E);
+ &ror ("ecx",11-6);
&mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
- &xor ("ecx","edi"); # Sigma1(e)
+ &xor ("ecx",$E);
+ &ror ("ecx",6); # Sigma1(e)
&mov ("edi",$Goff);
&add ($T,"ecx"); # T += Sigma1(e)
- &mov ($Eoff,$E); # modulo-scheduled
&xor ("esi","edi");
+ &mov ($Eoff,$E); # modulo-scheduled
&mov ("ecx",$A);
&and ("esi",$E);
&mov ($E,$Doff); # e becomes d, which is e in next iteration
@@ -69,14 +68,14 @@ sub BODY_00_15() {
&mov ("edi",$A);
&add ($T,"esi"); # T += Ch(e,f,g)
- &ror ("ecx",2);
+ &ror ("ecx",22-13);
&add ($T,$Hoff); # T += h
- &ror ("edi",13);
+ &xor ("ecx",$A);
+ &ror ("ecx",13-2);
&mov ("esi",$Boff);
- &xor ("ecx","edi");
- &ror ("edi",22-13);
+ &xor ("ecx",$A);
+ &ror ("ecx",2); # Sigma0(a)
&add ($E,$T); # d += T
- &xor ("ecx","edi"); # Sigma0(a)
&mov ("edi",$Coff);
&add ($T,"ecx"); # T += Sigma0(a)
@@ -97,16 +96,15 @@ sub BODY_00_15() {
&add ($A,"esi"); # h += K256[i]
}
+&static_label("K256");
&function_begin("sha256_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K256);
- &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));
+ &picsetup($K256);
+ &picsymbol($K256, &label("K256"), $K256);
&sub ("esp",16);
&and ("esp",-64);
@@ -168,23 +166,22 @@ sub BODY_00_15() {
&set_label("16_63",16);
&mov ("esi",$T);
&mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
- &shr ($T,3);
- &ror ("esi",7);
- &xor ($T,"esi");
&ror ("esi",18-7);
&mov ("edi","ecx");
- &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("esi",$T);
+ &ror ("esi",7);
+ &shr ($T,3);
- &shr ("ecx",10);
- &mov ("esi",&DWP(4*(8+15+16),"esp"));
- &ror ("edi",17);
- &xor ("ecx","edi");
&ror ("edi",19-17);
- &add ($T,"esi"); # T += X[-16]
- &xor ("edi","ecx") # sigma1(X[-2])
+ &xor ($T,"esi"); # T = sigma0(X[-15])
+ &xor ("edi","ecx");
+ &ror ("edi",17);
+ &shr ("ecx",10);
+ &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
+ &xor ("edi","ecx"); # sigma1(X[-2])
- &add ($T,"edi"); # T += sigma1(X[-2])
- # &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1)
+ &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
+ # &add ($T,"edi"); # T += sigma1(X[-2])
# &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
&BODY_00_15(1);
@@ -227,8 +224,10 @@ sub BODY_00_15() {
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha256_block_data_order");
-&set_label("K256",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K256",64);
&data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
&data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
&data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
@@ -245,7 +244,6 @@ sub BODY_00_15() {
&data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
&data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
&data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
-&asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/sha/asm/sha512-586.pl b/lib/libcrypto/sha/asm/sha512-586.pl
index 163361ebe9d..c1d0684e92b 100644
--- a/lib/libcrypto/sha/asm/sha512-586.pl
+++ b/lib/libcrypto/sha/asm/sha512-586.pl
@@ -261,16 +261,18 @@ sub BODY_00_15_x86 {
}
+&static_label("K512");
&function_begin("sha512_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K512);
- &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
+ &picsetup($K512);
+if ($sse2) {
+ &picsymbol("edx", "OPENSSL_ia32cap_P", $K512);
+}
+ &picsymbol($K512, &label("K512"), $K512);
&sub ("esp",16);
&and ("esp",-64);
@@ -283,7 +285,6 @@ sub BODY_00_15_x86 {
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if ($sse2) {
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
&bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("loop_x86"));
@@ -556,8 +557,10 @@ if ($sse2) {
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha512_block_data_order");
-&set_label("K512",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K512",64);
&data_word(0xd728ae22,0x428a2f98); # u64
&data_word(0x23ef65cd,0x71374491); # u64
&data_word(0xec4d3b2f,0xb5c0fbcf); # u64
@@ -638,7 +641,6 @@ if ($sse2) {
&data_word(0xfc657e2a,0x597f299c); # u64
&data_word(0x3ad6faec,0x5fcb6fab); # u64
&data_word(0x4a475817,0x6c44198c); # u64
-&function_end_B("sha512_block_data_order");
-&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
diff --git a/lib/libcrypto/whrlpool/asm/wp-mmx.pl b/lib/libcrypto/whrlpool/asm/wp-mmx.pl
index 0ff8e5b6121..a54d702c3fc 100644
--- a/lib/libcrypto/whrlpool/asm/wp-mmx.pl
+++ b/lib/libcrypto/whrlpool/asm/wp-mmx.pl
@@ -77,6 +77,8 @@ sub row()
$tbl="ebp";
@mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7");
+&static_label("table");
+
&function_begin_B("whirlpool_block_mmx");
&push ("ebp");
&push ("ebx");
@@ -97,10 +99,8 @@ $tbl="ebp";
&mov (&DWP(8,"ebx"),"ebp");
&mov (&DWP(16,"ebx"),"eax"); # saved stack pointer
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("table"), $tbl);
&xor ("ecx","ecx");
&xor ("edx","edx");
@@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) {
&pop ("ebx");
&pop ("ebp");
&ret ();
+&function_end_B("whirlpool_block_mmx");
+ &rodataseg();
&align(64);
&set_label("table");
&LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8);
@@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) {
&L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8);
&L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
&L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
+ &previous();
-&function_end_B("whirlpool_block_mmx");
&asm_finish();