From 5f59efbdf609ac302476ee5a23a3557285be5b19 Mon Sep 17 00:00:00 2001 From: Miod Vallat Date: Wed, 1 Feb 2023 20:45:05 +0000 Subject: Move all data blocks from .text to .rodata and cleanup up and homogeneize code responsible from getting the proper address of those blocks. ok tb@ jsing@ --- lib/libcrypto/aes/asm/aes-586.pl | 45 ++++++++--------- lib/libcrypto/aes/asm/aesni-x86.pl | 1 - lib/libcrypto/aes/asm/vpaes-x86.pl | 34 ++++++++----- lib/libcrypto/bn/asm/bn-586.pl | 9 ++-- lib/libcrypto/bn/asm/x86-gf2m.pl | 5 +- lib/libcrypto/bn/asm/x86-mont.pl | 5 +- lib/libcrypto/camellia/asm/cmll-x86.pl | 40 ++++++--------- lib/libcrypto/des/asm/des-586.pl | 9 ++-- lib/libcrypto/modes/asm/ghash-x86.pl | 66 +++++++++--------------- lib/libcrypto/perlasm/cbc.pl | 82 ++++++++++++------------------ lib/libcrypto/perlasm/x86gas.pl | 92 ++++++++++++++++++++++++++-------- lib/libcrypto/rc4/asm/rc4-586.pl | 26 ++++++---- lib/libcrypto/sha/asm/sha1-586.pl | 26 +++++----- lib/libcrypto/sha/asm/sha256-586.pl | 66 ++++++++++++------------ lib/libcrypto/sha/asm/sha512-586.pl | 18 ++++--- lib/libcrypto/whrlpool/asm/wp-mmx.pl | 12 +++-- 16 files changed, 273 insertions(+), 263 deletions(-) diff --git a/lib/libcrypto/aes/asm/aes-586.pl b/lib/libcrypto/aes/asm/aes-586.pl index c5ae3f6903e..4e0f34cba31 100644 --- a/lib/libcrypto/aes/asm/aes-586.pl +++ b/lib/libcrypto/aes/asm/aes-586.pl @@ -950,8 +950,10 @@ sub enclast() &xor ($s3,&DWP(12,$key)); &ret (); +&function_end_B("_x86_AES_encrypt"); -&set_label("AES_Te",64); # Yes! I keep it in the code segment! + &rodataseg(); +&set_label("AES_Te",64); &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); @@ -1154,7 +1156,7 @@ sub enclast() &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); -&function_end_B("_x86_AES_encrypt"); + &previous(); # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); &function_begin("AES_encrypt"); @@ -1174,11 +1176,9 @@ sub enclast() &add ("esp",4); # 4 is reserved for caller's return address &mov ($_esp,$s0); # save stack pointer - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); + &picsetup($tbl); + &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); + &picsymbol($tbl, &label("AES_Te"), $tbl); # pick Te4 copy which can't "overlap" with stack frame or key schedule &lea ($s1,&DWP(768-4,"esp")); @@ -1744,8 +1744,10 @@ sub declast() &xor ($s3,&DWP(12,$key)); &ret (); +&function_end_B("_x86_AES_decrypt"); -&set_label("AES_Td",64); # Yes! I keep it in the code segment! + &rodataseg(); +&set_label("AES_Td",64); &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); @@ -1943,7 +1945,7 @@ sub declast() &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -&function_end_B("_x86_AES_decrypt"); + &previous(); # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); &function_begin("AES_decrypt"); @@ -1963,11 +1965,9 @@ sub declast() &add ("esp",4); # 4 is reserved for caller's return address &mov ($_esp,$s0); # save stack pointer - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); - &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl)); + &picsetup($tbl); + &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); + &picsymbol($tbl, &label("AES_Td"), $tbl); # pick Td4 copy which can't "overlap" with stack frame or key schedule &lea ($s1,&DWP(768-4,"esp")); @@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &cmp ($s2,0); &je (&label("drop_out")); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); - + &picsetup($tbl); + &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); + &picsymbol($tbl, &label("AES_Te"), $tbl); &cmp (&wparam(5),0); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); &jne (&label("picked_te")); &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); &set_label("picked_te"); @@ -2659,10 +2656,9 @@ sub enckey() &test ("edi",-1); &jz (&label("badpointer")); - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($tbl); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); + &picsetup($tbl); + &picsymbol($tbl, &label("AES_Te"), $tbl); + &lea ($tbl,&DWP(2048+128,$tbl)); # prefetch Te4 @@ -2975,6 +2971,5 @@ sub deckey() &xor ("eax","eax"); # return success &function_end("AES_set_decrypt_key"); -&asciz("AES for x86, CRYPTOGAMS by "); &asm_finish(); diff --git a/lib/libcrypto/aes/asm/aesni-x86.pl b/lib/libcrypto/aes/asm/aesni-x86.pl index 8c1d0b5bed2..ff444156113 100644 --- a/lib/libcrypto/aes/asm/aesni-x86.pl +++ b/lib/libcrypto/aes/asm/aesni-x86.pl @@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") { &set_label("dec_key_ret"); &ret (); &function_end_B("${PREFIX}_set_decrypt_key"); -&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); &asm_finish(); diff --git a/lib/libcrypto/aes/asm/vpaes-x86.pl b/lib/libcrypto/aes/asm/vpaes-x86.pl index 1533e2c3042..38cef61733c 100644 --- a/lib/libcrypto/aes/asm/vpaes-x86.pl +++ b/lib/libcrypto/aes/asm/vpaes-x86.pl @@ -57,6 +57,7 @@ $PREFIX="vpaes"; my ($round, $base, $magic, $key, $const, $inp, $out)= ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); + &rodataseg(); &static_label("_vpaes_consts"); &static_label("_vpaes_schedule_low_round"); @@ -153,8 +154,7 @@ $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t $k_dsbo=0x2c0; # decryption sbox final output &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); -&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)"); -&align (64); + &previous(); &function_begin_B("_vpaes_preheat"); &add ($const,&DWP(0,"esp")); @@ -762,9 +762,11 @@ $k_dsbo=0x2c0; # decryption sbox final output &mov ($magic,0x30); &mov ($out,0); - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &picsetup($const); + &picsymbol($const, &label("_vpaes_consts"), $const); + &lea ($const,&DWP(0x30,$const)) + &call ("_vpaes_schedule_core"); -&set_label("pic_point"); &mov ("esp",&DWP(48,"esp")); &xor ("eax","eax"); @@ -792,18 +794,22 @@ $k_dsbo=0x2c0; # decryption sbox final output &and ($magic,32); &xor ($magic,32); # nbist==192?0:32; - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &picsetup($const); + &picsymbol($const, &label("_vpaes_consts"), $const); + &lea ($const,&DWP(0x30,$const)) + &call ("_vpaes_schedule_core"); -&set_label("pic_point"); &mov ("esp",&DWP(48,"esp")); &xor ("eax","eax"); &function_end("${PREFIX}_set_decrypt_key"); &function_begin("${PREFIX}_encrypt"); - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &picsetup($const); + &picsymbol($const, &label("_vpaes_consts"), $const); + &lea ($const,&DWP(0x30,$const)) + &call ("_vpaes_preheat"); -&set_label("pic_point"); &mov ($inp,&wparam(0)); # inp &lea ($base,&DWP(-56,"esp")); &mov ($out,&wparam(1)); # out @@ -820,9 +826,11 @@ $k_dsbo=0x2c0; # decryption sbox final output &function_end("${PREFIX}_encrypt"); &function_begin("${PREFIX}_decrypt"); - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &picsetup($const); + &picsymbol($const, &label("_vpaes_consts"), $const); + &lea ($const,&DWP(0x30,$const)) + &call ("_vpaes_preheat"); -&set_label("pic_point"); &mov ($inp,&wparam(0)); # inp &lea ($base,&DWP(-56,"esp")); &mov ($out,&wparam(1)); # out @@ -859,9 +867,11 @@ $k_dsbo=0x2c0; # decryption sbox final output &mov (&DWP(8,"esp"),$const); # save ivp &mov ($out,$round); # $out works as $len - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); + &picsetup($const); + &picsymbol($const, &label("_vpaes_consts"), $const); + &lea ($const,&DWP(0x30,$const)) + &call ("_vpaes_preheat"); -&set_label("pic_point"); &cmp ($magic,0); &je (&label("cbc_dec_loop")); &jmp (&label("cbc_enc_loop")); diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl index c4e2baa6c5a..b502fe60ee2 100644 --- a/lib/libcrypto/bn/asm/bn-586.pl +++ b/lib/libcrypto/bn/asm/bn-586.pl @@ -32,7 +32,8 @@ sub bn_mul_add_words $c="ecx"; if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); + &picsetup("eax"); + &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("maw_non_sse2")); @@ -218,7 +219,8 @@ sub bn_mul_words $c="ecx"; if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); + &picsetup("eax"); + &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("mw_non_sse2")); @@ -329,7 +331,8 @@ sub bn_sqr_words $c="ecx"; if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); + &picsetup("eax"); + &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc(&label("sqr_non_sse2")); diff --git a/lib/libcrypto/bn/asm/x86-gf2m.pl b/lib/libcrypto/bn/asm/x86-gf2m.pl index 9715b2158fd..cb2f2a5c305 100644 --- a/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/lib/libcrypto/bn/asm/x86-gf2m.pl @@ -200,7 +200,8 @@ $R="mm0"; # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); &function_begin_B("bn_GF2m_mul_2x2"); if (!$x86only) { - &picmeup("edx","OPENSSL_ia32cap_P"); + &picsetup("edx"); + &picsymbol("edx", "OPENSSL_ia32cap_P", "edx"); &mov ("eax",&DWP(0,"edx")); &mov ("edx",&DWP(4,"edx")); &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit @@ -308,6 +309,4 @@ if ($sse2) { &ret (); &function_end_B("bn_GF2m_mul_2x2"); -&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by "); - &asm_finish(); diff --git a/lib/libcrypto/bn/asm/x86-mont.pl b/lib/libcrypto/bn/asm/x86-mont.pl index e6c04739b1d..65246517485 100755 --- a/lib/libcrypto/bn/asm/x86-mont.pl +++ b/lib/libcrypto/bn/asm/x86-mont.pl @@ -113,7 +113,8 @@ $mul1="mm5"; $temp="mm6"; $mask="mm7"; - &picmeup("eax","OPENSSL_ia32cap_P"); + &picsetup("eax"); + &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("non_sse2")); @@ -588,6 +589,4 @@ $sbit=$num; &set_label("just_leave"); &function_end("bn_mul_mont"); -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by "); - &asm_finish(); diff --git a/lib/libcrypto/camellia/asm/cmll-x86.pl b/lib/libcrypto/camellia/asm/cmll-x86.pl index 027302ac869..a4ab11e54d6 100644 --- a/lib/libcrypto/camellia/asm/cmll-x86.pl +++ b/lib/libcrypto/camellia/asm/cmll-x86.pl @@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4]; &mov ($_esp,"ebx"); # save %esp &mov ($_end,"eax"); # save keyEnd - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &mov (@T[0],&DWP(0,$idx)); # load plaintext &mov (@T[1],&DWP(4,$idx)); @@ -206,10 +204,8 @@ if ($OPENSSL) { &mov ($_esp,"ebx"); # save %esp &mov ($_end,"eax"); # save keyEnd - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &mov (@T[0],&DWP(0,$idx)); # load plaintext &mov (@T[1],&DWP(4,$idx)); @@ -316,10 +312,8 @@ if ($OPENSSL) { &lea ($key,&DWP(0,$key,"eax")); &mov (&DWP(5*4,"esp"),"ebx");# save %esp - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &mov (@T[0],&DWP(0,$idx)); # load ciphertext &mov (@T[1],&DWP(4,$idx)); @@ -381,10 +375,8 @@ if ($OPENSSL) { &lea ($key,&DWP(0,$key,"eax")); &mov (&DWP(5*4,"esp"),"ebx");# save %esp - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &mov (@T[0],&DWP(0,$idx)); # load ciphertext &mov (@T[1],&DWP(4,$idx)); @@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0; &xor (@T[3],&DWP(1*8+4,$key)); &set_label("1st128",4); - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] @@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<< sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } + &rodataseg(); &set_label("Camellia_SIGMA",64); &data_word( 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, @@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<< # tables are interleaved, remember? for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } + &previous(); # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, # size_t length, const CAMELLIA_KEY *key, @@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T; &mov ($_key,$s3); # save copy of key &mov ($_ivp,$Tbl); # save copy of ivp - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); + &picsetup($Tbl); + &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); &mov ($idx,32); &set_label("prefetch_sbox",4); @@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T; &function_end("Camellia_cbc_encrypt"); } -&asciz("Camellia for x86 by "); - &asm_finish(); diff --git a/lib/libcrypto/des/asm/des-586.pl b/lib/libcrypto/des/asm/des-586.pl index 5b5f39cebd1..e11b2ef80ff 100644 --- a/lib/libcrypto/des/asm/des-586.pl +++ b/lib/libcrypto/des/asm/des-586.pl @@ -154,11 +154,8 @@ sub DES_encrypt &rotl($L,3); } - # PIC-ification:-) - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($trans); - &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); + &picsetup($trans); + &picsymbol($trans, &label("DES_SPtrans"), $trans); &mov( "ecx", &wparam(1) ); @@ -314,6 +311,7 @@ sub FP_new sub DES_SPtrans { + &rodataseg(); &set_label("DES_SPtrans",64); &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); @@ -450,4 +448,5 @@ sub DES_SPtrans &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); + &previous(); } diff --git a/lib/libcrypto/modes/asm/ghash-x86.pl b/lib/libcrypto/modes/asm/ghash-x86.pl index 27492597adb..5e868a43ff2 100644 --- a/lib/libcrypto/modes/asm/ghash-x86.pl +++ b/lib/libcrypto/modes/asm/ghash-x86.pl @@ -411,10 +411,8 @@ $S=12; # shift factor for rem_4bit &mov ($inp,&wparam(0)); # load Xi &mov ($Htbl,&wparam(1)); # load Htable - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + &picsetup("eax"); + &picsymbol("eax", &label("rem_4bit"), "eax"); &movz ($Zll,&BP(15,$inp)); @@ -436,10 +434,8 @@ $S=12; # shift factor for rem_4bit &mov ($inp,&wparam(2)); # load in &mov ($Zlh,&wparam(3)); # load len - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + &picsetup("eax"); + &picsymbol("eax", &label("rem_4bit"), "eax"); &add ($Zlh,$inp); &mov (&wparam(3),$Zlh); # len to point at the end of input @@ -584,10 +580,8 @@ sub mmx_loop() { &mov ($inp,&wparam(0)); # load Xi &mov ($Htbl,&wparam(1)); # load Htable - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); + &picsetup("eax"); + &picsymbol("eax", &label("rem_4bit"), "eax"); &movz ($Zll,&BP(15,$inp)); @@ -618,10 +612,9 @@ sub mmx_loop() { &mov ("ecx",&wparam(2)); # inp &mov ("edx",&wparam(3)); # len &mov ("ebp","esp"); # original %esp - &call (&label("pic_point")); - &set_label ("pic_point"); - &blindpop ($rem_8bit); - &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit)); + + &picsetup($rem_8bit); + &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit); &sub ("esp",512+16+16); # allocate stack frame... &and ("esp",-64); # ...and align it @@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_; &mov ($Htbl,&wparam(0)); &mov ($Xip,&wparam(1)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Hkey,&QWP(0,$Xip)); &pshufd ($Hkey,$Hkey,0b01001110);# dword swap @@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_; &mov ($Xip,&wparam(0)); &mov ($Htbl,&wparam(1)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Xi,&QWP(0,$Xip)); &movdqa ($T3,&QWP(0,$const)); @@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_; &mov ($inp,&wparam(2)); &mov ($len,&wparam(3)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Xi,&QWP(0,$Xip)); &movdqa ($T3,&QWP(0,$const)); @@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_; &mov ($Htbl,&wparam(0)); &mov ($Xip,&wparam(1)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Hkey,&QWP(0,$Xip)); &pshufd ($Hkey,$Hkey,0b01001110);# dword swap @@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_; &mov ($Xip,&wparam(0)); &mov ($Htbl,&wparam(1)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Xi,&QWP(0,$Xip)); &movdqa ($Xn,&QWP(0,$const)); @@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_; &mov ($inp,&wparam(2)); &mov ($len,&wparam(3)); - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); + &picsetup($const); + &picsymbol($const, &label("bswap"), $const); &movdqu ($Xi,&QWP(0,$Xip)); &movdqa ($T3,&QWP(0,$const)); @@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_; } + &rodataseg(); &set_label("bswap",64); &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial + &previous(); }} # $sse2 + &rodataseg(); &set_label("rem_4bit",64); &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); @@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_; &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); + &previous(); }}} # !$x86only -&asciz("GHASH for x86, CRYPTOGAMS by "); &asm_finish(); # A question was risen about choice of vanilla MMX. Or rather why wasn't diff --git a/lib/libcrypto/perlasm/cbc.pl b/lib/libcrypto/perlasm/cbc.pl index 24561e759ab..392f23e145e 100644 --- a/lib/libcrypto/perlasm/cbc.pl +++ b/lib/libcrypto/perlasm/cbc.pl @@ -34,6 +34,15 @@ sub cbc # p1,p2,p3 are the offsets for parameters to be passed to the # underlying calls. +&static_label("cbc_enc_jmp_table_".$name); +&static_label("ej1_".$name); +&static_label("ej2_".$name); +&static_label("ej3_".$name); +&static_label("ej4_".$name); +&static_label("ej5_".$name); +&static_label("ej6_".$name); +&static_label("ej7_".$name); + &function_begin_B($name,""); &comment(""); @@ -146,33 +155,32 @@ sub cbc &mov($count, &wparam(2)); # length &and($count, 7); &jz(&label("finish")); - &call(&label("PIC_point")); -&set_label("PIC_point"); - &blindpop("edx"); - &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); + + &picsetup("edx"); + &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx") &mov($count,&DWP(0,"ecx",$count,4)); - &add($count,"edx"); + &picadjust($count, "edx"); + &xor("ecx","ecx"); &xor("edx","edx"); - #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); &jmp_ptr($count); -&set_label("ej7"); +&set_label("ej7_".$name); &movb(&HB("edx"), &BP(6,$in,"",0)); &shl("edx",8); -&set_label("ej6"); +&set_label("ej6_".$name); &movb(&HB("edx"), &BP(5,$in,"",0)); -&set_label("ej5"); +&set_label("ej5_".$name); &movb(&LB("edx"), &BP(4,$in,"",0)); -&set_label("ej4"); +&set_label("ej4_".$name); &mov("ecx", &DWP(0,$in,"",0)); &jmp(&label("ejend")); -&set_label("ej3"); +&set_label("ej3_".$name); &movb(&HB("ecx"), &BP(2,$in,"",0)); &shl("ecx",8); -&set_label("ej2"); +&set_label("ej2_".$name); &movb(&HB("ecx"), &BP(1,$in,"",0)); -&set_label("ej1"); +&set_label("ej1_".$name); &movb(&LB("ecx"), &BP(0,$in,"",0)); &set_label("ejend"); @@ -279,30 +287,14 @@ sub cbc &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, &mov("ebx", &DWP(4,$in,"",0)); # next iv actually -&set_label("dj7"); &rotr("edx", 16); &movb(&BP(6,$out,"",0), &LB("edx")); &shr("edx",16); -&set_label("dj6"); &movb(&BP(5,$out,"",0), &HB("edx")); -&set_label("dj5"); &movb(&BP(4,$out,"",0), &LB("edx")); -&set_label("dj4"); &mov(&DWP(0,$out,"",0), "ecx"); - &jmp(&label("djend")); -&set_label("dj3"); - &rotr("ecx", 16); - &movb(&BP(2,$out,"",0), &LB("ecx")); - &shl("ecx",16); -&set_label("dj2"); - &movb(&BP(1,$in,"",0), &HB("ecx")); -&set_label("dj1"); - &movb(&BP(0,$in,"",0), &LB("ecx")); -&set_label("djend"); # final iv is still in eax:ebx - &jmp(&label("finish")); - ############################ FINISH #######################3 &set_label("finish",1); @@ -319,31 +311,21 @@ sub cbc &mov(&DWP(4,"ecx","",0), "ebx"); # save iv &function_end_A($name); + &function_end_B($name); + &rodataseg(); &align(64); - &set_label("cbc_enc_jmp_table"); + &set_label("cbc_enc_jmp_table_".$name); &data_word("0"); - &data_word(&label("ej1")."-".&label("PIC_point")); - &data_word(&label("ej2")."-".&label("PIC_point")); - &data_word(&label("ej3")."-".&label("PIC_point")); - &data_word(&label("ej4")."-".&label("PIC_point")); - &data_word(&label("ej5")."-".&label("PIC_point")); - &data_word(&label("ej6")."-".&label("PIC_point")); - &data_word(&label("ej7")."-".&label("PIC_point")); - # not used - #&set_label("cbc_dec_jmp_table",1); - #&data_word("0"); - #&data_word(&label("dj1")."-".&label("PIC_point")); - #&data_word(&label("dj2")."-".&label("PIC_point")); - #&data_word(&label("dj3")."-".&label("PIC_point")); - #&data_word(&label("dj4")."-".&label("PIC_point")); - #&data_word(&label("dj5")."-".&label("PIC_point")); - #&data_word(&label("dj6")."-".&label("PIC_point")); - #&data_word(&label("dj7")."-".&label("PIC_point")); - &align(64); + &data_word(&code_sym(&label("ej1_".$name))); + &data_word(&code_sym(&label("ej2_".$name))); + &data_word(&code_sym(&label("ej3_".$name))); + &data_word(&code_sym(&label("ej4_".$name))); + &data_word(&code_sym(&label("ej5_".$name))); + &data_word(&code_sym(&label("ej6_".$name))); + &data_word(&code_sym(&label("ej7_".$name))); + &previous(); - &function_end_B($name); - } 1; diff --git a/lib/libcrypto/perlasm/x86gas.pl b/lib/libcrypto/perlasm/x86gas.pl index ca644ba5534..f28a590549b 100644 --- a/lib/libcrypto/perlasm/x86gas.pl +++ b/lib/libcrypto/perlasm/x86gas.pl @@ -177,34 +177,52 @@ sub ::align push(@out,".align\t$val\n"); } -sub ::picmeup -{ my($dst,$sym,$base,$reflabel)=@_; - - if ($::openbsd) - { &::emitraw("#if defined(PIC) || defined(__PIC__)"); - &::emitraw("PIC_PROLOGUE"); - &::mov($dst, &::DWP("PIC_GOT($sym)")); - &::emitraw("PIC_EPILOGUE"); - &::emitraw("#else /* PIC */"); - &::lea($dst,&::DWP($sym)); - &::emitraw("#endif /* PIC */"); - } - elsif (($::pic && ($::elf || $::aout)) || $::macosx) - { if (!defined($base)) - { &::call(&::label("PIC_me_up")); - &::set_label("PIC_me_up"); - &::blindpop($dst); - $base=$dst; - $reflabel=&::label("PIC_me_up"); - } +# +# PIC data access wrappers +# +# Usage: +# picsetup($base) +# - only allowed once per function (because of hardcoded label name), +# sets up pic access, uses $base register as temporary +# picsymbol($dst, $sym, $base) +# - loads the address of symbol $sym into $dst with the help of $base +# initialized by picsetup +# picadjust($sym, $base) +# - adjusts a code pointer read from a code_sym table with the help of +# $base initialized by picsetup +# code_sym($sym) +# - emits a pointer to the given code symbol, relative to the GOT if +# PIC. This pointer will need to be adjusted with picadjust above +# before use. + +sub ::picsetup +{ my($base)=@_; + + if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) + { + &::call(&::label("PIC_setup")); + &::set_label("PIC_setup"); + &::blindpop($base); if ($::macosx) { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); - &::mov($dst,&::DWP("$indirect-$reflabel",$base)); $non_lazy_ptr{"$nmdecor$sym"}=$indirect; } + } +} + +sub ::picsymbol +{ my($dst,$sym,$base)=@_; + + if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) + { + my $reflabel=&::label("PIC_setup"); + if ($::macosx) + { my $indirect=$non_lazy_ptr{"$nmdecor$sym"}; + &::mov($dst,&::DWP("$indirect-$reflabel",$base)); + } else { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", - $base)); + $base)); &::mov($dst,&::DWP("$sym\@GOT",$dst)); } } @@ -212,6 +230,30 @@ sub ::picmeup { &::lea($dst,&::DWP($sym)); } } +sub ::picadjust +{ my($sym,$base)=@_; + + if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) + { + my $reflabel=&::label("PIC_setup"); + &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", + $base,$sym)); + } +} + +sub ::code_sym +{ my($sym)=@_; + + if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) + { + $sym."\@GOTOFF"; + } + else + { + $sym; + } +} + sub ::initseg { my $f=$nmdecor.shift; @@ -264,4 +306,10 @@ ___ sub ::dataseg { push(@out,".data\n"); } +sub ::rodataseg +{ push(@out,".rodata\n"); } + +sub ::previous +{ push(@out,".previous\n"); } + 1; diff --git a/lib/libcrypto/rc4/asm/rc4-586.pl b/lib/libcrypto/rc4/asm/rc4-586.pl index f3c3e117bc3..4991c37c2c1 100644 --- a/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/lib/libcrypto/rc4/asm/rc4-586.pl @@ -188,7 +188,8 @@ if ($alt=0) { &mov (&wparam(3),$out); # $out as accumulator in these loops &jz (&label("go4loop4")); - &picmeup($out,"OPENSSL_ia32cap_P"); + &picsetup($out); + &picsymbol($out, "OPENSSL_ia32cap_P", $out); # check SSE2 bit [could have been MMX] &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("go4loop4")); @@ -305,7 +306,9 @@ $idx="edx"; &mov ($out,&wparam(0)); # load key &mov ($idi,&wparam(1)); # load len &mov ($inp,&wparam(2)); # load data - &picmeup($idx,"OPENSSL_ia32cap_P"); + + &picsetup($idx); + &picsymbol($idx, "OPENSSL_ia32cap_P", $idx); &lea ($out,&DWP(2*4,$out)); # &key->data &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end @@ -382,12 +385,12 @@ $idx="edx"; &function_end("RC4_set_key"); # const char *RC4_options(void); +&static_label("opts"); &function_begin_B("RC4_options"); - &call (&label("pic_point")); -&set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); - &picmeup("edx","OPENSSL_ia32cap_P"); + &picsetup("edx"); + &picsymbol("eax", &label("opts"), "edx"); + &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");; + &mov ("edx",&DWP(0,"edx")); &bt ("edx","\$IA32CAP_BIT0_INTELP4"); &jc (&label("1xchar")); @@ -399,13 +402,14 @@ $idx="edx"; &add ("eax",12); &set_label("ret"); &ret (); -&set_label("opts",64); +&function_end_B("RC4_options"); + + &rodataseg(); +&set_label("opts"); &asciz ("rc4(4x,int)"); &asciz ("rc4(1x,char)"); &asciz ("rc4(8x,mmx)"); -&asciz ("RC4 for x86, CRYPTOGAMS by "); -&align (64); -&function_end_B("RC4_options"); + &previous(); &asm_finish(); diff --git a/lib/libcrypto/sha/asm/sha1-586.pl b/lib/libcrypto/sha/asm/sha1-586.pl index 1de5e2650e1..5928e083c1f 100644 --- a/lib/libcrypto/sha/asm/sha1-586.pl +++ b/lib/libcrypto/sha/asm/sha1-586.pl @@ -295,11 +295,9 @@ if ($xmm) { &static_label("avx_shortcut") if ($ymm); &static_label("K_XX_XX"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); + &picsetup($tmp1); + &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1); + &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); &mov ($A,&DWP(0,$T)); &mov ($D,&DWP(4,$T)); @@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) }; my $_ror=sub { &ror(@_) }; &function_begin("_sha1_block_data_order_ssse3"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); + &picsetup($tmp1); + &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); + &set_label("ssse3_shortcut"); &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 @@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) }; my $_ror=sub { &shrd(@_[0],@_) }; &function_begin("_sha1_block_data_order_avx"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); + &picsetup($tmp1); + &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); + &set_label("avx_shortcut"); &vzeroall(); @@ -1213,13 +1209,15 @@ sub Xtail_avx() &mov (&DWP(16,@T[1]),$E); &function_end("_sha1_block_data_order_avx"); } + + &rodataseg(); &set_label("K_XX_XX",64); &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask + &previous(); } -&asciz("SHA1 block transform for x86, CRYPTOGAMS by "); &asm_finish(); diff --git a/lib/libcrypto/sha/asm/sha256-586.pl b/lib/libcrypto/sha/asm/sha256-586.pl index ecc8b69c75d..2b05c960634 100644 --- a/lib/libcrypto/sha/asm/sha256-586.pl +++ b/lib/libcrypto/sha/asm/sha256-586.pl @@ -14,8 +14,8 @@ # Pentium PIII P4 AMD K8 Core2 # gcc 46 36 41 27 26 # icc 57 33 38 25 23 -# x86 asm 40 30 35 20 20 -# x86_64 asm(*) - - 21 15.8 16.5 +# x86 asm 40 30 33 20 18 +# x86_64 asm(*) - - 21 16 16 # # (*) x86_64 assembler performance is presented for reference # purposes. @@ -48,20 +48,19 @@ sub BODY_00_15() { my $in_16_63=shift; &mov ("ecx",$E); - &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7] - &ror ("ecx",6); - &mov ("edi",$E); - &ror ("edi",11); + &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2]) + &ror ("ecx",25-11); &mov ("esi",$Foff); - &xor ("ecx","edi"); - &ror ("edi",25-11); + &xor ("ecx",$E); + &ror ("ecx",11-6); &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0] - &xor ("ecx","edi"); # Sigma1(e) + &xor ("ecx",$E); + &ror ("ecx",6); # Sigma1(e) &mov ("edi",$Goff); &add ($T,"ecx"); # T += Sigma1(e) - &mov ($Eoff,$E); # modulo-scheduled &xor ("esi","edi"); + &mov ($Eoff,$E); # modulo-scheduled &mov ("ecx",$A); &and ("esi",$E); &mov ($E,$Doff); # e becomes d, which is e in next iteration @@ -69,14 +68,14 @@ sub BODY_00_15() { &mov ("edi",$A); &add ($T,"esi"); # T += Ch(e,f,g) - &ror ("ecx",2); + &ror ("ecx",22-13); &add ($T,$Hoff); # T += h - &ror ("edi",13); + &xor ("ecx",$A); + &ror ("ecx",13-2); &mov ("esi",$Boff); - &xor ("ecx","edi"); - &ror ("edi",22-13); + &xor ("ecx",$A); + &ror ("ecx",2); # Sigma0(a) &add ($E,$T); # d += T - &xor ("ecx","edi"); # Sigma0(a) &mov ("edi",$Coff); &add ($T,"ecx"); # T += Sigma0(a) @@ -97,16 +96,15 @@ sub BODY_00_15() { &add ($A,"esi"); # h += K256[i] } +&static_label("K256"); &function_begin("sha256_block_data_order"); &mov ("esi",wparam(0)); # ctx &mov ("edi",wparam(1)); # inp &mov ("eax",wparam(2)); # num &mov ("ebx","esp"); # saved sp - &call (&label("pic_point")); # make it PIC! -&set_label("pic_point"); - &blindpop($K256); - &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); + &picsetup($K256); + &picsymbol($K256, &label("K256"), $K256); &sub ("esp",16); &and ("esp",-64); @@ -168,23 +166,22 @@ sub BODY_00_15() { &set_label("16_63",16); &mov ("esi",$T); &mov ("ecx",&DWP(4*(8+15+16-14),"esp")); - &shr ($T,3); - &ror ("esi",7); - &xor ($T,"esi"); &ror ("esi",18-7); &mov ("edi","ecx"); - &xor ($T,"esi"); # T = sigma0(X[-15]) + &xor ("esi",$T); + &ror ("esi",7); + &shr ($T,3); - &shr ("ecx",10); - &mov ("esi",&DWP(4*(8+15+16),"esp")); - &ror ("edi",17); - &xor ("ecx","edi"); &ror ("edi",19-17); - &add ($T,"esi"); # T += X[-16] - &xor ("edi","ecx") # sigma1(X[-2]) + &xor ($T,"esi"); # T = sigma0(X[-15]) + &xor ("edi","ecx"); + &ror ("edi",17); + &shr ("ecx",10); + &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16] + &xor ("edi","ecx"); # sigma1(X[-2]) - &add ($T,"edi"); # T += sigma1(X[-2]) - # &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1) + &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7] + # &add ($T,"edi"); # T += sigma1(X[-2]) # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0] &BODY_00_15(1); @@ -227,8 +224,10 @@ sub BODY_00_15() { &mov ("esp",&DWP(12,"esp")); # restore sp &function_end_A(); +&function_end_B("sha256_block_data_order"); -&set_label("K256",64); # Yes! I keep it in the code segment! + &rodataseg(); +&set_label("K256",64); &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); @@ -245,7 +244,6 @@ sub BODY_00_15() { &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); -&function_end_B("sha256_block_data_order"); -&asciz("SHA256 block transform for x86, CRYPTOGAMS by "); + &previous(); &asm_finish(); diff --git a/lib/libcrypto/sha/asm/sha512-586.pl b/lib/libcrypto/sha/asm/sha512-586.pl index 163361ebe9d..c1d0684e92b 100644 --- a/lib/libcrypto/sha/asm/sha512-586.pl +++ b/lib/libcrypto/sha/asm/sha512-586.pl @@ -261,16 +261,18 @@ sub BODY_00_15_x86 { } +&static_label("K512"); &function_begin("sha512_block_data_order"); &mov ("esi",wparam(0)); # ctx &mov ("edi",wparam(1)); # inp &mov ("eax",wparam(2)); # num &mov ("ebx","esp"); # saved sp - &call (&label("pic_point")); # make it PIC! -&set_label("pic_point"); - &blindpop($K512); - &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512)); + &picsetup($K512); +if ($sse2) { + &picsymbol("edx", "OPENSSL_ia32cap_P", $K512); +} + &picsymbol($K512, &label("K512"), $K512); &sub ("esp",16); &and ("esp",-64); @@ -283,7 +285,6 @@ sub BODY_00_15_x86 { &mov (&DWP(12,"esp"),"ebx"); # saved sp if ($sse2) { - &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); &jnc (&label("loop_x86")); @@ -556,8 +557,10 @@ if ($sse2) { &mov ("esp",&DWP(12,"esp")); # restore sp &function_end_A(); +&function_end_B("sha512_block_data_order"); -&set_label("K512",64); # Yes! I keep it in the code segment! + &rodataseg(); +&set_label("K512",64); &data_word(0xd728ae22,0x428a2f98); # u64 &data_word(0x23ef65cd,0x71374491); # u64 &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 @@ -638,7 +641,6 @@ if ($sse2) { &data_word(0xfc657e2a,0x597f299c); # u64 &data_word(0x3ad6faec,0x5fcb6fab); # u64 &data_word(0x4a475817,0x6c44198c); # u64 -&function_end_B("sha512_block_data_order"); -&asciz("SHA512 block transform for x86, CRYPTOGAMS by "); + &previous(); &asm_finish(); diff --git a/lib/libcrypto/whrlpool/asm/wp-mmx.pl b/lib/libcrypto/whrlpool/asm/wp-mmx.pl index 0ff8e5b6121..a54d702c3fc 100644 --- a/lib/libcrypto/whrlpool/asm/wp-mmx.pl +++ b/lib/libcrypto/whrlpool/asm/wp-mmx.pl @@ -77,6 +77,8 @@ sub row() $tbl="ebp"; @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); +&static_label("table"); + &function_begin_B("whirlpool_block_mmx"); &push ("ebp"); &push ("ebx"); @@ -97,10 +99,8 @@ $tbl="ebp"; &mov (&DWP(8,"ebx"),"ebp"); &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer - &call (&label("pic_point")); -&set_label("pic_point"); - &blindpop($tbl); - &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl)); + &picsetup($tbl); + &picsymbol($tbl, &label("table"), $tbl); &xor ("ecx","ecx"); &xor ("edx","edx"); @@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) { &pop ("ebx"); &pop ("ebp"); &ret (); +&function_end_B("whirlpool_block_mmx"); + &rodataseg(); &align(64); &set_label("table"); &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); @@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) { &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); + &previous(); -&function_end_B("whirlpool_block_mmx"); &asm_finish(); -- cgit v1.2.3