summaryrefslogtreecommitdiff
path: root/gnu
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2019-01-30 03:08:13 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2019-01-30 03:08:13 +0000
commita5513b06c6efdd63199425cd8ca5bb985f960e31 (patch)
treea5987ecc835c60a9feaa0d67935814ef2c56f3b8 /gnu
parente2f84b0a9ace6c0051461915505e45d00d49d6e4 (diff)
implement -msave-args in clang/llvm, like the sun did for gcc
this is a bit different to gcc as gcc likes to use movs to move stuff on and off the stack, and directly updates the stack pointers with add and sub instructions. llvm prefers to use push and pop instructions, is a lot more careful about keeping track of how much stuff is currently on the stack, and generally pops the frame pointer rather than do maths on it. -msave-args adds a bunch of pushes as the first thing a function prologue does. to keep the stack aligned, if there's an odd number of arguments to the function it pushes the first one again to put the frame back on a 16 byte boundary. to undo the pushes the frame pointer needs to be updated in function epilogues. clang emits a series of pops to fix up the registers on the way out, but popping saved arguments is a waste of time and harmful to actual data in the function. rather than add an offset to the stack pointer, -msave-args emits a leaveq operation to fix up the frame again. leaveq is effectively mov rbp,rsp; pop rbp, and is a single byte, meaning there's less potential for gadgets compared to a direct add to rsp, or an explicit mov rbp,rsp. the only thing missing compared to the gcc implementation is adding the SUN_amd64_parmdump dwarf flag to affected functions. if someone can tell me how to add that from the frame lowering code, let me know. when enabled in kernel builds again, this will provide useful arguments in ddb stack traces again.
Diffstat (limited to 'gnu')
-rw-r--r--gnu/llvm/include/llvm/BinaryFormat/Dwarf.def93
-rw-r--r--gnu/llvm/lib/Target/X86/X86.td899
-rw-r--r--gnu/llvm/lib/Target/X86/X86FrameLowering.cpp97
-rw-r--r--gnu/llvm/lib/Target/X86/X86FrameLowering.h2
-rw-r--r--gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h23
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.h5
-rw-r--r--gnu/llvm/tools/clang/include/clang/Driver/Options.td2
-rw-r--r--gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp216
-rw-r--r--gnu/llvm/tools/clang/lib/Basic/Targets/X86.h63
9 files changed, 1112 insertions, 288 deletions
diff --git a/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def b/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
index 3df3300de46..41ed16d6f45 100644
--- a/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -12,15 +12,15 @@
//===----------------------------------------------------------------------===//
// TODO: Add other DW-based macros.
-#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT || \
- defined HANDLE_DW_FORM || defined HANDLE_DW_OP || \
- defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \
- defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
- defined HANDLE_DW_CC || defined HANDLE_DW_LNS || \
- defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT || \
- defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE || \
- defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY || \
- defined HANDLE_DW_UT)
+#if !( \
+ defined HANDLE_DW_TAG || defined HANDLE_DW_AT || defined HANDLE_DW_FORM || \
+ defined HANDLE_DW_OP || defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \
+ defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
+ defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \
+ defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \
+ defined HANDLE_DW_RLE || defined HANDLE_DW_CFA || \
+ defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \
+ defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX)
#error "Missing macro definition of HANDLE_DW*"
#endif
@@ -92,6 +92,14 @@
#define HANDLE_DW_UT(ID, NAME)
#endif
+#ifndef HANDLE_DWARF_SECTION
+#define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME)
+#endif
+
+#ifndef HANDLE_DW_IDX
+#define HANDLE_DW_IDX(ID, NAME)
+#endif
+
HANDLE_DW_TAG(0x0000, null, 2, DWARF)
HANDLE_DW_TAG(0x0001, array_type, 2, DWARF)
HANDLE_DW_TAG(0x0002, class_type, 2, DWARF)
@@ -172,6 +180,8 @@ HANDLE_DW_TAG(0x4103, class_template, 0, GNU)
HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU)
HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU)
HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU)
+HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU)
+HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU)
HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE)
HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND)
HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND)
@@ -334,6 +344,8 @@ HANDLE_DW_AT(0x2106, body_end, 0, GNU)
HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU)
HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU)
HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU)
+HANDLE_DW_AT(0x2111, GNU_call_site_value, 0, GNU)
+HANDLE_DW_AT(0x2117, GNU_all_call_sites, 0, GNU)
HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU)
// Extensions for Fission proposal.
HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU)
@@ -343,6 +355,8 @@ HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU)
HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU)
HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU)
HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU)
+// Sun Extension
+HANDLE_DW_AT(0x2224, SUN_amd64_parmdump, 0, GNU)
// Borland extensions.
HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND)
HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND)
@@ -696,6 +710,7 @@ HANDLE_DW_CC(0x03, nocall)
HANDLE_DW_CC(0x04, pass_by_reference)
HANDLE_DW_CC(0x05, pass_by_value)
// Vendor extensions:
+HANDLE_DW_CC(0x40, GNU_renesas_sh)
HANDLE_DW_CC(0x41, GNU_borland_fastcall_i386)
HANDLE_DW_CC(0xb0, BORLAND_safecall)
HANDLE_DW_CC(0xb1, BORLAND_stdcall)
@@ -705,6 +720,22 @@ HANDLE_DW_CC(0xb4, BORLAND_msreturn)
HANDLE_DW_CC(0xb5, BORLAND_thiscall)
HANDLE_DW_CC(0xb6, BORLAND_fastcall)
HANDLE_DW_CC(0xc0, LLVM_vectorcall)
+HANDLE_DW_CC(0xc1, LLVM_Win64)
+HANDLE_DW_CC(0xc2, LLVM_X86_64SysV)
+HANDLE_DW_CC(0xc3, LLVM_AAPCS)
+HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP)
+HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc)
+HANDLE_DW_CC(0xc6, LLVM_SpirFunction)
+HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel)
+HANDLE_DW_CC(0xc8, LLVM_Swift)
+HANDLE_DW_CC(0xc9, LLVM_PreserveMost)
+HANDLE_DW_CC(0xca, LLVM_PreserveAll)
+HANDLE_DW_CC(0xcb, LLVM_X86RegCall)
+// From GCC source code (include/dwarf2.h): This DW_CC_ value is not currently
+// generated by any toolchain. It is used internally to GDB to indicate OpenCL C
+// functions that have been compiled with the IBM XL C for OpenCL compiler and use
+// a non-platform calling convention for passing OpenCL C vector types.
+HANDLE_DW_CC(0xff, GDB_IBM_OpenCL)
// Line Number Extended Opcode Encodings
HANDLE_DW_LNE(0x01, end_sequence)
@@ -735,6 +766,9 @@ HANDLE_DW_LNCT(0x02, directory_index)
HANDLE_DW_LNCT(0x03, timestamp)
HANDLE_DW_LNCT(0x04, size)
HANDLE_DW_LNCT(0x05, MD5)
+// A vendor extension until http://dwarfstd.org/ShowIssue.php?issue=180201.1 is
+// accepted and incorporated into the next DWARF standard.
+HANDLE_DW_LNCT(0x2001, LLVM_source)
// DWARF v5 Macro information.
HANDLE_DW_MACRO(0x01, define)
@@ -819,6 +853,45 @@ HANDLE_DW_UT(0x04, skeleton)
HANDLE_DW_UT(0x05, split_compile)
HANDLE_DW_UT(0x06, split_type)
+// DWARF section types. (enum name, ELF name, ELF DWO name, cmdline name)
+// Note that these IDs don't mean anything.
+// TODO: Add Mach-O and COFF names.
+// Official DWARF sections.
+HANDLE_DWARF_SECTION(DebugAbbrev, ".debug_abbrev", "debug-abbrev")
+HANDLE_DWARF_SECTION(DebugAddr, ".debug_addr", "debug-addr")
+HANDLE_DWARF_SECTION(DebugAranges, ".debug_aranges", "debug-aranges")
+HANDLE_DWARF_SECTION(DebugInfo, ".debug_info", "debug-info")
+HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types")
+HANDLE_DWARF_SECTION(DebugLine, ".debug_line", "debug-line")
+HANDLE_DWARF_SECTION(DebugLineStr, ".debug_line_str", "debug-line-str")
+HANDLE_DWARF_SECTION(DebugLoc, ".debug_loc", "debug-loc")
+HANDLE_DWARF_SECTION(DebugFrame, ".debug_frame", "debug-frame")
+HANDLE_DWARF_SECTION(DebugMacro, ".debug_macro", "debug-macro")
+HANDLE_DWARF_SECTION(DebugNames, ".debug_names", "debug-names")
+HANDLE_DWARF_SECTION(DebugPubnames, ".debug_pubnames", "debug-pubnames")
+HANDLE_DWARF_SECTION(DebugPubtypes, ".debug_pubtypes", "debug-pubtypes")
+HANDLE_DWARF_SECTION(DebugGnuPubnames, ".debug_gnu_pubnames", "debug-gnu-pubnames")
+HANDLE_DWARF_SECTION(DebugGnuPubtypes, ".debug_gnu_pubtypes", "debug-gnu-pubtypes")
+HANDLE_DWARF_SECTION(DebugRanges, ".debug_ranges", "debug-ranges")
+HANDLE_DWARF_SECTION(DebugRnglists, ".debug_rnglists", "debug-rnglists")
+HANDLE_DWARF_SECTION(DebugStr, ".debug_str", "debug-str")
+HANDLE_DWARF_SECTION(DebugStrOffsets, ".debug_str_offsets", "debug-str-offsets")
+HANDLE_DWARF_SECTION(DebugCUIndex, ".debug_cu_index", "debug-cu-index")
+HANDLE_DWARF_SECTION(DebugTUIndex, ".debug_tu_index", "debug-tu-index")
+// Vendor extensions.
+HANDLE_DWARF_SECTION(AppleNames, ".apple_names", "apple-names")
+HANDLE_DWARF_SECTION(AppleTypes, ".apple_types", "apple-types")
+HANDLE_DWARF_SECTION(AppleNamespaces, ".apple_namespaces", "apple-namespaces")
+HANDLE_DWARF_SECTION(AppleObjC, ".apple_objc", "apple-objc")
+HANDLE_DWARF_SECTION(GdbIndex, ".gdb_index", "gdb-index")
+
+HANDLE_DW_IDX(0x01, compile_unit)
+HANDLE_DW_IDX(0x02, type_unit)
+HANDLE_DW_IDX(0x03, die_offset)
+HANDLE_DW_IDX(0x04, parent)
+HANDLE_DW_IDX(0x05, type_hash)
+
+
#undef HANDLE_DW_TAG
#undef HANDLE_DW_AT
#undef HANDLE_DW_FORM
@@ -836,3 +909,5 @@ HANDLE_DW_UT(0x06, split_type)
#undef HANDLE_DW_CFA
#undef HANDLE_DW_APPLE_PROPERTY
#undef HANDLE_DW_UT
+#undef HANDLE_DWARF_SECTION
+#undef HANDLE_DW_IDX
diff --git a/gnu/llvm/lib/Target/X86/X86.td b/gnu/llvm/lib/Target/X86/X86.td
index 8902a853425..3ce047a4920 100644
--- a/gnu/llvm/lib/Target/X86/X86.td
+++ b/gnu/llvm/lib/Target/X86/X86.td
@@ -31,6 +31,12 @@ def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
// X86 Subtarget features
//===----------------------------------------------------------------------===//
+def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
+ "Enable X87 float instructions">;
+
+def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
+ "Enable NOPL instruction">;
+
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
@@ -92,10 +98,10 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
-def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
- "Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
+def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+ "PMULLD instruction is slow">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
@@ -113,18 +119,30 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
+def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
+ "Enable three-operand fused multiple-add",
+ [FeatureAVX]>;
+def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
+ "Support 16-bit floating point conversion instructions",
+ [FeatureAVX]>;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
- [FeatureAVX2]>;
+ [FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
+def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
+ "true", "Enable AVX-512 Population Count Instructions",
+ [FeatureAVX512]>;
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
+def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
+ "true",
+ "Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
@@ -134,14 +152,32 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
+def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
+ "Enable AVX-512 Vector Byte Manipulation Instructions",
+ [FeatureBWI]>;
+def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
+ "Enable AVX-512 further Vector Byte Manipulation Instructions",
+ [FeatureBWI]>;
+def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
+ "Enable AVX-512 Integer Fused Multiple-Add",
+ [FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">;
+def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
+ "Enable AVX-512 Vector Neural Network Instructions",
+ [FeatureAVX512]>;
+def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
+ "Enable AVX-512 Bit Algorithms",
+ [FeatureBWI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
-def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
- "Enable three-operand fused multiple-add",
- [FeatureAVX]>;
+def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
+ "Enable Galois Field Arithmetic Instructions",
+ [FeatureSSE2]>;
+def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
+ "Enable vpclmulqdq instructions",
+ [FeatureAVX, FeaturePCLMUL]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add",
[FeatureAVX, FeatureSSE4A]>;
@@ -154,15 +190,17 @@ def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
+def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
+ "Promote selected AES instructions to AVX512/AVX registers",
+ [FeatureAVX, FeatureAES]>;
def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
"Enable TBM instructions">;
+def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
+ "Enable LWP instructions">;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
-def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
- "Support 16-bit floating point conversion instructions",
- [FeatureAVX]>;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
@@ -173,19 +211,27 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
-def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
- "Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
+ "Support CET Shadow-Stack instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
+def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
+ "Enable MONITORX/MWAITX timer functionality">;
+def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
+ "Enable Cache Line Zero">;
+def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
+ "Enable Cache Demote">;
+def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
+ "Support ptwrite instruction">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
@@ -193,117 +239,320 @@ def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
-def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
"HasSlowDivide64", "true",
- "Use 16-bit divide for positive values less than 65536">;
+ "Use 32-bit divide for positive values less than 2^32">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
-// TODO: This feature ought to be renamed.
-// What it really refers to are CPUs for which certain instructions
-// (which ones besides the example below?) are microcoded.
-// The best examples of this are the memory forms of CALL and PUSH
-// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
-def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
- "CallRegIndirect", "true",
- "Call register indirect">;
+def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
+ "Invalidate Process-Context Identifier">;
+def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
+ "Enable Software Guard Extensions">;
+def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
+ "Flush A Cache Line Optimized">;
+def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
+ "Cache Line Write Back">;
+def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
+ "Write Back No Invalidate">;
+def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
+ "Support RDPID instructions">;
+def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
+ "Wait and pause enhancements">;
+// On some processors, instructions that implicitly take two memory operands are
+// slow. In practice, this means that CALL, PUSH, and POP with memory operands
+// should be avoided in favor of a MOV + register CALL/PUSH/POP.
+def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+ "SlowTwoMemOps", "true",
+ "Two memory operand instructions are slow">;
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+ "LEA instruction with 3 ops or certain registers is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
+def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+ "HasPOPCNTFalseDeps", "true",
+ "POPCNT has a false dependency on dest register">;
+def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+ "HasLZCNTFalseDeps", "true",
+ "LZCNT/TZCNT have a false dependency on dest register">;
+def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
+ "platform configuration instruction">;
+def FeatureSaveArgs
+ : SubtargetFeature<"save-args", "SaveArgs", "true",
+ "Save register arguments on the stack.">;
+// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
+// using a variable mask over multiple fixed shuffles.
+def FeatureFastVariableShuffle
+ : SubtargetFeature<"fast-variable-shuffle",
+ "HasFastVariableShuffle",
+ "true", "Shuffles with variable masks are fast">;
+// On some X86 processors, there is no performance hazard to writing only the
+// lower parts of a YMM or ZMM register without clearing the upper part.
+def FeatureFastPartialYMMorZMMWrite
+ : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
+ "HasFastPartialYMMorZMMWrite",
+ "true", "Partial writes to YMM/ZMM registers are fast">;
+// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+// vector FSQRT has higher throughput than the corresponding NR code.
+// The idea is that throughput bound code is likely to be vectorized, so for
+// vectorized code we should care about the throughput of SQRT operations.
+// But if the code is scalar that probably means that the code has some kind of
+// dependency and we should care more about reducing the latency.
+def FeatureFastScalarFSQRT
+ : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
+ "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
+def FeatureFastVectorFSQRT
+ : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
+ "true", "Vector SQRT is fast (disable Newton-Raphson)">;
+// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
+// be used to replace test/set sequences.
+def FeatureFastLZCNT
+ : SubtargetFeature<
+ "fast-lzcnt", "HasFastLZCNT", "true",
+ "LZCNT instructions are as fast as most simple integer ops">;
+// If the target can efficiently decode NOPs upto 11-bytes in length.
+def FeatureFast11ByteNOP
+ : SubtargetFeature<
+ "fast-11bytenop", "HasFast11ByteNOP", "true",
+ "Target can quickly decode up to 11 byte NOPs">;
+// If the target can efficiently decode NOPs upto 15-bytes in length.
+def FeatureFast15ByteNOP
+ : SubtargetFeature<
+ "fast-15bytenop", "HasFast15ByteNOP", "true",
+ "Target can quickly decode up to 15 byte NOPs">;
+// Sandy Bridge and newer processors can use SHLD with the same source on both
+// inputs to implement rotate to avoid the partial flag update of the normal
+// rotate instructions.
+def FeatureFastSHLDRotate
+ : SubtargetFeature<
+ "fast-shld-rotate", "HasFastSHLDRotate", "true",
+ "SHLD can be used as a faster rotate">;
+
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+ : SubtargetFeature<
+ "ermsb", "HasERMSB", "true",
+ "REP MOVS/STOS are fast">;
+
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+ : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+ "Various instructions can be fused with conditional branches">;
+
+// Gather is available since Haswell (AVX2 set). So technically, we can
+// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
+// Skylake Client processor has faster Gathers than HSW and performance is
+// similar to Skylake Server (AVX-512).
+def FeatureHasFastGather
+ : SubtargetFeature<"fast-gather", "HasFastGather", "true",
+ "Indicates if gather is reasonably fast.">;
+
+def FeaturePrefer256Bit
+ : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
+ "Prefer 256-bit AVX instructions">;
+
+// Enable mitigation of some aspects of speculative execution related
+// vulnerabilities by removing speculatable indirect branches. This disables
+// jump-table formation, rewrites explicit `indirectbr` instructions into
+// `switch` instructions, and uses a special construct called a "retpoline" to
+// prevent speculation of the remaining indirect branches (indirect calls and
+// tail calls).
+def FeatureRetpoline
+ : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+ "Remove speculation of indirect branches from the "
+ "generated code, either by avoiding them entirely or "
+ "lowering them with a speculation blocking construct.">;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+ : SubtargetFeature<
+ "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+ "Enable retpoline, but with an externally provided thunk.",
+ [FeatureRetpoline]>;
+
+// Direct Move instructions.
+def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
+ "Support movdiri instruction">;
+def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
+ "Support movdir64b instruction">;
//===----------------------------------------------------------------------===//
-// X86 processors supported.
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+include "X86RegisterBanks.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
//===----------------------------------------------------------------------===//
include "X86Schedule.td"
+include "X86InstrInfo.td"
+include "X86SchedPredicates.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
+include "X86SchedBroadwell.td"
+include "X86ScheduleSLM.td"
+include "X86ScheduleZnver1.td"
+include "X86ScheduleBtVer2.td"
+include "X86SchedSkylakeClient.td"
+include "X86SchedSkylakeServer.td"
def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
"Intel Silvermont processors">;
+def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
+ "Intel Goldmont processors">;
+def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
+ "Intel Goldmont Plus processors">;
+def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
+ "Intel Tremont processors">;
+def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily",
+ "IntelHaswell", "Intel Haswell processors">;
+def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily",
+ "IntelBroadwell", "Intel Broadwell processors">;
+def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily",
+ "IntelSkylake", "Intel Skylake processors">;
+def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily",
+ "IntelKNL", "Intel Knights Landing processors">;
+def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily",
+ "IntelSKX", "Intel Skylake Server processors">;
+def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily",
+ "IntelCannonlake", "Intel Cannonlake processors">;
+def ProcIntelICL : SubtargetFeature<"icelake-client", "X86ProcFamily",
+ "IntelIcelakeClient", "Intel Icelake processors">;
+def ProcIntelICX : SubtargetFeature<"icelake-server", "X86ProcFamily",
+ "IntelIcelakeServer", "Intel Icelake Server processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-def : Proc<"generic", [FeatureSlowUAMem16]>;
-def : Proc<"i386", [FeatureSlowUAMem16]>;
-def : Proc<"i486", [FeatureSlowUAMem16]>;
-def : Proc<"i586", [FeatureSlowUAMem16]>;
-def : Proc<"pentium", [FeatureSlowUAMem16]>;
-def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"i686", [FeatureSlowUAMem16]>;
-def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
-def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV,
- FeatureFXSR]>;
-def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR]>;
-def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR, FeatureSlowBTMem]>;
-def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
- FeatureFXSR, FeatureSlowBTMem]>;
-def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
- FeatureFXSR]>;
-def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
- FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
+
+def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
+def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
+ FeatureNOPL]>;
+
+def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+ FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
+
+foreach P = ["pentium3", "pentium3m"] in {
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+ FeatureFXSR, FeatureNOPL]>;
+}
+
+// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
+// The intent is to enable it for pentium4 which is the current default
+// processor in a vanilla 32-bit clang compilation when no specific
+// architecture is specified. This generally gives a nice performance
+// increase on silvermont, with largely neutral behavior on other
+// contemporary large core processors.
+// pentium-m, pentium4m, prescott and nocona are included as a preventative
+// measure to avoid performance surprises, in case clang's default cpu
+// changes slightly.
+
+def : ProcessorModel<"pentium-m", GenericPostRAModel,
+ [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+ FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+
+foreach P = ["pentium4", "pentium4m"] in {
+ def : ProcessorModel<P, GenericPostRAModel,
+ [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+ FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+}
+
+// Intel Quark.
+def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
- FeatureSlowBTMem]>;
+ [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
+ FeatureFXSR, FeatureNOPL]>;
// NetBurst.
-def : Proc<"prescott",
- [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
- FeatureSlowBTMem]>;
-def : Proc<"nocona", [
+def : ProcessorModel<"prescott", GenericPostRAModel,
+ [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
+ FeatureFXSR, FeatureNOPL]>;
+def : ProcessorModel<"nocona", GenericPostRAModel, [
+ FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem
+ FeatureNOPL,
+ FeatureCMPXCHG16B
]>;
// Intel Core 2 Solo/Duo.
def : ProcessorModel<"core2", SandyBridgeModel, [
+ FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
+ FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
+ FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
FeatureMOVBE,
- FeatureSlowBTMem,
FeatureLEAForSP,
FeatureSlowDivide32,
FeatureSlowDivide64,
- FeatureCallRegIndirect,
+ FeatureSlowTwoMemOps,
FeatureLEAUsesAG,
FeaturePadShortFunctions,
FeatureLAHFSAHF
@@ -313,34 +562,108 @@ def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
ProcIntelSLM,
+ FeatureX87,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeaturePOPCNT,
FeaturePCLMUL,
FeatureAES,
FeatureSlowDivide64,
- FeatureCallRegIndirect,
+ FeatureSlowTwoMemOps,
FeaturePRFCHW,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem,
- FeatureLAHFSAHF
+ FeatureSlowPMULLD,
+ FeatureRDRAND,
+ FeatureLAHFSAHF,
+ FeaturePOPCNTFalseDeps
]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
+class ProcessorFeatures<list<SubtargetFeature> Inherited,
+ list<SubtargetFeature> NewFeatures> {
+ list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
+}
+
+class ProcModel<string Name, SchedMachineModel Model,
+ list<SubtargetFeature> ProcFeatures,
+ list<SubtargetFeature> OtherFeatures> :
+ ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
+
+def GLMFeatures : ProcessorFeatures<[], [
+ FeatureX87,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeatureSlowTwoMemOps,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF,
+ FeatureMPX,
+ FeatureSHA,
+ FeatureRDRAND,
+ FeatureRDSEED,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT,
+ FeatureFSGSBase
+]>;
+
+class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
+ GLMFeatures.Value, [
+ ProcIntelGLM,
+ FeaturePOPCNTFalseDeps
+]>;
+def : GoldmontProc<"goldmont">;
+
+def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
+ FeaturePTWRITE,
+ FeatureRDPID,
+ FeatureSGX
+]>;
+
+class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
+ GLPFeatures.Value, [
+ ProcIntelGLP
+]>;
+def : GoldmontPlusProc<"goldmont-plus">;
+
+class TremontProc<string Name> : ProcModel<Name, SLMModel,
+ GLPFeatures.Value, [
+ ProcIntelTRM,
+ FeatureCLDEMOTE,
+ FeatureGFNI,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureWAITPKG
+]>;
+def : TremontProc<"tremont">;
+
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureX87,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
@@ -348,244 +671,276 @@ def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureX87,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureMacroFusion
]>;
def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+def SNBFeatures : ProcessorFeatures<[], [
+ FeatureX87,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureSlowUAMem32,
FeaturePOPCNT,
FeatureAES,
+ FeatureSlowDivide64,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureSlow3OpsLEA,
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureSlowIncDec,
+ FeatureMacroFusion
+]>;
+
+class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
+ SNBFeatures.Value, [
+ FeatureSlowUAMem32,
+ FeaturePOPCNTFalseDeps
]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
-class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
+def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
FeatureRDRAND,
FeatureF16C,
- FeatureFSGSBase,
- FeatureLAHFSAHF
+ FeatureFSGSBase
+]>;
+
+class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
+ IVBFeatures.Value, [
+ FeatureSlowUAMem32,
+ FeaturePOPCNTFalseDeps
]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
-class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureMMX,
+def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureAVX2,
- FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
+ FeatureERMSB,
FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureSlowIncDec,
- FeatureLAHFSAHF
+ FeatureINVPCID,
+ FeatureLZCNT,
+ FeatureMOVBE,
+ FeatureFastVariableShuffle
+]>;
+
+class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
+ HSWFeatures.Value, [
+ ProcIntelHSW,
+ FeaturePOPCNTFalseDeps,
+ FeatureLZCNTFalseDeps
]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
-class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureMMX,
- FeatureAVX2,
- FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
+def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
FeatureADX,
FeatureRDSEED,
- FeatureSlowIncDec,
- FeatureLAHFSAHF
+ FeaturePRFCHW
+]>;
+class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
+ BDWFeatures.Value, [
+ ProcIntelBDW,
+ FeaturePOPCNTFalseDeps,
+ FeatureLZCNTFalseDeps
]>;
def : BroadwellProc<"broadwell">;
-// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureMMX,
+def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
+ FeatureMPX,
+ FeatureRTM,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT,
+ FeatureFastVectorFSQRT
+]>;
+
+class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
+ SKLFeatures.Value, [
+ ProcIntelSKL,
+ FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps,
+ FeatureSGX
+]>;
+def : SkylakeClientProc<"skylake">;
+
+def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureAVX512,
- FeatureFXSR,
FeatureERI,
FeatureCDI,
FeaturePFI,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
+ FeaturePREFETCHWT1,
+ FeatureADX,
+ FeatureRDSEED,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureSlowIncDec,
- FeatureMPX,
- FeatureLAHFSAHF
+ FeaturePRFCHW
+]>;
+
+// FIXME: define KNL model
+class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
+ KNLFeatures.Value, [
+ ProcIntelKNL,
+ FeatureSlowTwoMemOps,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureHasFastGather
]>;
def : KnightsLandingProc<"knl">;
-// FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureMMX,
+class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
+ KNLFeatures.Value, [
+ ProcIntelKNL,
+ FeatureSlowTwoMemOps,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureHasFastGather,
+ FeatureVPOPCNTDQ
+]>;
+def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
+
+def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
FeatureAVX512,
- FeatureFXSR,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeaturePKU,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureADX,
- FeatureRDSEED,
- FeatureSlowIncDec,
- FeatureMPX,
- FeatureXSAVEC,
- FeatureXSAVES,
- FeatureLAHFSAHF
+ FeatureCLWB
+]>;
+
+class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ SKXFeatures.Value, [
+ ProcIntelSKX,
+ FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps
]>;
-def : SkylakeProc<"skylake">;
-def : SkylakeProc<"skx">; // Legacy alias.
+def : SkylakeServerProc<"skylake-avx512">;
+def : SkylakeServerProc<"skx">; // Legacy alias.
+def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
+ FeatureAVX512,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
+ FeatureVBMI,
+ FeatureIFMA,
+ FeatureSHA,
+ FeatureSGX
+]>;
+
+class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ CNLFeatures.Value, [
+ ProcIntelCNL,
+ FeatureHasFastGather
+]>;
+def : CannonlakeProc<"cannonlake">;
+
+def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
+ FeatureBITALG,
+ FeatureVAES,
+ FeatureVBMI2,
+ FeatureVNNI,
+ FeatureVPCLMULQDQ,
+ FeatureVPOPCNTDQ,
+ FeatureGFNI,
+ FeatureCLWB,
+ FeatureRDPID
+]>;
+
+class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ ICLFeatures.Value, [
+ ProcIntelICL,
+ FeatureHasFastGather
+]>;
+def : IcelakeClientProc<"icelake-client">;
+
+class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ ICLFeatures.Value, [
+ ProcIntelICX,
+ FeaturePCONFIG,
+ FeatureWBNOINVD,
+ FeatureHasFastGather
+]>;
+def : IcelakeServerProc<"icelake-server">;
// AMD CPUs.
-def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
- FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
- FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
- FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
-def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
- FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
-def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
- FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+
+foreach P = ["athlon", "athlon-tbird"] in {
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
+ FeatureNOPL, FeatureSlowSHLD]>;
+}
+
+foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
+}
+
+foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>;
+}
+
+foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
+}
+
+foreach P = ["amdfam10", "barcelona"] in {
+ def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+ FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+ FeatureSlowSHLD, FeatureLAHFSAHF]>;
+}
// Bobcat
def : Proc<"btver1", [
+ FeatureX87,
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
FeatureFXSR,
+ FeatureNOPL,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureLZCNT,
FeaturePOPCNT,
- FeatureXSAVE,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast15ByteNOP
]>;
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model, [
+ FeatureX87,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
+ FeatureNOPL,
FeatureSSE4A,
FeatureCMPXCHG16B,
FeaturePRFCHW,
@@ -595,15 +950,19 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureF16C,
FeatureMOVBE,
FeatureLZCNT,
+ FeatureFastLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast15ByteNOP,
+ FeatureFastPartialYMMorZMMWrite
]>;
// Bulldozer
def : Proc<"bdver1", [
+ FeatureX87,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -613,15 +972,20 @@ def : Proc<"bdver1", [
FeatureMMX,
FeatureAVX,
FeatureFXSR,
+ FeatureNOPL,
FeatureSSE4A,
FeatureLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
+ FeatureLWP,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast11ByteNOP,
+ FeatureMacroFusion
]>;
// Piledriver
def : Proc<"bdver2", [
+ FeatureX87,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -631,6 +995,7 @@ def : Proc<"bdver2", [
FeatureMMX,
FeatureAVX,
FeatureFXSR,
+ FeatureNOPL,
FeatureSSE4A,
FeatureF16C,
FeatureLZCNT,
@@ -638,13 +1003,17 @@ def : Proc<"bdver2", [
FeatureXSAVE,
FeatureBMI,
FeatureTBM,
+ FeatureLWP,
FeatureFMA,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast11ByteNOP,
+ FeatureMacroFusion
]>;
// Steamroller
def : Proc<"bdver3", [
+ FeatureX87,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -654,6 +1023,7 @@ def : Proc<"bdver3", [
FeatureMMX,
FeatureAVX,
FeatureFXSR,
+ FeatureNOPL,
FeatureSSE4A,
FeatureF16C,
FeatureLZCNT,
@@ -661,18 +1031,23 @@ def : Proc<"bdver3", [
FeatureXSAVE,
FeatureBMI,
FeatureTBM,
+ FeatureLWP,
FeatureFMA,
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureFSGSBase,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast11ByteNOP,
+ FeatureMacroFusion
]>;
// Excavator
def : Proc<"bdver4", [
+ FeatureX87,
FeatureMMX,
FeatureAVX2,
FeatureFXSR,
+ FeatureNOPL,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -686,18 +1061,61 @@ def : Proc<"bdver4", [
FeatureBMI,
FeatureBMI2,
FeatureTBM,
+ FeatureLWP,
FeatureFMA,
FeatureXSAVEOPT,
+ FeatureSlowSHLD,
FeatureFSGSBase,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFast11ByteNOP,
+ FeatureMWAITX,
+ FeatureMacroFusion
]>;
-def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
+// Znver1
+def: ProcessorModel<"znver1", Znver1Model, [
+ FeatureADX,
+ FeatureAES,
+ FeatureAVX2,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureCLFLUSHOPT,
+ FeatureCLZERO,
+ FeatureCMPXCHG16B,
+ FeatureF16C,
+ FeatureFMA,
+ FeatureFSGSBase,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureFastLZCNT,
+ FeatureLAHFSAHF,
+ FeatureLZCNT,
+ FeatureFast15ByteNOP,
+ FeatureMacroFusion,
+ FeatureMMX,
+ FeatureMOVBE,
+ FeatureMWAITX,
+ FeaturePCLMUL,
+ FeaturePOPCNT,
+ FeaturePRFCHW,
+ FeatureRDRAND,
+ FeatureRDSEED,
+ FeatureSHA,
+ FeatureSSE4A,
+ FeatureSlowSHLD,
+ FeatureX87,
+ FeatureXSAVE,
+ FeatureXSAVEC,
+ FeatureXSAVEOPT,
+ FeatureXSAVES]>;
-def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>;
+def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
+ FeatureSSE1, FeatureFXSR]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -709,23 +1127,17 @@ def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>;
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
-def : ProcessorModel<"x86-64", SandyBridgeModel,
- [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem ]>;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "X86RegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "X86InstrInfo.td"
-
-def X86InstrInfo : InstrInfo;
+def : ProcessorModel<"x86-64", SandyBridgeModel, [
+ FeatureX87,
+ FeatureMMX,
+ FeatureSSE2,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureSlow3OpsLEA,
+ FeatureSlowIncDec,
+ FeatureMacroFusion
+]>;
//===----------------------------------------------------------------------===//
// Calling Conventions
@@ -784,4 +1196,11 @@ def X86 : Target {
let InstructionSet = X86InstrInfo;
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+ let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "X86PfmCounters.td"
diff --git a/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp b/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
index ea076e576ef..1a8d3b03836 100644
--- a/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -48,6 +48,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
StackPtr = TRI->getStackRegister();
+ SaveArgs = Is64Bit ? STI.getSaveArgs() : 0;
}
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
@@ -91,7 +92,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
- MFI.hasCopyImplyingStackAdjustment());
+ MFI.hasCopyImplyingStackAdjustment() ||
+ SaveArgs);
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -872,6 +874,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MI->getOperand(3).setIsDead();
}
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
+
+ if (Subtarget.isCallingConvWin64(CallConv)) {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+ }
+
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -1154,6 +1174,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
nullptr, DwarfFramePtr));
}
+ if (SaveArgs && !Fn.arg_empty()) {
+ ArrayRef<MCPhysReg> GPRs =
+ get64BitArgumentGPRs(Fn.getCallingConv(), STI);
+ unsigned arg_size = Fn.arg_size();
+ unsigned RI = 0;
+ int64_t SaveSize = 0;
+
+ if (Fn.hasStructRetAttr()) {
+ GPRs = GPRs.drop_front(1);
+ arg_size--;
+ }
+
+ for (MCPhysReg Reg : GPRs) {
+ if (++RI > arg_size)
+ break;
+
+ SaveSize += SlotSize;
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(Reg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Realign the stack. PUSHes are the most space efficient.
+ while (SaveSize % getStackAlignment()) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(GPRs.front())
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ SaveSize += SlotSize;
+ }
+
+ //dlg StackSize -= SaveSize;
+ //dlg MFI.setStackSize(StackSize);
+ X86FI->setSaveArgSize(SaveSize);
+ }
+
if (NeedsWinFPO) {
// .cv_fpo_setframe $FramePtr
HasWinCFI = true;
@@ -1619,20 +1676,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
uint64_t SEHStackAllocAmt = NumBytes;
- if (HasFP) {
- // Pop EBP.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
- MachineFramePtr)
- .setMIFlag(MachineInstr::FrameDestroy);
- if (NeedsDwarfCFI) {
- unsigned DwarfStackPtr =
- TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
- nullptr, DwarfStackPtr, -SlotSize));
- --MBBI;
- }
- }
-
MachineBasicBlock::iterator FirstCSPop = MBBI;
// Skip the callee-saved pop instructions.
while (MBBI != MBB.begin()) {
@@ -1702,6 +1745,28 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
+ if (HasFP) {
+ MBBI = Terminator;
+
+ if (X86FI->getSaveArgSize()) {
+ // LEAVE is effectively mov rbp,rsp; pop rbp
+ BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64), MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else {
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+ if (NeedsDwarfCFI) {
+ unsigned DwarfStackPtr =
+ TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
+ nullptr, DwarfStackPtr, -SlotSize));
+ --MBBI;
+ }
+ }
+
// Windows unwinder will not invoke function's exception handler if IP is
// either in prologue or in epilogue. This behavior causes a problem when a
// call immediately precedes an epilogue, because the return address points
@@ -1790,6 +1855,8 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
"FPDelta isn't aligned per the Win64 ABI!");
}
+ if (FI >= 0)
+ Offset -= X86FI->getSaveArgSize();
if (TRI->hasBasePointer(MF)) {
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
diff --git a/gnu/llvm/lib/Target/X86/X86FrameLowering.h b/gnu/llvm/lib/Target/X86/X86FrameLowering.h
index 430848d4d1d..a301056e89b 100644
--- a/gnu/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/gnu/llvm/lib/Target/X86/X86FrameLowering.h
@@ -37,6 +37,8 @@ public:
const X86RegisterInfo *TRI;
const X86ReturnProtectorLowering RPL;
+ bool SaveArgs;
+
unsigned SlotSize;
/// Is64Bit implies that x86_64 instructions are available.
diff --git a/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 00515dde556..393abe698db 100644
--- a/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -16,8 +16,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineValueType.h"
-#include <vector>
+#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -42,6 +41,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// stack frame in bytes.
unsigned CalleeSavedFrameSize = 0;
+ // SaveArgSize - Number of register arguments saved on the stack
+ unsigned SaveArgSize = 0;
+
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
/// to the space used by the return address).
/// Used on windows platform for stdcall & fastcall name decoration
@@ -50,7 +52,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex = 0;
- /// \brief FrameIndex for return slot.
+ /// FrameIndex for return slot.
int FrameAddrIndex = 0;
/// TailCallReturnAddrDelta - The number of bytes by which return address
@@ -96,6 +98,12 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// copies.
bool IsSplitCSR = false;
+ /// True if this function uses the red zone.
+ bool UsesRedZone = false;
+
+ /// True if this function has WIN_ALLOCA instructions.
+ bool HasWinAlloca = false;
+
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
@@ -119,6 +127,9 @@ public:
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+ unsigned getSaveArgSize() const { return SaveArgSize; }
+ void setSaveArgSize(unsigned bytes) { SaveArgSize = bytes; }
+
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
@@ -167,6 +178,12 @@ public:
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
+ bool getUsesRedZone() const { return UsesRedZone; }
+ void setUsesRedZone(bool V) { UsesRedZone = V; }
+
+ bool hasWinAlloca() const { return HasWinAlloca; }
+ void setHasWinAlloca(bool v) { HasWinAlloca = v; }
};
} // End llvm namespace
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 34143924b44..cf5439e42cf 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -400,6 +400,9 @@ protected:
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment = 4;
+ /// Whether function prologues should save register arguments on the stack.
+ unsigned SaveArgs;
+
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
// FIXME: this is a known good value for Yonah. How about others?
@@ -478,6 +481,8 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
+ unsigned getSaveArgs() const { return SaveArgs; }
+
/// Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
diff --git a/gnu/llvm/tools/clang/include/clang/Driver/Options.td b/gnu/llvm/tools/clang/include/clang/Driver/Options.td
index e8afeb469c5..b9b054606fb 100644
--- a/gnu/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/gnu/llvm/tools/clang/include/clang/Driver/Options.td
@@ -2814,6 +2814,8 @@ def mretpoline : Flag<["-"], "mretpoline">, Group<m_x86_Features_Group>;
def mno_retpoline : Flag<["-"], "mno-retpoline">, Group<m_x86_Features_Group>;
def mretpoline_external_thunk : Flag<["-"], "mretpoline-external-thunk">, Group<m_x86_Features_Group>;
def mno_retpoline_external_thunk : Flag<["-"], "mno-retpoline-external-thunk">, Group<m_x86_Features_Group>;
+def msave_args : Flag<["-"], "msave-args">, Group<m_x86_Features_Group>;
+def mno_save_args : Flag<["-"], "mno-save-args">, Group<m_x86_Features_Group>;
// These are legacy user-facing driver-level option spellings. They are always
// aliases for options that are spelled using the more common Unix / GNU flag
diff --git a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
index cfa6c571d6e..ffa9e0d2a87 100644
--- a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
+++ b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
@@ -15,8 +15,10 @@
#include "clang/Basic/Builtins.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/TargetParser.h"
namespace clang {
namespace targets {
@@ -131,7 +133,11 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "mmx", true);
break;
- case CK_Icelake:
+ case CK_IcelakeServer:
+ setFeatureEnabledImpl(Features, "pconfig", true);
+ setFeatureEnabledImpl(Features, "wbnoinvd", true);
+ LLVM_FALLTHROUGH;
+ case CK_IcelakeClient:
setFeatureEnabledImpl(Features, "vaes", true);
setFeatureEnabledImpl(Features, "gfni", true);
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
@@ -139,6 +145,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "avx512vnni", true);
setFeatureEnabledImpl(Features, "avx512vbmi2", true);
setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
+ setFeatureEnabledImpl(Features, "rdpid", true);
LLVM_FALLTHROUGH;
case CK_Cannonlake:
setFeatureEnabledImpl(Features, "avx512ifma", true);
@@ -159,7 +166,8 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "xsavec", true);
setFeatureEnabledImpl(Features, "xsaves", true);
setFeatureEnabledImpl(Features, "mpx", true);
- setFeatureEnabledImpl(Features, "sgx", true);
+ if (Kind != CK_SkylakeServer) // SKX inherits all SKL features, except SGX
+ setFeatureEnabledImpl(Features, "sgx", true);
setFeatureEnabledImpl(Features, "clflushopt", true);
setFeatureEnabledImpl(Features, "rtm", true);
LLVM_FALLTHROUGH;
@@ -174,6 +182,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "bmi", true);
setFeatureEnabledImpl(Features, "bmi2", true);
setFeatureEnabledImpl(Features, "fma", true);
+ setFeatureEnabledImpl(Features, "invpcid", true);
setFeatureEnabledImpl(Features, "movbe", true);
LLVM_FALLTHROUGH;
case CK_IvyBridge:
@@ -198,6 +207,7 @@ bool X86TargetInfo::initFeatureMap(
LLVM_FALLTHROUGH;
case CK_Core2:
setFeatureEnabledImpl(Features, "ssse3", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
LLVM_FALLTHROUGH;
case CK_Yonah:
case CK_Prescott:
@@ -216,9 +226,20 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "fxsr", true);
break;
+ case CK_Tremont:
+ setFeatureEnabledImpl(Features, "cldemote", true);
+ setFeatureEnabledImpl(Features, "movdiri", true);
+ setFeatureEnabledImpl(Features, "movdir64b", true);
+ setFeatureEnabledImpl(Features, "gfni", true);
+ setFeatureEnabledImpl(Features, "waitpkg", true);
+ LLVM_FALLTHROUGH;
+ case CK_GoldmontPlus:
+ setFeatureEnabledImpl(Features, "ptwrite", true);
+ setFeatureEnabledImpl(Features, "rdpid", true);
+ setFeatureEnabledImpl(Features, "sgx", true);
+ LLVM_FALLTHROUGH;
case CK_Goldmont:
setFeatureEnabledImpl(Features, "sha", true);
- setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "rdseed", true);
setFeatureEnabledImpl(Features, "xsave", true);
setFeatureEnabledImpl(Features, "xsaveopt", true);
@@ -229,6 +250,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "fsgsbase", true);
LLVM_FALLTHROUGH;
case CK_Silvermont:
+ setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "aes", true);
setFeatureEnabledImpl(Features, "pclmul", true);
setFeatureEnabledImpl(Features, "sse4.2", true);
@@ -239,6 +261,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "ssse3", true);
setFeatureEnabledImpl(Features, "fxsr", true);
setFeatureEnabledImpl(Features, "cx16", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
break;
case CK_KNM:
@@ -269,6 +292,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "xsaveopt", true);
setFeatureEnabledImpl(Features, "xsave", true);
setFeatureEnabledImpl(Features, "movbe", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
break;
case CK_K6_2:
@@ -282,6 +306,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "sse4a", true);
setFeatureEnabledImpl(Features, "lzcnt", true);
setFeatureEnabledImpl(Features, "popcnt", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
LLVM_FALLTHROUGH;
case CK_K8SSE3:
setFeatureEnabledImpl(Features, "sse3", true);
@@ -315,6 +340,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "prfchw", true);
setFeatureEnabledImpl(Features, "cx16", true);
setFeatureEnabledImpl(Features, "fxsr", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
break;
case CK_ZNVER1:
@@ -338,6 +364,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "prfchw", true);
setFeatureEnabledImpl(Features, "rdrnd", true);
setFeatureEnabledImpl(Features, "rdseed", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
setFeatureEnabledImpl(Features, "sha", true);
setFeatureEnabledImpl(Features, "sse4a", true);
setFeatureEnabledImpl(Features, "xsave", true);
@@ -372,6 +399,7 @@ bool X86TargetInfo::initFeatureMap(
setFeatureEnabledImpl(Features, "cx16", true);
setFeatureEnabledImpl(Features, "fxsr", true);
setFeatureEnabledImpl(Features, "xsave", true);
+ setFeatureEnabledImpl(Features, "sahf", true);
break;
}
if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec))
@@ -734,8 +762,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasMPX = true;
} else if (Feature == "+shstk") {
HasSHSTK = true;
- } else if (Feature == "+ibt") {
- HasIBT = true;
} else if (Feature == "+movbe") {
HasMOVBE = true;
} else if (Feature == "+sgx") {
@@ -760,14 +786,36 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasCLFLUSHOPT = true;
} else if (Feature == "+clwb") {
HasCLWB = true;
+ } else if (Feature == "+wbnoinvd") {
+ HasWBNOINVD = true;
} else if (Feature == "+prefetchwt1") {
HasPREFETCHWT1 = true;
} else if (Feature == "+clzero") {
HasCLZERO = true;
+ } else if (Feature == "+cldemote") {
+ HasCLDEMOTE = true;
+ } else if (Feature == "+rdpid") {
+ HasRDPID = true;
} else if (Feature == "+retpoline") {
HasRetpoline = true;
} else if (Feature == "+retpoline-external-thunk") {
HasRetpolineExternalThunk = true;
+ } else if (Feature == "+sahf") {
+ HasLAHFSAHF = true;
+ } else if (Feature == "+waitpkg") {
+ HasWAITPKG = true;
+ } else if (Feature == "+movdiri") {
+ HasMOVDIRI = true;
+ } else if (Feature == "+movdir64b") {
+ HasMOVDIR64B = true;
+ } else if (Feature == "+pconfig") {
+ HasPCONFIG = true;
+ } else if (Feature == "+ptwrite") {
+ HasPTWRITE = true;
+ } else if (Feature == "+invpcid") {
+ HasINVPCID = true;
+ } else if (Feature == "+save-args") {
+ HasSaveArgs = true;
}
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -888,6 +936,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_Goldmont:
defineCPUMacros(Builder, "goldmont");
break;
+ case CK_GoldmontPlus:
+ defineCPUMacros(Builder, "goldmont_plus");
+ break;
+ case CK_Tremont:
+ defineCPUMacros(Builder, "tremont");
+ break;
case CK_Nehalem:
case CK_Westmere:
case CK_SandyBridge:
@@ -897,7 +951,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_SkylakeClient:
case CK_SkylakeServer:
case CK_Cannonlake:
- case CK_Icelake:
+ case CK_IcelakeClient:
+ case CK_IcelakeServer:
// FIXME: Historically, we defined this legacy name, it would be nice to
// remove it at some point. We've never exposed fine-grained names for
// recent primary x86 CPUs, and we should keep it that way.
@@ -1093,12 +1148,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__XSAVES__");
if (HasPKU)
Builder.defineMacro("__PKU__");
- if (HasCX16)
- Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
if (HasCLFLUSHOPT)
Builder.defineMacro("__CLFLUSHOPT__");
if (HasCLWB)
Builder.defineMacro("__CLWB__");
+ if (HasWBNOINVD)
+ Builder.defineMacro("__WBNOINVD__");
if (HasMPX)
Builder.defineMacro("__MPX__");
if (HasSHSTK)
@@ -1109,6 +1164,22 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Builder.defineMacro("__CLZERO__");
+ if (HasRDPID)
+ Builder.defineMacro("__RDPID__");
+ if (HasCLDEMOTE)
+ Builder.defineMacro("__CLDEMOTE__");
+ if (HasWAITPKG)
+ Builder.defineMacro("__WAITPKG__");
+ if (HasMOVDIRI)
+ Builder.defineMacro("__MOVDIRI__");
+ if (HasMOVDIR64B)
+ Builder.defineMacro("__MOVDIR64B__");
+ if (HasPCONFIG)
+ Builder.defineMacro("__PCONFIG__");
+ if (HasPTWRITE)
+ Builder.defineMacro("__PTWRITE__");
+ if (HasINVPCID)
+ Builder.defineMacro("__INVPCID__");
// Each case falls through to the previous one here.
switch (SSELevel) {
@@ -1188,6 +1259,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
}
if (CPU >= CK_i586)
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+ if (HasCX16)
+ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
if (HasFloat128)
Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
@@ -1216,6 +1289,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512ifma", true)
.Case("bmi", true)
.Case("bmi2", true)
+ .Case("cldemote", true)
.Case("clflushopt", true)
.Case("clwb", true)
.Case("clzero", true)
@@ -1226,20 +1300,27 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("fsgsbase", true)
.Case("fxsr", true)
.Case("gfni", true)
+ .Case("invpcid", true)
.Case("lwp", true)
.Case("lzcnt", true)
.Case("mmx", true)
.Case("movbe", true)
+ .Case("movdiri", true)
+ .Case("movdir64b", true)
.Case("mpx", true)
.Case("mwaitx", true)
.Case("pclmul", true)
+ .Case("pconfig", true)
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchwt1", true)
.Case("prfchw", true)
+ .Case("ptwrite", true)
+ .Case("rdpid", true)
.Case("rdrnd", true)
.Case("rdseed", true)
.Case("rtm", true)
+ .Case("sahf", true)
.Case("sgx", true)
.Case("sha", true)
.Case("shstk", true)
@@ -1254,6 +1335,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("tbm", true)
.Case("vaes", true)
.Case("vpclmulqdq", true)
+ .Case("wbnoinvd", true)
+ .Case("waitpkg", true)
.Case("x87", true)
.Case("xop", true)
.Case("xsave", true)
@@ -1284,6 +1367,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512ifma", HasAVX512IFMA)
.Case("bmi", HasBMI)
.Case("bmi2", HasBMI2)
+ .Case("cldemote", HasCLDEMOTE)
.Case("clflushopt", HasCLFLUSHOPT)
.Case("clwb", HasCLWB)
.Case("clzero", HasCLZERO)
@@ -1294,25 +1378,32 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("fsgsbase", HasFSGSBASE)
.Case("fxsr", HasFXSR)
.Case("gfni", HasGFNI)
- .Case("ibt", HasIBT)
+ .Case("invpcid", HasINVPCID)
.Case("lwp", HasLWP)
.Case("lzcnt", HasLZCNT)
.Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
.Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
.Case("mmx", MMX3DNowLevel >= MMX)
.Case("movbe", HasMOVBE)
+ .Case("movdiri", HasMOVDIRI)
+ .Case("movdir64b", HasMOVDIR64B)
.Case("mpx", HasMPX)
+ .Case("save-args", HasSaveArgs)
.Case("mwaitx", HasMWAITX)
.Case("pclmul", HasPCLMUL)
+ .Case("pconfig", HasPCONFIG)
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
+ .Case("ptwrite", HasPTWRITE)
+ .Case("rdpid", HasRDPID)
.Case("rdrnd", HasRDRND)
.Case("rdseed", HasRDSEED)
.Case("retpoline", HasRetpoline)
.Case("retpoline-external-thunk", HasRetpolineExternalThunk)
.Case("rtm", HasRTM)
+ .Case("sahf", HasLAHFSAHF)
.Case("sgx", HasSGX)
.Case("sha", HasSHA)
.Case("shstk", HasSHSTK)
@@ -1326,6 +1417,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("tbm", HasTBM)
.Case("vaes", HasVAES)
.Case("vpclmulqdq", HasVPCLMULQDQ)
+ .Case("wbnoinvd", HasWBNOINVD)
+ .Case("waitpkg", HasWAITPKG)
.Case("x86", true)
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
@@ -1349,6 +1442,95 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
.Default(false);
}
+static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
+ return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
+#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, llvm::X86::ENUM)
+#include "llvm/Support/X86TargetParser.def"
+ ;
+ // Note, this function should only be used after ensuring the value is
+ // correct, so it asserts if the value is out of range.
+}
+
+static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) {
+ enum class FeatPriority {
+#define FEATURE(FEAT) FEAT,
+#include "clang/Basic/X86Target.def"
+ };
+ switch (Feat) {
+#define FEATURE(FEAT) \
+ case llvm::X86::FEAT: \
+ return static_cast<unsigned>(FeatPriority::FEAT);
+#include "clang/Basic/X86Target.def"
+ default:
+ llvm_unreachable("No Feature Priority for non-CPUSupports Features");
+ }
+}
+
+unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
+ // Valid CPUs have a 'key feature' that compares just better than its key
+ // feature.
+ CPUKind Kind = getCPUKind(Name);
+ if (Kind != CK_Generic) {
+ switch (Kind) {
+ default:
+ llvm_unreachable(
+ "CPU Type without a key feature used in 'target' attribute");
+#define PROC_WITH_FEAT(ENUM, STR, IS64, KEY_FEAT) \
+ case CK_##ENUM: \
+ return (getFeaturePriority(llvm::X86::KEY_FEAT) << 1) + 1;
+#include "clang/Basic/X86Target.def"
+ }
+ }
+
+ // Now we know we have a feature, so get its priority and shift it a few so
+ // that we have sufficient room for the CPUs (above).
+ return getFeaturePriority(getFeature(Name)) << 1;
+}
+
+bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
+ return llvm::StringSwitch<bool>(Name)
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true)
+#include "clang/Basic/X86Target.def"
+ .Default(false);
+}
+
+static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) {
+ return llvm::StringSwitch<StringRef>(Name)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME)
+#include "clang/Basic/X86Target.def"
+ .Default(Name);
+}
+
+char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
+ return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name))
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
+#include "clang/Basic/X86Target.def"
+ .Default(0);
+}
+
+void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
+ StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const {
+ StringRef WholeList =
+ llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name))
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
+#include "clang/Basic/X86Target.def"
+ .Default("");
+ WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+}
+
+std::string X86TargetInfo::getCPUKindCanonicalName(CPUKind Kind) const {
+ switch (Kind) {
+ case CK_Generic:
+ return "";
+#define PROC(ENUM, STRING, IS64BIT) \
+ case CK_##ENUM: \
+ return STRING;
+#include "clang/Basic/X86Target.def"
+ }
+ llvm_unreachable("Invalid CPUKind");
+}
+
// We can't use a generic validation scheme for the cpus accepted here
// versus subtarget cpus accepted in the target attribute because the
// variables intitialized by the runtime only support the below currently
@@ -1434,7 +1616,7 @@ bool X86TargetInfo::validateAsmConstraint(
case 'y': // Any MMX register.
case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
case 'x': // Any SSE register.
- case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
+ case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0
// for intermideate k reg operations).
case 'Q': // Any register accessible as [r]h: a, b, c, and d.
case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -1562,8 +1744,6 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
bool X86TargetInfo::checkCPUKind(CPUKind Kind) const {
// Perform any per-CPU checks necessary to determine if this CPU is
// acceptable.
- // FIXME: This results in terrible diagnostics. Clang just says the CPU is
- // invalid without explaining *why*.
switch (Kind) {
case CK_Generic:
// No processor selected!
@@ -1576,6 +1756,18 @@ bool X86TargetInfo::checkCPUKind(CPUKind Kind) const {
llvm_unreachable("Unhandled CPU kind");
}
+void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
+#define PROC(ENUM, STRING, IS64BIT) \
+ if (IS64BIT || getTriple().getArch() == llvm::Triple::x86) \
+ Values.emplace_back(STRING);
+ // Go through CPUKind checking to ensure that the alias is de-aliased and
+ // 64 bit-ness is checked.
+#define PROC_ALIAS(ENUM, ALIAS) \
+ if (checkCPUKind(getCPUKind(ALIAS))) \
+ Values.emplace_back(ALIAS);
+#include "clang/Basic/X86Target.def"
+}
+
X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const {
return llvm::StringSwitch<CPUKind>(CPU)
#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM)
diff --git a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
index 590531c1785..c476326102d 100644
--- a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
+++ b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
@@ -81,7 +81,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasSHA = false;
bool HasMPX = false;
bool HasSHSTK = false;
- bool HasIBT = false;
bool HasSGX = false;
bool HasCX16 = false;
bool HasFXSR = false;
@@ -91,15 +90,27 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasXSAVES = false;
bool HasMWAITX = false;
bool HasCLZERO = false;
+ bool HasCLDEMOTE = false;
+ bool HasPCONFIG = false;
bool HasPKU = false;
bool HasCLFLUSHOPT = false;
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHWT1 = false;
+ bool HasRDPID = false;
bool HasRetpoline = false;
bool HasRetpolineExternalThunk = false;
-
- /// \brief Enumeration of all of the X86 CPUs supported by Clang.
+ bool HasLAHFSAHF = false;
+ bool HasWBNOINVD = false;
+ bool HasWAITPKG = false;
+ bool HasMOVDIRI = false;
+ bool HasMOVDIR64B = false;
+ bool HasPTWRITE = false;
+ bool HasINVPCID = false;
+ bool HasSaveArgs = false;
+
+protected:
+ /// Enumeration of all of the X86 CPUs supported by Clang.
///
/// Each enumeration represents a particular CPU supported by Clang. These
/// loosely correspond to the options passed to '-march' or '-mtune' flags.
@@ -113,6 +124,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
CPUKind getCPUKind(StringRef CPU) const;
+ std::string getCPUKindCanonicalName(CPUKind Kind) const;
+
enum FPMathKind { FP_Default, FP_SSE, FP_387 } FPMath = FP_Default;
public:
@@ -120,7 +133,7 @@ public:
: TargetInfo(Triple) {
LongDoubleFormat = &llvm::APFloat::x87DoubleExtended();
}
-
+
unsigned getFloatEvalMethod() const override {
// X87 evaluates with 80 bits "long double" precision.
return SSELevel == NoSSE ? 2 : 0;
@@ -138,6 +151,14 @@ public:
bool validateCpuIs(StringRef Name) const override;
+ bool validateCPUSpecificCPUDispatch(StringRef Name) const override;
+
+ char CPUSpecificManglingCharacter(StringRef Name) const override;
+
+ void getCPUSpecificCPUDispatchFeatures(
+ StringRef Name,
+ llvm::SmallVectorImpl<StringRef> &Features) const override;
+
bool validateAsmConstraint(const char *&Name,
TargetInfo::ConstraintInfo &info) const override;
@@ -158,6 +179,17 @@ public:
bool validateInputSize(StringRef Constraint, unsigned Size) const override;
+ virtual bool
+ checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override {
+ return true;
+ };
+
+ virtual bool
+ checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const override {
+ return true;
+ };
+
+
virtual bool validateOperandSize(StringRef Constraint, unsigned Size) const;
std::string convertConstraint(const char *&Constraint) const override;
@@ -165,8 +197,8 @@ public:
return "~{dirflag},~{fpsr},~{flags}";
}
- StringRef getConstraintRegister(const StringRef &Constraint,
- const StringRef &Expression) const override {
+ StringRef getConstraintRegister(StringRef Constraint,
+ StringRef Expression) const override {
StringRef::iterator I, E;
for (I = Constraint.begin(), E = Constraint.end(); I != E; ++I) {
if (isalpha(*I))
@@ -207,7 +239,7 @@ public:
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
-
+
static void setSSELevel(llvm::StringMap<bool> &Features, X86SSEEnum Level,
bool Enabled);
@@ -254,10 +286,17 @@ public:
return checkCPUKind(getCPUKind(Name));
}
+ void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
bool setCPU(const std::string &Name) override {
return checkCPUKind(CPU = getCPUKind(Name));
}
+ bool supportsMultiVersioning() const override {
+ return getTriple().isOSBinFormatELF();
+ }
+ unsigned multiVersionSortPriority(StringRef Name) const override;
+
bool setFPMath(StringRef Name) override;
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
@@ -269,6 +308,7 @@ public:
case CC_X86VectorCall:
case CC_X86RegCall:
case CC_C:
+ case CC_PreserveMost:
case CC_Swift:
case CC_X86Pascal:
case CC_IntelOclBicc:
@@ -536,7 +576,7 @@ public:
IntPtrType = SignedLong;
PtrDiffType = SignedLong;
}
-
+
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override {
X86_32TargetInfo::getTargetDefines(Opts, Builder);
@@ -623,7 +663,7 @@ public:
bool hasInt128Type() const override { return true; }
unsigned getUnwindWordWidth() const override { return 64; }
-
+
unsigned getRegisterWidth() const override { return 64; }
bool validateGlobalRegisterVariable(StringRef RegName, unsigned RegSize,
@@ -708,6 +748,11 @@ public:
Builder.defineMacro("_M_X64", "100");
Builder.defineMacro("_M_AMD64", "100");
}
+
+ TargetInfo::CallingConvKind
+ getCallingConvKind(bool ClangABICompat4) const override {
+ return CCK_MicrosoftWin64;
+ }
};
// x86-64 MinGW target