diff options
author | Philip Guenther <guenther@cvs.openbsd.org> | 2018-12-30 23:08:06 +0000 |
---|---|---|
committer | Philip Guenther <guenther@cvs.openbsd.org> | 2018-12-30 23:08:06 +0000 |
commit | 5a7ce81e74827aac04443e86ccb158d1121c7d8a (patch) | |
tree | 6a70196e1900e50492373a957e44c197a05a7e45 /gnu/llvm/lib | |
parent | 3d1ec8445e872d8b6365f39a103b51fdd64c20d8 (diff) |
Turn on -mretpoline by default in clang on amd64, but turn it off
explicitly in SMALL_KERNEL kernel builds.
tweaks from jsg@ and tb@
ok deraadt@ kettenis@
Diffstat (limited to 'gnu/llvm/lib')
-rw-r--r-- | gnu/llvm/lib/Target/X86/X86Subtarget.cpp | 315 | ||||
-rw-r--r-- | gnu/llvm/lib/Target/X86/X86Subtarget.h | 324 |
2 files changed, 452 insertions, 187 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp index 8ef08c960f0..195576bf546 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp @@ -11,19 +11,28 @@ // //===----------------------------------------------------------------------===// +#include "X86.h" + +#include "X86CallLowering.h" +#include "X86LegalizerInfo.h" +#include "X86RegisterBankInfo.h" #include "X86Subtarget.h" -#include "X86InstrInfo.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Host.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #if defined(_MSC_VER) #include <intrin.h> @@ -46,126 +55,127 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, /// Classify a blockaddress reference for the current subtarget according to how /// we should reference it in a non-pcrel context. -unsigned char X86Subtarget::ClassifyBlockAddressReference() const { - if (isPICStyleGOT()) // 32-bit ELF targets. - return X86II::MO_GOTOFF; - - if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. - return X86II::MO_PIC_BASE_OFFSET; - - // Direct static reference to label. - return X86II::MO_NO_FLAG; +unsigned char X86Subtarget::classifyBlockAddressReference() const { + return classifyLocalReference(nullptr); } /// Classify a global variable reference for the current subtarget according to /// how we should reference it in a non-pcrel context. -unsigned char X86Subtarget:: -ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { - // DLLImport only exists on windows, it is implemented as a load from a - // DLLIMPORT stub. - if (GV->hasDLLImportStorageClass()) - return X86II::MO_DLLIMPORT; +unsigned char +X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const { + return classifyGlobalReference(GV, *GV->getParent()); +} - bool isDef = GV->isStrongDefinitionForLinker(); - - // X86-64 in PIC mode. - if (isPICStyleRIPRel()) { - // Large model never uses stubs. - if (TM.getCodeModel() == CodeModel::Large) - return X86II::MO_NO_FLAG; - - if (isTargetDarwin()) { - // If symbol visibility is hidden, the extra load is not needed if - // target is x86-64 or the symbol is definitely defined in the current - // translation unit. - if (GV->hasDefaultVisibility() && !isDef) - return X86II::MO_GOTPCREL; - } else if (!isTargetWin64()) { - assert(isTargetELF() && "Unknown rip-relative target"); - - // Extra load is needed for all externally visible. - if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility()) - return X86II::MO_GOTPCREL; - } +unsigned char +X86Subtarget::classifyLocalReference(const GlobalValue *GV) const { + // 64 bits can use %rip addressing for anything local. + if (is64Bit()) + return X86II::MO_NO_FLAG; + // If this is for a position dependent executable, the static linker can + // figure it out. + if (!isPositionIndependent()) return X86II::MO_NO_FLAG; - } - if (isPICStyleGOT()) { // 32-bit ELF targets. - // Extra load is needed for all externally visible. - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return X86II::MO_GOTOFF; - return X86II::MO_GOT; - } + // The COFF dynamic linker just patches the executable sections. + if (isTargetCOFF()) + return X86II::MO_NO_FLAG; - if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. - // Determine whether we have a stub reference and/or whether the reference - // is relative to the PIC base or not. + if (isTargetDarwin()) { + // 32 bit macho has no relocation for a-b if a is undefined, even if + // b is in the section that is being relocated. + // This means we have to use o load even for GVs that are known to be + // local to the dso. + if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) + return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (isDef) - return X86II::MO_PIC_BASE_OFFSET; + return X86II::MO_PIC_BASE_OFFSET; + } - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return X86II::MO_DARWIN_NONLAZY_PIC_BASE; + return X86II::MO_GOTOFF; +} - // If symbol visibility is hidden, we have a stub for common symbol - // references and external declarations. - if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) { - // Hidden $non_lazy_ptr reference. - return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; - } +unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV, + const Module &M) const { + // Large model never uses stubs. + if (TM.getCodeModel() == CodeModel::Large) + return X86II::MO_NO_FLAG; - // Otherwise, no stub. - return X86II::MO_PIC_BASE_OFFSET; + // Absolute symbols can be referenced directly. + if (GV) { + if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) { + // See if we can use the 8-bit immediate form. Note that some instructions + // will sign extend the immediate operand, so to be conservative we only + // accept the range [0,128). + if (CR->getUnsignedMax().ult(128)) + return X86II::MO_ABS8; + else + return X86II::MO_NO_FLAG; + } } - if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. - // Determine whether we have a stub reference. + if (TM.shouldAssumeDSOLocal(M, GV)) + return classifyLocalReference(GV); - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (isDef) - return X86II::MO_NO_FLAG; + if (isTargetCOFF()) + return X86II::MO_DLLIMPORT; - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return X86II::MO_DARWIN_NONLAZY; + if (is64Bit()) + return X86II::MO_GOTPCREL; - // Otherwise, no stub. - return X86II::MO_NO_FLAG; + if (isTargetDarwin()) { + if (!isPositionIndependent()) + return X86II::MO_DARWIN_NONLAZY; + return X86II::MO_DARWIN_NONLAZY_PIC_BASE; } - // Direct static reference to global. - return X86II::MO_NO_FLAG; + return X86II::MO_GOT; +} + +unsigned char +X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const { + return classifyGlobalFunctionReference(GV, *GV->getParent()); } +unsigned char +X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, + const Module &M) const { + if (TM.shouldAssumeDSOLocal(M, GV)) + return X86II::MO_NO_FLAG; -/// This function returns the name of a function which has an interface like -/// the non-standard bzero function, if such a function exists on the -/// current subtarget and it is considered preferable over memset with zero -/// passed as the second argument. Otherwise it returns null. -const char *X86Subtarget::getBZeroEntry() const { - // Darwin 10 has a __bzero entry point for this purpose. - if (getTargetTriple().isMacOSX() && - !getTargetTriple().isMacOSXVersionLT(10, 6)) - return "__bzero"; + if (isTargetCOFF()) { + assert(GV->hasDLLImportStorageClass() && + "shouldAssumeDSOLocal gave inconsistent answer"); + return X86II::MO_DLLIMPORT; + } - return nullptr; -} + const Function *F = dyn_cast_or_null<Function>(GV); + + if (isTargetELF()) { + if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv())) + // According to psABI, PLT stub clobbers XMM8-XMM15. + // In Regcall calling convention those registers are used for passing + // parameters. Thus we need to prevent lazy binding in Regcall. + return X86II::MO_GOTPCREL; + if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit()) + return X86II::MO_GOTPCREL; + return X86II::MO_PLT; + } + + if (is64Bit()) { + if (F && F->hasFnAttribute(Attribute::NonLazyBind)) + // If the function is marked as non-lazy, generate an indirect call + // which loads from the GOT directly. This avoids runtime overhead + // at the cost of eager binding (and one extra byte of encoding). + return X86II::MO_GOTPCREL; + return X86II::MO_NO_FLAG; + } -bool X86Subtarget::hasSinCos() const { - return getTargetTriple().isMacOSX() && - !getTargetTriple().isMacOSXVersionLT(10, 9) && - is64Bit(); + return X86II::MO_NO_FLAG; } /// Return true if the subtarget allows calls to immediate address. -bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { +bool X86Subtarget::isLegalToCallImmediateAddr() const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does, // the following check for Win32 should be removed. @@ -197,6 +207,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+sahf"; } + // OpenBSD/amd64 defaults to -mretpoline + if (isTargetOpenBSD() && In64BitMode) { + if (!FullFS.empty()) + FullFS = "+retpoline," + FullFS; + else + FullFS = "+retpoline"; + } // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); @@ -227,33 +244,49 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both + // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || - In64BitMode) + isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; + + // Some CPUs have more overhead for gather. The specified overhead is relative + // to the Load operation. "2" is the number provided by Intel architects. This + // parameter is used for cost estimation of Gather Op and comparison with + // other alternatives. + // TODO: Remove the explicit hasAVX512()?, That would mean we would only + // enable gather with a -march. + if (hasAVX512() || (hasAVX2() && hasFastGather())) + GatherOverhead = 2; + if (hasAVX512()) + ScatterOverhead = 2; } void X86Subtarget::initializeEnvironment() { X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; + HasX87 = false; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; HasSSE4A = false; HasAES = false; + HasVAES = false; HasFXSR = false; HasXSAVE = false; HasXSAVEOPT = false; HasXSAVEC = false; HasXSAVES = false; HasPCLMUL = false; + HasVPCLMULQDQ = false; + HasGFNI = false; HasFMA = false; HasFMA4 = false; HasXOP = false; HasTBM = false; + HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; @@ -261,39 +294,67 @@ void X86Subtarget::initializeEnvironment() { HasLZCNT = false; HasBMI = false; HasBMI2 = false; + HasVBMI = false; + HasVBMI2 = false; + HasIFMA = false; HasRTM = false; - HasHLE = false; HasERI = false; HasCDI = false; HasPFI = false; HasDQI = false; + HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; HasPKU = false; + HasVNNI = false; + HasBITALG = false; HasSHA = false; + HasPREFETCHWT1 = false; HasPRFCHW = false; HasRDSEED = false; HasLAHFSAHF = false; + HasMWAITX = false; + HasCLZERO = false; HasMPX = false; - IsBTMemSlow = false; + HasSHSTK = false; + HasIBT = false; + HasSGX = false; + HasCLFLUSHOPT = false; + HasCLWB = false; + UseRetpoline = false; + UseRetpolineExternalThunk = false; + IsPMULLDSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; IsUAMem32Slow = false; HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; + HasFastVariableShuffle = false; + HasFastPartialYMMorZMMWrite = false; + HasFastGather = false; + HasFastScalarFSQRT = false; + HasFastVectorFSQRT = false; + HasFastLZCNT = false; + HasFastSHLDRotate = false; + HasMacroFusion = false; + HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; - CallRegIndirect = false; + SlowTwoMemOps = false; LEAUsesAG = false; SlowLEA = false; + Slow3OpsLEA = false; SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; UseSoftFloat = false; + X86ProcFamily = Others; + GatherOverhead = 1024; + ScatterOverhead = 1024; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, @@ -303,41 +364,55 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, const X86TargetMachine &TM, +X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + const X86TargetMachine &TM, unsigned StackAlignOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), - PICStyle(PICStyles::None), TargetTriple(TT), + PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), - TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. - if (TM.getRelocationModel() == Reloc::Static) { - // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. + if (!isPositionIndependent()) setPICStyle(PICStyles::None); - } else if (is64Bit()) { - // PIC in 64 bit mode is always rip-rel. + else if (is64Bit()) setPICStyle(PICStyles::RIPRel); - } else if (isTargetCOFF()) { + else if (isTargetCOFF()) setPICStyle(PICStyles::None); - } else if (isTargetDarwin()) { - if (TM.getRelocationModel() == Reloc::PIC_) - setPICStyle(PICStyles::StubPIC); - else { - assert(TM.getRelocationModel() == Reloc::DynamicNoPIC); - setPICStyle(PICStyles::StubDynamicNoPIC); - } - } else if (isTargetELF()) { + else if (isTargetDarwin()) + setPICStyle(PICStyles::StubPIC); + else if (isTargetELF()) setPICStyle(PICStyles::GOT); - } + + CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); + Legalizer.reset(new X86LegalizerInfo(*this, TM)); + + auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); + RegBankInfo.reset(RBI); + InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); +} + +const CallLowering *X86Subtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +const InstructionSelector *X86Subtarget::getInstructionSelector() const { + return InstSelector.get(); +} + +const LegalizerInfo *X86Subtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +const RegisterBankInfo *X86Subtarget::getRegBankInfo() const { + return RegBankInfo.get(); } bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } - diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h index 13d1026dcaa..e6b486cdbeb 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.h +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h @@ -18,32 +18,53 @@ #include "X86ISelLowering.h" #include "X86InstrInfo.h" #include "X86SelectionDAGInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <string> +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetMachine.h" +#include <memory> #define GET_SUBTARGETINFO_HEADER #include "X86GenSubtargetInfo.inc" namespace llvm { + class GlobalValue; -class StringRef; -class TargetMachine; /// The X86 backend supports a number of different styles of PIC. /// namespace PICStyles { + enum Style { - StubPIC, // Used on i386-darwin in -fPIC mode. - StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode. - GOT, // Used on many 32-bit unices in -fPIC mode. - RIPRel, // Used on X86-64 when not in -static mode. - None // Set when in -static mode (not PIC or DynamicNoPIC mode). + StubPIC, // Used on i386-darwin in pic mode. + GOT, // Used on 32 bit elf on when in pic mode. + RIPRel, // Used on X86-64 when in pic mode. + None // Set when not in pic mode. }; -} + +} // end namespace PICStyles class X86Subtarget final : public X86GenSubtargetInfo { +public: + enum X86ProcFamilyEnum { + Others, + IntelAtom, + IntelSLM, + IntelGLM, + IntelHaswell, + IntelBroadwell, + IntelSkylake, + IntelKNL, + IntelSKX, + IntelCannonlake, + IntelIcelake, + }; protected: enum X86SSEEnum { @@ -54,22 +75,23 @@ protected: NoThreeDNow, MMX, ThreeDNow, ThreeDNowA }; - enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM - }; - /// X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; /// Which PIC style to use PICStyles::Style PICStyle; + const TargetMachine &TM; + /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel; /// MMX, 3DNow, 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel; + /// True if the processor supports X87 instructions. + bool HasX87; + /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; @@ -85,21 +107,29 @@ protected: /// Target has AES instructions bool HasAES; + bool HasVAES; /// Target has FXSAVE/FXRESTOR instructions bool HasFXSR; /// Target has XSAVE instructions bool HasXSAVE; + /// Target has XSAVEOPT instructions bool HasXSAVEOPT; + /// Target has XSAVEC instructions bool HasXSAVEC; + /// Target has XSAVES instructions bool HasXSAVES; /// Target has carry-less multiplication bool HasPCLMUL; + bool HasVPCLMULQDQ; + + /// Target has Galois Field Arithmetic instructions + bool HasGFNI; /// Target has 3-operand fused multiply-add bool HasFMA; @@ -113,6 +143,9 @@ protected: /// Target has TBM instructions. bool HasTBM; + /// Target has LWP instructions + bool HasLWP; + /// True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -134,12 +167,18 @@ protected: /// Processor has BMI2 instructions. bool HasBMI2; + /// Processor has VBMI instructions. + bool HasVBMI; + + /// Processor has VBMI2 instructions. + bool HasVBMI2; + + /// Processor has Integer Fused Multiply Add + bool HasIFMA; + /// Processor has RTM instructions. bool HasRTM; - /// Processor has HLE. - bool HasHLE; - /// Processor has ADX instructions. bool HasADX; @@ -155,12 +194,22 @@ protected: /// Processor has LAHF/SAHF instructions. bool HasLAHFSAHF; - /// True if BT (bit test) of memory instructions are slow. - bool IsBTMemSlow; + /// Processor has MONITORX/MWAITX instructions. + bool HasMWAITX; + + /// Processor has Cache Line Zero instruction + bool HasCLZERO; + + /// Processor has Prefetch with intent to Write instruction + bool HasPREFETCHWT1; /// True if SHLD instructions are slow. bool IsSHLDSlow; + /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and + // PMULUDQ. + bool IsPMULLDSlow; + /// True if unaligned memory accesses of 16-bytes are slow. bool IsUAMem16Slow; @@ -179,21 +228,53 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// True if its preferable to combine to a single shuffle using a variable + /// mask over multiple fixed shuffles. + bool HasFastVariableShuffle; + + /// True if there is no performance penalty to writing only the lower parts + /// of a YMM or ZMM register without clearing the upper part. + bool HasFastPartialYMMorZMMWrite; + + /// True if gather is reasonably fast. This is true for Skylake client and + /// all AVX-512 CPUs. + bool HasFastGather; + + /// True if hardware SQRTSS instruction is at least as fast (latency) as + /// RSQRTSS followed by a Newton-Raphson iteration. + bool HasFastScalarFSQRT; + + /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast + /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. + bool HasFastVectorFSQRT; + /// True if 8-bit divisions are significantly faster than /// 32-bit divisions and should be used when possible. bool HasSlowDivide32; - /// True if 16-bit divides are significantly faster than + /// True if 32-bit divides are significantly faster than /// 64-bit divisions and should be used when possible. bool HasSlowDivide64; + /// True if LZCNT instruction is fast. + bool HasFastLZCNT; + + /// True if SHLD based rotate is fast. + bool HasFastSHLDRotate; + + /// True if the processor supports macrofusion. + bool HasMacroFusion; + + /// True if the processor has enhanced REP MOVSB/STOSB. + bool HasERMSB; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; - /// True if the Calls with memory reference should be converted - /// to a register-based indirect call. - bool CallRegIndirect; + /// True if two memory operand instructions should use a temporary register + /// instead. + bool SlowTwoMemOps; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. @@ -202,6 +283,11 @@ protected: /// True if the LEA instruction with certain arguments is slow bool SlowLEA; + /// True if the LEA instruction has all three source operands: base, index, + /// and offset or if the LEA instruction uses base and index registers where + /// the base is EBP, RBP,or R13 + bool Slow3OpsLEA; + /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; @@ -214,6 +300,9 @@ protected: /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; + /// Processor has AVX-512 population count Instructions + bool HasVPOPCNTDQ; + /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -226,9 +315,40 @@ protected: /// Processor has PKU extenstions bool HasPKU; - /// Processot supports MPX - Memory Protection Extensions + /// Processor has AVX-512 Vector Neural Network Instructions + bool HasVNNI; + + /// Processor has AVX-512 Bit Algorithms instructions + bool HasBITALG; + + /// Processor supports MPX - Memory Protection Extensions bool HasMPX; + /// Processor supports CET SHSTK - Control-Flow Enforcement Technology + /// using Shadow Stack + bool HasSHSTK; + + /// Processor supports CET IBT - Control-Flow Enforcement Technology + /// using Indirect Branch Tracking + bool HasIBT; + + /// Processor has Software Guard Extensions + bool HasSGX; + + /// Processor supports Flush Cache Line instruction + bool HasCLFLUSHOPT; + + /// Processor supports Cache Line Write Back instruction + bool HasCLWB; + + /// Use a retpoline thunk rather than indirect calls to block speculative + /// execution. + bool UseRetpoline; + + /// When using a retpoline thunk, call an externally provided thunk rather + /// than emitting one inside the compiler. + bool UseRetpolineExternalThunk; + /// Use software floating point for code generation. bool UseSoftFloat; @@ -246,8 +366,13 @@ protected: /// Instruction itineraries for scheduling InstrItineraryData InstrItins; -private: + /// GlobalISel related APIs. + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + std::unique_ptr<InstructionSelector> InstSelector; +private: /// Override the stack alignment. unsigned StackAlignOverride; @@ -260,6 +385,10 @@ private: /// True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode; + /// Contains the Overhead of gather\scatter instructions + int GatherOverhead; + int ScatterOverhead; + X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. @@ -271,19 +400,23 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, + X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; } + const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } + const X86FrameLowering *getFrameLowering() const override { return &FrameLowering; } + const X86SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } @@ -301,12 +434,19 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// Methods used by Global ISel + const CallLowering *getCallLowering() const override; + const InstructionSelector *getInstructionSelector() const override; + const LegalizerInfo *getLegalizerInfo() const override; + const RegisterBankInfo *getRegBankInfo() const override; + private: /// Initialize the full set of dependencies so we can use an initializer /// list for X86Subtarget. X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); + public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { @@ -336,6 +476,7 @@ public: PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } + bool hasX87() const { return HasX87; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } @@ -354,19 +495,23 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } + bool hasVAES() const { return HasVAES; } bool hasFXSR() const { return HasFXSR; } bool hasXSAVE() const { return HasXSAVE; } bool hasXSAVEOPT() const { return HasXSAVEOPT; } bool hasXSAVEC() const { return HasXSAVEC; } bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } + bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } + bool hasGFNI() const { return HasGFNI; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. - bool hasFMA() const { return HasFMA && !HasFMA4; } + bool hasFMA() const { return HasFMA; } bool hasFMA4() const { return HasFMA4; } - bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); } + bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } + bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } @@ -374,58 +519,108 @@ public: bool hasLZCNT() const { return HasLZCNT; } bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } + bool hasVBMI() const { return HasVBMI; } + bool hasVBMI2() const { return HasVBMI2; } + bool hasIFMA() const { return HasIFMA; } bool hasRTM() const { return HasRTM; } - bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } bool hasSHA() const { return HasSHA; } - bool hasPRFCHW() const { return HasPRFCHW; } + bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; } + bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } + bool hasSSEPrefetch() const { + // We implicitly enable these when we have a write prefix supporting cache + // level OR if we have prfchw, but don't already have a read prefetch from + // 3dnow. + return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); + } bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } - bool isBTMemSlow() const { return IsBTMemSlow; } + bool hasMWAITX() const { return HasMWAITX; } + bool hasCLZERO() const { return HasCLZERO; } bool isSHLDSlow() const { return IsSHLDSlow; } + bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } + int getGatherOverhead() const { return GatherOverhead; } + int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasFastVariableShuffle() const { + return HasFastVariableShuffle; + } + bool hasFastPartialYMMorZMMWrite() const { + return HasFastPartialYMMorZMMWrite; + } + bool hasFastGather() const { return HasFastGather; } + bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; } + bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } + bool hasFastLZCNT() const { return HasFastLZCNT; } + bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasMacroFusion() const { return HasMacroFusion; } + bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } - bool callRegIndirect() const { return CallRegIndirect; } + bool slowTwoMemOps() const { return SlowTwoMemOps; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } + bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } + bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } + bool hasVNNI() const { return HasVNNI; } + bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } + bool hasSHSTK() const { return HasSHSTK; } + bool hasIBT() const { return HasIBT; } + bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } + bool hasCLWB() const { return HasCLWB; } + bool useRetpoline() const { return UseRetpoline; } + bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + bool isXRaySupported() const override { return is64Bit(); } + + X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } + + /// TODO: to be removed later and replaced with suitable properties bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } + /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for + /// no-sse2). There isn't any reason to disable it if the target processor + /// supports it. + bool hasMFence() const { return hasSSE2() || is64Bit(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } + bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); } bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } - bool isTargetPS4() const { return TargetTriple.isPS4(); } + bool isTargetPS4() const { return TargetTriple.isPS4CPU(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } + bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } + bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } bool isTargetWindowsMSVC() const { return TargetTriple.isWindowsMSVCEnvironment(); @@ -455,15 +650,10 @@ public: bool isOSWindows() const { return TargetTriple.isOSWindows(); } - bool isTargetWin64() const { - return In64BitMode && TargetTriple.isOSWindows(); - } + bool isTargetWin64() const { return In64BitMode && isOSWindows(); } - bool isTargetWin32() const { - return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC()); - } + bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } - bool isPICStyleSet() const { return PICStyle != PICStyles::None; } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } @@ -471,19 +661,14 @@ public: return PICStyle == PICStyles::StubPIC; } - bool isPICStyleStubNoDynamic() const { - return PICStyle == PICStyles::StubDynamicNoPIC; - } - bool isPICStyleStubAny() const { - return PICStyle == PICStyles::StubDynamicNoPIC || - PICStyle == PICStyles::StubPIC; - } + bool isPositionIndependent() const { return TM.isPositionIndependent(); } bool isCallingConvWin64(CallingConv::ID CC) const { switch (CC) { // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: + case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: @@ -491,7 +676,7 @@ public: case CallingConv::Intel_OCL_BI: return isTargetWin64(); // This convention allows using the Win64 convention on other targets. - case CallingConv::X86_64_Win64: + case CallingConv::Win64: return true; // This convention allows using the SysV convention on Windows targets. case CallingConv::X86_64_SysV: @@ -502,33 +687,36 @@ public: } } - /// ClassifyGlobalReference - Classify a global variable reference for the - /// current subtarget according to how we should reference it in a non-pcrel - /// context. - unsigned char ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM)const; + /// Classify a global variable reference for the current subtarget according + /// to how we should reference it in a non-pcrel context. + unsigned char classifyLocalReference(const GlobalValue *GV) const; + + unsigned char classifyGlobalReference(const GlobalValue *GV, + const Module &M) const; + unsigned char classifyGlobalReference(const GlobalValue *GV) const; + + /// Classify a global function reference for the current subtarget. + unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, + const Module &M) const; + unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const; /// Classify a blockaddress reference for the current subtarget according to /// how we should reference it in a non-pcrel context. - unsigned char ClassifyBlockAddressReference() const; + unsigned char classifyBlockAddressReference() const; /// Return true if the subtarget allows calls to immediate address. - bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; - - /// This function returns the name of a function which has an interface - /// like the non-standard bzero function, if such a function exists on - /// the current subtarget and it is considered prefereable over - /// memset with zero passed as the second argument. Otherwise it - /// returns null. - const char *getBZeroEntry() const; + bool isLegalToCallImmediateAddr() const; - /// This function returns true if the target has sincos() routine in its - /// compiler runtime or math libraries. - bool hasSinCos() const; + /// If we are using retpolines, we need to expand indirectbr to avoid it + /// lowering to an actual indirect jump. + bool enableIndirectBrExpand() const override { return useRetpoline(); } /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } + // TODO: Update the regression tests and return true. + bool supportPrintSchedInfo() const override { return false; } + bool enableEarlyIfConversion() const override; /// Return the instruction itineraries based on the subtarget selection. @@ -539,8 +727,10 @@ public: AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } + + bool enableAdvancedRASplitCost() const override { return true; } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H |