diff options
-rw-r--r-- | gnu/llvm/lib/Target/X86/X86Subtarget.cpp | 309 | ||||
-rw-r--r-- | gnu/llvm/lib/Target/X86/X86Subtarget.h | 323 |
2 files changed, 188 insertions, 444 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp index dca98d999e5..8ef08c960f0 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp @@ -11,28 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "X86.h" - -#include "X86CallLowering.h" -#include "X86LegalizerInfo.h" -#include "X86RegisterBankInfo.h" #include "X86Subtarget.h" -#include "MCTargetDesc/X86BaseInfo.h" +#include "X86InstrInfo.h" #include "X86TargetMachine.h" -#include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #if defined(_MSC_VER) #include <intrin.h> @@ -55,127 +46,126 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, /// Classify a blockaddress reference for the current subtarget according to how /// we should reference it in a non-pcrel context. -unsigned char X86Subtarget::classifyBlockAddressReference() const { - return classifyLocalReference(nullptr); +unsigned char X86Subtarget::ClassifyBlockAddressReference() const { + if (isPICStyleGOT()) // 32-bit ELF targets. + return X86II::MO_GOTOFF; + + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. + return X86II::MO_PIC_BASE_OFFSET; + + // Direct static reference to label. + return X86II::MO_NO_FLAG; } /// Classify a global variable reference for the current subtarget according to /// how we should reference it in a non-pcrel context. -unsigned char -X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const { - return classifyGlobalReference(GV, *GV->getParent()); -} - -unsigned char -X86Subtarget::classifyLocalReference(const GlobalValue *GV) const { - // 64 bits can use %rip addressing for anything local. - if (is64Bit()) - return X86II::MO_NO_FLAG; +unsigned char X86Subtarget:: +ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { + // DLLImport only exists on windows, it is implemented as a load from a + // DLLIMPORT stub. + if (GV->hasDLLImportStorageClass()) + return X86II::MO_DLLIMPORT; - // If this is for a position dependent executable, the static linker can - // figure it out. - if (!isPositionIndependent()) - return X86II::MO_NO_FLAG; + bool isDef = GV->isStrongDefinitionForLinker(); + + // X86-64 in PIC mode. + if (isPICStyleRIPRel()) { + // Large model never uses stubs. + if (TM.getCodeModel() == CodeModel::Large) + return X86II::MO_NO_FLAG; + + if (isTargetDarwin()) { + // If symbol visibility is hidden, the extra load is not needed if + // target is x86-64 or the symbol is definitely defined in the current + // translation unit. + if (GV->hasDefaultVisibility() && !isDef) + return X86II::MO_GOTPCREL; + } else if (!isTargetWin64()) { + assert(isTargetELF() && "Unknown rip-relative target"); + + // Extra load is needed for all externally visible. + if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility()) + return X86II::MO_GOTPCREL; + } - // The COFF dynamic linker just patches the executable sections. - if (isTargetCOFF()) return X86II::MO_NO_FLAG; + } - if (isTargetDarwin()) { - // 32 bit macho has no relocation for a-b if a is undefined, even if - // b is in the section that is being relocated. - // This means we have to use o load even for GVs that are known to be - // local to the dso. - if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) - return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - - return X86II::MO_PIC_BASE_OFFSET; + if (isPICStyleGOT()) { // 32-bit ELF targets. + // Extra load is needed for all externally visible. + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return X86II::MO_GOTOFF; + return X86II::MO_GOT; } - return X86II::MO_GOTOFF; -} + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. + // Determine whether we have a stub reference and/or whether the reference + // is relative to the PIC base or not. -unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV, - const Module &M) const { - // Large model never uses stubs. - if (TM.getCodeModel() == CodeModel::Large) - return X86II::MO_NO_FLAG; + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (isDef) + return X86II::MO_PIC_BASE_OFFSET; - // Absolute symbols can be referenced directly. - if (GV) { - if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) { - // See if we can use the 8-bit immediate form. Note that some instructions - // will sign extend the immediate operand, so to be conservative we only - // accept the range [0,128). - if (CR->getUnsignedMax().ult(128)) - return X86II::MO_ABS8; - else - return X86II::MO_NO_FLAG; + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_NONLAZY_PIC_BASE; + + // If symbol visibility is hidden, we have a stub for common symbol + // references and external declarations. + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) { + // Hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; } - } - if (TM.shouldAssumeDSOLocal(M, GV)) - return classifyLocalReference(GV); + // Otherwise, no stub. + return X86II::MO_PIC_BASE_OFFSET; + } - if (isTargetCOFF()) - return X86II::MO_DLLIMPORT; + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. + // Determine whether we have a stub reference. - if (is64Bit()) - return X86II::MO_GOTPCREL; + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (isDef) + return X86II::MO_NO_FLAG; - if (isTargetDarwin()) { - if (!isPositionIndependent()) + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_NONLAZY; - return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - } - return X86II::MO_GOT; -} + // Otherwise, no stub. + return X86II::MO_NO_FLAG; + } -unsigned char -X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const { - return classifyGlobalFunctionReference(GV, *GV->getParent()); + // Direct static reference to global. + return X86II::MO_NO_FLAG; } -unsigned char -X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, - const Module &M) const { - if (TM.shouldAssumeDSOLocal(M, GV)) - return X86II::MO_NO_FLAG; - if (isTargetCOFF()) { - assert(GV->hasDLLImportStorageClass() && - "shouldAssumeDSOLocal gave inconsistent answer"); - return X86II::MO_DLLIMPORT; - } - - const Function *F = dyn_cast_or_null<Function>(GV); - - if (isTargetELF()) { - if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv())) - // According to psABI, PLT stub clobbers XMM8-XMM15. - // In Regcall calling convention those registers are used for passing - // parameters. Thus we need to prevent lazy binding in Regcall. - return X86II::MO_GOTPCREL; - if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit()) - return X86II::MO_GOTPCREL; - return X86II::MO_PLT; - } +/// This function returns the name of a function which has an interface like +/// the non-standard bzero function, if such a function exists on the +/// current subtarget and it is considered preferable over memset with zero +/// passed as the second argument. Otherwise it returns null. +const char *X86Subtarget::getBZeroEntry() const { + // Darwin 10 has a __bzero entry point for this purpose. + if (getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 6)) + return "__bzero"; - if (is64Bit()) { - if (F && F->hasFnAttribute(Attribute::NonLazyBind)) - // If the function is marked as non-lazy, generate an indirect call - // which loads from the GOT directly. This avoids runtime overhead - // at the cost of eager binding (and one extra byte of encoding). - return X86II::MO_GOTPCREL; - return X86II::MO_NO_FLAG; - } + return nullptr; +} - return X86II::MO_NO_FLAG; +bool X86Subtarget::hasSinCos() const { + return getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 9) && + is64Bit(); } /// Return true if the subtarget allows calls to immediate address. -bool X86Subtarget::isLegalToCallImmediateAddr() const { +bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does, // the following check for Win32 should be removed. @@ -207,6 +197,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+sahf"; } + // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); @@ -236,49 +227,33 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both + // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || - isTargetKFreeBSD() || In64BitMode) + In64BitMode) stackAlignment = 16; - - // Some CPUs have more overhead for gather. The specified overhead is relative - // to the Load operation. "2" is the number provided by Intel architects. This - // parameter is used for cost estimation of Gather Op and comparison with - // other alternatives. - // TODO: Remove the explicit hasAVX512()?, That would mean we would only - // enable gather with a -march. - if (hasAVX512() || (hasAVX2() && hasFastGather())) - GatherOverhead = 2; - if (hasAVX512()) - ScatterOverhead = 2; } void X86Subtarget::initializeEnvironment() { X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; - HasX87 = false; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; HasSSE4A = false; HasAES = false; - HasVAES = false; HasFXSR = false; HasXSAVE = false; HasXSAVEOPT = false; HasXSAVEC = false; HasXSAVES = false; HasPCLMUL = false; - HasVPCLMULQDQ = false; - HasGFNI = false; HasFMA = false; HasFMA4 = false; HasXOP = false; HasTBM = false; - HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; @@ -286,67 +261,39 @@ void X86Subtarget::initializeEnvironment() { HasLZCNT = false; HasBMI = false; HasBMI2 = false; - HasVBMI = false; - HasVBMI2 = false; - HasIFMA = false; HasRTM = false; + HasHLE = false; HasERI = false; HasCDI = false; HasPFI = false; HasDQI = false; - HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; HasPKU = false; - HasVNNI = false; - HasBITALG = false; HasSHA = false; - HasPREFETCHWT1 = false; HasPRFCHW = false; HasRDSEED = false; HasLAHFSAHF = false; - HasMWAITX = false; - HasCLZERO = false; HasMPX = false; - HasSHSTK = false; - HasIBT = false; - HasSGX = false; - HasCLFLUSHOPT = false; - HasCLWB = false; - UseRetpoline = false; - UseRetpolineExternalThunk = false; - IsPMULLDSlow = false; + IsBTMemSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; IsUAMem32Slow = false; HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; - HasFastVariableShuffle = false; - HasFastPartialYMMorZMMWrite = false; - HasFastGather = false; - HasFastScalarFSQRT = false; - HasFastVectorFSQRT = false; - HasFastLZCNT = false; - HasFastSHLDRotate = false; - HasMacroFusion = false; - HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; - SlowTwoMemOps = false; + CallRegIndirect = false; LEAUsesAG = false; SlowLEA = false; - Slow3OpsLEA = false; SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; UseSoftFloat = false; - X86ProcFamily = Others; - GatherOverhead = 1024; - ScatterOverhead = 1024; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, @@ -356,55 +303,41 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, +X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const X86TargetMachine &TM, unsigned StackAlignOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), - PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), + PICStyle(PICStyles::None), TargetTriple(TT), StackAlignOverride(StackAlignOverride), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(*this, getStackAlignment()) { + TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. - if (!isPositionIndependent()) + if (TM.getRelocationModel() == Reloc::Static) { + // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. setPICStyle(PICStyles::None); - else if (is64Bit()) + } else if (is64Bit()) { + // PIC in 64 bit mode is always rip-rel. setPICStyle(PICStyles::RIPRel); - else if (isTargetCOFF()) + } else if (isTargetCOFF()) { setPICStyle(PICStyles::None); - else if (isTargetDarwin()) - setPICStyle(PICStyles::StubPIC); - else if (isTargetELF()) + } else if (isTargetDarwin()) { + if (TM.getRelocationModel() == Reloc::PIC_) + setPICStyle(PICStyles::StubPIC); + else { + assert(TM.getRelocationModel() == Reloc::DynamicNoPIC); + setPICStyle(PICStyles::StubDynamicNoPIC); + } + } else if (isTargetELF()) { setPICStyle(PICStyles::GOT); - - CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); - Legalizer.reset(new X86LegalizerInfo(*this, TM)); - - auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); - RegBankInfo.reset(RBI); - InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); -} - -const CallLowering *X86Subtarget::getCallLowering() const { - return CallLoweringInfo.get(); -} - -const InstructionSelector *X86Subtarget::getInstructionSelector() const { - return InstSelector.get(); -} - -const LegalizerInfo *X86Subtarget::getLegalizerInfo() const { - return Legalizer.get(); -} - -const RegisterBankInfo *X86Subtarget::getRegBankInfo() const { - return RegBankInfo.get(); + } } bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } + diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h index 37ffac1faf6..13d1026dcaa 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.h +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h @@ -18,53 +18,32 @@ #include "X86ISelLowering.h" #include "X86InstrInfo.h" #include "X86SelectionDAGInfo.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/CallingConv.h" -#include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" -#include <memory> +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> #define GET_SUBTARGETINFO_HEADER #include "X86GenSubtargetInfo.inc" namespace llvm { - class GlobalValue; +class StringRef; +class TargetMachine; /// The X86 backend supports a number of different styles of PIC. /// namespace PICStyles { - enum Style { - StubPIC, // Used on i386-darwin in pic mode. - GOT, // Used on 32 bit elf on when in pic mode. - RIPRel, // Used on X86-64 when in pic mode. - None // Set when not in pic mode. + StubPIC, // Used on i386-darwin in -fPIC mode. + StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode. + GOT, // Used on many 32-bit unices in -fPIC mode. + RIPRel, // Used on X86-64 when not in -static mode. + None // Set when in -static mode (not PIC or DynamicNoPIC mode). }; - -} // end namespace PICStyles +} class X86Subtarget final : public X86GenSubtargetInfo { -public: - enum X86ProcFamilyEnum { - Others, - IntelAtom, - IntelSLM, - IntelGLM, - IntelHaswell, - IntelBroadwell, - IntelSkylake, - IntelKNL, - IntelSKX, - IntelCannonlake, - IntelIcelake, - }; protected: enum X86SSEEnum { @@ -75,23 +54,22 @@ protected: NoThreeDNow, MMX, ThreeDNow, ThreeDNowA }; + enum X86ProcFamilyEnum { + Others, IntelAtom, IntelSLM + }; + /// X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; /// Which PIC style to use PICStyles::Style PICStyle; - const TargetMachine &TM; - /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel; /// MMX, 3DNow, 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel; - /// True if the processor supports X87 instructions. - bool HasX87; - /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; @@ -107,29 +85,21 @@ protected: /// Target has AES instructions bool HasAES; - bool HasVAES; /// Target has FXSAVE/FXRESTOR instructions bool HasFXSR; /// Target has XSAVE instructions bool HasXSAVE; - /// Target has XSAVEOPT instructions bool HasXSAVEOPT; - /// Target has XSAVEC instructions bool HasXSAVEC; - /// Target has XSAVES instructions bool HasXSAVES; /// Target has carry-less multiplication bool HasPCLMUL; - bool HasVPCLMULQDQ; - - /// Target has Galois Field Arithmetic instructions - bool HasGFNI; /// Target has 3-operand fused multiply-add bool HasFMA; @@ -143,9 +113,6 @@ protected: /// Target has TBM instructions. bool HasTBM; - /// Target has LWP instructions - bool HasLWP; - /// True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -167,18 +134,12 @@ protected: /// Processor has BMI2 instructions. bool HasBMI2; - /// Processor has VBMI instructions. - bool HasVBMI; - - /// Processor has VBMI2 instructions. - bool HasVBMI2; - - /// Processor has Integer Fused Multiply Add - bool HasIFMA; - /// Processor has RTM instructions. bool HasRTM; + /// Processor has HLE. + bool HasHLE; + /// Processor has ADX instructions. bool HasADX; @@ -194,22 +155,12 @@ protected: /// Processor has LAHF/SAHF instructions. bool HasLAHFSAHF; - /// Processor has MONITORX/MWAITX instructions. - bool HasMWAITX; - - /// Processor has Cache Line Zero instruction - bool HasCLZERO; - - /// Processor has Prefetch with intent to Write instruction - bool HasPREFETCHWT1; + /// True if BT (bit test) of memory instructions are slow. + bool IsBTMemSlow; /// True if SHLD instructions are slow. bool IsSHLDSlow; - /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and - // PMULUDQ. - bool IsPMULLDSlow; - /// True if unaligned memory accesses of 16-bytes are slow. bool IsUAMem16Slow; @@ -228,53 +179,21 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; - /// True if its preferable to combine to a single shuffle using a variable - /// mask over multiple fixed shuffles. - bool HasFastVariableShuffle; - - /// True if there is no performance penalty to writing only the lower parts - /// of a YMM or ZMM register without clearing the upper part. - bool HasFastPartialYMMorZMMWrite; - - /// True if gather is reasonably fast. This is true for Skylake client and - /// all AVX-512 CPUs. - bool HasFastGather; - - /// True if hardware SQRTSS instruction is at least as fast (latency) as - /// RSQRTSS followed by a Newton-Raphson iteration. - bool HasFastScalarFSQRT; - - /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast - /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. - bool HasFastVectorFSQRT; - /// True if 8-bit divisions are significantly faster than /// 32-bit divisions and should be used when possible. bool HasSlowDivide32; - /// True if 32-bit divides are significantly faster than + /// True if 16-bit divides are significantly faster than /// 64-bit divisions and should be used when possible. bool HasSlowDivide64; - /// True if LZCNT instruction is fast. - bool HasFastLZCNT; - - /// True if SHLD based rotate is fast. - bool HasFastSHLDRotate; - - /// True if the processor supports macrofusion. - bool HasMacroFusion; - - /// True if the processor has enhanced REP MOVSB/STOSB. - bool HasERMSB; - /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; - /// True if two memory operand instructions should use a temporary register - /// instead. - bool SlowTwoMemOps; + /// True if the Calls with memory reference should be converted + /// to a register-based indirect call. + bool CallRegIndirect; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. @@ -283,11 +202,6 @@ protected: /// True if the LEA instruction with certain arguments is slow bool SlowLEA; - /// True if the LEA instruction has all three source operands: base, index, - /// and offset or if the LEA instruction uses base and index registers where - /// the base is EBP, RBP,or R13 - bool Slow3OpsLEA; - /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; @@ -300,9 +214,6 @@ protected: /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; - /// Processor has AVX-512 population count Instructions - bool HasVPOPCNTDQ; - /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -315,40 +226,9 @@ protected: /// Processor has PKU extenstions bool HasPKU; - /// Processor has AVX-512 Vector Neural Network Instructions - bool HasVNNI; - - /// Processor has AVX-512 Bit Algorithms instructions - bool HasBITALG; - - /// Processor supports MPX - Memory Protection Extensions + /// Processot supports MPX - Memory Protection Extensions bool HasMPX; - /// Processor supports CET SHSTK - Control-Flow Enforcement Technology - /// using Shadow Stack - bool HasSHSTK; - - /// Processor supports CET IBT - Control-Flow Enforcement Technology - /// using Indirect Branch Tracking - bool HasIBT; - - /// Processor has Software Guard Extensions - bool HasSGX; - - /// Processor supports Flush Cache Line instruction - bool HasCLFLUSHOPT; - - /// Processor supports Cache Line Write Back instruction - bool HasCLWB; - - /// Use a retpoline thunk rather than indirect calls to block speculative - /// execution. - bool UseRetpoline; - - /// When using a retpoline thunk, call an externally provided thunk rather - /// than emitting one inside the compiler. - bool UseRetpolineExternalThunk; - /// Use software floating point for code generation. bool UseSoftFloat; @@ -366,13 +246,8 @@ protected: /// Instruction itineraries for scheduling InstrItineraryData InstrItins; - /// GlobalISel related APIs. - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - std::unique_ptr<InstructionSelector> InstSelector; - private: + /// Override the stack alignment. unsigned StackAlignOverride; @@ -385,10 +260,6 @@ private: /// True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode; - /// Contains the Overhead of gather\scatter instructions - int GatherOverhead; - int ScatterOverhead; - X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. @@ -400,23 +271,19 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const X86TargetMachine &TM, unsigned StackAlignOverride); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; } - const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const X86FrameLowering *getFrameLowering() const override { return &FrameLowering; } - const X86SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } - const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } @@ -434,19 +301,12 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// Methods used by Global ISel - const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; - const LegalizerInfo *getLegalizerInfo() const override; - const RegisterBankInfo *getRegBankInfo() const override; - private: /// Initialize the full set of dependencies so we can use an initializer /// list for X86Subtarget. X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); - public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { @@ -476,7 +336,6 @@ public: PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } - bool hasX87() const { return HasX87; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } @@ -495,23 +354,19 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } - bool hasVAES() const { return HasVAES; } bool hasFXSR() const { return HasFXSR; } bool hasXSAVE() const { return HasXSAVE; } bool hasXSAVEOPT() const { return HasXSAVEOPT; } bool hasXSAVEC() const { return HasXSAVEC; } bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } - bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } - bool hasGFNI() const { return HasGFNI; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. - bool hasFMA() const { return HasFMA; } + bool hasFMA() const { return HasFMA && !HasFMA4; } bool hasFMA4() const { return HasFMA4; } - bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } + bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } - bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } @@ -519,107 +374,58 @@ public: bool hasLZCNT() const { return HasLZCNT; } bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } - bool hasVBMI() const { return HasVBMI; } - bool hasVBMI2() const { return HasVBMI2; } - bool hasIFMA() const { return HasIFMA; } bool hasRTM() const { return HasRTM; } + bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } bool hasSHA() const { return HasSHA; } - bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; } - bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } - bool hasSSEPrefetch() const { - // We implicitly enable these when we have a write prefix supporting cache - // level OR if we have prfchw, but don't already have a read prefetch from - // 3dnow. - return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); - } + bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } - bool hasMWAITX() const { return HasMWAITX; } - bool hasCLZERO() const { return HasCLZERO; } + bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } - bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } - int getGatherOverhead() const { return GatherOverhead; } - int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } - bool hasFastVariableShuffle() const { - return HasFastVariableShuffle; - } - bool hasFastPartialYMMorZMMWrite() const { - return HasFastPartialYMMorZMMWrite; - } - bool hasFastGather() const { return HasFastGather; } - bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; } - bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } - bool hasFastLZCNT() const { return HasFastLZCNT; } - bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } - bool hasMacroFusion() const { return HasMacroFusion; } - bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } - bool slowTwoMemOps() const { return SlowTwoMemOps; } + bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } - bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } - bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } - bool hasVNNI() const { return HasVNNI; } - bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } - bool hasSHSTK() const { return HasSHSTK; } - bool hasIBT() const { return HasIBT; } - bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } - bool hasCLWB() const { return HasCLWB; } - bool useRetpoline() const { return UseRetpoline; } - bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } - bool isXRaySupported() const override { return is64Bit(); } - - X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } - - /// TODO: to be removed later and replaced with suitable properties bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } - /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for - /// no-sse2). There isn't any reason to disable it if the target processor - /// supports it. - bool hasMFence() const { return hasSSE2() || is64Bit(); } - const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } - bool isTargetPS4() const { return TargetTriple.isPS4CPU(); } + bool isTargetPS4() const { return TargetTriple.isPS4(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } - bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } - bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } - bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } bool isTargetWindowsMSVC() const { return TargetTriple.isWindowsMSVCEnvironment(); @@ -649,10 +455,15 @@ public: bool isOSWindows() const { return TargetTriple.isOSWindows(); } - bool isTargetWin64() const { return In64BitMode && isOSWindows(); } + bool isTargetWin64() const { + return In64BitMode && TargetTriple.isOSWindows(); + } - bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } + bool isTargetWin32() const { + return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC()); + } + bool isPICStyleSet() const { return PICStyle != PICStyles::None; } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } @@ -660,14 +471,19 @@ public: return PICStyle == PICStyles::StubPIC; } - bool isPositionIndependent() const { return TM.isPositionIndependent(); } + bool isPICStyleStubNoDynamic() const { + return PICStyle == PICStyles::StubDynamicNoPIC; + } + bool isPICStyleStubAny() const { + return PICStyle == PICStyles::StubDynamicNoPIC || + PICStyle == PICStyles::StubPIC; + } bool isCallingConvWin64(CallingConv::ID CC) const { switch (CC) { // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: - case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: @@ -675,7 +491,7 @@ public: case CallingConv::Intel_OCL_BI: return isTargetWin64(); // This convention allows using the Win64 convention on other targets. - case CallingConv::Win64: + case CallingConv::X86_64_Win64: return true; // This convention allows using the SysV convention on Windows targets. case CallingConv::X86_64_SysV: @@ -686,36 +502,33 @@ public: } } - /// Classify a global variable reference for the current subtarget according - /// to how we should reference it in a non-pcrel context. - unsigned char classifyLocalReference(const GlobalValue *GV) const; - - unsigned char classifyGlobalReference(const GlobalValue *GV, - const Module &M) const; - unsigned char classifyGlobalReference(const GlobalValue *GV) const; - - /// Classify a global function reference for the current subtarget. - unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, - const Module &M) const; - unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const; + /// ClassifyGlobalReference - Classify a global variable reference for the + /// current subtarget according to how we should reference it in a non-pcrel + /// context. + unsigned char ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM)const; /// Classify a blockaddress reference for the current subtarget according to /// how we should reference it in a non-pcrel context. - unsigned char classifyBlockAddressReference() const; + unsigned char ClassifyBlockAddressReference() const; /// Return true if the subtarget allows calls to immediate address. - bool isLegalToCallImmediateAddr() const; + bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; + + /// This function returns the name of a function which has an interface + /// like the non-standard bzero function, if such a function exists on + /// the current subtarget and it is considered prefereable over + /// memset with zero passed as the second argument. Otherwise it + /// returns null. + const char *getBZeroEntry() const; - /// If we are using retpolines, we need to expand indirectbr to avoid it - /// lowering to an actual indirect jump. - bool enableIndirectBrExpand() const override { return useRetpoline(); } + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - // TODO: Update the regression tests and return true. - bool supportPrintSchedInfo() const override { return false; } - bool enableEarlyIfConversion() const override; /// Return the instruction itineraries based on the subtarget selection. @@ -726,10 +539,8 @@ public: AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } - - bool enableAdvancedRASplitCost() const override { return true; } }; -} // end namespace llvm +} // End llvm namespace -#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H +#endif |