summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.cpp309
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.h323
2 files changed, 188 insertions, 444 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index dca98d999e5..8ef08c960f0 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -11,28 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
-
-#include "X86CallLowering.h"
-#include "X86LegalizerInfo.h"
-#include "X86RegisterBankInfo.h"
#include "X86Subtarget.h"
-#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86InstrInfo.h"
#include "X86TargetMachine.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#if defined(_MSC_VER)
#include <intrin.h>
@@ -55,127 +46,126 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
/// Classify a blockaddress reference for the current subtarget according to how
/// we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::classifyBlockAddressReference() const {
- return classifyLocalReference(nullptr);
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
+ if (isPICStyleGOT()) // 32-bit ELF targets.
+ return X86II::MO_GOTOFF;
+
+ if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
+ return X86II::MO_PIC_BASE_OFFSET;
+
+ // Direct static reference to label.
+ return X86II::MO_NO_FLAG;
}
/// Classify a global variable reference for the current subtarget according to
/// how we should reference it in a non-pcrel context.
-unsigned char
-X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
- return classifyGlobalReference(GV, *GV->getParent());
-}
-
-unsigned char
-X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
- // 64 bits can use %rip addressing for anything local.
- if (is64Bit())
- return X86II::MO_NO_FLAG;
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+ // DLLImport only exists on windows, it is implemented as a load from a
+ // DLLIMPORT stub.
+ if (GV->hasDLLImportStorageClass())
+ return X86II::MO_DLLIMPORT;
- // If this is for a position dependent executable, the static linker can
- // figure it out.
- if (!isPositionIndependent())
- return X86II::MO_NO_FLAG;
+ bool isDef = GV->isStrongDefinitionForLinker();
+
+ // X86-64 in PIC mode.
+ if (isPICStyleRIPRel()) {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
+
+ if (isTargetDarwin()) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ if (GV->hasDefaultVisibility() && !isDef)
+ return X86II::MO_GOTPCREL;
+ } else if (!isTargetWin64()) {
+ assert(isTargetELF() && "Unknown rip-relative target");
+
+ // Extra load is needed for all externally visible.
+ if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+ return X86II::MO_GOTPCREL;
+ }
- // The COFF dynamic linker just patches the executable sections.
- if (isTargetCOFF())
return X86II::MO_NO_FLAG;
+ }
- if (isTargetDarwin()) {
- // 32 bit macho has no relocation for a-b if a is undefined, even if
- // b is in the section that is being relocated.
- // This means we have to use o load even for GVs that are known to be
- // local to the dso.
- if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
- return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-
- return X86II::MO_PIC_BASE_OFFSET;
+ if (isPICStyleGOT()) { // 32-bit ELF targets.
+ // Extra load is needed for all externally visible.
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return X86II::MO_GOTOFF;
+ return X86II::MO_GOT;
}
- return X86II::MO_GOTOFF;
-}
+ if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
+ // Determine whether we have a stub reference and/or whether the reference
+ // is relative to the PIC base or not.
-unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
- const Module &M) const {
- // Large model never uses stubs.
- if (TM.getCodeModel() == CodeModel::Large)
- return X86II::MO_NO_FLAG;
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (isDef)
+ return X86II::MO_PIC_BASE_OFFSET;
- // Absolute symbols can be referenced directly.
- if (GV) {
- if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
- // See if we can use the 8-bit immediate form. Note that some instructions
- // will sign extend the immediate operand, so to be conservative we only
- // accept the range [0,128).
- if (CR->getUnsignedMax().ult(128))
- return X86II::MO_ABS8;
- else
- return X86II::MO_NO_FLAG;
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+ // If symbol visibility is hidden, we have a stub for common symbol
+ // references and external declarations.
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
+ // Hidden $non_lazy_ptr reference.
+ return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
- }
- if (TM.shouldAssumeDSOLocal(M, GV))
- return classifyLocalReference(GV);
+ // Otherwise, no stub.
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
- if (isTargetCOFF())
- return X86II::MO_DLLIMPORT;
+ if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
+ // Determine whether we have a stub reference.
- if (is64Bit())
- return X86II::MO_GOTPCREL;
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (isDef)
+ return X86II::MO_NO_FLAG;
- if (isTargetDarwin()) {
- if (!isPositionIndependent())
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_NONLAZY;
- return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
- }
- return X86II::MO_GOT;
-}
+ // Otherwise, no stub.
+ return X86II::MO_NO_FLAG;
+ }
-unsigned char
-X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
- return classifyGlobalFunctionReference(GV, *GV->getParent());
+ // Direct static reference to global.
+ return X86II::MO_NO_FLAG;
}
-unsigned char
-X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
- const Module &M) const {
- if (TM.shouldAssumeDSOLocal(M, GV))
- return X86II::MO_NO_FLAG;
- if (isTargetCOFF()) {
- assert(GV->hasDLLImportStorageClass() &&
- "shouldAssumeDSOLocal gave inconsistent answer");
- return X86II::MO_DLLIMPORT;
- }
-
- const Function *F = dyn_cast_or_null<Function>(GV);
-
- if (isTargetELF()) {
- if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
- // According to psABI, PLT stub clobbers XMM8-XMM15.
- // In Regcall calling convention those registers are used for passing
- // parameters. Thus we need to prevent lazy binding in Regcall.
- return X86II::MO_GOTPCREL;
- if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
- return X86II::MO_GOTPCREL;
- return X86II::MO_PLT;
- }
+/// This function returns the name of a function which has an interface like
+/// the non-standard bzero function, if such a function exists on the
+/// current subtarget and it is considered preferable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
+ return "__bzero";
- if (is64Bit()) {
- if (F && F->hasFnAttribute(Attribute::NonLazyBind))
- // If the function is marked as non-lazy, generate an indirect call
- // which loads from the GOT directly. This avoids runtime overhead
- // at the cost of eager binding (and one extra byte of encoding).
- return X86II::MO_GOTPCREL;
- return X86II::MO_NO_FLAG;
- }
+ return nullptr;
+}
- return X86II::MO_NO_FLAG;
+bool X86Subtarget::hasSinCos() const {
+ return getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+ is64Bit();
}
/// Return true if the subtarget allows calls to immediate address.
-bool X86Subtarget::isLegalToCallImmediateAddr() const {
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
// the following check for Win32 should be removed.
@@ -207,6 +197,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+sahf";
}
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -236,49 +227,33 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
+ // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
stackAlignment = StackAlignOverride;
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
- isTargetKFreeBSD() || In64BitMode)
+ In64BitMode)
stackAlignment = 16;
-
- // Some CPUs have more overhead for gather. The specified overhead is relative
- // to the Load operation. "2" is the number provided by Intel architects. This
- // parameter is used for cost estimation of Gather Op and comparison with
- // other alternatives.
- // TODO: Remove the explicit hasAVX512()?, That would mean we would only
- // enable gather with a -march.
- if (hasAVX512() || (hasAVX2() && hasFastGather()))
- GatherOverhead = 2;
- if (hasAVX512())
- ScatterOverhead = 2;
}
void X86Subtarget::initializeEnvironment() {
X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
- HasX87 = false;
HasCMov = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;
HasAES = false;
- HasVAES = false;
HasFXSR = false;
HasXSAVE = false;
HasXSAVEOPT = false;
HasXSAVEC = false;
HasXSAVES = false;
HasPCLMUL = false;
- HasVPCLMULQDQ = false;
- HasGFNI = false;
HasFMA = false;
HasFMA4 = false;
HasXOP = false;
HasTBM = false;
- HasLWP = false;
HasMOVBE = false;
HasRDRAND = false;
HasF16C = false;
@@ -286,67 +261,39 @@ void X86Subtarget::initializeEnvironment() {
HasLZCNT = false;
HasBMI = false;
HasBMI2 = false;
- HasVBMI = false;
- HasVBMI2 = false;
- HasIFMA = false;
HasRTM = false;
+ HasHLE = false;
HasERI = false;
HasCDI = false;
HasPFI = false;
HasDQI = false;
- HasVPOPCNTDQ = false;
HasBWI = false;
HasVLX = false;
HasADX = false;
HasPKU = false;
- HasVNNI = false;
- HasBITALG = false;
HasSHA = false;
- HasPREFETCHWT1 = false;
HasPRFCHW = false;
HasRDSEED = false;
HasLAHFSAHF = false;
- HasMWAITX = false;
- HasCLZERO = false;
HasMPX = false;
- HasSHSTK = false;
- HasIBT = false;
- HasSGX = false;
- HasCLFLUSHOPT = false;
- HasCLWB = false;
- UseRetpoline = false;
- UseRetpolineExternalThunk = false;
- IsPMULLDSlow = false;
+ IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
- HasFastVariableShuffle = false;
- HasFastPartialYMMorZMMWrite = false;
- HasFastGather = false;
- HasFastScalarFSQRT = false;
- HasFastVectorFSQRT = false;
- HasFastLZCNT = false;
- HasFastSHLDRotate = false;
- HasMacroFusion = false;
- HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
- SlowTwoMemOps = false;
+ CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
- Slow3OpsLEA = false;
SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
UseSoftFloat = false;
- X86ProcFamily = Others;
- GatherOverhead = 1024;
- ScatterOverhead = 1024;
}
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -356,55 +303,41 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM,
+X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
- PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
+ PICStyle(PICStyles::None), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this, getStackAlignment()) {
+ TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
- if (!isPositionIndependent())
+ if (TM.getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
setPICStyle(PICStyles::None);
- else if (is64Bit())
+ } else if (is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
setPICStyle(PICStyles::RIPRel);
- else if (isTargetCOFF())
+ } else if (isTargetCOFF()) {
setPICStyle(PICStyles::None);
- else if (isTargetDarwin())
- setPICStyle(PICStyles::StubPIC);
- else if (isTargetELF())
+ } else if (isTargetDarwin()) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
+ setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (isTargetELF()) {
setPICStyle(PICStyles::GOT);
-
- CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
- Legalizer.reset(new X86LegalizerInfo(*this, TM));
-
- auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
- RegBankInfo.reset(RBI);
- InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
-}
-
-const CallLowering *X86Subtarget::getCallLowering() const {
- return CallLoweringInfo.get();
-}
-
-const InstructionSelector *X86Subtarget::getInstructionSelector() const {
- return InstSelector.get();
-}
-
-const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
- return Legalizer.get();
-}
-
-const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
- return RegBankInfo.get();
+ }
}
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
+
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 37ffac1faf6..13d1026dcaa 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -18,53 +18,32 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include <memory>
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "X86GenSubtargetInfo.inc"
namespace llvm {
-
class GlobalValue;
+class StringRef;
+class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
namespace PICStyles {
-
enum Style {
- StubPIC, // Used on i386-darwin in pic mode.
- GOT, // Used on 32 bit elf on when in pic mode.
- RIPRel, // Used on X86-64 when in pic mode.
- None // Set when not in pic mode.
+ StubPIC, // Used on i386-darwin in -fPIC mode.
+ StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+ GOT, // Used on many 32-bit unices in -fPIC mode.
+ RIPRel, // Used on X86-64 when not in -static mode.
+ None // Set when in -static mode (not PIC or DynamicNoPIC mode).
};
-
-} // end namespace PICStyles
+}
class X86Subtarget final : public X86GenSubtargetInfo {
-public:
- enum X86ProcFamilyEnum {
- Others,
- IntelAtom,
- IntelSLM,
- IntelGLM,
- IntelHaswell,
- IntelBroadwell,
- IntelSkylake,
- IntelKNL,
- IntelSKX,
- IntelCannonlake,
- IntelIcelake,
- };
protected:
enum X86SSEEnum {
@@ -75,23 +54,22 @@ protected:
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
+ enum X86ProcFamilyEnum {
+ Others, IntelAtom, IntelSLM
+ };
+
/// X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily;
/// Which PIC style to use
PICStyles::Style PICStyle;
- const TargetMachine &TM;
-
/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
/// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
- /// True if the processor supports X87 instructions.
- bool HasX87;
-
/// True if this processor has conditional move instructions
/// (generally pentium pro+).
bool HasCMov;
@@ -107,29 +85,21 @@ protected:
/// Target has AES instructions
bool HasAES;
- bool HasVAES;
/// Target has FXSAVE/FXRESTOR instructions
bool HasFXSR;
/// Target has XSAVE instructions
bool HasXSAVE;
-
/// Target has XSAVEOPT instructions
bool HasXSAVEOPT;
-
/// Target has XSAVEC instructions
bool HasXSAVEC;
-
/// Target has XSAVES instructions
bool HasXSAVES;
/// Target has carry-less multiplication
bool HasPCLMUL;
- bool HasVPCLMULQDQ;
-
- /// Target has Galois Field Arithmetic instructions
- bool HasGFNI;
/// Target has 3-operand fused multiply-add
bool HasFMA;
@@ -143,9 +113,6 @@ protected:
/// Target has TBM instructions.
bool HasTBM;
- /// Target has LWP instructions
- bool HasLWP;
-
/// True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -167,18 +134,12 @@ protected:
/// Processor has BMI2 instructions.
bool HasBMI2;
- /// Processor has VBMI instructions.
- bool HasVBMI;
-
- /// Processor has VBMI2 instructions.
- bool HasVBMI2;
-
- /// Processor has Integer Fused Multiply Add
- bool HasIFMA;
-
/// Processor has RTM instructions.
bool HasRTM;
+ /// Processor has HLE.
+ bool HasHLE;
+
/// Processor has ADX instructions.
bool HasADX;
@@ -194,22 +155,12 @@ protected:
/// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF;
- /// Processor has MONITORX/MWAITX instructions.
- bool HasMWAITX;
-
- /// Processor has Cache Line Zero instruction
- bool HasCLZERO;
-
- /// Processor has Prefetch with intent to Write instruction
- bool HasPREFETCHWT1;
+ /// True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
- /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
- // PMULUDQ.
- bool IsPMULLDSlow;
-
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow;
@@ -228,53 +179,21 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
- /// True if its preferable to combine to a single shuffle using a variable
- /// mask over multiple fixed shuffles.
- bool HasFastVariableShuffle;
-
- /// True if there is no performance penalty to writing only the lower parts
- /// of a YMM or ZMM register without clearing the upper part.
- bool HasFastPartialYMMorZMMWrite;
-
- /// True if gather is reasonably fast. This is true for Skylake client and
- /// all AVX-512 CPUs.
- bool HasFastGather;
-
- /// True if hardware SQRTSS instruction is at least as fast (latency) as
- /// RSQRTSS followed by a Newton-Raphson iteration.
- bool HasFastScalarFSQRT;
-
- /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
- /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
- bool HasFastVectorFSQRT;
-
/// True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;
- /// True if 32-bit divides are significantly faster than
+ /// True if 16-bit divides are significantly faster than
/// 64-bit divisions and should be used when possible.
bool HasSlowDivide64;
- /// True if LZCNT instruction is fast.
- bool HasFastLZCNT;
-
- /// True if SHLD based rotate is fast.
- bool HasFastSHLDRotate;
-
- /// True if the processor supports macrofusion.
- bool HasMacroFusion;
-
- /// True if the processor has enhanced REP MOVSB/STOSB.
- bool HasERMSB;
-
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
- /// True if two memory operand instructions should use a temporary register
- /// instead.
- bool SlowTwoMemOps;
+ /// True if the Calls with memory reference should be converted
+ /// to a register-based indirect call.
+ bool CallRegIndirect;
/// True if the LEA instruction inputs have to be ready at address generation
/// (AG) time.
@@ -283,11 +202,6 @@ protected:
/// True if the LEA instruction with certain arguments is slow
bool SlowLEA;
- /// True if the LEA instruction has all three source operands: base, index,
- /// and offset or if the LEA instruction uses base and index registers where
- /// the base is EBP, RBP,or R13
- bool Slow3OpsLEA;
-
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
@@ -300,9 +214,6 @@ protected:
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI;
- /// Processor has AVX-512 population count Instructions
- bool HasVPOPCNTDQ;
-
/// Processor has AVX-512 Doubleword and Quadword instructions
bool HasDQI;
@@ -315,40 +226,9 @@ protected:
/// Processor has PKU extenstions
bool HasPKU;
- /// Processor has AVX-512 Vector Neural Network Instructions
- bool HasVNNI;
-
- /// Processor has AVX-512 Bit Algorithms instructions
- bool HasBITALG;
-
- /// Processor supports MPX - Memory Protection Extensions
+ /// Processot supports MPX - Memory Protection Extensions
bool HasMPX;
- /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
- /// using Shadow Stack
- bool HasSHSTK;
-
- /// Processor supports CET IBT - Control-Flow Enforcement Technology
- /// using Indirect Branch Tracking
- bool HasIBT;
-
- /// Processor has Software Guard Extensions
- bool HasSGX;
-
- /// Processor supports Flush Cache Line instruction
- bool HasCLFLUSHOPT;
-
- /// Processor supports Cache Line Write Back instruction
- bool HasCLWB;
-
- /// Use a retpoline thunk rather than indirect calls to block speculative
- /// execution.
- bool UseRetpoline;
-
- /// When using a retpoline thunk, call an externally provided thunk rather
- /// than emitting one inside the compiler.
- bool UseRetpolineExternalThunk;
-
/// Use software floating point for code generation.
bool UseSoftFloat;
@@ -366,13 +246,8 @@ protected:
/// Instruction itineraries for scheduling
InstrItineraryData InstrItins;
- /// GlobalISel related APIs.
- std::unique_ptr<CallLowering> CallLoweringInfo;
- std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
- std::unique_ptr<InstructionSelector> InstSelector;
-
private:
+
/// Override the stack alignment.
unsigned StackAlignOverride;
@@ -385,10 +260,6 @@ private:
/// True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
- /// Contains the Overhead of gather\scatter instructions
- int GatherOverhead;
- int ScatterOverhead;
-
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
@@ -400,23 +271,19 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
- X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
const X86TargetMachine &TM, unsigned StackAlignOverride);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
-
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-
const X86FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
-
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
-
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
@@ -434,19 +301,12 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// Methods used by Global ISel
- const CallLowering *getCallLowering() const override;
- const InstructionSelector *getInstructionSelector() const override;
- const LegalizerInfo *getLegalizerInfo() const override;
- const RegisterBankInfo *getRegBankInfo() const override;
-
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
-
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -476,7 +336,6 @@ public:
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
- bool hasX87() const { return HasX87; }
bool hasCMov() const { return HasCMov; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
@@ -495,23 +354,19 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
- bool hasVAES() const { return HasVAES; }
bool hasFXSR() const { return HasFXSR; }
bool hasXSAVE() const { return HasXSAVE; }
bool hasXSAVEOPT() const { return HasXSAVEOPT; }
bool hasXSAVEC() const { return HasXSAVEC; }
bool hasXSAVES() const { return HasXSAVES; }
bool hasPCLMUL() const { return HasPCLMUL; }
- bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
- bool hasGFNI() const { return HasGFNI; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
- bool hasFMA() const { return HasFMA; }
+ bool hasFMA() const { return HasFMA && !HasFMA4; }
bool hasFMA4() const { return HasFMA4; }
- bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
- bool hasLWP() const { return HasLWP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
@@ -519,107 +374,58 @@ public:
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
- bool hasVBMI() const { return HasVBMI; }
- bool hasVBMI2() const { return HasVBMI2; }
- bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
+ bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
- bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
- bool hasSSEPrefetch() const {
- // We implicitly enable these when we have a write prefix supporting cache
- // level OR if we have prfchw, but don't already have a read prefetch from
- // 3dnow.
- return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
- }
+ bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
- bool hasMWAITX() const { return HasMWAITX; }
- bool hasCLZERO() const { return HasCLZERO; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
- bool isPMULLDSlow() const { return IsPMULLDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
- int getGatherOverhead() const { return GatherOverhead; }
- int getScatterOverhead() const { return ScatterOverhead; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
- bool hasFastVariableShuffle() const {
- return HasFastVariableShuffle;
- }
- bool hasFastPartialYMMorZMMWrite() const {
- return HasFastPartialYMMorZMMWrite;
- }
- bool hasFastGather() const { return HasFastGather; }
- bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
- bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
- bool hasFastLZCNT() const { return HasFastLZCNT; }
- bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
- bool hasMacroFusion() const { return HasMacroFusion; }
- bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
- bool slowTwoMemOps() const { return SlowTwoMemOps; }
+ bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
- bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
- bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
- bool hasVNNI() const { return HasVNNI; }
- bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
- bool hasSHSTK() const { return HasSHSTK; }
- bool hasIBT() const { return HasIBT; }
- bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
- bool hasCLWB() const { return HasCLWB; }
- bool useRetpoline() const { return UseRetpoline; }
- bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
- bool isXRaySupported() const override { return is64Bit(); }
-
- X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
-
- /// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
- /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
- /// no-sse2). There isn't any reason to disable it if the target processor
- /// supports it.
- bool hasMFence() const { return hasSSE2() || is64Bit(); }
-
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
- bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
+ bool isTargetPS4() const { return TargetTriple.isPS4(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
- bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
- bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
- bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -649,10 +455,15 @@ public:
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
- bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
+ bool isTargetWin64() const {
+ return In64BitMode && TargetTriple.isOSWindows();
+ }
- bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
+ bool isTargetWin32() const {
+ return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC());
+ }
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
@@ -660,14 +471,19 @@ public:
return PICStyle == PICStyles::StubPIC;
}
- bool isPositionIndependent() const { return TM.isPositionIndependent(); }
+ bool isPICStyleStubNoDynamic() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC;
+ }
+ bool isPICStyleStubAny() const {
+ return PICStyle == PICStyles::StubDynamicNoPIC ||
+ PICStyle == PICStyles::StubPIC;
+ }
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
// On Win64, all these conventions just use the default convention.
case CallingConv::C:
case CallingConv::Fast:
- case CallingConv::Swift:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:
@@ -675,7 +491,7 @@ public:
case CallingConv::Intel_OCL_BI:
return isTargetWin64();
// This convention allows using the Win64 convention on other targets.
- case CallingConv::Win64:
+ case CallingConv::X86_64_Win64:
return true;
// This convention allows using the SysV convention on Windows targets.
case CallingConv::X86_64_SysV:
@@ -686,36 +502,33 @@ public:
}
}
- /// Classify a global variable reference for the current subtarget according
- /// to how we should reference it in a non-pcrel context.
- unsigned char classifyLocalReference(const GlobalValue *GV) const;
-
- unsigned char classifyGlobalReference(const GlobalValue *GV,
- const Module &M) const;
- unsigned char classifyGlobalReference(const GlobalValue *GV) const;
-
- /// Classify a global function reference for the current subtarget.
- unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
- const Module &M) const;
- unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
+ /// ClassifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget according to how we should reference it in a non-pcrel
+ /// context.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM)const;
/// Classify a blockaddress reference for the current subtarget according to
/// how we should reference it in a non-pcrel context.
- unsigned char classifyBlockAddressReference() const;
+ unsigned char ClassifyBlockAddressReference() const;
/// Return true if the subtarget allows calls to immediate address.
- bool isLegalToCallImmediateAddr() const;
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
- /// If we are using retpolines, we need to expand indirectbr to avoid it
- /// lowering to an actual indirect jump.
- bool enableIndirectBrExpand() const override { return useRetpoline(); }
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- // TODO: Update the regression tests and return true.
- bool supportPrintSchedInfo() const override { return false; }
-
bool enableEarlyIfConversion() const override;
/// Return the instruction itineraries based on the subtarget selection.
@@ -726,10 +539,8 @@ public:
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
-
- bool enableAdvancedRASplitCost() const override { return true; }
};
-} // end namespace llvm
+} // End llvm namespace
-#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
+#endif