summaryrefslogtreecommitdiff
path: root/gnu/llvm/lib
diff options
context:
space:
mode:
authorPhilip Guenther <guenther@cvs.openbsd.org>2018-12-30 23:08:06 +0000
committerPhilip Guenther <guenther@cvs.openbsd.org>2018-12-30 23:08:06 +0000
commit5a7ce81e74827aac04443e86ccb158d1121c7d8a (patch)
tree6a70196e1900e50492373a957e44c197a05a7e45 /gnu/llvm/lib
parent3d1ec8445e872d8b6365f39a103b51fdd64c20d8 (diff)
Turn on -mretpoline by default in clang on amd64, but turn it off
explicitly in SMALL_KERNEL kernel builds. tweaks from jsg@ and tb@ ok deraadt@ kettenis@
Diffstat (limited to 'gnu/llvm/lib')
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.cpp315
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.h324
2 files changed, 452 insertions, 187 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 8ef08c960f0..195576bf546 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -11,19 +11,28 @@
//
//===----------------------------------------------------------------------===//
+#include "X86.h"
+
+#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#include "X86RegisterBankInfo.h"
#include "X86Subtarget.h"
-#include "X86InstrInfo.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#if defined(_MSC_VER)
#include <intrin.h>
@@ -46,126 +55,127 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
/// Classify a blockaddress reference for the current subtarget according to how
/// we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
- if (isPICStyleGOT()) // 32-bit ELF targets.
- return X86II::MO_GOTOFF;
-
- if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
- return X86II::MO_PIC_BASE_OFFSET;
-
- // Direct static reference to label.
- return X86II::MO_NO_FLAG;
+unsigned char X86Subtarget::classifyBlockAddressReference() const {
+ return classifyLocalReference(nullptr);
}
/// Classify a global variable reference for the current subtarget according to
/// how we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::
-ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
- // DLLImport only exists on windows, it is implemented as a load from a
- // DLLIMPORT stub.
- if (GV->hasDLLImportStorageClass())
- return X86II::MO_DLLIMPORT;
+unsigned char
+X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
+ return classifyGlobalReference(GV, *GV->getParent());
+}
- bool isDef = GV->isStrongDefinitionForLinker();
-
- // X86-64 in PIC mode.
- if (isPICStyleRIPRel()) {
- // Large model never uses stubs.
- if (TM.getCodeModel() == CodeModel::Large)
- return X86II::MO_NO_FLAG;
-
- if (isTargetDarwin()) {
- // If symbol visibility is hidden, the extra load is not needed if
- // target is x86-64 or the symbol is definitely defined in the current
- // translation unit.
- if (GV->hasDefaultVisibility() && !isDef)
- return X86II::MO_GOTPCREL;
- } else if (!isTargetWin64()) {
- assert(isTargetELF() && "Unknown rip-relative target");
-
- // Extra load is needed for all externally visible.
- if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
- return X86II::MO_GOTPCREL;
- }
+unsigned char
+X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+ // 64 bits can use %rip addressing for anything local.
+ if (is64Bit())
+ return X86II::MO_NO_FLAG;
+ // If this is for a position dependent executable, the static linker can
+ // figure it out.
+ if (!isPositionIndependent())
return X86II::MO_NO_FLAG;
- }
- if (isPICStyleGOT()) { // 32-bit ELF targets.
- // Extra load is needed for all externally visible.
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- return X86II::MO_GOTOFF;
- return X86II::MO_GOT;
- }
+ // The COFF dynamic linker just patches the executable sections.
+ if (isTargetCOFF())
+ return X86II::MO_NO_FLAG;
- if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
- // Determine whether we have a stub reference and/or whether the reference
- // is relative to the PIC base or not.
+ if (isTargetDarwin()) {
+ // 32 bit macho has no relocation for a-b if a is undefined, even if
+ // b is in the section that is being relocated.
+ // This means we have to use o load even for GVs that are known to be
+ // local to the dso.
+ if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (isDef)
- return X86II::MO_PIC_BASE_OFFSET;
+ return X86II::MO_PIC_BASE_OFFSET;
+ }
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+ return X86II::MO_GOTOFF;
+}
- // If symbol visibility is hidden, we have a stub for common symbol
- // references and external declarations.
- if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
- // Hidden $non_lazy_ptr reference.
- return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
- }
+unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
+ const Module &M) const {
+ // Large model never uses stubs.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return X86II::MO_NO_FLAG;
- // Otherwise, no stub.
- return X86II::MO_PIC_BASE_OFFSET;
+ // Absolute symbols can be referenced directly.
+ if (GV) {
+ if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
+ // See if we can use the 8-bit immediate form. Note that some instructions
+ // will sign extend the immediate operand, so to be conservative we only
+ // accept the range [0,128).
+ if (CR->getUnsignedMax().ult(128))
+ return X86II::MO_ABS8;
+ else
+ return X86II::MO_NO_FLAG;
+ }
}
- if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
- // Determine whether we have a stub reference.
+ if (TM.shouldAssumeDSOLocal(M, GV))
+ return classifyLocalReference(GV);
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (isDef)
- return X86II::MO_NO_FLAG;
+ if (isTargetCOFF())
+ return X86II::MO_DLLIMPORT;
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return X86II::MO_DARWIN_NONLAZY;
+ if (is64Bit())
+ return X86II::MO_GOTPCREL;
- // Otherwise, no stub.
- return X86II::MO_NO_FLAG;
+ if (isTargetDarwin()) {
+ if (!isPositionIndependent())
+ return X86II::MO_DARWIN_NONLAZY;
+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
}
- // Direct static reference to global.
- return X86II::MO_NO_FLAG;
+ return X86II::MO_GOT;
+}
+
+unsigned char
+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
+ return classifyGlobalFunctionReference(GV, *GV->getParent());
}
+unsigned char
+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
+ const Module &M) const {
+ if (TM.shouldAssumeDSOLocal(M, GV))
+ return X86II::MO_NO_FLAG;
-/// This function returns the name of a function which has an interface like
-/// the non-standard bzero function, if such a function exists on the
-/// current subtarget and it is considered preferable over memset with zero
-/// passed as the second argument. Otherwise it returns null.
-const char *X86Subtarget::getBZeroEntry() const {
- // Darwin 10 has a __bzero entry point for this purpose.
- if (getTargetTriple().isMacOSX() &&
- !getTargetTriple().isMacOSXVersionLT(10, 6))
- return "__bzero";
+ if (isTargetCOFF()) {
+ assert(GV->hasDLLImportStorageClass() &&
+ "shouldAssumeDSOLocal gave inconsistent answer");
+ return X86II::MO_DLLIMPORT;
+ }
- return nullptr;
-}
+ const Function *F = dyn_cast_or_null<Function>(GV);
+
+ if (isTargetELF()) {
+ if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
+ // According to psABI, PLT stub clobbers XMM8-XMM15.
+ // In Regcall calling convention those registers are used for passing
+ // parameters. Thus we need to prevent lazy binding in Regcall.
+ return X86II::MO_GOTPCREL;
+ if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
+ return X86II::MO_GOTPCREL;
+ return X86II::MO_PLT;
+ }
+
+ if (is64Bit()) {
+ if (F && F->hasFnAttribute(Attribute::NonLazyBind))
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ return X86II::MO_GOTPCREL;
+ return X86II::MO_NO_FLAG;
+ }
-bool X86Subtarget::hasSinCos() const {
- return getTargetTriple().isMacOSX() &&
- !getTargetTriple().isMacOSXVersionLT(10, 9) &&
- is64Bit();
+ return X86II::MO_NO_FLAG;
}
/// Return true if the subtarget allows calls to immediate address.
-bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+bool X86Subtarget::isLegalToCallImmediateAddr() const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
// the following check for Win32 should be removed.
@@ -197,6 +207,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+sahf";
}
+ // OpenBSD/amd64 defaults to -mretpoline
+ if (isTargetOpenBSD() && In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+retpoline," + FullFS;
+ else
+ FullFS = "+retpoline";
+ }
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -227,33 +244,49 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
+ // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
stackAlignment = StackAlignOverride;
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
- In64BitMode)
+ isTargetKFreeBSD() || In64BitMode)
stackAlignment = 16;
+
+ // Some CPUs have more overhead for gather. The specified overhead is relative
+ // to the Load operation. "2" is the number provided by Intel architects. This
+ // parameter is used for cost estimation of Gather Op and comparison with
+ // other alternatives.
+ // TODO: Remove the explicit hasAVX512()?, That would mean we would only
+ // enable gather with a -march.
+ if (hasAVX512() || (hasAVX2() && hasFastGather()))
+ GatherOverhead = 2;
+ if (hasAVX512())
+ ScatterOverhead = 2;
}
void X86Subtarget::initializeEnvironment() {
X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
+ HasX87 = false;
HasCMov = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;
HasAES = false;
+ HasVAES = false;
HasFXSR = false;
HasXSAVE = false;
HasXSAVEOPT = false;
HasXSAVEC = false;
HasXSAVES = false;
HasPCLMUL = false;
+ HasVPCLMULQDQ = false;
+ HasGFNI = false;
HasFMA = false;
HasFMA4 = false;
HasXOP = false;
HasTBM = false;
+ HasLWP = false;
HasMOVBE = false;
HasRDRAND = false;
HasF16C = false;
@@ -261,39 +294,67 @@ void X86Subtarget::initializeEnvironment() {
HasLZCNT = false;
HasBMI = false;
HasBMI2 = false;
+ HasVBMI = false;
+ HasVBMI2 = false;
+ HasIFMA = false;
HasRTM = false;
- HasHLE = false;
HasERI = false;
HasCDI = false;
HasPFI = false;
HasDQI = false;
+ HasVPOPCNTDQ = false;
HasBWI = false;
HasVLX = false;
HasADX = false;
HasPKU = false;
+ HasVNNI = false;
+ HasBITALG = false;
HasSHA = false;
+ HasPREFETCHWT1 = false;
HasPRFCHW = false;
HasRDSEED = false;
HasLAHFSAHF = false;
+ HasMWAITX = false;
+ HasCLZERO = false;
HasMPX = false;
- IsBTMemSlow = false;
+ HasSHSTK = false;
+ HasIBT = false;
+ HasSGX = false;
+ HasCLFLUSHOPT = false;
+ HasCLWB = false;
+ UseRetpoline = false;
+ UseRetpolineExternalThunk = false;
+ IsPMULLDSlow = false;
IsSHLDSlow = false;
IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
+ HasFastVariableShuffle = false;
+ HasFastPartialYMMorZMMWrite = false;
+ HasFastGather = false;
+ HasFastScalarFSQRT = false;
+ HasFastVectorFSQRT = false;
+ HasFastLZCNT = false;
+ HasFastSHLDRotate = false;
+ HasMacroFusion = false;
+ HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
- CallRegIndirect = false;
+ SlowTwoMemOps = false;
LEAUsesAG = false;
SlowLEA = false;
+ Slow3OpsLEA = false;
SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
UseSoftFloat = false;
+ X86ProcFamily = Others;
+ GatherOverhead = 1024;
+ ScatterOverhead = 1024;
}
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -303,41 +364,55 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
-X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const X86TargetMachine &TM,
+X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const X86TargetMachine &TM,
unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
- PICStyle(PICStyles::None), TargetTriple(TT),
+ PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
- if (TM.getRelocationModel() == Reloc::Static) {
- // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ if (!isPositionIndependent())
setPICStyle(PICStyles::None);
- } else if (is64Bit()) {
- // PIC in 64 bit mode is always rip-rel.
+ else if (is64Bit())
setPICStyle(PICStyles::RIPRel);
- } else if (isTargetCOFF()) {
+ else if (isTargetCOFF())
setPICStyle(PICStyles::None);
- } else if (isTargetDarwin()) {
- if (TM.getRelocationModel() == Reloc::PIC_)
- setPICStyle(PICStyles::StubPIC);
- else {
- assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
- setPICStyle(PICStyles::StubDynamicNoPIC);
- }
- } else if (isTargetELF()) {
+ else if (isTargetDarwin())
+ setPICStyle(PICStyles::StubPIC);
+ else if (isTargetELF())
setPICStyle(PICStyles::GOT);
- }
+
+ CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
+ Legalizer.reset(new X86LegalizerInfo(*this, TM));
+
+ auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
+ RegBankInfo.reset(RBI);
+ InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
+}
+
+const CallLowering *X86Subtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+const InstructionSelector *X86Subtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
+
+const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
}
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
-
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 13d1026dcaa..e6b486cdbeb 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -18,32 +18,53 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
#define GET_SUBTARGETINFO_HEADER
#include "X86GenSubtargetInfo.inc"
namespace llvm {
+
class GlobalValue;
-class StringRef;
-class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
namespace PICStyles {
+
enum Style {
- StubPIC, // Used on i386-darwin in -fPIC mode.
- StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
- GOT, // Used on many 32-bit unices in -fPIC mode.
- RIPRel, // Used on X86-64 when not in -static mode.
- None // Set when in -static mode (not PIC or DynamicNoPIC mode).
+ StubPIC, // Used on i386-darwin in pic mode.
+ GOT, // Used on 32 bit elf on when in pic mode.
+ RIPRel, // Used on X86-64 when in pic mode.
+ None // Set when not in pic mode.
};
-}
+
+} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
+public:
+ enum X86ProcFamilyEnum {
+ Others,
+ IntelAtom,
+ IntelSLM,
+ IntelGLM,
+ IntelHaswell,
+ IntelBroadwell,
+ IntelSkylake,
+ IntelKNL,
+ IntelSKX,
+ IntelCannonlake,
+ IntelIcelake,
+ };
protected:
enum X86SSEEnum {
@@ -54,22 +75,23 @@ protected:
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
- enum X86ProcFamilyEnum {
- Others, IntelAtom, IntelSLM
- };
-
/// X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily;
/// Which PIC style to use
PICStyles::Style PICStyle;
+ const TargetMachine &TM;
+
/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
/// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
+ /// True if the processor supports X87 instructions.
+ bool HasX87;
+
/// True if this processor has conditional move instructions
/// (generally pentium pro+).
bool HasCMov;
@@ -85,21 +107,29 @@ protected:
/// Target has AES instructions
bool HasAES;
+ bool HasVAES;
/// Target has FXSAVE/FXRESTOR instructions
bool HasFXSR;
/// Target has XSAVE instructions
bool HasXSAVE;
+
/// Target has XSAVEOPT instructions
bool HasXSAVEOPT;
+
/// Target has XSAVEC instructions
bool HasXSAVEC;
+
/// Target has XSAVES instructions
bool HasXSAVES;
/// Target has carry-less multiplication
bool HasPCLMUL;
+ bool HasVPCLMULQDQ;
+
+ /// Target has Galois Field Arithmetic instructions
+ bool HasGFNI;
/// Target has 3-operand fused multiply-add
bool HasFMA;
@@ -113,6 +143,9 @@ protected:
/// Target has TBM instructions.
bool HasTBM;
+ /// Target has LWP instructions
+ bool HasLWP;
+
/// True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -134,12 +167,18 @@ protected:
/// Processor has BMI2 instructions.
bool HasBMI2;
+ /// Processor has VBMI instructions.
+ bool HasVBMI;
+
+ /// Processor has VBMI2 instructions.
+ bool HasVBMI2;
+
+ /// Processor has Integer Fused Multiply Add
+ bool HasIFMA;
+
/// Processor has RTM instructions.
bool HasRTM;
- /// Processor has HLE.
- bool HasHLE;
-
/// Processor has ADX instructions.
bool HasADX;
@@ -155,12 +194,22 @@ protected:
/// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF;
- /// True if BT (bit test) of memory instructions are slow.
- bool IsBTMemSlow;
+ /// Processor has MONITORX/MWAITX instructions.
+ bool HasMWAITX;
+
+ /// Processor has Cache Line Zero instruction
+ bool HasCLZERO;
+
+ /// Processor has Prefetch with intent to Write instruction
+ bool HasPREFETCHWT1;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
+ /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
+ // PMULUDQ.
+ bool IsPMULLDSlow;
+
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow;
@@ -179,21 +228,53 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// True if its preferable to combine to a single shuffle using a variable
+ /// mask over multiple fixed shuffles.
+ bool HasFastVariableShuffle;
+
+ /// True if there is no performance penalty to writing only the lower parts
+ /// of a YMM or ZMM register without clearing the upper part.
+ bool HasFastPartialYMMorZMMWrite;
+
+ /// True if gather is reasonably fast. This is true for Skylake client and
+ /// all AVX-512 CPUs.
+ bool HasFastGather;
+
+ /// True if hardware SQRTSS instruction is at least as fast (latency) as
+ /// RSQRTSS followed by a Newton-Raphson iteration.
+ bool HasFastScalarFSQRT;
+
+ /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
+ /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
+ bool HasFastVectorFSQRT;
+
/// True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;
- /// True if 16-bit divides are significantly faster than
+ /// True if 32-bit divides are significantly faster than
/// 64-bit divisions and should be used when possible.
bool HasSlowDivide64;
+ /// True if LZCNT instruction is fast.
+ bool HasFastLZCNT;
+
+ /// True if SHLD based rotate is fast.
+ bool HasFastSHLDRotate;
+
+ /// True if the processor supports macrofusion.
+ bool HasMacroFusion;
+
+ /// True if the processor has enhanced REP MOVSB/STOSB.
+ bool HasERMSB;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
- /// True if the Calls with memory reference should be converted
- /// to a register-based indirect call.
- bool CallRegIndirect;
+ /// True if two memory operand instructions should use a temporary register
+ /// instead.
+ bool SlowTwoMemOps;
/// True if the LEA instruction inputs have to be ready at address generation
/// (AG) time.
@@ -202,6 +283,11 @@ protected:
/// True if the LEA instruction with certain arguments is slow
bool SlowLEA;
+ /// True if the LEA instruction has all three source operands: base, index,
+ /// and offset or if the LEA instruction uses base and index registers where
+ /// the base is EBP, RBP,or R13
+ bool Slow3OpsLEA;
+
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
@@ -214,6 +300,9 @@ protected:
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI;
+ /// Processor has AVX-512 population count Instructions
+ bool HasVPOPCNTDQ;
+
/// Processor has AVX-512 Doubleword and Quadword instructions
bool HasDQI;
@@ -226,9 +315,40 @@ protected:
/// Processor has PKU extenstions
bool HasPKU;
- /// Processot supports MPX - Memory Protection Extensions
+ /// Processor has AVX-512 Vector Neural Network Instructions
+ bool HasVNNI;
+
+ /// Processor has AVX-512 Bit Algorithms instructions
+ bool HasBITALG;
+
+ /// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
+ /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
+ /// using Shadow Stack
+ bool HasSHSTK;
+
+ /// Processor supports CET IBT - Control-Flow Enforcement Technology
+ /// using Indirect Branch Tracking
+ bool HasIBT;
+
+ /// Processor has Software Guard Extensions
+ bool HasSGX;
+
+ /// Processor supports Flush Cache Line instruction
+ bool HasCLFLUSHOPT;
+
+ /// Processor supports Cache Line Write Back instruction
+ bool HasCLWB;
+
+ /// Use a retpoline thunk rather than indirect calls to block speculative
+ /// execution.
+ bool UseRetpoline;
+
+ /// When using a retpoline thunk, call an externally provided thunk rather
+ /// than emitting one inside the compiler.
+ bool UseRetpolineExternalThunk;
+
/// Use software floating point for code generation.
bool UseSoftFloat;
@@ -246,8 +366,13 @@ protected:
/// Instruction itineraries for scheduling
InstrItineraryData InstrItins;
-private:
+ /// GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+private:
/// Override the stack alignment.
unsigned StackAlignOverride;
@@ -260,6 +385,10 @@ private:
/// True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
+ /// Contains the Overhead of gather\scatter instructions
+ int GatherOverhead;
+ int ScatterOverhead;
+
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
@@ -271,19 +400,23 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
- X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+ X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM, unsigned StackAlignOverride);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
const X86FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
+
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
@@ -301,12 +434,19 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ /// Methods used by Global ISel
+ const CallLowering *getCallLowering() const override;
+ const InstructionSelector *getInstructionSelector() const override;
+ const LegalizerInfo *getLegalizerInfo() const override;
+ const RegisterBankInfo *getRegBankInfo() const override;
+
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
+
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -336,6 +476,7 @@ public:
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+ bool hasX87() const { return HasX87; }
bool hasCMov() const { return HasCMov; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
@@ -354,19 +495,23 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
+ bool hasVAES() const { return HasVAES; }
bool hasFXSR() const { return HasFXSR; }
bool hasXSAVE() const { return HasXSAVE; }
bool hasXSAVEOPT() const { return HasXSAVEOPT; }
bool hasXSAVEC() const { return HasXSAVEC; }
bool hasXSAVES() const { return HasXSAVES; }
bool hasPCLMUL() const { return HasPCLMUL; }
+ bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
+ bool hasGFNI() const { return HasGFNI; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
- bool hasFMA() const { return HasFMA && !HasFMA4; }
+ bool hasFMA() const { return HasFMA; }
bool hasFMA4() const { return HasFMA4; }
- bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
+ bool hasLWP() const { return HasLWP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
@@ -374,58 +519,108 @@ public:
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
+ bool hasVBMI() const { return HasVBMI; }
+ bool hasVBMI2() const { return HasVBMI2; }
+ bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
- bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+ bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+ bool hasSSEPrefetch() const {
+ // We implicitly enable these when we have a write prefix supporting cache
+ // level OR if we have prfchw, but don't already have a read prefetch from
+ // 3dnow.
+ return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
+ }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
- bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool hasMWAITX() const { return HasMWAITX; }
+ bool hasCLZERO() const { return HasCLZERO; }
bool isSHLDSlow() const { return IsSHLDSlow; }
+ bool isPMULLDSlow() const { return IsPMULLDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
+ int getGatherOverhead() const { return GatherOverhead; }
+ int getScatterOverhead() const { return ScatterOverhead; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasFastVariableShuffle() const {
+ return HasFastVariableShuffle;
+ }
+ bool hasFastPartialYMMorZMMWrite() const {
+ return HasFastPartialYMMorZMMWrite;
+ }
+ bool hasFastGather() const { return HasFastGather; }
+ bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
+ bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
+ bool hasFastLZCNT() const { return HasFastLZCNT; }
+ bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasMacroFusion() const { return HasMacroFusion; }
+ bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
- bool callRegIndirect() const { return CallRegIndirect; }
+ bool slowTwoMemOps() const { return SlowTwoMemOps; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
+ bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
+ bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
+ bool hasVNNI() const { return HasVNNI; }
+ bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
+ bool hasSHSTK() const { return HasSHSTK; }
+ bool hasIBT() const { return HasIBT; }
+ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
+ bool hasCLWB() const { return HasCLWB; }
+ bool useRetpoline() const { return UseRetpoline; }
+ bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+ bool isXRaySupported() const override { return is64Bit(); }
+
+ X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
+
+ /// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
+ /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ /// no-sse2). There isn't any reason to disable it if the target processor
+ /// supports it.
+ bool hasMFence() const { return hasSSE2() || is64Bit(); }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
+ bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); }
bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
- bool isTargetPS4() const { return TargetTriple.isPS4(); }
+ bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
+ bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -455,15 +650,10 @@ public:
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
- bool isTargetWin64() const {
- return In64BitMode && TargetTriple.isOSWindows();
- }
+ bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
- bool isTargetWin32() const {
- return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC());
- }
+ bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
- bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
@@ -471,19 +661,14 @@ public:
return PICStyle == PICStyles::StubPIC;
}
- bool isPICStyleStubNoDynamic() const {
- return PICStyle == PICStyles::StubDynamicNoPIC;
- }
- bool isPICStyleStubAny() const {
- return PICStyle == PICStyles::StubDynamicNoPIC ||
- PICStyle == PICStyles::StubPIC;
- }
+ bool isPositionIndependent() const { return TM.isPositionIndependent(); }
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
// On Win64, all these conventions just use the default convention.
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::Swift:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:
@@ -491,7 +676,7 @@ public:
case CallingConv::Intel_OCL_BI:
return isTargetWin64();
// This convention allows using the Win64 convention on other targets.
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
return true;
// This convention allows using the SysV convention on Windows targets.
case CallingConv::X86_64_SysV:
@@ -502,33 +687,36 @@ public:
}
}
- /// ClassifyGlobalReference - Classify a global variable reference for the
- /// current subtarget according to how we should reference it in a non-pcrel
- /// context.
- unsigned char ClassifyGlobalReference(const GlobalValue *GV,
- const TargetMachine &TM)const;
+ /// Classify a global variable reference for the current subtarget according
+ /// to how we should reference it in a non-pcrel context.
+ unsigned char classifyLocalReference(const GlobalValue *GV) const;
+
+ unsigned char classifyGlobalReference(const GlobalValue *GV,
+ const Module &M) const;
+ unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+
+ /// Classify a global function reference for the current subtarget.
+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
+ const Module &M) const;
+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
/// Classify a blockaddress reference for the current subtarget according to
/// how we should reference it in a non-pcrel context.
- unsigned char ClassifyBlockAddressReference() const;
+ unsigned char classifyBlockAddressReference() const;
/// Return true if the subtarget allows calls to immediate address.
- bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
-
- /// This function returns the name of a function which has an interface
- /// like the non-standard bzero function, if such a function exists on
- /// the current subtarget and it is considered prefereable over
- /// memset with zero passed as the second argument. Otherwise it
- /// returns null.
- const char *getBZeroEntry() const;
+ bool isLegalToCallImmediateAddr() const;
- /// This function returns true if the target has sincos() routine in its
- /// compiler runtime or math libraries.
- bool hasSinCos() const;
+ /// If we are using retpolines, we need to expand indirectbr to avoid it
+ /// lowering to an actual indirect jump.
+ bool enableIndirectBrExpand() const override { return useRetpoline(); }
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
+ // TODO: Update the regression tests and return true.
+ bool supportPrintSchedInfo() const override { return false; }
+
bool enableEarlyIfConversion() const override;
/// Return the instruction itineraries based on the subtarget selection.
@@ -539,8 +727,10 @@ public:
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
+
+ bool enableAdvancedRASplitCost() const override { return true; }
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H