src - OpenBSD base system

diff options


context:
space:
mode:

author	Philip Guenther <guenther@cvs.openbsd.org>	2018-12-30 23:08:06 +0000
committer	Philip Guenther <guenther@cvs.openbsd.org>	2018-12-30 23:08:06 +0000
commit	5a7ce81e74827aac04443e86ccb158d1121c7d8a (patch)
tree	6a70196e1900e50492373a957e44c197a05a7e45 /gnu/llvm/lib
parent	3d1ec8445e872d8b6365f39a103b51fdd64c20d8 (diff)

Turn on -mretpoline by default in clang on amd64, but turn it off

explicitly in SMALL_KERNEL kernel builds. tweaks from jsg@ and tb@ ok deraadt@ kettenis@

Diffstat (limited to 'gnu/llvm/lib')

-rw-r--r--

gnu/llvm/lib/Target/X86/X86Subtarget.cpp

315

-rw-r--r--

gnu/llvm/lib/Target/X86/X86Subtarget.h

324

2 files changed, 452 insertions, 187 deletions

diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 8ef08c960f0..195576bf546 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp

@@ -11,19 +11,28 @@

//===----------------------------------------------------------------------===//

+#include "X86.h"

+#include "X86CallLowering.h"

+#include "X86LegalizerInfo.h"

+#include "X86RegisterBankInfo.h"

#include "X86Subtarget.h"

-#include "X86InstrInfo.h"

+#include "MCTargetDesc/X86BaseInfo.h"

#include "X86TargetMachine.h"

+#include "llvm/ADT/Triple.h"

+#include "llvm/CodeGen/GlobalISel/CallLowering.h"

+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"

#include "llvm/IR/Attributes.h"

+#include "llvm/IR/ConstantRange.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GlobalValue.h"

+#include "llvm/Support/Casting.h"

+#include "llvm/Support/CodeGen.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

-#include "llvm/Support/Host.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetMachine.h"

-#include "llvm/Target/TargetOptions.h"

#if defined(_MSC_VER)

#include <intrin.h>

@@ -46,126 +55,127 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,

/// Classify a blockaddress reference for the current subtarget according to how

/// we should reference it in a non-pcrel context.

-unsigned char X86Subtarget::ClassifyBlockAddressReference() const {

- if (isPICStyleGOT()) // 32-bit ELF targets.

- return X86II::MO_GOTOFF;

- if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.

- return X86II::MO_PIC_BASE_OFFSET;

- // Direct static reference to label.

- return X86II::MO_NO_FLAG;

+unsigned char X86Subtarget::classifyBlockAddressReference() const {

+ return classifyLocalReference(nullptr);

}

/// Classify a global variable reference for the current subtarget according to

/// how we should reference it in a non-pcrel context.

-unsigned char X86Subtarget::

-ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {

- // DLLImport only exists on windows, it is implemented as a load from a

- // DLLIMPORT stub.

- if (GV->hasDLLImportStorageClass())

- return X86II::MO_DLLIMPORT;

+unsigned char

+X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {

+ return classifyGlobalReference(GV, *GV->getParent());

- bool isDef = GV->isStrongDefinitionForLinker();

- // X86-64 in PIC mode.

- if (isPICStyleRIPRel()) {

- // Large model never uses stubs.

- if (TM.getCodeModel() == CodeModel::Large)

- return X86II::MO_NO_FLAG;

- if (isTargetDarwin()) {

- // If symbol visibility is hidden, the extra load is not needed if

- // target is x86-64 or the symbol is definitely defined in the current

- // translation unit.

- if (GV->hasDefaultVisibility() && !isDef)

- return X86II::MO_GOTPCREL;

- } else if (!isTargetWin64()) {

- assert(isTargetELF() && "Unknown rip-relative target");

- // Extra load is needed for all externally visible.

- if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())

- return X86II::MO_GOTPCREL;

- }

+unsigned char

+X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {

+ // 64 bits can use %rip addressing for anything local.

+ if (is64Bit())

+ return X86II::MO_NO_FLAG;

+ // If this is for a position dependent executable, the static linker can

+ // figure it out.

+ if (!isPositionIndependent())

return X86II::MO_NO_FLAG;

- }

- if (isPICStyleGOT()) { // 32-bit ELF targets.

- // Extra load is needed for all externally visible.

- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())

- return X86II::MO_GOTOFF;

- return X86II::MO_GOT;

- }

+ // The COFF dynamic linker just patches the executable sections.

+ if (isTargetCOFF())

+ return X86II::MO_NO_FLAG;

- if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.

- // Determine whether we have a stub reference and/or whether the reference

- // is relative to the PIC base or not.

+ if (isTargetDarwin()) {

+ // 32 bit macho has no relocation for a-b if a is undefined, even if

+ // b is in the section that is being relocated.

+ // This means we have to use o load even for GVs that are known to be

+ // local to the dso.

+ if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))

+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;

- // If this is a strong reference to a definition, it is definitely not

- // through a stub.

- if (isDef)

- return X86II::MO_PIC_BASE_OFFSET;

+ return X86II::MO_PIC_BASE_OFFSET;

+ }

- // Unless we have a symbol with hidden visibility, we have to go through a

- // normal $non_lazy_ptr stub because this symbol might be resolved late.

- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.

- return X86II::MO_DARWIN_NONLAZY_PIC_BASE;

+ return X86II::MO_GOTOFF;

- // If symbol visibility is hidden, we have a stub for common symbol

- // references and external declarations.

- if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {

- // Hidden $non_lazy_ptr reference.

- return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;

- }

+unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,

+ const Module &M) const {

+ // Large model never uses stubs.

+ if (TM.getCodeModel() == CodeModel::Large)

+ return X86II::MO_NO_FLAG;

- // Otherwise, no stub.

- return X86II::MO_PIC_BASE_OFFSET;

+ // Absolute symbols can be referenced directly.

+ if (GV) {

+ if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {

+ // See if we can use the 8-bit immediate form. Note that some instructions

+ // will sign extend the immediate operand, so to be conservative we only

+ // accept the range [0,128).

+ if (CR->getUnsignedMax().ult(128))

+ return X86II::MO_ABS8;

+ else

+ return X86II::MO_NO_FLAG;

+ }

}

- if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.

- // Determine whether we have a stub reference.

+ if (TM.shouldAssumeDSOLocal(M, GV))

+ return classifyLocalReference(GV);

- // If this is a strong reference to a definition, it is definitely not

- // through a stub.

- if (isDef)

- return X86II::MO_NO_FLAG;

+ if (isTargetCOFF())

+ return X86II::MO_DLLIMPORT;

- // Unless we have a symbol with hidden visibility, we have to go through a

- // normal $non_lazy_ptr stub because this symbol might be resolved late.

- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.

- return X86II::MO_DARWIN_NONLAZY;

+ if (is64Bit())

+ return X86II::MO_GOTPCREL;

- // Otherwise, no stub.

- return X86II::MO_NO_FLAG;

+ if (isTargetDarwin()) {

+ if (!isPositionIndependent())

+ return X86II::MO_DARWIN_NONLAZY;

+ return X86II::MO_DARWIN_NONLAZY_PIC_BASE;

}

- // Direct static reference to global.

- return X86II::MO_NO_FLAG;

+ return X86II::MO_GOT;

+unsigned char

+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {

+ return classifyGlobalFunctionReference(GV, *GV->getParent());

}

+unsigned char

+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,

+ const Module &M) const {

+ if (TM.shouldAssumeDSOLocal(M, GV))

+ return X86II::MO_NO_FLAG;

-/// This function returns the name of a function which has an interface like

-/// the non-standard bzero function, if such a function exists on the

-/// current subtarget and it is considered preferable over memset with zero

-/// passed as the second argument. Otherwise it returns null.

-const char *X86Subtarget::getBZeroEntry() const {

- // Darwin 10 has a __bzero entry point for this purpose.

- if (getTargetTriple().isMacOSX() &&

- !getTargetTriple().isMacOSXVersionLT(10, 6))

- return "__bzero";

+ if (isTargetCOFF()) {

+ assert(GV->hasDLLImportStorageClass() &&

+ "shouldAssumeDSOLocal gave inconsistent answer");

+ return X86II::MO_DLLIMPORT;

+ }

- return nullptr;

+ const Function *F = dyn_cast_or_null<Function>(GV);

+ if (isTargetELF()) {

+ if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))

+ // According to psABI, PLT stub clobbers XMM8-XMM15.

+ // In Regcall calling convention those registers are used for passing

+ // parameters. Thus we need to prevent lazy binding in Regcall.

+ return X86II::MO_GOTPCREL;

+ if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())

+ return X86II::MO_GOTPCREL;

+ return X86II::MO_PLT;

+ }

+ if (is64Bit()) {

+ if (F && F->hasFnAttribute(Attribute::NonLazyBind))

+ // If the function is marked as non-lazy, generate an indirect call

+ // which loads from the GOT directly. This avoids runtime overhead

+ // at the cost of eager binding (and one extra byte of encoding).

+ return X86II::MO_GOTPCREL;

+ return X86II::MO_NO_FLAG;

+ }

-bool X86Subtarget::hasSinCos() const {

- return getTargetTriple().isMacOSX() &&

- !getTargetTriple().isMacOSXVersionLT(10, 9) &&

- is64Bit();

+ return X86II::MO_NO_FLAG;

}

/// Return true if the subtarget allows calls to immediate address.

-bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {

+bool X86Subtarget::isLegalToCallImmediateAddr() const {

// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32

// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,

// the following check for Win32 should be removed.

@@ -197,6 +207,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {

FullFS = "+sahf";

}

+ // OpenBSD/amd64 defaults to -mretpoline

+ if (isTargetOpenBSD() && In64BitMode) {

+ if (!FullFS.empty())

+ FullFS = "+retpoline," + FullFS;

+ else

+ FullFS = "+retpoline";

+ }

// Parse features string and set the CPU.

ParseSubtargetFeatures(CPUName, FullFS);

@@ -227,33 +244,49 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {

assert((!In64BitMode || HasX86_64) &&

"64-bit code requested on a subtarget that doesn't support it!");

- // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both

+ // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both

// 32 and 64 bit) and for all 64-bit targets.

if (StackAlignOverride)

stackAlignment = StackAlignOverride;

else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||

- In64BitMode)

+ isTargetKFreeBSD() || In64BitMode)

stackAlignment = 16;

+ // Some CPUs have more overhead for gather. The specified overhead is relative

+ // to the Load operation. "2" is the number provided by Intel architects. This

+ // parameter is used for cost estimation of Gather Op and comparison with

+ // other alternatives.

+ // TODO: Remove the explicit hasAVX512()?, That would mean we would only

+ // enable gather with a -march.

+ if (hasAVX512() || (hasAVX2() && hasFastGather()))

+ GatherOverhead = 2;

+ if (hasAVX512())

+ ScatterOverhead = 2;

}

void X86Subtarget::initializeEnvironment() {

X86SSELevel = NoSSE;

X863DNowLevel = NoThreeDNow;

+ HasX87 = false;

HasCMov = false;

HasX86_64 = false;

HasPOPCNT = false;

HasSSE4A = false;

HasAES = false;

+ HasVAES = false;

HasFXSR = false;

HasXSAVE = false;

HasXSAVEOPT = false;

HasXSAVEC = false;

HasXSAVES = false;

HasPCLMUL = false;

+ HasVPCLMULQDQ = false;

+ HasGFNI = false;

HasFMA = false;

HasFMA4 = false;

HasXOP = false;

HasTBM = false;

+ HasLWP = false;

HasMOVBE = false;

HasRDRAND = false;

HasF16C = false;

@@ -261,39 +294,67 @@ void X86Subtarget::initializeEnvironment() {

HasLZCNT = false;

HasBMI = false;

HasBMI2 = false;

+ HasVBMI = false;

+ HasVBMI2 = false;

+ HasIFMA = false;

HasRTM = false;

- HasHLE = false;

HasERI = false;

HasCDI = false;

HasPFI = false;

HasDQI = false;

+ HasVPOPCNTDQ = false;

HasBWI = false;

HasVLX = false;

HasADX = false;

HasPKU = false;

+ HasVNNI = false;

+ HasBITALG = false;

HasSHA = false;

+ HasPREFETCHWT1 = false;

HasPRFCHW = false;

HasRDSEED = false;

HasLAHFSAHF = false;

+ HasMWAITX = false;

+ HasCLZERO = false;

HasMPX = false;

- IsBTMemSlow = false;

+ HasSHSTK = false;

+ HasIBT = false;

+ HasSGX = false;

+ HasCLFLUSHOPT = false;

+ HasCLWB = false;

+ UseRetpoline = false;

+ UseRetpolineExternalThunk = false;

+ IsPMULLDSlow = false;

IsSHLDSlow = false;

IsUAMem16Slow = false;

IsUAMem32Slow = false;

HasSSEUnalignedMem = false;

HasCmpxchg16b = false;

UseLeaForSP = false;

+ HasFastVariableShuffle = false;

+ HasFastPartialYMMorZMMWrite = false;

+ HasFastGather = false;

+ HasFastScalarFSQRT = false;

+ HasFastVectorFSQRT = false;

+ HasFastLZCNT = false;

+ HasFastSHLDRotate = false;

+ HasMacroFusion = false;

+ HasERMSB = false;

HasSlowDivide32 = false;

HasSlowDivide64 = false;

PadShortFunctions = false;

- CallRegIndirect = false;

+ SlowTwoMemOps = false;

LEAUsesAG = false;

SlowLEA = false;

+ Slow3OpsLEA = false;

SlowIncDec = false;

stackAlignment = 4;

// FIXME: this is a known good value for Yonah. How about others?

MaxInlineSizeThreshold = 128;

UseSoftFloat = false;

+ X86ProcFamily = Others;

+ GatherOverhead = 1024;

+ ScatterOverhead = 1024;

}

X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,

@@ -303,41 +364,55 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,

return *this;

}

-X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,

- const std::string &FS, const X86TargetMachine &TM,

+X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,

+ const X86TargetMachine &TM,

unsigned StackAlignOverride)

: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),

- PICStyle(PICStyles::None), TargetTriple(TT),

+ PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),

StackAlignOverride(StackAlignOverride),

In64BitMode(TargetTriple.getArch() == Triple::x86_64),

In32BitMode(TargetTriple.getArch() == Triple::x86 &&

TargetTriple.getEnvironment() != Triple::CODE16),

In16BitMode(TargetTriple.getArch() == Triple::x86 &&

TargetTriple.getEnvironment() == Triple::CODE16),

- TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),

- TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {

+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),

+ FrameLowering(*this, getStackAlignment()) {

// Determine the PICStyle based on the target selected.

- if (TM.getRelocationModel() == Reloc::Static) {

- // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.

+ if (!isPositionIndependent())

setPICStyle(PICStyles::None);

- } else if (is64Bit()) {

- // PIC in 64 bit mode is always rip-rel.

+ else if (is64Bit())

setPICStyle(PICStyles::RIPRel);

- } else if (isTargetCOFF()) {

+ else if (isTargetCOFF())

setPICStyle(PICStyles::None);

- } else if (isTargetDarwin()) {

- if (TM.getRelocationModel() == Reloc::PIC_)

- setPICStyle(PICStyles::StubPIC);

- else {

- assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);

- setPICStyle(PICStyles::StubDynamicNoPIC);

- }

- } else if (isTargetELF()) {

+ else if (isTargetDarwin())

+ setPICStyle(PICStyles::StubPIC);

+ else if (isTargetELF())

setPICStyle(PICStyles::GOT);

- }

+ CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));

+ Legalizer.reset(new X86LegalizerInfo(*this, TM));

+ auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());

+ RegBankInfo.reset(RBI);

+ InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));

+const CallLowering *X86Subtarget::getCallLowering() const {

+ return CallLoweringInfo.get();

+const InstructionSelector *X86Subtarget::getInstructionSelector() const {

+ return InstSelector.get();

+const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {

+ return Legalizer.get();

+const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {

+ return RegBankInfo.get();

}

bool X86Subtarget::enableEarlyIfConversion() const {

return hasCMov() && X86EarlyIfConv;

}

diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 13d1026dcaa..e6b486cdbeb 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h

@@ -18,32 +18,53 @@

#include "X86ISelLowering.h"

#include "X86InstrInfo.h"

#include "X86SelectionDAGInfo.h"

+#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/Triple.h"

+#include "llvm/CodeGen/GlobalISel/CallLowering.h"

+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"

+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"

+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"

+#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/IR/CallingConv.h"

-#include "llvm/Target/TargetSubtargetInfo.h"

-#include <string>

+#include "llvm/MC/MCInstrItineraries.h"

+#include "llvm/Target/TargetMachine.h"

+#include <memory>

#define GET_SUBTARGETINFO_HEADER

#include "X86GenSubtargetInfo.inc"

namespace llvm {

class GlobalValue;

-class StringRef;

-class TargetMachine;

/// The X86 backend supports a number of different styles of PIC.

///

namespace PICStyles {

enum Style {

- StubPIC, // Used on i386-darwin in -fPIC mode.

- StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.

- GOT, // Used on many 32-bit unices in -fPIC mode.

- RIPRel, // Used on X86-64 when not in -static mode.

- None // Set when in -static mode (not PIC or DynamicNoPIC mode).

+ StubPIC, // Used on i386-darwin in pic mode.

+ GOT, // Used on 32 bit elf on when in pic mode.

+ RIPRel, // Used on X86-64 when in pic mode.

+ None // Set when not in pic mode.

};

+} // end namespace PICStyles

class X86Subtarget final : public X86GenSubtargetInfo {

+public:

+ enum X86ProcFamilyEnum {

+ Others,

+ IntelAtom,

+ IntelSLM,

+ IntelGLM,

+ IntelHaswell,

+ IntelBroadwell,

+ IntelSkylake,

+ IntelKNL,

+ IntelSKX,

+ IntelCannonlake,

+ IntelIcelake,

+ };

protected:

enum X86SSEEnum {

@@ -54,22 +75,23 @@ protected:

NoThreeDNow, MMX, ThreeDNow, ThreeDNowA

};

- enum X86ProcFamilyEnum {

- Others, IntelAtom, IntelSLM

- };

/// X86 processor family: Intel Atom, and others

X86ProcFamilyEnum X86ProcFamily;

/// Which PIC style to use

PICStyles::Style PICStyle;

+ const TargetMachine &TM;

/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.

X86SSEEnum X86SSELevel;

/// MMX, 3DNow, 3DNow Athlon, or none supported.

X863DNowEnum X863DNowLevel;

+ /// True if the processor supports X87 instructions.

+ bool HasX87;

/// True if this processor has conditional move instructions

/// (generally pentium pro+).

bool HasCMov;

@@ -85,21 +107,29 @@ protected:

/// Target has AES instructions

bool HasAES;

+ bool HasVAES;

/// Target has FXSAVE/FXRESTOR instructions

bool HasFXSR;

/// Target has XSAVE instructions

bool HasXSAVE;

/// Target has XSAVEOPT instructions

bool HasXSAVEOPT;

/// Target has XSAVEC instructions

bool HasXSAVEC;

/// Target has XSAVES instructions

bool HasXSAVES;

/// Target has carry-less multiplication

bool HasPCLMUL;

+ bool HasVPCLMULQDQ;

+ /// Target has Galois Field Arithmetic instructions

+ bool HasGFNI;

/// Target has 3-operand fused multiply-add

bool HasFMA;

@@ -113,6 +143,9 @@ protected:

/// Target has TBM instructions.

bool HasTBM;

+ /// Target has LWP instructions

+ bool HasLWP;

/// True if the processor has the MOVBE instruction.

bool HasMOVBE;

@@ -134,12 +167,18 @@ protected:

/// Processor has BMI2 instructions.

bool HasBMI2;

+ /// Processor has VBMI instructions.

+ bool HasVBMI;

+ /// Processor has VBMI2 instructions.

+ bool HasVBMI2;

+ /// Processor has Integer Fused Multiply Add

+ bool HasIFMA;

/// Processor has RTM instructions.

bool HasRTM;

- /// Processor has HLE.

- bool HasHLE;

/// Processor has ADX instructions.

bool HasADX;

@@ -155,12 +194,22 @@ protected:

/// Processor has LAHF/SAHF instructions.

bool HasLAHFSAHF;

- /// True if BT (bit test) of memory instructions are slow.

- bool IsBTMemSlow;

+ /// Processor has MONITORX/MWAITX instructions.

+ bool HasMWAITX;

+ /// Processor has Cache Line Zero instruction

+ bool HasCLZERO;

+ /// Processor has Prefetch with intent to Write instruction

+ bool HasPREFETCHWT1;

/// True if SHLD instructions are slow.

bool IsSHLDSlow;

+ /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and

+ // PMULUDQ.

+ bool IsPMULLDSlow;

/// True if unaligned memory accesses of 16-bytes are slow.

bool IsUAMem16Slow;

@@ -179,21 +228,53 @@ protected:

/// the stack pointer. This is an optimization for Intel Atom processors.

bool UseLeaForSP;

+ /// True if its preferable to combine to a single shuffle using a variable

+ /// mask over multiple fixed shuffles.

+ bool HasFastVariableShuffle;

+ /// True if there is no performance penalty to writing only the lower parts

+ /// of a YMM or ZMM register without clearing the upper part.

+ bool HasFastPartialYMMorZMMWrite;

+ /// True if gather is reasonably fast. This is true for Skylake client and

+ /// all AVX-512 CPUs.

+ bool HasFastGather;

+ /// True if hardware SQRTSS instruction is at least as fast (latency) as

+ /// RSQRTSS followed by a Newton-Raphson iteration.

+ bool HasFastScalarFSQRT;

+ /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast

+ /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.

+ bool HasFastVectorFSQRT;

/// True if 8-bit divisions are significantly faster than

/// 32-bit divisions and should be used when possible.

bool HasSlowDivide32;

- /// True if 16-bit divides are significantly faster than

+ /// True if 32-bit divides are significantly faster than

/// 64-bit divisions and should be used when possible.

bool HasSlowDivide64;

+ /// True if LZCNT instruction is fast.

+ bool HasFastLZCNT;

+ /// True if SHLD based rotate is fast.

+ bool HasFastSHLDRotate;

+ /// True if the processor supports macrofusion.

+ bool HasMacroFusion;

+ /// True if the processor has enhanced REP MOVSB/STOSB.

+ bool HasERMSB;

/// True if the short functions should be padded to prevent

/// a stall when returning too early.

bool PadShortFunctions;

- /// True if the Calls with memory reference should be converted

- /// to a register-based indirect call.

- bool CallRegIndirect;

+ /// True if two memory operand instructions should use a temporary register

+ /// instead.

+ bool SlowTwoMemOps;

/// True if the LEA instruction inputs have to be ready at address generation

/// (AG) time.

@@ -202,6 +283,11 @@ protected:

/// True if the LEA instruction with certain arguments is slow

bool SlowLEA;

+ /// True if the LEA instruction has all three source operands: base, index,

+ /// and offset or if the LEA instruction uses base and index registers where

+ /// the base is EBP, RBP,or R13

+ bool Slow3OpsLEA;

/// True if INC and DEC instructions are slow when writing to flags

bool SlowIncDec;

@@ -214,6 +300,9 @@ protected:

/// Processor has AVX-512 Conflict Detection Instructions

bool HasCDI;

+ /// Processor has AVX-512 population count Instructions

+ bool HasVPOPCNTDQ;

/// Processor has AVX-512 Doubleword and Quadword instructions

bool HasDQI;

@@ -226,9 +315,40 @@ protected:

/// Processor has PKU extenstions

bool HasPKU;

- /// Processot supports MPX - Memory Protection Extensions

+ /// Processor has AVX-512 Vector Neural Network Instructions

+ bool HasVNNI;

+ /// Processor has AVX-512 Bit Algorithms instructions

+ bool HasBITALG;

+ /// Processor supports MPX - Memory Protection Extensions

bool HasMPX;

+ /// Processor supports CET SHSTK - Control-Flow Enforcement Technology

+ /// using Shadow Stack

+ bool HasSHSTK;

+ /// Processor supports CET IBT - Control-Flow Enforcement Technology

+ /// using Indirect Branch Tracking

+ bool HasIBT;

+ /// Processor has Software Guard Extensions

+ bool HasSGX;

+ /// Processor supports Flush Cache Line instruction

+ bool HasCLFLUSHOPT;

+ /// Processor supports Cache Line Write Back instruction

+ bool HasCLWB;

+ /// Use a retpoline thunk rather than indirect calls to block speculative

+ /// execution.

+ bool UseRetpoline;

+ /// When using a retpoline thunk, call an externally provided thunk rather

+ /// than emitting one inside the compiler.

+ bool UseRetpolineExternalThunk;

/// Use software floating point for code generation.

bool UseSoftFloat;

@@ -246,8 +366,13 @@ protected:

/// Instruction itineraries for scheduling

InstrItineraryData InstrItins;

-private:

+ /// GlobalISel related APIs.

+ std::unique_ptr<CallLowering> CallLoweringInfo;

+ std::unique_ptr<LegalizerInfo> Legalizer;

+ std::unique_ptr<RegisterBankInfo> RegBankInfo;

+ std::unique_ptr<InstructionSelector> InstSelector;

+private:

/// Override the stack alignment.

unsigned StackAlignOverride;

@@ -260,6 +385,10 @@ private:

/// True if compiling for 16-bit, false for 32-bit or 64-bit.

bool In16BitMode;

+ /// Contains the Overhead of gather\scatter instructions

+ int GatherOverhead;

+ int ScatterOverhead;

X86SelectionDAGInfo TSInfo;

// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which

// X86TargetLowering needs.

@@ -271,19 +400,23 @@ public:

/// This constructor initializes the data members to match that

/// of the specified triple.

///

- X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,

+ X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,

const X86TargetMachine &TM, unsigned StackAlignOverride);

const X86TargetLowering *getTargetLowering() const override {

return &TLInfo;

}

const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }

const X86FrameLowering *getFrameLowering() const override {

return &FrameLowering;

}

const X86SelectionDAGInfo *getSelectionDAGInfo() const override {

return &TSInfo;

}

const X86RegisterInfo *getRegisterInfo() const override {

return &getInstrInfo()->getRegisterInfo();

}

@@ -301,12 +434,19 @@ public:

/// subtarget options. Definition of function is auto generated by tblgen.

void ParseSubtargetFeatures(StringRef CPU, StringRef FS);

+ /// Methods used by Global ISel

+ const CallLowering *getCallLowering() const override;

+ const InstructionSelector *getInstructionSelector() const override;

+ const LegalizerInfo *getLegalizerInfo() const override;

+ const RegisterBankInfo *getRegBankInfo() const override;

private:

/// Initialize the full set of dependencies so we can use an initializer

/// list for X86Subtarget.

X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);

void initializeEnvironment();

void initSubtargetFeatures(StringRef CPU, StringRef FS);

public:

/// Is this x86_64? (disregarding specific ABI / programming model)

bool is64Bit() const {

@@ -336,6 +476,7 @@ public:

PICStyles::Style getPICStyle() const { return PICStyle; }

void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }

+ bool hasX87() const { return HasX87; }

bool hasCMov() const { return HasCMov; }

bool hasSSE1() const { return X86SSELevel >= SSE1; }

bool hasSSE2() const { return X86SSELevel >= SSE2; }

@@ -354,19 +495,23 @@ public:

bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }

bool hasPOPCNT() const { return HasPOPCNT; }

bool hasAES() const { return HasAES; }

+ bool hasVAES() const { return HasVAES; }

bool hasFXSR() const { return HasFXSR; }

bool hasXSAVE() const { return HasXSAVE; }

bool hasXSAVEOPT() const { return HasXSAVEOPT; }

bool hasXSAVEC() const { return HasXSAVEC; }

bool hasXSAVES() const { return HasXSAVES; }

bool hasPCLMUL() const { return HasPCLMUL; }

+ bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }

+ bool hasGFNI() const { return HasGFNI; }

// Prefer FMA4 to FMA - its better for commutation/memory folding and

// has equal or better performance on all supported targets.

- bool hasFMA() const { return HasFMA && !HasFMA4; }

+ bool hasFMA() const { return HasFMA; }

bool hasFMA4() const { return HasFMA4; }

- bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }

+ bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }

bool hasXOP() const { return HasXOP; }

bool hasTBM() const { return HasTBM; }

+ bool hasLWP() const { return HasLWP; }

bool hasMOVBE() const { return HasMOVBE; }

bool hasRDRAND() const { return HasRDRAND; }

bool hasF16C() const { return HasF16C; }

@@ -374,58 +519,108 @@ public:

bool hasLZCNT() const { return HasLZCNT; }

bool hasBMI() const { return HasBMI; }

bool hasBMI2() const { return HasBMI2; }

+ bool hasVBMI() const { return HasVBMI; }

+ bool hasVBMI2() const { return HasVBMI2; }

+ bool hasIFMA() const { return HasIFMA; }

bool hasRTM() const { return HasRTM; }

- bool hasHLE() const { return HasHLE; }

bool hasADX() const { return HasADX; }

bool hasSHA() const { return HasSHA; }

- bool hasPRFCHW() const { return HasPRFCHW; }

+ bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }

+ bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }

+ bool hasSSEPrefetch() const {

+ // We implicitly enable these when we have a write prefix supporting cache

+ // level OR if we have prfchw, but don't already have a read prefetch from

+ // 3dnow.

+ return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();

+ }

bool hasRDSEED() const { return HasRDSEED; }

bool hasLAHFSAHF() const { return HasLAHFSAHF; }

- bool isBTMemSlow() const { return IsBTMemSlow; }

+ bool hasMWAITX() const { return HasMWAITX; }

+ bool hasCLZERO() const { return HasCLZERO; }

bool isSHLDSlow() const { return IsSHLDSlow; }

+ bool isPMULLDSlow() const { return IsPMULLDSlow; }

bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }

bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }

+ int getGatherOverhead() const { return GatherOverhead; }

+ int getScatterOverhead() const { return ScatterOverhead; }

bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }

bool hasCmpxchg16b() const { return HasCmpxchg16b; }

bool useLeaForSP() const { return UseLeaForSP; }

+ bool hasFastVariableShuffle() const {

+ return HasFastVariableShuffle;

+ }

+ bool hasFastPartialYMMorZMMWrite() const {

+ return HasFastPartialYMMorZMMWrite;

+ }

+ bool hasFastGather() const { return HasFastGather; }

+ bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }

+ bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }

+ bool hasFastLZCNT() const { return HasFastLZCNT; }

+ bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }

+ bool hasMacroFusion() const { return HasMacroFusion; }

+ bool hasERMSB() const { return HasERMSB; }

bool hasSlowDivide32() const { return HasSlowDivide32; }

bool hasSlowDivide64() const { return HasSlowDivide64; }

bool padShortFunctions() const { return PadShortFunctions; }

- bool callRegIndirect() const { return CallRegIndirect; }

+ bool slowTwoMemOps() const { return SlowTwoMemOps; }

bool LEAusesAG() const { return LEAUsesAG; }

bool slowLEA() const { return SlowLEA; }

+ bool slow3OpsLEA() const { return Slow3OpsLEA; }

bool slowIncDec() const { return SlowIncDec; }

bool hasCDI() const { return HasCDI; }

+ bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }

bool hasPFI() const { return HasPFI; }

bool hasERI() const { return HasERI; }

bool hasDQI() const { return HasDQI; }

bool hasBWI() const { return HasBWI; }

bool hasVLX() const { return HasVLX; }

bool hasPKU() const { return HasPKU; }

+ bool hasVNNI() const { return HasVNNI; }

+ bool hasBITALG() const { return HasBITALG; }

bool hasMPX() const { return HasMPX; }

+ bool hasSHSTK() const { return HasSHSTK; }

+ bool hasIBT() const { return HasIBT; }

+ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }

+ bool hasCLWB() const { return HasCLWB; }

+ bool useRetpoline() const { return UseRetpoline; }

+ bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }

+ bool isXRaySupported() const override { return is64Bit(); }

+ X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }

+ /// TODO: to be removed later and replaced with suitable properties

bool isAtom() const { return X86ProcFamily == IntelAtom; }

bool isSLM() const { return X86ProcFamily == IntelSLM; }

bool useSoftFloat() const { return UseSoftFloat; }

+ /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for

+ /// no-sse2). There isn't any reason to disable it if the target processor

+ /// supports it.

+ bool hasMFence() const { return hasSSE2() || is64Bit(); }

const Triple &getTargetTriple() const { return TargetTriple; }

bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }

bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }

+ bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); }

bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }

bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }

- bool isTargetPS4() const { return TargetTriple.isPS4(); }

+ bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }

bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }

bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }

bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }

bool isTargetLinux() const { return TargetTriple.isOSLinux(); }

+ bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }

+ bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }

bool isTargetAndroid() const { return TargetTriple.isAndroid(); }

bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }

bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }

bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }

bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }

+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }

bool isTargetWindowsMSVC() const {

return TargetTriple.isWindowsMSVCEnvironment();

@@ -455,15 +650,10 @@ public:

bool isOSWindows() const { return TargetTriple.isOSWindows(); }

- bool isTargetWin64() const {

- return In64BitMode && TargetTriple.isOSWindows();

- }

+ bool isTargetWin64() const { return In64BitMode && isOSWindows(); }

- bool isTargetWin32() const {

- return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC());

- }

+ bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }

- bool isPICStyleSet() const { return PICStyle != PICStyles::None; }

bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }

bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }

@@ -471,19 +661,14 @@ public:

return PICStyle == PICStyles::StubPIC;

}

- bool isPICStyleStubNoDynamic() const {

- return PICStyle == PICStyles::StubDynamicNoPIC;

- }

- bool isPICStyleStubAny() const {

- return PICStyle == PICStyles::StubDynamicNoPIC ||

- PICStyle == PICStyles::StubPIC;

- }

+ bool isPositionIndependent() const { return TM.isPositionIndependent(); }

bool isCallingConvWin64(CallingConv::ID CC) const {

switch (CC) {

// On Win64, all these conventions just use the default convention.

case CallingConv::C:

case CallingConv::Fast:

+ case CallingConv::Swift:

case CallingConv::X86_FastCall:

case CallingConv::X86_StdCall:

case CallingConv::X86_ThisCall:

@@ -491,7 +676,7 @@ public:

case CallingConv::Intel_OCL_BI:

return isTargetWin64();

// This convention allows using the Win64 convention on other targets.

- case CallingConv::X86_64_Win64:

+ case CallingConv::Win64:

return true;

// This convention allows using the SysV convention on Windows targets.

case CallingConv::X86_64_SysV:

@@ -502,33 +687,36 @@ public:

}

- /// ClassifyGlobalReference - Classify a global variable reference for the

- /// current subtarget according to how we should reference it in a non-pcrel

- /// context.

- unsigned char ClassifyGlobalReference(const GlobalValue *GV,

- const TargetMachine &TM)const;

+ /// Classify a global variable reference for the current subtarget according

+ /// to how we should reference it in a non-pcrel context.

+ unsigned char classifyLocalReference(const GlobalValue *GV) const;

+ unsigned char classifyGlobalReference(const GlobalValue *GV,

+ const Module &M) const;

+ unsigned char classifyGlobalReference(const GlobalValue *GV) const;

+ /// Classify a global function reference for the current subtarget.

+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,

+ const Module &M) const;

+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;

/// Classify a blockaddress reference for the current subtarget according to

/// how we should reference it in a non-pcrel context.

- unsigned char ClassifyBlockAddressReference() const;

+ unsigned char classifyBlockAddressReference() const;

/// Return true if the subtarget allows calls to immediate address.

- bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;

- /// This function returns the name of a function which has an interface

- /// like the non-standard bzero function, if such a function exists on

- /// the current subtarget and it is considered prefereable over

- /// memset with zero passed as the second argument. Otherwise it

- /// returns null.

- const char *getBZeroEntry() const;

+ bool isLegalToCallImmediateAddr() const;

- /// This function returns true if the target has sincos() routine in its

- /// compiler runtime or math libraries.

- bool hasSinCos() const;

+ /// If we are using retpolines, we need to expand indirectbr to avoid it

+ /// lowering to an actual indirect jump.

+ bool enableIndirectBrExpand() const override { return useRetpoline(); }

/// Enable the MachineScheduler pass for all X86 subtargets.

bool enableMachineScheduler() const override { return true; }

+ // TODO: Update the regression tests and return true.

+ bool supportPrintSchedInfo() const override { return false; }

bool enableEarlyIfConversion() const override;

/// Return the instruction itineraries based on the subtarget selection.

@@ -539,8 +727,10 @@ public:

AntiDepBreakMode getAntiDepBreakMode() const override {

return TargetSubtargetInfo::ANTIDEP_CRITICAL;

}

+ bool enableAdvancedRASplitCost() const override { return true; }

};

-} // End llvm namespace

+} // end namespace llvm

-#endif

+#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H