2 files changed, 452 insertions, 187 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 8ef08c960f0..195576bf546 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -11,19 +11,28 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "X86.h"
+
+#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#include "X86RegisterBankInfo.h"
 #include "X86Subtarget.h"
-#include "X86InstrInfo.h"
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 
 #if defined(_MSC_VER)
 #include <intrin.h>
@@ -46,126 +55,127 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
 
 /// Classify a blockaddress reference for the current subtarget according to how
 /// we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
-  if (isPICStyleGOT())    // 32-bit ELF targets.
-    return X86II::MO_GOTOFF;
-
-  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
-    return X86II::MO_PIC_BASE_OFFSET;
-
-  // Direct static reference to label.
-  return X86II::MO_NO_FLAG;
+unsigned char X86Subtarget::classifyBlockAddressReference() const {
+  return classifyLocalReference(nullptr);
 }
 
 /// Classify a global variable reference for the current subtarget according to
 /// how we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::
-ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
-  // DLLImport only exists on windows, it is implemented as a load from a
-  // DLLIMPORT stub.
-  if (GV->hasDLLImportStorageClass())
-    return X86II::MO_DLLIMPORT;
+unsigned char
+X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
+  return classifyGlobalReference(GV, *GV->getParent());
+}
 
-  bool isDef = GV->isStrongDefinitionForLinker();
-
-  // X86-64 in PIC mode.
-  if (isPICStyleRIPRel()) {
-    // Large model never uses stubs.
-    if (TM.getCodeModel() == CodeModel::Large)
-      return X86II::MO_NO_FLAG;
-
-    if (isTargetDarwin()) {
-      // If symbol visibility is hidden, the extra load is not needed if
-      // target is x86-64 or the symbol is definitely defined in the current
-      // translation unit.
-      if (GV->hasDefaultVisibility() && !isDef)
-        return X86II::MO_GOTPCREL;
-    } else if (!isTargetWin64()) {
-      assert(isTargetELF() && "Unknown rip-relative target");
-
-      // Extra load is needed for all externally visible.
-      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
-        return X86II::MO_GOTPCREL;
-    }
+unsigned char
+X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+  // 64 bits can use %rip addressing for anything local.
+  if (is64Bit())
+    return X86II::MO_NO_FLAG;
 
+  // If this is for a position dependent executable, the static linker can
+  // figure it out.
+  if (!isPositionIndependent())
     return X86II::MO_NO_FLAG;
-  }
 
-  if (isPICStyleGOT()) {   // 32-bit ELF targets.
-    // Extra load is needed for all externally visible.
-    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
-      return X86II::MO_GOTOFF;
-    return X86II::MO_GOT;
-  }
+  // The COFF dynamic linker just patches the executable sections.
+  if (isTargetCOFF())
+    return X86II::MO_NO_FLAG;
 
-  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
-    // Determine whether we have a stub reference and/or whether the reference
-    // is relative to the PIC base or not.
+  if (isTargetDarwin()) {
+    // 32 bit macho has no relocation for a-b if a is undefined, even if
+    // b is in the section that is being relocated.
+    // This means we have to use o load even for GVs that are known to be
+    // local to the dso.
+    if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
+      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
 
-    // If this is a strong reference to a definition, it is definitely not
-    // through a stub.
-    if (isDef)
-      return X86II::MO_PIC_BASE_OFFSET;
+    return X86II::MO_PIC_BASE_OFFSET;
+  }
 
-    // Unless we have a symbol with hidden visibility, we have to go through a
-    // normal $non_lazy_ptr stub because this symbol might be resolved late.
-    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
-      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+  return X86II::MO_GOTOFF;
+}
 
-    // If symbol visibility is hidden, we have a stub for common symbol
-    // references and external declarations.
-    if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
-      // Hidden $non_lazy_ptr reference.
-      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
-    }
+unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
+                                                    const Module &M) const {
+  // Large model never uses stubs.
+  if (TM.getCodeModel() == CodeModel::Large)
+    return X86II::MO_NO_FLAG;
 
-    // Otherwise, no stub.
-    return X86II::MO_PIC_BASE_OFFSET;
+  // Absolute symbols can be referenced directly.
+  if (GV) {
+    if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
+      // See if we can use the 8-bit immediate form. Note that some instructions
+      // will sign extend the immediate operand, so to be conservative we only
+      // accept the range [0,128).
+      if (CR->getUnsignedMax().ult(128))
+        return X86II::MO_ABS8;
+      else
+        return X86II::MO_NO_FLAG;
+    }
   }
 
-  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
-    // Determine whether we have a stub reference.
+  if (TM.shouldAssumeDSOLocal(M, GV))
+    return classifyLocalReference(GV);
 
-    // If this is a strong reference to a definition, it is definitely not
-    // through a stub.
-    if (isDef)
-      return X86II::MO_NO_FLAG;
+  if (isTargetCOFF())
+    return X86II::MO_DLLIMPORT;
 
-    // Unless we have a symbol with hidden visibility, we have to go through a
-    // normal $non_lazy_ptr stub because this symbol might be resolved late.
-    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
-      return X86II::MO_DARWIN_NONLAZY;
+  if (is64Bit())
+    return X86II::MO_GOTPCREL;
 
-    // Otherwise, no stub.
-    return X86II::MO_NO_FLAG;
+  if (isTargetDarwin()) {
+    if (!isPositionIndependent())
+      return X86II::MO_DARWIN_NONLAZY;
+    return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
   }
 
-  // Direct static reference to global.
-  return X86II::MO_NO_FLAG;
+  return X86II::MO_GOT;
+}
+
+unsigned char
+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
+  return classifyGlobalFunctionReference(GV, *GV->getParent());
 }
 
+unsigned char
+X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
+                                              const Module &M) const {
+  if (TM.shouldAssumeDSOLocal(M, GV))
+    return X86II::MO_NO_FLAG;
 
-/// This function returns the name of a function which has an interface like
-/// the non-standard bzero function, if such a function exists on the
-/// current subtarget and it is considered preferable over memset with zero
-/// passed as the second argument. Otherwise it returns null.
-const char *X86Subtarget::getBZeroEntry() const {
-  // Darwin 10 has a __bzero entry point for this purpose.
-  if (getTargetTriple().isMacOSX() &&
-      !getTargetTriple().isMacOSXVersionLT(10, 6))
-    return "__bzero";
+  if (isTargetCOFF()) {
+    assert(GV->hasDLLImportStorageClass() &&
+           "shouldAssumeDSOLocal gave inconsistent answer");
+    return X86II::MO_DLLIMPORT;
+  }
 
-  return nullptr;
-}
+  const Function *F = dyn_cast_or_null<Function>(GV);
+
+  if (isTargetELF()) {
+    if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
+      // According to psABI, PLT stub clobbers XMM8-XMM15.
+      // In Regcall calling convention those registers are used for passing
+      // parameters. Thus we need to prevent lazy binding in Regcall.
+      return X86II::MO_GOTPCREL;
+    if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
+      return X86II::MO_GOTPCREL;
+    return X86II::MO_PLT;
+  }
+
+  if (is64Bit()) {
+    if (F && F->hasFnAttribute(Attribute::NonLazyBind))
+      // If the function is marked as non-lazy, generate an indirect call
+      // which loads from the GOT directly. This avoids runtime overhead
+      // at the cost of eager binding (and one extra byte of encoding).
+      return X86II::MO_GOTPCREL;
+    return X86II::MO_NO_FLAG;
+  }
 
-bool X86Subtarget::hasSinCos() const {
-  return getTargetTriple().isMacOSX() &&
-    !getTargetTriple().isMacOSXVersionLT(10, 9) &&
-    is64Bit();
+  return X86II::MO_NO_FLAG;
 }
 
 /// Return true if the subtarget allows calls to immediate address.
-bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+bool X86Subtarget::isLegalToCallImmediateAddr() const {
   // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
   // but WinCOFFObjectWriter::RecordRelocation cannot emit them.  Once it does,
   // the following check for Win32 should be removed.
@@ -197,6 +207,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       FullFS = "+sahf";
   }
 
+  // OpenBSD/amd64 defaults to -mretpoline
+  if (isTargetOpenBSD() && In64BitMode) {
+    if (!FullFS.empty())
+      FullFS = "+retpoline," + FullFS;
+    else
+      FullFS = "+retpoline";
+  }
 
   // Parse features string and set the CPU.
   ParseSubtargetFeatures(CPUName, FullFS);
@@ -227,33 +244,49 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
+  // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
   if (StackAlignOverride)
     stackAlignment = StackAlignOverride;
   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
-           In64BitMode)
+           isTargetKFreeBSD() || In64BitMode)
     stackAlignment = 16;
+
+  // Some CPUs have more overhead for gather. The specified overhead is relative
+  // to the Load operation. "2" is the number provided by Intel architects. This
+  // parameter is used for cost estimation of Gather Op and comparison with
+  // other alternatives.
+  // TODO: Remove the explicit hasAVX512()?, That would mean we would only
+  // enable gather with a -march.
+  if (hasAVX512() || (hasAVX2() && hasFastGather()))
+    GatherOverhead = 2;
+  if (hasAVX512())
+    ScatterOverhead = 2;
 }
 
 void X86Subtarget::initializeEnvironment() {
   X86SSELevel = NoSSE;
   X863DNowLevel = NoThreeDNow;
+  HasX87 = false;
   HasCMov = false;
   HasX86_64 = false;
   HasPOPCNT = false;
   HasSSE4A = false;
   HasAES = false;
+  HasVAES = false;
   HasFXSR = false;
   HasXSAVE = false;
   HasXSAVEOPT = false;
   HasXSAVEC = false;
   HasXSAVES = false;
   HasPCLMUL = false;
+  HasVPCLMULQDQ = false;
+  HasGFNI = false;
   HasFMA = false;
   HasFMA4 = false;
   HasXOP = false;
   HasTBM = false;
+  HasLWP = false;
   HasMOVBE = false;
   HasRDRAND = false;
   HasF16C = false;
@@ -261,39 +294,67 @@ void X86Subtarget::initializeEnvironment() {
   HasLZCNT = false;
   HasBMI = false;
   HasBMI2 = false;
+  HasVBMI = false;
+  HasVBMI2 = false;
+  HasIFMA = false;
   HasRTM = false;
-  HasHLE = false;
   HasERI = false;
   HasCDI = false;
   HasPFI = false;
   HasDQI = false;
+  HasVPOPCNTDQ = false;
   HasBWI = false;
   HasVLX = false;
   HasADX = false;
   HasPKU = false;
+  HasVNNI = false;
+  HasBITALG = false;
   HasSHA = false;
+  HasPREFETCHWT1 = false;
   HasPRFCHW = false;
   HasRDSEED = false;
   HasLAHFSAHF = false;
+  HasMWAITX = false;
+  HasCLZERO = false;
   HasMPX = false;
-  IsBTMemSlow = false;
+  HasSHSTK = false;
+  HasIBT = false;
+  HasSGX = false;
+  HasCLFLUSHOPT = false;
+  HasCLWB = false;
+  UseRetpoline = false;
+  UseRetpolineExternalThunk = false;
+  IsPMULLDSlow = false;
   IsSHLDSlow = false;
   IsUAMem16Slow = false;
   IsUAMem32Slow = false;
   HasSSEUnalignedMem = false;
   HasCmpxchg16b = false;
   UseLeaForSP = false;
+  HasFastVariableShuffle = false;
+  HasFastPartialYMMorZMMWrite = false;
+  HasFastGather = false;
+  HasFastScalarFSQRT = false;
+  HasFastVectorFSQRT = false;
+  HasFastLZCNT = false;
+  HasFastSHLDRotate = false;
+  HasMacroFusion = false;
+  HasERMSB = false;
   HasSlowDivide32 = false;
   HasSlowDivide64 = false;
   PadShortFunctions = false;
-  CallRegIndirect = false;
+  SlowTwoMemOps = false;
   LEAUsesAG = false;
   SlowLEA = false;
+  Slow3OpsLEA = false;
   SlowIncDec = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
   UseSoftFloat = false;
+  X86ProcFamily = Others;
+  GatherOverhead = 1024;
+  ScatterOverhead = 1024;
 }
 
 X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -303,41 +364,55 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
-                           const std::string &FS, const X86TargetMachine &TM,
+X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                           const X86TargetMachine &TM,
                            unsigned StackAlignOverride)
     : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
-      PICStyle(PICStyles::None), TargetTriple(TT),
+      PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
       StackAlignOverride(StackAlignOverride),
       In64BitMode(TargetTriple.getArch() == Triple::x86_64),
       In32BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() != Triple::CODE16),
       In16BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() == Triple::CODE16),
-      TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      FrameLowering(*this, getStackAlignment()) {
   // Determine the PICStyle based on the target selected.
-  if (TM.getRelocationModel() == Reloc::Static) {
-    // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+  if (!isPositionIndependent())
     setPICStyle(PICStyles::None);
-  } else if (is64Bit()) {
-    // PIC in 64 bit mode is always rip-rel.
+  else if (is64Bit())
     setPICStyle(PICStyles::RIPRel);
-  } else if (isTargetCOFF()) {
+  else if (isTargetCOFF())
     setPICStyle(PICStyles::None);
-  } else if (isTargetDarwin()) {
-    if (TM.getRelocationModel() == Reloc::PIC_)
-      setPICStyle(PICStyles::StubPIC);
-    else {
-      assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
-      setPICStyle(PICStyles::StubDynamicNoPIC);
-    }
-  } else if (isTargetELF()) {
+  else if (isTargetDarwin())
+    setPICStyle(PICStyles::StubPIC);
+  else if (isTargetELF())
     setPICStyle(PICStyles::GOT);
-  }
+
+  CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
+  Legalizer.reset(new X86LegalizerInfo(*this, TM));
+
+  auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
+  RegBankInfo.reset(RBI);
+  InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
+}
+
+const CallLowering *X86Subtarget::getCallLowering() const {
+  return CallLoweringInfo.get();
+}
+
+const InstructionSelector *X86Subtarget::getInstructionSelector() const {
+  return InstSelector.get();
+}
+
+const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
+  return Legalizer.get();
+}
+
+const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
+  return RegBankInfo.get();
 }
 
 bool X86Subtarget::enableEarlyIfConversion() const {
   return hasCMov() && X86EarlyIfConv;
 }
-
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 13d1026dcaa..e6b486cdbeb 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -18,32 +18,53 @@
 #include "X86ISelLowering.h"
 #include "X86InstrInfo.h"
 #include "X86SelectionDAGInfo.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "X86GenSubtargetInfo.inc"
 
 namespace llvm {
+
 class GlobalValue;
-class StringRef;
-class TargetMachine;
 
 /// The X86 backend supports a number of different styles of PIC.
 ///
 namespace PICStyles {
+
 enum Style {
-  StubPIC,          // Used on i386-darwin in -fPIC mode.
-  StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
-  GOT,              // Used on many 32-bit unices in -fPIC mode.
-  RIPRel,           // Used on X86-64 when not in -static mode.
-  None              // Set when in -static mode (not PIC or DynamicNoPIC mode).
+  StubPIC,          // Used on i386-darwin in pic mode.
+  GOT,              // Used on 32 bit elf on when in pic mode.
+  RIPRel,           // Used on X86-64 when in pic mode.
+  None              // Set when not in pic mode.
 };
-}
+
+} // end namespace PICStyles
 
 class X86Subtarget final : public X86GenSubtargetInfo {
+public:  
+  enum X86ProcFamilyEnum {
+    Others,
+    IntelAtom,
+    IntelSLM,
+    IntelGLM,
+    IntelHaswell,
+    IntelBroadwell,
+    IntelSkylake,
+    IntelKNL,
+    IntelSKX,
+    IntelCannonlake,
+    IntelIcelake,
+  };
 
 protected:
   enum X86SSEEnum {
@@ -54,22 +75,23 @@ protected:
     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
   };
 
-  enum X86ProcFamilyEnum {
-    Others, IntelAtom, IntelSLM
-  };
-
   /// X86 processor family: Intel Atom, and others
   X86ProcFamilyEnum X86ProcFamily;
 
   /// Which PIC style to use
   PICStyles::Style PICStyle;
 
+  const TargetMachine &TM;
+
   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel;
 
   /// MMX, 3DNow, 3DNow Athlon, or none supported.
   X863DNowEnum X863DNowLevel;
 
+  /// True if the processor supports X87 instructions.
+  bool HasX87;
+
   /// True if this processor has conditional move instructions
   /// (generally pentium pro+).
   bool HasCMov;
@@ -85,21 +107,29 @@ protected:
 
   /// Target has AES instructions
   bool HasAES;
+  bool HasVAES;
 
   /// Target has FXSAVE/FXRESTOR instructions
   bool HasFXSR;
 
   /// Target has XSAVE instructions
   bool HasXSAVE;
+
   /// Target has XSAVEOPT instructions
   bool HasXSAVEOPT;
+
   /// Target has XSAVEC instructions
   bool HasXSAVEC;
+
   /// Target has XSAVES instructions
   bool HasXSAVES;
 
   /// Target has carry-less multiplication
   bool HasPCLMUL;
+  bool HasVPCLMULQDQ;
+
+  /// Target has Galois Field Arithmetic instructions
+  bool HasGFNI;
 
   /// Target has 3-operand fused multiply-add
   bool HasFMA;
@@ -113,6 +143,9 @@ protected:
   /// Target has TBM instructions.
   bool HasTBM;
 
+  /// Target has LWP instructions
+  bool HasLWP;
+
   /// True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
@@ -134,12 +167,18 @@ protected:
   /// Processor has BMI2 instructions.
   bool HasBMI2;
 
+  /// Processor has VBMI instructions.
+  bool HasVBMI;
+
+  /// Processor has VBMI2 instructions.
+  bool HasVBMI2;
+
+  /// Processor has Integer Fused Multiply Add
+  bool HasIFMA;
+
   /// Processor has RTM instructions.
   bool HasRTM;
 
-  /// Processor has HLE.
-  bool HasHLE;
-
   /// Processor has ADX instructions.
   bool HasADX;
 
@@ -155,12 +194,22 @@ protected:
   /// Processor has LAHF/SAHF instructions.
   bool HasLAHFSAHF;
 
-  /// True if BT (bit test) of memory instructions are slow.
-  bool IsBTMemSlow;
+  /// Processor has MONITORX/MWAITX instructions.
+  bool HasMWAITX;
+
+  /// Processor has Cache Line Zero instruction
+  bool HasCLZERO;
+
+  /// Processor has Prefetch with intent to Write instruction
+  bool HasPREFETCHWT1;
 
   /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
+  /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
+  //  PMULUDQ.
+  bool IsPMULLDSlow;
+
   /// True if unaligned memory accesses of 16-bytes are slow.
   bool IsUAMem16Slow;
 
@@ -179,21 +228,53 @@ protected:
   /// the stack pointer. This is an optimization for Intel Atom processors.
   bool UseLeaForSP;
 
+  /// True if its preferable to combine to a single shuffle using a variable
+  /// mask over multiple fixed shuffles.
+  bool HasFastVariableShuffle;
+
+  /// True if there is no performance penalty to writing only the lower parts
+  /// of a YMM or ZMM register without clearing the upper part.
+  bool HasFastPartialYMMorZMMWrite;
+
+  /// True if gather is reasonably fast. This is true for Skylake client and
+  /// all AVX-512 CPUs.
+  bool HasFastGather;
+
+  /// True if hardware SQRTSS instruction is at least as fast (latency) as
+  /// RSQRTSS followed by a Newton-Raphson iteration.
+  bool HasFastScalarFSQRT;
+
+  /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
+  /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
+  bool HasFastVectorFSQRT;
+
   /// True if 8-bit divisions are significantly faster than
   /// 32-bit divisions and should be used when possible.
   bool HasSlowDivide32;
 
-  /// True if 16-bit divides are significantly faster than
+  /// True if 32-bit divides are significantly faster than
   /// 64-bit divisions and should be used when possible.
   bool HasSlowDivide64;
 
+  /// True if LZCNT instruction is fast.
+  bool HasFastLZCNT;
+
+  /// True if SHLD based rotate is fast.
+  bool HasFastSHLDRotate;
+
+  /// True if the processor supports macrofusion.
+  bool HasMacroFusion;
+
+  /// True if the processor has enhanced REP MOVSB/STOSB.
+  bool HasERMSB;
+
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
 
-  /// True if the Calls with memory reference should be converted
-  /// to a register-based indirect call.
-  bool CallRegIndirect;
+  /// True if two memory operand instructions should use a temporary register
+  /// instead.
+  bool SlowTwoMemOps;
 
   /// True if the LEA instruction inputs have to be ready at address generation
   /// (AG) time.
@@ -202,6 +283,11 @@ protected:
   /// True if the LEA instruction with certain arguments is slow
   bool SlowLEA;
 
+  /// True if the LEA instruction has all three source operands: base, index,
+  /// and offset or if the LEA instruction uses base and index registers where
+  /// the base is EBP, RBP,or R13
+  bool Slow3OpsLEA;
+
   /// True if INC and DEC instructions are slow when writing to flags
   bool SlowIncDec;
 
@@ -214,6 +300,9 @@ protected:
   /// Processor has AVX-512 Conflict Detection Instructions
   bool HasCDI;
 
+  /// Processor has AVX-512 population count Instructions
+  bool HasVPOPCNTDQ;
+
   /// Processor has AVX-512 Doubleword and Quadword instructions
   bool HasDQI;
 
@@ -226,9 +315,40 @@ protected:
   /// Processor has PKU extenstions
   bool HasPKU;
 
-  /// Processot supports MPX - Memory Protection Extensions
+  /// Processor has AVX-512 Vector Neural Network Instructions
+  bool HasVNNI;
+
+  /// Processor has AVX-512 Bit Algorithms instructions
+  bool HasBITALG;
+
+  /// Processor supports MPX - Memory Protection Extensions
   bool HasMPX;
 
+  /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
+  /// using Shadow Stack
+  bool HasSHSTK;
+
+  /// Processor supports CET IBT - Control-Flow Enforcement Technology
+  /// using Indirect Branch Tracking
+  bool HasIBT;
+
+  /// Processor has Software Guard Extensions
+  bool HasSGX;
+
+  /// Processor supports Flush Cache Line instruction
+  bool HasCLFLUSHOPT;
+
+  /// Processor supports Cache Line Write Back instruction
+  bool HasCLWB;
+
+  /// Use a retpoline thunk rather than indirect calls to block speculative
+  /// execution.
+  bool UseRetpoline;
+
+  /// When using a retpoline thunk, call an externally provided thunk rather
+  /// than emitting one inside the compiler.
+  bool UseRetpolineExternalThunk;
+
   /// Use software floating point for code generation.
   bool UseSoftFloat;
 
@@ -246,8 +366,13 @@ protected:
   /// Instruction itineraries for scheduling
   InstrItineraryData InstrItins;
 
-private:
+  /// GlobalISel related APIs.
+  std::unique_ptr<CallLowering> CallLoweringInfo;
+  std::unique_ptr<LegalizerInfo> Legalizer;
+  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
 
+private:
   /// Override the stack alignment.
   unsigned StackAlignOverride;
 
@@ -260,6 +385,10 @@ private:
   /// True if compiling for 16-bit, false for 32-bit or 64-bit.
   bool In16BitMode;
 
+  /// Contains the Overhead of gather\scatter instructions
+  int GatherOverhead;
+  int ScatterOverhead;
+
   X86SelectionDAGInfo TSInfo;
   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
   // X86TargetLowering needs.
@@ -271,19 +400,23 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+  X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
                const X86TargetMachine &TM, unsigned StackAlignOverride);
 
   const X86TargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
+
   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
   const X86FrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
+
   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
+
   const X86RegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
@@ -301,12 +434,19 @@ public:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  /// Methods used by Global ISel
+  const CallLowering *getCallLowering() const override;
+  const InstructionSelector *getInstructionSelector() const override;
+  const LegalizerInfo *getLegalizerInfo() const override;
+  const RegisterBankInfo *getRegBankInfo() const override;
+
 private:
   /// Initialize the full set of dependencies so we can use an initializer
   /// list for X86Subtarget.
   X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
   void initializeEnvironment();
   void initSubtargetFeatures(StringRef CPU, StringRef FS);
+
 public:
   /// Is this x86_64? (disregarding specific ABI / programming model)
   bool is64Bit() const {
@@ -336,6 +476,7 @@ public:
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
+  bool hasX87() const { return HasX87; }
   bool hasCMov() const { return HasCMov; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
@@ -354,19 +495,23 @@ public:
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasPOPCNT() const { return HasPOPCNT; }
   bool hasAES() const { return HasAES; }
+  bool hasVAES() const { return HasVAES; }
   bool hasFXSR() const { return HasFXSR; }
   bool hasXSAVE() const { return HasXSAVE; }
   bool hasXSAVEOPT() const { return HasXSAVEOPT; }
   bool hasXSAVEC() const { return HasXSAVEC; }
   bool hasXSAVES() const { return HasXSAVES; }
   bool hasPCLMUL() const { return HasPCLMUL; }
+  bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
+  bool hasGFNI() const { return HasGFNI; }
   // Prefer FMA4 to FMA - its better for commutation/memory folding and
   // has equal or better performance on all supported targets.
-  bool hasFMA() const { return HasFMA && !HasFMA4; }
+  bool hasFMA() const { return HasFMA; }
   bool hasFMA4() const { return HasFMA4; }
-  bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
+  bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
   bool hasXOP() const { return HasXOP; }
   bool hasTBM() const { return HasTBM; }
+  bool hasLWP() const { return HasLWP; }
   bool hasMOVBE() const { return HasMOVBE; }
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
@@ -374,58 +519,108 @@ public:
   bool hasLZCNT() const { return HasLZCNT; }
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
+  bool hasVBMI() const { return HasVBMI; }
+  bool hasVBMI2() const { return HasVBMI2; }
+  bool hasIFMA() const { return HasIFMA; }
   bool hasRTM() const { return HasRTM; }
-  bool hasHLE() const { return HasHLE; }
   bool hasADX() const { return HasADX; }
   bool hasSHA() const { return HasSHA; }
-  bool hasPRFCHW() const { return HasPRFCHW; }
+  bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+  bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+  bool hasSSEPrefetch() const {
+    // We implicitly enable these when we have a write prefix supporting cache
+    // level OR if we have prfchw, but don't already have a read prefetch from
+    // 3dnow.
+    return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
+  }
   bool hasRDSEED() const { return HasRDSEED; }
   bool hasLAHFSAHF() const { return HasLAHFSAHF; }
-  bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool hasMWAITX() const { return HasMWAITX; }
+  bool hasCLZERO() const { return HasCLZERO; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
+  bool isPMULLDSlow() const { return IsPMULLDSlow; }
   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
+  int getGatherOverhead() const { return GatherOverhead; }
+  int getScatterOverhead() const { return ScatterOverhead; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
   bool useLeaForSP() const { return UseLeaForSP; }
+  bool hasFastVariableShuffle() const {
+    return HasFastVariableShuffle;
+  }
+  bool hasFastPartialYMMorZMMWrite() const {
+    return HasFastPartialYMMorZMMWrite;
+  }
+  bool hasFastGather() const { return HasFastGather; }
+  bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
+  bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
+  bool hasFastLZCNT() const { return HasFastLZCNT; }
+  bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+  bool hasMacroFusion() const { return HasMacroFusion; }
+  bool hasERMSB() const { return HasERMSB; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
-  bool callRegIndirect() const { return CallRegIndirect; }
+  bool slowTwoMemOps() const { return SlowTwoMemOps; }
   bool LEAusesAG() const { return LEAUsesAG; }
   bool slowLEA() const { return SlowLEA; }
+  bool slow3OpsLEA() const { return Slow3OpsLEA; }
   bool slowIncDec() const { return SlowIncDec; }
   bool hasCDI() const { return HasCDI; }
+  bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
   bool hasPFI() const { return HasPFI; }
   bool hasERI() const { return HasERI; }
   bool hasDQI() const { return HasDQI; }
   bool hasBWI() const { return HasBWI; }
   bool hasVLX() const { return HasVLX; }
   bool hasPKU() const { return HasPKU; }
+  bool hasVNNI() const { return HasVNNI; }
+  bool hasBITALG() const { return HasBITALG; }
   bool hasMPX() const { return HasMPX; }
+  bool hasSHSTK() const { return HasSHSTK; }
+  bool hasIBT() const { return HasIBT; }
+  bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
+  bool hasCLWB() const { return HasCLWB; }
+  bool useRetpoline() const { return UseRetpoline; }
+  bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
 
+  bool isXRaySupported() const override { return is64Bit(); }
+
+  X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
+
+  /// TODO: to be removed later and replaced with suitable properties
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
   bool useSoftFloat() const { return UseSoftFloat; }
 
+  /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+  /// no-sse2). There isn't any reason to disable it if the target processor
+  /// supports it.
+  bool hasMFence() const { return hasSSE2() || is64Bit(); }
+
   const Triple &getTargetTriple() const { return TargetTriple; }
 
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
+  bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); }
   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
-  bool isTargetPS4() const { return TargetTriple.isPS4(); }
+  bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+  bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
+  bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
+  bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
 
   bool isTargetWindowsMSVC() const {
     return TargetTriple.isWindowsMSVCEnvironment();
@@ -455,15 +650,10 @@ public:
 
   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
 
-  bool isTargetWin64() const {
-    return In64BitMode && TargetTriple.isOSWindows();
-  }
+  bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
 
-  bool isTargetWin32() const {
-    return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC());
-  }
+  bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
 
-  bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
 
@@ -471,19 +661,14 @@ public:
     return PICStyle == PICStyles::StubPIC;
   }
 
-  bool isPICStyleStubNoDynamic() const {
-    return PICStyle == PICStyles::StubDynamicNoPIC;
-  }
-  bool isPICStyleStubAny() const {
-    return PICStyle == PICStyles::StubDynamicNoPIC ||
-           PICStyle == PICStyles::StubPIC;
-  }
+  bool isPositionIndependent() const { return TM.isPositionIndependent(); }
 
   bool isCallingConvWin64(CallingConv::ID CC) const {
     switch (CC) {
     // On Win64, all these conventions just use the default convention.
     case CallingConv::C:
     case CallingConv::Fast:
+    case CallingConv::Swift:
     case CallingConv::X86_FastCall:
     case CallingConv::X86_StdCall:
     case CallingConv::X86_ThisCall:
@@ -491,7 +676,7 @@ public:
     case CallingConv::Intel_OCL_BI:
       return isTargetWin64();
     // This convention allows using the Win64 convention on other targets.
-    case CallingConv::X86_64_Win64:
+    case CallingConv::Win64:
       return true;
     // This convention allows using the SysV convention on Windows targets.
     case CallingConv::X86_64_SysV:
@@ -502,33 +687,36 @@ public:
     }
   }
 
-  /// ClassifyGlobalReference - Classify a global variable reference for the
-  /// current subtarget according to how we should reference it in a non-pcrel
-  /// context.
-  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
-                                        const TargetMachine &TM)const;
+  /// Classify a global variable reference for the current subtarget according
+  /// to how we should reference it in a non-pcrel context.
+  unsigned char classifyLocalReference(const GlobalValue *GV) const;
+
+  unsigned char classifyGlobalReference(const GlobalValue *GV,
+                                        const Module &M) const;
+  unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+
+  /// Classify a global function reference for the current subtarget.
+  unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
+                                                const Module &M) const;
+  unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
 
   /// Classify a blockaddress reference for the current subtarget according to
   /// how we should reference it in a non-pcrel context.
-  unsigned char ClassifyBlockAddressReference() const;
+  unsigned char classifyBlockAddressReference() const;
 
   /// Return true if the subtarget allows calls to immediate address.
-  bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
-
-  /// This function returns the name of a function which has an interface
-  /// like the non-standard bzero function, if such a function exists on
-  /// the current subtarget and it is considered prefereable over
-  /// memset with zero passed as the second argument. Otherwise it
-  /// returns null.
-  const char *getBZeroEntry() const;
+  bool isLegalToCallImmediateAddr() const;
 
-  /// This function returns true if the target has sincos() routine in its
-  /// compiler runtime or math libraries.
-  bool hasSinCos() const;
+  /// If we are using retpolines, we need to expand indirectbr to avoid it
+  /// lowering to an actual indirect jump.
+  bool enableIndirectBrExpand() const override { return useRetpoline(); }
 
   /// Enable the MachineScheduler pass for all X86 subtargets.
   bool enableMachineScheduler() const override { return true; }
 
+  // TODO: Update the regression tests and return true.
+  bool supportPrintSchedInfo() const override { return false; }
+
   bool enableEarlyIfConversion() const override;
 
   /// Return the instruction itineraries based on the subtarget selection.
@@ -539,8 +727,10 @@ public:
   AntiDepBreakMode getAntiDepBreakMode() const override {
     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
   }
+
+  bool enableAdvancedRASplitCost() const override { return true; }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H