2 files changed, 188 insertions, 444 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index dca98d999e5..8ef08c960f0 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -11,28 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86.h"
-
-#include "X86CallLowering.h"
-#include "X86LegalizerInfo.h"
-#include "X86RegisterBankInfo.h"
 #include "X86Subtarget.h"
-#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86InstrInfo.h"
 #include "X86TargetMachine.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 #if defined(_MSC_VER)
 #include <intrin.h>
@@ -55,127 +46,126 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
 
 /// Classify a blockaddress reference for the current subtarget according to how
 /// we should reference it in a non-pcrel context.
-unsigned char X86Subtarget::classifyBlockAddressReference() const {
-  return classifyLocalReference(nullptr);
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
+  if (isPICStyleGOT())    // 32-bit ELF targets.
+    return X86II::MO_GOTOFF;
+
+  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
+    return X86II::MO_PIC_BASE_OFFSET;
+
+  // Direct static reference to label.
+  return X86II::MO_NO_FLAG;
 }
 
 /// Classify a global variable reference for the current subtarget according to
 /// how we should reference it in a non-pcrel context.
-unsigned char
-X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
-  return classifyGlobalReference(GV, *GV->getParent());
-}
-
-unsigned char
-X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
-  // 64 bits can use %rip addressing for anything local.
-  if (is64Bit())
-    return X86II::MO_NO_FLAG;
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+  // DLLImport only exists on windows, it is implemented as a load from a
+  // DLLIMPORT stub.
+  if (GV->hasDLLImportStorageClass())
+    return X86II::MO_DLLIMPORT;
 
-  // If this is for a position dependent executable, the static linker can
-  // figure it out.
-  if (!isPositionIndependent())
-    return X86II::MO_NO_FLAG;
+  bool isDef = GV->isStrongDefinitionForLinker();
+
+  // X86-64 in PIC mode.
+  if (isPICStyleRIPRel()) {
+    // Large model never uses stubs.
+    if (TM.getCodeModel() == CodeModel::Large)
+      return X86II::MO_NO_FLAG;
+
+    if (isTargetDarwin()) {
+      // If symbol visibility is hidden, the extra load is not needed if
+      // target is x86-64 or the symbol is definitely defined in the current
+      // translation unit.
+      if (GV->hasDefaultVisibility() && !isDef)
+        return X86II::MO_GOTPCREL;
+    } else if (!isTargetWin64()) {
+      assert(isTargetELF() && "Unknown rip-relative target");
+
+      // Extra load is needed for all externally visible.
+      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+        return X86II::MO_GOTPCREL;
+    }
 
-  // The COFF dynamic linker just patches the executable sections.
-  if (isTargetCOFF())
     return X86II::MO_NO_FLAG;
+  }
 
-  if (isTargetDarwin()) {
-    // 32 bit macho has no relocation for a-b if a is undefined, even if
-    // b is in the section that is being relocated.
-    // This means we have to use o load even for GVs that are known to be
-    // local to the dso.
-    if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
-      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-
-    return X86II::MO_PIC_BASE_OFFSET;
+  if (isPICStyleGOT()) {   // 32-bit ELF targets.
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return X86II::MO_GOTOFF;
+    return X86II::MO_GOT;
   }
 
-  return X86II::MO_GOTOFF;
-}
+  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
+    // Determine whether we have a stub reference and/or whether the reference
+    // is relative to the PIC base or not.
 
-unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
-                                                    const Module &M) const {
-  // Large model never uses stubs.
-  if (TM.getCodeModel() == CodeModel::Large)
-    return X86II::MO_NO_FLAG;
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (isDef)
+      return X86II::MO_PIC_BASE_OFFSET;
 
-  // Absolute symbols can be referenced directly.
-  if (GV) {
-    if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
-      // See if we can use the 8-bit immediate form. Note that some instructions
-      // will sign extend the immediate operand, so to be conservative we only
-      // accept the range [0,128).
-      if (CR->getUnsignedMax().ult(128))
-        return X86II::MO_ABS8;
-      else
-        return X86II::MO_NO_FLAG;
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+
+    // If symbol visibility is hidden, we have a stub for common symbol
+    // references and external declarations.
+    if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
+      // Hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
     }
-  }
 
-  if (TM.shouldAssumeDSOLocal(M, GV))
-    return classifyLocalReference(GV);
+    // Otherwise, no stub.
+    return X86II::MO_PIC_BASE_OFFSET;
+  }
 
-  if (isTargetCOFF())
-    return X86II::MO_DLLIMPORT;
+  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
+    // Determine whether we have a stub reference.
 
-  if (is64Bit())
-    return X86II::MO_GOTPCREL;
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (isDef)
+      return X86II::MO_NO_FLAG;
 
-  if (isTargetDarwin()) {
-    if (!isPositionIndependent())
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
       return X86II::MO_DARWIN_NONLAZY;
-    return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-  }
 
-  return X86II::MO_GOT;
-}
+    // Otherwise, no stub.
+    return X86II::MO_NO_FLAG;
+  }
 
-unsigned char
-X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
-  return classifyGlobalFunctionReference(GV, *GV->getParent());
+  // Direct static reference to global.
+  return X86II::MO_NO_FLAG;
 }
 
-unsigned char
-X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
-                                              const Module &M) const {
-  if (TM.shouldAssumeDSOLocal(M, GV))
-    return X86II::MO_NO_FLAG;
 
-  if (isTargetCOFF()) {
-    assert(GV->hasDLLImportStorageClass() &&
-           "shouldAssumeDSOLocal gave inconsistent answer");
-    return X86II::MO_DLLIMPORT;
-  }
-
-  const Function *F = dyn_cast_or_null<Function>(GV);
-
-  if (isTargetELF()) {
-    if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
-      // According to psABI, PLT stub clobbers XMM8-XMM15.
-      // In Regcall calling convention those registers are used for passing
-      // parameters. Thus we need to prevent lazy binding in Regcall.
-      return X86II::MO_GOTPCREL;
-    if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
-      return X86II::MO_GOTPCREL;
-    return X86II::MO_PLT;
-  }
+/// This function returns the name of a function which has an interface like
+/// the non-standard bzero function, if such a function exists on the
+/// current subtarget and it is considered preferable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+  // Darwin 10 has a __bzero entry point for this purpose.
+  if (getTargetTriple().isMacOSX() &&
+      !getTargetTriple().isMacOSXVersionLT(10, 6))
+    return "__bzero";
 
-  if (is64Bit()) {
-    if (F && F->hasFnAttribute(Attribute::NonLazyBind))
-      // If the function is marked as non-lazy, generate an indirect call
-      // which loads from the GOT directly. This avoids runtime overhead
-      // at the cost of eager binding (and one extra byte of encoding).
-      return X86II::MO_GOTPCREL;
-    return X86II::MO_NO_FLAG;
-  }
+  return nullptr;
+}
 
-  return X86II::MO_NO_FLAG;
+bool X86Subtarget::hasSinCos() const {
+  return getTargetTriple().isMacOSX() &&
+    !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+    is64Bit();
 }
 
 /// Return true if the subtarget allows calls to immediate address.
-bool X86Subtarget::isLegalToCallImmediateAddr() const {
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
   // but WinCOFFObjectWriter::RecordRelocation cannot emit them.  Once it does,
   // the following check for Win32 should be removed.
@@ -207,6 +197,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       FullFS = "+sahf";
   }
 
+
   // Parse features string and set the CPU.
   ParseSubtargetFeatures(CPUName, FullFS);
 
@@ -236,49 +227,33 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
+  // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
   if (StackAlignOverride)
     stackAlignment = StackAlignOverride;
   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
-           isTargetKFreeBSD() || In64BitMode)
+           In64BitMode)
     stackAlignment = 16;
-
-  // Some CPUs have more overhead for gather. The specified overhead is relative
-  // to the Load operation. "2" is the number provided by Intel architects. This
-  // parameter is used for cost estimation of Gather Op and comparison with
-  // other alternatives.
-  // TODO: Remove the explicit hasAVX512()?, That would mean we would only
-  // enable gather with a -march.
-  if (hasAVX512() || (hasAVX2() && hasFastGather()))
-    GatherOverhead = 2;
-  if (hasAVX512())
-    ScatterOverhead = 2;
 }
 
 void X86Subtarget::initializeEnvironment() {
   X86SSELevel = NoSSE;
   X863DNowLevel = NoThreeDNow;
-  HasX87 = false;
   HasCMov = false;
   HasX86_64 = false;
   HasPOPCNT = false;
   HasSSE4A = false;
   HasAES = false;
-  HasVAES = false;
   HasFXSR = false;
   HasXSAVE = false;
   HasXSAVEOPT = false;
   HasXSAVEC = false;
   HasXSAVES = false;
   HasPCLMUL = false;
-  HasVPCLMULQDQ = false;
-  HasGFNI = false;
   HasFMA = false;
   HasFMA4 = false;
   HasXOP = false;
   HasTBM = false;
-  HasLWP = false;
   HasMOVBE = false;
   HasRDRAND = false;
   HasF16C = false;
@@ -286,67 +261,39 @@ void X86Subtarget::initializeEnvironment() {
   HasLZCNT = false;
   HasBMI = false;
   HasBMI2 = false;
-  HasVBMI = false;
-  HasVBMI2 = false;
-  HasIFMA = false;
   HasRTM = false;
+  HasHLE = false;
   HasERI = false;
   HasCDI = false;
   HasPFI = false;
   HasDQI = false;
-  HasVPOPCNTDQ = false;
   HasBWI = false;
   HasVLX = false;
   HasADX = false;
   HasPKU = false;
-  HasVNNI = false;
-  HasBITALG = false;
   HasSHA = false;
-  HasPREFETCHWT1 = false;
   HasPRFCHW = false;
   HasRDSEED = false;
   HasLAHFSAHF = false;
-  HasMWAITX = false;
-  HasCLZERO = false;
   HasMPX = false;
-  HasSHSTK = false;
-  HasIBT = false;
-  HasSGX = false;
-  HasCLFLUSHOPT = false;
-  HasCLWB = false;
-  UseRetpoline = false;
-  UseRetpolineExternalThunk = false;
-  IsPMULLDSlow = false;
+  IsBTMemSlow = false;
   IsSHLDSlow = false;
   IsUAMem16Slow = false;
   IsUAMem32Slow = false;
   HasSSEUnalignedMem = false;
   HasCmpxchg16b = false;
   UseLeaForSP = false;
-  HasFastVariableShuffle = false;
-  HasFastPartialYMMorZMMWrite = false;
-  HasFastGather = false;
-  HasFastScalarFSQRT = false;
-  HasFastVectorFSQRT = false;
-  HasFastLZCNT = false;
-  HasFastSHLDRotate = false;
-  HasMacroFusion = false;
-  HasERMSB = false;
   HasSlowDivide32 = false;
   HasSlowDivide64 = false;
   PadShortFunctions = false;
-  SlowTwoMemOps = false;
+  CallRegIndirect = false;
   LEAUsesAG = false;
   SlowLEA = false;
-  Slow3OpsLEA = false;
   SlowIncDec = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
   UseSoftFloat = false;
-  X86ProcFamily = Others;
-  GatherOverhead = 1024;
-  ScatterOverhead = 1024;
 }
 
 X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -356,55 +303,41 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
-                           const X86TargetMachine &TM,
+X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
+                           const std::string &FS, const X86TargetMachine &TM,
                            unsigned StackAlignOverride)
     : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
-      PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
+      PICStyle(PICStyles::None), TargetTriple(TT),
       StackAlignOverride(StackAlignOverride),
       In64BitMode(TargetTriple.getArch() == Triple::x86_64),
       In32BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() != Triple::CODE16),
       In16BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() == Triple::CODE16),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      FrameLowering(*this, getStackAlignment()) {
+      TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+      TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
   // Determine the PICStyle based on the target selected.
-  if (!isPositionIndependent())
+  if (TM.getRelocationModel() == Reloc::Static) {
+    // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
     setPICStyle(PICStyles::None);
-  else if (is64Bit())
+  } else if (is64Bit()) {
+    // PIC in 64 bit mode is always rip-rel.
     setPICStyle(PICStyles::RIPRel);
-  else if (isTargetCOFF())
+  } else if (isTargetCOFF()) {
     setPICStyle(PICStyles::None);
-  else if (isTargetDarwin())
-    setPICStyle(PICStyles::StubPIC);
-  else if (isTargetELF())
+  } else if (isTargetDarwin()) {
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      setPICStyle(PICStyles::StubPIC);
+    else {
+      assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
+      setPICStyle(PICStyles::StubDynamicNoPIC);
+    }
+  } else if (isTargetELF()) {
     setPICStyle(PICStyles::GOT);
-
-  CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
-  Legalizer.reset(new X86LegalizerInfo(*this, TM));
-
-  auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
-  RegBankInfo.reset(RBI);
-  InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
-}
-
-const CallLowering *X86Subtarget::getCallLowering() const {
-  return CallLoweringInfo.get();
-}
-
-const InstructionSelector *X86Subtarget::getInstructionSelector() const {
-  return InstSelector.get();
-}
-
-const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
-  return Legalizer.get();
-}
-
-const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
-  return RegBankInfo.get();
+  }
 }
 
 bool X86Subtarget::enableEarlyIfConversion() const {
   return hasCMov() && X86EarlyIfConv;
 }
+
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 37ffac1faf6..13d1026dcaa 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -18,53 +18,32 @@
 #include "X86ISelLowering.h"
 #include "X86InstrInfo.h"
 #include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include <memory>
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "X86GenSubtargetInfo.inc"
 
 namespace llvm {
-
 class GlobalValue;
+class StringRef;
+class TargetMachine;
 
 /// The X86 backend supports a number of different styles of PIC.
 ///
 namespace PICStyles {
-
 enum Style {
-  StubPIC,          // Used on i386-darwin in pic mode.
-  GOT,              // Used on 32 bit elf on when in pic mode.
-  RIPRel,           // Used on X86-64 when in pic mode.
-  None              // Set when not in pic mode.
+  StubPIC,          // Used on i386-darwin in -fPIC mode.
+  StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+  GOT,              // Used on many 32-bit unices in -fPIC mode.
+  RIPRel,           // Used on X86-64 when not in -static mode.
+  None              // Set when in -static mode (not PIC or DynamicNoPIC mode).
 };
-
-} // end namespace PICStyles
+}
 
 class X86Subtarget final : public X86GenSubtargetInfo {
-public:  
-  enum X86ProcFamilyEnum {
-    Others,
-    IntelAtom,
-    IntelSLM,
-    IntelGLM,
-    IntelHaswell,
-    IntelBroadwell,
-    IntelSkylake,
-    IntelKNL,
-    IntelSKX,
-    IntelCannonlake,
-    IntelIcelake,
-  };
 
 protected:
   enum X86SSEEnum {
@@ -75,23 +54,22 @@ protected:
     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
   };
 
+  enum X86ProcFamilyEnum {
+    Others, IntelAtom, IntelSLM
+  };
+
   /// X86 processor family: Intel Atom, and others
   X86ProcFamilyEnum X86ProcFamily;
 
   /// Which PIC style to use
   PICStyles::Style PICStyle;
 
-  const TargetMachine &TM;
-
   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel;
 
   /// MMX, 3DNow, 3DNow Athlon, or none supported.
   X863DNowEnum X863DNowLevel;
 
-  /// True if the processor supports X87 instructions.
-  bool HasX87;
-
   /// True if this processor has conditional move instructions
   /// (generally pentium pro+).
   bool HasCMov;
@@ -107,29 +85,21 @@ protected:
 
   /// Target has AES instructions
   bool HasAES;
-  bool HasVAES;
 
   /// Target has FXSAVE/FXRESTOR instructions
   bool HasFXSR;
 
   /// Target has XSAVE instructions
   bool HasXSAVE;
-
   /// Target has XSAVEOPT instructions
   bool HasXSAVEOPT;
-
   /// Target has XSAVEC instructions
   bool HasXSAVEC;
-
   /// Target has XSAVES instructions
   bool HasXSAVES;
 
   /// Target has carry-less multiplication
   bool HasPCLMUL;
-  bool HasVPCLMULQDQ;
-
-  /// Target has Galois Field Arithmetic instructions
-  bool HasGFNI;
 
   /// Target has 3-operand fused multiply-add
   bool HasFMA;
@@ -143,9 +113,6 @@ protected:
   /// Target has TBM instructions.
   bool HasTBM;
 
-  /// Target has LWP instructions
-  bool HasLWP;
-
   /// True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
@@ -167,18 +134,12 @@ protected:
   /// Processor has BMI2 instructions.
   bool HasBMI2;
 
-  /// Processor has VBMI instructions.
-  bool HasVBMI;
-
-  /// Processor has VBMI2 instructions.
-  bool HasVBMI2;
-
-  /// Processor has Integer Fused Multiply Add
-  bool HasIFMA;
-
   /// Processor has RTM instructions.
   bool HasRTM;
 
+  /// Processor has HLE.
+  bool HasHLE;
+
   /// Processor has ADX instructions.
   bool HasADX;
 
@@ -194,22 +155,12 @@ protected:
   /// Processor has LAHF/SAHF instructions.
   bool HasLAHFSAHF;
 
-  /// Processor has MONITORX/MWAITX instructions.
-  bool HasMWAITX;
-
-  /// Processor has Cache Line Zero instruction
-  bool HasCLZERO;
-
-  /// Processor has Prefetch with intent to Write instruction
-  bool HasPREFETCHWT1;
+  /// True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
 
   /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
-  /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
-  //  PMULUDQ.
-  bool IsPMULLDSlow;
-
   /// True if unaligned memory accesses of 16-bytes are slow.
   bool IsUAMem16Slow;
 
@@ -228,53 +179,21 @@ protected:
   /// the stack pointer. This is an optimization for Intel Atom processors.
   bool UseLeaForSP;
 
-  /// True if its preferable to combine to a single shuffle using a variable
-  /// mask over multiple fixed shuffles.
-  bool HasFastVariableShuffle;
-
-  /// True if there is no performance penalty to writing only the lower parts
-  /// of a YMM or ZMM register without clearing the upper part.
-  bool HasFastPartialYMMorZMMWrite;
-
-  /// True if gather is reasonably fast. This is true for Skylake client and
-  /// all AVX-512 CPUs.
-  bool HasFastGather;
-
-  /// True if hardware SQRTSS instruction is at least as fast (latency) as
-  /// RSQRTSS followed by a Newton-Raphson iteration.
-  bool HasFastScalarFSQRT;
-
-  /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
-  /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
-  bool HasFastVectorFSQRT;
-
   /// True if 8-bit divisions are significantly faster than
   /// 32-bit divisions and should be used when possible.
   bool HasSlowDivide32;
 
-  /// True if 32-bit divides are significantly faster than
+  /// True if 16-bit divides are significantly faster than
   /// 64-bit divisions and should be used when possible.
   bool HasSlowDivide64;
 
-  /// True if LZCNT instruction is fast.
-  bool HasFastLZCNT;
-
-  /// True if SHLD based rotate is fast.
-  bool HasFastSHLDRotate;
-
-  /// True if the processor supports macrofusion.
-  bool HasMacroFusion;
-
-  /// True if the processor has enhanced REP MOVSB/STOSB.
-  bool HasERMSB;
-
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
 
-  /// True if two memory operand instructions should use a temporary register
-  /// instead.
-  bool SlowTwoMemOps;
+  /// True if the Calls with memory reference should be converted
+  /// to a register-based indirect call.
+  bool CallRegIndirect;
 
   /// True if the LEA instruction inputs have to be ready at address generation
   /// (AG) time.
@@ -283,11 +202,6 @@ protected:
   /// True if the LEA instruction with certain arguments is slow
   bool SlowLEA;
 
-  /// True if the LEA instruction has all three source operands: base, index,
-  /// and offset or if the LEA instruction uses base and index registers where
-  /// the base is EBP, RBP,or R13
-  bool Slow3OpsLEA;
-
   /// True if INC and DEC instructions are slow when writing to flags
   bool SlowIncDec;
 
@@ -300,9 +214,6 @@ protected:
   /// Processor has AVX-512 Conflict Detection Instructions
   bool HasCDI;
 
-  /// Processor has AVX-512 population count Instructions
-  bool HasVPOPCNTDQ;
-
   /// Processor has AVX-512 Doubleword and Quadword instructions
   bool HasDQI;
 
@@ -315,40 +226,9 @@ protected:
   /// Processor has PKU extenstions
   bool HasPKU;
 
-  /// Processor has AVX-512 Vector Neural Network Instructions
-  bool HasVNNI;
-
-  /// Processor has AVX-512 Bit Algorithms instructions
-  bool HasBITALG;
-
-  /// Processor supports MPX - Memory Protection Extensions
+  /// Processot supports MPX - Memory Protection Extensions
   bool HasMPX;
 
-  /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
-  /// using Shadow Stack
-  bool HasSHSTK;
-
-  /// Processor supports CET IBT - Control-Flow Enforcement Technology
-  /// using Indirect Branch Tracking
-  bool HasIBT;
-
-  /// Processor has Software Guard Extensions
-  bool HasSGX;
-
-  /// Processor supports Flush Cache Line instruction
-  bool HasCLFLUSHOPT;
-
-  /// Processor supports Cache Line Write Back instruction
-  bool HasCLWB;
-
-  /// Use a retpoline thunk rather than indirect calls to block speculative
-  /// execution.
-  bool UseRetpoline;
-
-  /// When using a retpoline thunk, call an externally provided thunk rather
-  /// than emitting one inside the compiler.
-  bool UseRetpolineExternalThunk;
-
   /// Use software floating point for code generation.
   bool UseSoftFloat;
 
@@ -366,13 +246,8 @@ protected:
   /// Instruction itineraries for scheduling
   InstrItineraryData InstrItins;
 
-  /// GlobalISel related APIs.
-  std::unique_ptr<CallLowering> CallLoweringInfo;
-  std::unique_ptr<LegalizerInfo> Legalizer;
-  std::unique_ptr<RegisterBankInfo> RegBankInfo;
-  std::unique_ptr<InstructionSelector> InstSelector;
-
 private:
+
   /// Override the stack alignment.
   unsigned StackAlignOverride;
 
@@ -385,10 +260,6 @@ private:
   /// True if compiling for 16-bit, false for 32-bit or 64-bit.
   bool In16BitMode;
 
-  /// Contains the Overhead of gather\scatter instructions
-  int GatherOverhead;
-  int ScatterOverhead;
-
   X86SelectionDAGInfo TSInfo;
   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
   // X86TargetLowering needs.
@@ -400,23 +271,19 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+  X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
                const X86TargetMachine &TM, unsigned StackAlignOverride);
 
   const X86TargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
-
   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-
   const X86FrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
-
   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
-
   const X86RegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
@@ -434,19 +301,12 @@ public:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  /// Methods used by Global ISel
-  const CallLowering *getCallLowering() const override;
-  const InstructionSelector *getInstructionSelector() const override;
-  const LegalizerInfo *getLegalizerInfo() const override;
-  const RegisterBankInfo *getRegBankInfo() const override;
-
 private:
   /// Initialize the full set of dependencies so we can use an initializer
   /// list for X86Subtarget.
   X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
   void initializeEnvironment();
   void initSubtargetFeatures(StringRef CPU, StringRef FS);
-
 public:
   /// Is this x86_64? (disregarding specific ABI / programming model)
   bool is64Bit() const {
@@ -476,7 +336,6 @@ public:
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
-  bool hasX87() const { return HasX87; }
   bool hasCMov() const { return HasCMov; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
@@ -495,23 +354,19 @@ public:
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasPOPCNT() const { return HasPOPCNT; }
   bool hasAES() const { return HasAES; }
-  bool hasVAES() const { return HasVAES; }
   bool hasFXSR() const { return HasFXSR; }
   bool hasXSAVE() const { return HasXSAVE; }
   bool hasXSAVEOPT() const { return HasXSAVEOPT; }
   bool hasXSAVEC() const { return HasXSAVEC; }
   bool hasXSAVES() const { return HasXSAVES; }
   bool hasPCLMUL() const { return HasPCLMUL; }
-  bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
-  bool hasGFNI() const { return HasGFNI; }
   // Prefer FMA4 to FMA - its better for commutation/memory folding and
   // has equal or better performance on all supported targets.
-  bool hasFMA() const { return HasFMA; }
+  bool hasFMA() const { return HasFMA && !HasFMA4; }
   bool hasFMA4() const { return HasFMA4; }
-  bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
+  bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
   bool hasXOP() const { return HasXOP; }
   bool hasTBM() const { return HasTBM; }
-  bool hasLWP() const { return HasLWP; }
   bool hasMOVBE() const { return HasMOVBE; }
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
@@ -519,107 +374,58 @@ public:
   bool hasLZCNT() const { return HasLZCNT; }
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
-  bool hasVBMI() const { return HasVBMI; }
-  bool hasVBMI2() const { return HasVBMI2; }
-  bool hasIFMA() const { return HasIFMA; }
   bool hasRTM() const { return HasRTM; }
+  bool hasHLE() const { return HasHLE; }
   bool hasADX() const { return HasADX; }
   bool hasSHA() const { return HasSHA; }
-  bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
-  bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
-  bool hasSSEPrefetch() const {
-    // We implicitly enable these when we have a write prefix supporting cache
-    // level OR if we have prfchw, but don't already have a read prefetch from
-    // 3dnow.
-    return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
-  }
+  bool hasPRFCHW() const { return HasPRFCHW; }
   bool hasRDSEED() const { return HasRDSEED; }
   bool hasLAHFSAHF() const { return HasLAHFSAHF; }
-  bool hasMWAITX() const { return HasMWAITX; }
-  bool hasCLZERO() const { return HasCLZERO; }
+  bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
-  bool isPMULLDSlow() const { return IsPMULLDSlow; }
   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
-  int getGatherOverhead() const { return GatherOverhead; }
-  int getScatterOverhead() const { return ScatterOverhead; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
   bool useLeaForSP() const { return UseLeaForSP; }
-  bool hasFastVariableShuffle() const {
-    return HasFastVariableShuffle;
-  }
-  bool hasFastPartialYMMorZMMWrite() const {
-    return HasFastPartialYMMorZMMWrite;
-  }
-  bool hasFastGather() const { return HasFastGather; }
-  bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
-  bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
-  bool hasFastLZCNT() const { return HasFastLZCNT; }
-  bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
-  bool hasMacroFusion() const { return HasMacroFusion; }
-  bool hasERMSB() const { return HasERMSB; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
-  bool slowTwoMemOps() const { return SlowTwoMemOps; }
+  bool callRegIndirect() const { return CallRegIndirect; }
   bool LEAusesAG() const { return LEAUsesAG; }
   bool slowLEA() const { return SlowLEA; }
-  bool slow3OpsLEA() const { return Slow3OpsLEA; }
   bool slowIncDec() const { return SlowIncDec; }
   bool hasCDI() const { return HasCDI; }
-  bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
   bool hasPFI() const { return HasPFI; }
   bool hasERI() const { return HasERI; }
   bool hasDQI() const { return HasDQI; }
   bool hasBWI() const { return HasBWI; }
   bool hasVLX() const { return HasVLX; }
   bool hasPKU() const { return HasPKU; }
-  bool hasVNNI() const { return HasVNNI; }
-  bool hasBITALG() const { return HasBITALG; }
   bool hasMPX() const { return HasMPX; }
-  bool hasSHSTK() const { return HasSHSTK; }
-  bool hasIBT() const { return HasIBT; }
-  bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
-  bool hasCLWB() const { return HasCLWB; }
-  bool useRetpoline() const { return UseRetpoline; }
-  bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
 
-  bool isXRaySupported() const override { return is64Bit(); }
-
-  X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
-
-  /// TODO: to be removed later and replaced with suitable properties
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
   bool useSoftFloat() const { return UseSoftFloat; }
 
-  /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
-  /// no-sse2). There isn't any reason to disable it if the target processor
-  /// supports it.
-  bool hasMFence() const { return hasSSE2() || is64Bit(); }
-
   const Triple &getTargetTriple() const { return TargetTriple; }
 
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
-  bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
+  bool isTargetPS4() const { return TargetTriple.isPS4(); }
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
-  bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
-  bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
-  bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
 
   bool isTargetWindowsMSVC() const {
     return TargetTriple.isWindowsMSVCEnvironment();
@@ -649,10 +455,15 @@ public:
 
   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
 
-  bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
+  bool isTargetWin64() const {
+    return In64BitMode && TargetTriple.isOSWindows();
+  }
 
-  bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
+  bool isTargetWin32() const {
+    return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC());
+  }
 
+  bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
 
@@ -660,14 +471,19 @@ public:
     return PICStyle == PICStyles::StubPIC;
   }
 
-  bool isPositionIndependent() const { return TM.isPositionIndependent(); }
+  bool isPICStyleStubNoDynamic() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC;
+  }
+  bool isPICStyleStubAny() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC ||
+           PICStyle == PICStyles::StubPIC;
+  }
 
   bool isCallingConvWin64(CallingConv::ID CC) const {
     switch (CC) {
     // On Win64, all these conventions just use the default convention.
     case CallingConv::C:
     case CallingConv::Fast:
-    case CallingConv::Swift:
     case CallingConv::X86_FastCall:
     case CallingConv::X86_StdCall:
     case CallingConv::X86_ThisCall:
@@ -675,7 +491,7 @@ public:
     case CallingConv::Intel_OCL_BI:
       return isTargetWin64();
     // This convention allows using the Win64 convention on other targets.
-    case CallingConv::Win64:
+    case CallingConv::X86_64_Win64:
       return true;
     // This convention allows using the SysV convention on Windows targets.
     case CallingConv::X86_64_SysV:
@@ -686,36 +502,33 @@ public:
     }
   }
 
-  /// Classify a global variable reference for the current subtarget according
-  /// to how we should reference it in a non-pcrel context.
-  unsigned char classifyLocalReference(const GlobalValue *GV) const;
-
-  unsigned char classifyGlobalReference(const GlobalValue *GV,
-                                        const Module &M) const;
-  unsigned char classifyGlobalReference(const GlobalValue *GV) const;
-
-  /// Classify a global function reference for the current subtarget.
-  unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
-                                                const Module &M) const;
-  unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
+  /// ClassifyGlobalReference - Classify a global variable reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM)const;
 
   /// Classify a blockaddress reference for the current subtarget according to
   /// how we should reference it in a non-pcrel context.
-  unsigned char classifyBlockAddressReference() const;
+  unsigned char ClassifyBlockAddressReference() const;
 
   /// Return true if the subtarget allows calls to immediate address.
-  bool isLegalToCallImmediateAddr() const;
+  bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+  /// This function returns the name of a function which has an interface
+  /// like the non-standard bzero function, if such a function exists on
+  /// the current subtarget and it is considered prefereable over
+  /// memset with zero passed as the second argument. Otherwise it
+  /// returns null.
+  const char *getBZeroEntry() const;
 
-  /// If we are using retpolines, we need to expand indirectbr to avoid it
-  /// lowering to an actual indirect jump.
-  bool enableIndirectBrExpand() const override { return useRetpoline(); }
+  /// This function returns true if the target has sincos() routine in its
+  /// compiler runtime or math libraries.
+  bool hasSinCos() const;
 
   /// Enable the MachineScheduler pass for all X86 subtargets.
   bool enableMachineScheduler() const override { return true; }
 
-  // TODO: Update the regression tests and return true.
-  bool supportPrintSchedInfo() const override { return false; }
-
   bool enableEarlyIfConversion() const override;
 
   /// Return the instruction itineraries based on the subtarget selection.
@@ -726,10 +539,8 @@ public:
   AntiDepBreakMode getAntiDepBreakMode() const override {
     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
   }
-
-  bool enableAdvancedRASplitCost() const override { return true; }
 };
 
-} // end namespace llvm
+} // End llvm namespace
 
-#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
+#endif