summaryrefslogtreecommitdiff
path: root/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r--gnu/llvm/lib/Target/X86/X86MCInstLower.cpp749
1 files changed, 517 insertions, 232 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
index e1ca558f0f2..b1db061482b 100644
--- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -16,10 +16,12 @@
#include "X86RegisterInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86InstComments.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "Utils/X86ShuffleDecode.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -35,9 +37,16 @@
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
using namespace llvm;
namespace {
@@ -61,9 +70,6 @@ public:
private:
MachineModuleInfoMachO &getMachOMMI() const;
- Mangler *getMang() const {
- return AsmPrinter.Mang;
- }
};
} // end anonymous namespace
@@ -72,47 +78,33 @@ private:
static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
const MCSubtargetInfo &STI);
-namespace llvm {
- X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM)
- : TM(TM), InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {}
-
- X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {}
-
- void
- X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) {
- MF = &F;
- CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
- *MF->getSubtarget().getInstrInfo(),
- *MF->getSubtarget().getRegisterInfo(), MF->getContext()));
- }
-
- void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
- const MCSubtargetInfo &STI) {
- if (InShadow) {
- SmallString<256> Code;
- SmallVector<MCFixup, 4> Fixups;
- raw_svector_ostream VecOS(Code);
- CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
- CurrentShadowSize += Code.size();
- if (CurrentShadowSize >= RequiredShadowSize)
- InShadow = false; // The shadow is big enough. Stop counting.
- }
+void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
+ const MCSubtargetInfo &STI,
+ MCCodeEmitter *CodeEmitter) {
+ if (InShadow) {
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
+ CurrentShadowSize += Code.size();
+ if (CurrentShadowSize >= RequiredShadowSize)
+ InShadow = false; // The shadow is big enough. Stop counting.
}
+}
- void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
+void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
- if (InShadow && CurrentShadowSize < RequiredShadowSize) {
- InShadow = false;
- EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
- MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
- }
+ if (InShadow && CurrentShadowSize < RequiredShadowSize) {
+ InShadow = false;
+ EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
+ MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
}
+}
- void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
- SMShadowTracker.count(Inst, getSubtargetInfo());
- }
-} // end llvm namespace
+void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
+ SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
+}
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
@@ -140,12 +132,8 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
// Handle dllimport linkage.
Name += "__imp_";
break;
- case X86II::MO_DARWIN_STUB:
- Suffix = "$stub";
- break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
- case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
Suffix = "$non_lazy_ptr";
break;
}
@@ -153,8 +141,6 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (!Suffix.empty())
Name += DL.getPrivateGlobalPrefix();
- unsigned PrefixLen = Name.size();
-
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
AsmPrinter.getNameWithPrefix(Name, GV);
@@ -164,14 +150,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(Suffix.empty());
Sym = MO.getMBB()->getSymbol();
}
- unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
if (!Sym)
Sym = Ctx.getOrCreateSymbol(Name);
- StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
-
// If the target flags on the operand changes the name of the symbol, do that
// before we return the symbol.
switch (MO.getTargetFlags()) {
@@ -189,36 +172,6 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
break;
}
- case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
- MachineModuleInfoImpl::StubValueTy &StubSym =
- getMachOMMI().getHiddenGVStubEntry(Sym);
- if (!StubSym.getPointer()) {
- assert(MO.isGlobal() && "Extern symbol not handled yet");
- StubSym =
- MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
- !MO.getGlobal()->hasInternalLinkage());
- }
- break;
- }
- case X86II::MO_DARWIN_STUB: {
- MachineModuleInfoImpl::StubValueTy &StubSym =
- getMachOMMI().getFnStubEntry(Sym);
- if (StubSym.getPointer())
- return Sym;
-
- if (MO.isGlobal()) {
- StubSym =
- MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
- !MO.getGlobal()->hasInternalLinkage());
- } else {
- StubSym =
- MachineModuleInfoImpl::
- StubValueTy(Ctx.getOrCreateSymbol(OrigName), false);
- }
- break;
- }
}
return Sym;
@@ -237,7 +190,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
- case X86II::MO_DARWIN_STUB:
break;
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
@@ -265,14 +217,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
- case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::create(Sym, Ctx);
// Subtract the pic base.
Expr = MCBinaryExpr::createSub(Expr,
MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI()) {
- assert(MAI.doesSetDirectiveSuppressesReloc());
+ assert(MAI.doesSetDirectiveSuppressReloc());
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -547,18 +498,13 @@ ReSimplify:
break;
}
- // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
- case X86::TAILJMPr:
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
+ { unsigned Opcode;
+ case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
- case X86::TAILJMPd64: {
- unsigned Opcode;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::TAILJMPr: Opcode = X86::JMP32r; break;
- case X86::TAILJMPd:
- case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
- }
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
+ SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
OutMI.setOpcode(Opcode);
@@ -653,50 +599,81 @@ ReSimplify:
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o32a); break;
+ case X86::MOV8mr:
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao32); break;
- case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o32a); break;
- case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao32); break;
- case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
- case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
-
- case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
- case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
- case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
- case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
- case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
- case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
- case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
- case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
- case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
- case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
- case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
- case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
- case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
- case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
- case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
- case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
- case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
- case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
- case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
- case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
- case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
- case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
- case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
- case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
- case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
- case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
- case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
- case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
- case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
- case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
- case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
- case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
- case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
- case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
- case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
- case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
+ case X86::MOV8rm:
+ case X86::MOV16mr:
+ case X86::MOV16rm:
+ case X86::MOV32mr:
+ case X86::MOV32rm: {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: NewOpc = X86::MOV8o32a; break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: NewOpc = X86::MOV8ao32; break;
+ case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
+ case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
+ case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
+ case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
+ }
+ SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
+ break;
+ }
+
+ case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
+ case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
+ case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
+ case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
+ case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32:
+ case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
+ case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
+ case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
+ case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::ADC8ri: NewOpc = X86::ADC8i8; break;
+ case X86::ADC16ri: NewOpc = X86::ADC16i16; break;
+ case X86::ADC32ri: NewOpc = X86::ADC32i32; break;
+ case X86::ADC64ri32: NewOpc = X86::ADC64i32; break;
+ case X86::ADD8ri: NewOpc = X86::ADD8i8; break;
+ case X86::ADD16ri: NewOpc = X86::ADD16i16; break;
+ case X86::ADD32ri: NewOpc = X86::ADD32i32; break;
+ case X86::ADD64ri32: NewOpc = X86::ADD64i32; break;
+ case X86::AND8ri: NewOpc = X86::AND8i8; break;
+ case X86::AND16ri: NewOpc = X86::AND16i16; break;
+ case X86::AND32ri: NewOpc = X86::AND32i32; break;
+ case X86::AND64ri32: NewOpc = X86::AND64i32; break;
+ case X86::CMP8ri: NewOpc = X86::CMP8i8; break;
+ case X86::CMP16ri: NewOpc = X86::CMP16i16; break;
+ case X86::CMP32ri: NewOpc = X86::CMP32i32; break;
+ case X86::CMP64ri32: NewOpc = X86::CMP64i32; break;
+ case X86::OR8ri: NewOpc = X86::OR8i8; break;
+ case X86::OR16ri: NewOpc = X86::OR16i16; break;
+ case X86::OR32ri: NewOpc = X86::OR32i32; break;
+ case X86::OR64ri32: NewOpc = X86::OR64i32; break;
+ case X86::SBB8ri: NewOpc = X86::SBB8i8; break;
+ case X86::SBB16ri: NewOpc = X86::SBB16i16; break;
+ case X86::SBB32ri: NewOpc = X86::SBB32i32; break;
+ case X86::SBB64ri32: NewOpc = X86::SBB64i32; break;
+ case X86::SUB8ri: NewOpc = X86::SUB8i8; break;
+ case X86::SUB16ri: NewOpc = X86::SUB16i16; break;
+ case X86::SUB32ri: NewOpc = X86::SUB32i32; break;
+ case X86::SUB64ri32: NewOpc = X86::SUB64i32; break;
+ case X86::TEST8ri: NewOpc = X86::TEST8i8; break;
+ case X86::TEST16ri: NewOpc = X86::TEST16i16; break;
+ case X86::TEST32ri: NewOpc = X86::TEST32i32; break;
+ case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
+ case X86::XOR8ri: NewOpc = X86::XOR8i8; break;
+ case X86::XOR16ri: NewOpc = X86::XOR16i16; break;
+ case X86::XOR32ri: NewOpc = X86::XOR32i32; break;
+ case X86::XOR64ri32: NewOpc = X86::XOR64i32; break;
+ }
+ SimplifyShortImmForm(OutMI, NewOpc);
+ break;
+ }
// Try to shrink some forms of movsx.
case X86::MOVSX16rr8:
@@ -785,55 +762,77 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
.addExpr(tlsRef));
}
-/// \brief Emit the optimal amount of multi-byte nops on X86.
-static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI) {
+/// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes
+/// bytes. Return the size of nop emitted.
+static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
+ const MCSubtargetInfo &STI) {
// This works only for 64bit. For 32bit we have to do additional checking if
// the CPU supports multi-byte nops.
assert(Is64Bit && "EmitNops only supports X86-64");
- while (NumBytes) {
- unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
- Opc = IndexReg = Displacement = SegmentReg = 0;
- BaseReg = X86::RAX; ScaleVal = 1;
- switch (NumBytes) {
- case 0: llvm_unreachable("Zero nops?"); break;
- case 1: NumBytes -= 1; Opc = X86::NOOP; break;
- case 2: NumBytes -= 2; Opc = X86::XCHG16ar; break;
- case 3: NumBytes -= 3; Opc = X86::NOOPL; break;
- case 4: NumBytes -= 4; Opc = X86::NOOPL; Displacement = 8; break;
- case 5: NumBytes -= 5; Opc = X86::NOOPL; Displacement = 8;
- IndexReg = X86::RAX; break;
- case 6: NumBytes -= 6; Opc = X86::NOOPW; Displacement = 8;
- IndexReg = X86::RAX; break;
- case 7: NumBytes -= 7; Opc = X86::NOOPL; Displacement = 512; break;
- case 8: NumBytes -= 8; Opc = X86::NOOPL; Displacement = 512;
- IndexReg = X86::RAX; break;
- case 9: NumBytes -= 9; Opc = X86::NOOPW; Displacement = 512;
- IndexReg = X86::RAX; break;
- default: NumBytes -= 10; Opc = X86::NOOPW; Displacement = 512;
- IndexReg = X86::RAX; SegmentReg = X86::CS; break;
- }
- unsigned NumPrefixes = std::min(NumBytes, 5U);
- NumBytes -= NumPrefixes;
- for (unsigned i = 0; i != NumPrefixes; ++i)
- OS.EmitBytes("\x66");
+ unsigned NopSize;
+ unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
+ Opc = IndexReg = Displacement = SegmentReg = 0;
+ BaseReg = X86::RAX;
+ ScaleVal = 1;
+ switch (NumBytes) {
+ case 0: llvm_unreachable("Zero nops?"); break;
+ case 1: NopSize = 1; Opc = X86::NOOP; break;
+ case 2: NopSize = 2; Opc = X86::XCHG16ar; break;
+ case 3: NopSize = 3; Opc = X86::NOOPL; break;
+ case 4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break;
+ case 5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8;
+ IndexReg = X86::RAX; break;
+ case 6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8;
+ IndexReg = X86::RAX; break;
+ case 7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break;
+ case 8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512;
+ IndexReg = X86::RAX; break;
+ case 9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512;
+ IndexReg = X86::RAX; break;
+ default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512;
+ IndexReg = X86::RAX; SegmentReg = X86::CS; break;
+ }
- switch (Opc) {
- default: llvm_unreachable("Unexpected opcode"); break;
- case X86::NOOP:
- OS.EmitInstruction(MCInstBuilder(Opc), STI);
- break;
- case X86::XCHG16ar:
- OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI);
- break;
- case X86::NOOPL:
- case X86::NOOPW:
- OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg)
- .addImm(ScaleVal).addReg(IndexReg)
- .addImm(Displacement).addReg(SegmentReg), STI);
- break;
- }
- } // while (NumBytes)
+ unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
+ NopSize += NumPrefixes;
+ for (unsigned i = 0; i != NumPrefixes; ++i)
+ OS.EmitBytes("\x66");
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ break;
+ case X86::NOOP:
+ OS.EmitInstruction(MCInstBuilder(Opc), STI);
+ break;
+ case X86::XCHG16ar:
+ OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI);
+ break;
+ case X86::NOOPL:
+ case X86::NOOPW:
+ OS.EmitInstruction(MCInstBuilder(Opc)
+ .addReg(BaseReg)
+ .addImm(ScaleVal)
+ .addReg(IndexReg)
+ .addImm(Displacement)
+ .addReg(SegmentReg),
+ STI);
+ break;
+ }
+ assert(NopSize <= NumBytes && "We overemitted?");
+ return NopSize;
+}
+
+/// \brief Emit the optimal amount of multi-byte nops on X86.
+static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
+ const MCSubtargetInfo &STI) {
+ unsigned NopsToEmit = NumBytes;
+ (void)NopsToEmit;
+ while (NumBytes) {
+ NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
+ assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
+ }
}
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
@@ -891,10 +890,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
X86MCInstLower &MCIL) {
- // FAULTING_LOAD_OP <def>, <handler label>, <load opcode>, <load operands>
+ // FAULTING_LOAD_OP <def>, <MBB handler>, <load opcode>, <load operands>
unsigned LoadDefRegister = MI.getOperand(0).getReg();
- MCSymbol *HandlerLabel = MI.getOperand(1).getMCSymbol();
+ MCSymbol *HandlerLabel = MI.getOperand(1).getMBB()->getSymbol();
unsigned LoadOpcode = MI.getOperand(2).getImm();
unsigned LoadOperandsBeginIdx = 3;
@@ -915,6 +914,43 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo());
}
+void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ // PATCHABLE_OP minsize, opcode, operands
+
+ unsigned MinSize = MI.getOperand(0).getImm();
+ unsigned Opcode = MI.getOperand(1).getImm();
+
+ MCInst MCI;
+ MCI.setOpcode(Opcode);
+ for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
+ MCI.addOperand(MaybeOperand.getValue());
+
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
+
+ if (Code.size() < MinSize) {
+ if (MinSize == 2 && Opcode == X86::PUSH64r) {
+ // This is an optimization that lets us get away without emitting a nop in
+ // many cases.
+ //
+ // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two
+ // bytes too, so the check on MinSize is important.
+ MCI.setOpcode(X86::PUSH64rmr);
+ } else {
+ unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
+ getSubtargetInfo());
+ assert(NopSize == MinSize && "Could not implement MinSize!");
+ (void) NopSize;
+ }
+ }
+
+ OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
+}
+
// Lower a stackmap of the form:
// <id>, <shadowBytes>, ...
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
@@ -937,8 +973,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
PatchPointOpers opers(&MI);
unsigned ScratchIdx = opers.getNextScratchIdx();
unsigned EncodedBytes = 0;
- const MachineOperand &CalleeMO =
- opers.getMetaOper(PatchPointOpers::TargetPos);
+ const MachineOperand &CalleeMO = opers.getCallTarget();
// Check for null target. If target is non-null (i.e. is non-zero or is
// symbolic) then emit a call.
@@ -974,7 +1009,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
}
// Emit padding.
- unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ unsigned NumBytes = opers.getNumPatchBytes();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
@@ -982,14 +1017,107 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
+void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ // We want to emit the following pattern:
+ //
+ // .p2align 1, ...
+ // .Lxray_sled_N:
+ // jmp .tmpN
+ // # 9 bytes worth of noops
+ // .tmpN
+ //
+ // We need the 9 bytes because at runtime, we'd be patching over the full 11
+ // bytes with the following pattern:
+ //
+ // mov %r10, <function id, 32-bit> // 6 bytes
+ // call <relative offset, 32-bits> // 5 bytes
+ //
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitCodeAlignment(2);
+ OutStreamer->EmitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
+ // an operand (computed as an offset from the jmp instruction).
+ // FIXME: Find another less hacky way do force the relative jump.
+ OutStreamer->EmitBytes("\xeb\x09");
+ EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
+ OutStreamer->EmitLabel(Target);
+ recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
+}
+
+void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ // Since PATCHABLE_RET takes the opcode of the return statement as an
+ // argument, we use that to emit the correct form of the RET that we want.
+ // i.e. when we see this:
+ //
+ // PATCHABLE_RET X86::RET ...
+ //
+ // We should emit the RET followed by sleds.
+ //
+ // .p2align 1, ...
+ // .Lxray_sled_N:
+ // ret # or equivalent instruction
+ // # 10 bytes worth of noops
+ //
+ // This just makes sure that the alignment for the next instruction is 2.
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitCodeAlignment(2);
+ OutStreamer->EmitLabel(CurSled);
+ unsigned OpCode = MI.getOperand(0).getImm();
+ MCInst Ret;
+ Ret.setOpcode(OpCode);
+ for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
+ Ret.addOperand(MaybeOperand.getValue());
+ OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
+ EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
+ recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
+}
+
+void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) {
+ // Like PATCHABLE_RET, we have the actual instruction in the operands to this
+ // instruction so we lower that particular instruction and its operands.
+ // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
+ // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
+ // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
+ // tail call much like how we have it in PATCHABLE_RET.
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitCodeAlignment(2);
+ OutStreamer->EmitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
+ // an operand (computed as an offset from the jmp instruction).
+ // FIXME: Find another less hacky way do force the relative jump.
+ OutStreamer->EmitBytes("\xeb\x09");
+ EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
+ OutStreamer->EmitLabel(Target);
+ recordSled(CurSled, MI, SledKind::TAIL_CALL);
+
+ unsigned OpCode = MI.getOperand(0).getImm();
+ MCInst TC;
+ TC.setOpcode(OpCode);
+
+ // Before emitting the instruction, add a comment to indicate that this is
+ // indeed a tail call.
+ OutStreamer->AddComment("TAILCALL");
+ for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
+ TC.addOperand(MaybeOperand.getValue());
+ OutStreamer->EmitInstruction(TC, getSubtargetInfo());
+}
+
// Returns instruction preceding MBBI in MachineFunction.
// If MBBI is the first instruction of the first basic block, returns null.
static MachineBasicBlock::const_iterator
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
const MachineBasicBlock *MBB = MBBI->getParent();
while (MBBI == MBB->begin()) {
- if (MBB == MBB->getParent()->begin())
- return nullptr;
+ if (MBB == &MBB->getParent()->front())
+ return MachineBasicBlock::const_iterator();
MBB = MBB->getPrevNode();
MBBI = MBB->end();
}
@@ -1017,8 +1145,9 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
return C;
}
-static std::string getShuffleComment(const MachineOperand &DstOp,
- const MachineOperand &SrcOp,
+static std::string getShuffleComment(const MachineInstr *MI,
+ unsigned SrcOp1Idx,
+ unsigned SrcOp2Idx,
ArrayRef<int> Mask) {
std::string Comment;
@@ -1031,40 +1160,73 @@ static std::string getShuffleComment(const MachineOperand &DstOp,
return X86ATTInstPrinter::getRegisterName(RegNum);
};
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
+ const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
+
StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
- StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
+ StringRef Src1Name =
+ SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
+ StringRef Src2Name =
+ SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
+
+ // One source operand, fix the mask to print all elements in one span.
+ SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
+ if (Src1Name == Src2Name)
+ for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
+ if (ShuffleMask[i] >= e)
+ ShuffleMask[i] -= e;
raw_string_ostream CS(Comment);
- CS << DstName << " = ";
- bool NeedComma = false;
- bool InSrc = false;
- for (int M : Mask) {
- // Wrap up any prior entry...
- if (M == SM_SentinelZero && InSrc) {
- InSrc = false;
- CS << "]";
+ CS << DstName;
+
+ // Handle AVX512 MASK/MASXZ write mask comments.
+ // MASK: zmmX {%kY}
+ // MASKZ: zmmX {%kY} {z}
+ if (SrcOp1Idx > 1) {
+ assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
+
+ const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
+ if (WriteMaskOp.isReg()) {
+ CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
+
+ if (SrcOp1Idx == 2) {
+ CS << " {z}";
+ }
}
- if (NeedComma)
- CS << ",";
- else
- NeedComma = true;
+ }
- // Print this shuffle...
- if (M == SM_SentinelZero) {
+ CS << " = ";
+
+ for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ CS << ",";
+ if (ShuffleMask[i] == SM_SentinelZero) {
CS << "zero";
- } else {
- if (!InSrc) {
- InSrc = true;
- CS << SrcName << "[";
- }
- if (M == SM_SentinelUndef)
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)e;
+ CS << (isSrc1 ? Src1Name : Src2Name) << '[';
+
+ bool IsFirst = true;
+ while (i != e && ShuffleMask[i] != SM_SentinelZero &&
+ (ShuffleMask[i] < (int)e) == isSrc1) {
+ if (!IsFirst)
+ CS << ',';
+ else
+ IsFirst = false;
+ if (ShuffleMask[i] == SM_SentinelUndef)
CS << "u";
else
- CS << M;
+ CS << ShuffleMask[i] % (int)e;
+ ++i;
}
+ CS << ']';
+ --i; // For loop increments element #.
}
- if (InSrc)
- CS << "]";
CS.flush();
return Comment;
@@ -1074,6 +1236,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
+ // are compressed from EVEX encoding to VEX encoding.
+ if (TM.Options.MCOptions.ShowMCEncoding) {
+ if (MI->getAsmPrinterFlags() & AC_EVEX_2_VEX)
+ OutStreamer->AddComment("EVEX TO VEX Compression ", false);
+ }
+
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -1112,7 +1281,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TAILJMPd64:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
- case X86::TAILJMPd64_REX:
// Lower these as normal, but add some comments.
OutStreamer->AddComment("TAILCALL");
break;
@@ -1202,12 +1370,24 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::FAULTING_LOAD_OP:
return LowerFAULTING_LOAD_OP(*MI, MCInstLowering);
+ case TargetOpcode::PATCHABLE_OP:
+ return LowerPATCHABLE_OP(*MI, MCInstLowering);
+
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*MI);
case TargetOpcode::PATCHPOINT:
return LowerPATCHPOINT(*MI, MCInstLowering);
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_RET:
+ return LowerPATCHABLE_RET(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
+
case X86::MORESTACK_RET:
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
return;
@@ -1221,40 +1401,50 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
case X86::SEH_PushReg:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
return;
case X86::SEH_SaveReg:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;
case X86::SEH_SaveXMM:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;
case X86::SEH_StackAlloc:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
return;
case X86::SEH_SetFrame:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;
case X86::SEH_PushFrame:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
return;
case X86::SEH_EndPrologue:
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
OutStreamer->EmitWinCFIEndProlog();
return;
case X86::SEH_Epilogue: {
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
MachineBasicBlock::const_iterator MBBI(MI);
// Check if preceded by a call and emit nop if so.
- for (MBBI = PrevCrossBBInst(MBBI); MBBI; MBBI = PrevCrossBBInst(MBBI)) {
+ for (MBBI = PrevCrossBBInst(MBBI);
+ MBBI != MachineBasicBlock::const_iterator();
+ MBBI = PrevCrossBBInst(MBBI)) {
// Conservatively assume that pseudo instructions don't emit code and keep
// looking for a call. We may emit an unnecessary nop in some cases.
if (!MBBI->isPseudo()) {
@@ -1305,42 +1495,130 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(MI->getNumOperands() >= 6 &&
"We should always have at least 6 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
- const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
- SmallVector<int, 16> Mask;
+ SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
+
case X86::VPERMILPSrm:
- case X86::VPERMILPDrm:
case X86::VPERMILPSYrm:
- case X86::VPERMILPDYrm: {
+ case X86::VPERMILPSZ128rm:
+ case X86::VPERMILPSZ128rmk:
+ case X86::VPERMILPSZ128rmkz:
+ case X86::VPERMILPSZ256rm:
+ case X86::VPERMILPSZ256rmk:
+ case X86::VPERMILPSZ256rmkz:
+ case X86::VPERMILPSZrm:
+ case X86::VPERMILPSZrmk:
+ case X86::VPERMILPSZrmkz:
+ case X86::VPERMILPDrm:
+ case X86::VPERMILPDYrm:
+ case X86::VPERMILPDZ128rm:
+ case X86::VPERMILPDZ128rmk:
+ case X86::VPERMILPDZ128rmkz:
+ case X86::VPERMILPDZ256rm:
+ case X86::VPERMILPDZ256rmk:
+ case X86::VPERMILPDZ256rmkz:
+ case X86::VPERMILPDZrm:
+ case X86::VPERMILPDZrmk:
+ case X86::VPERMILPDZrmkz: {
if (!OutStreamer->isVerboseAsm())
break;
- assert(MI->getNumOperands() > 5 &&
- "We should always have at least 5 operands!");
- const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(1);
- const MachineOperand &MaskOp = MI->getOperand(5);
-
+ unsigned SrcIdx, MaskIdx;
unsigned ElSize;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break;
- case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break;
+ case X86::VPERMILPSrm:
+ case X86::VPERMILPSYrm:
+ case X86::VPERMILPSZ128rm:
+ case X86::VPERMILPSZ256rm:
+ case X86::VPERMILPSZrm:
+ SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
+ case X86::VPERMILPSZ128rmkz:
+ case X86::VPERMILPSZ256rmkz:
+ case X86::VPERMILPSZrmkz:
+ SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
+ case X86::VPERMILPSZ128rmk:
+ case X86::VPERMILPSZ256rmk:
+ case X86::VPERMILPSZrmk:
+ SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
+ case X86::VPERMILPDrm:
+ case X86::VPERMILPDYrm:
+ case X86::VPERMILPDZ128rm:
+ case X86::VPERMILPDZ256rm:
+ case X86::VPERMILPDZrm:
+ SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
+ case X86::VPERMILPDZ128rmkz:
+ case X86::VPERMILPDZ256rmkz:
+ case X86::VPERMILPDZrmkz:
+ SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
+ case X86::VPERMILPDZ128rmk:
+ case X86::VPERMILPDZ256rmk:
+ case X86::VPERMILPDZrmk:
+ SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
}
+ assert(MI->getNumOperands() >= 6 &&
+ "We should always have at least 6 operands!");
+
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ }
+ break;
+ }
+
+ case X86::VPERMIL2PDrm:
+ case X86::VPERMIL2PSrm:
+ case X86::VPERMIL2PDrmY:
+ case X86::VPERMIL2PSrmY: {
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() >= 8 &&
+ "We should always have at least 8 operands!");
+
+ const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
+ if (!CtrlOp.isImm())
+ break;
+
+ unsigned ElSize;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
+ case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
+ }
+
+ const MachineOperand &MaskOp = MI->getOperand(6);
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
+ if (!Mask.empty())
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ }
+ break;
+ }
+
+ case X86::VPPERMrrm: {
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() >= 7 &&
+ "We should always have at least 7 operands!");
+
+ const MachineOperand &MaskOp = MI->getOperand(6);
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodeVPPERMMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
@@ -1378,7 +1656,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CASE_ALL_MOV_RM()
if (!OutStreamer->isVerboseAsm())
break;
- if (MI->getNumOperands() > 4)
+ if (MI->getNumOperands() <= 4)
+ break;
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
std::string Comment;
raw_string_ostream CS(Comment);
@@ -1413,7 +1692,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << CI->getZExtValue();
} else {
// print multi-word constant as (w0,w1)
- auto Val = CI->getValue();
+ const auto &Val = CI->getValue();
CS << "(";
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
if (i > 0)
@@ -1446,7 +1725,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// is at the end of the shadow.
if (MI->isCall()) {
// Count then size of the call towards the shadow
- SMShadowTracker.count(TmpInst, getSubtargetInfo());
+ SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
// Then flush the shadow so that we fill with nops before the call, not
// after it.
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
@@ -1457,3 +1736,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitAndCountInstruction(TmpInst);
}
+
+/// Emit Trap bytes to the specified power of two alignment
+void X86AsmPrinter::EmitTrapToAlignment(unsigned NumBits) const {
+ if (NumBits == 0) return;
+ OutStreamer->EmitValueToAlignment(1u << NumBits, 0xCC, 1);
+}