diff options
author | Pascal Stumpf <pascal@cvs.openbsd.org> | 2016-09-03 22:47:02 +0000 |
---|---|---|
committer | Pascal Stumpf <pascal@cvs.openbsd.org> | 2016-09-03 22:47:02 +0000 |
commit | 807edb5007d648972cac964f4d9a30bae985f682 (patch) | |
tree | ca7778a2715f931a12e3e3e15d1414ed0cfde644 /gnu | |
parent | 0321dc16ecb1a7a77af14225a29d9a1b9bcc4563 (diff) |
Use the space freed up by sparc and zaurus to import LLVM.
ok hackroom@
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 813 | ||||
-rw-r--r-- | gnu/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 3128 | ||||
-rw-r--r-- | gnu/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 151 |
3 files changed, 971 insertions, 3121 deletions
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 04aa3c9b1e2..ec354c209ca 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,33 +16,30 @@ // //===----------------------------------------------------------------------===// +#include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" -#include "PPC.h" -#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -52,28 +49,22 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/SectionKind.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <memory> -#include <new> - +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "asmprinter" namespace { - class PPCAsmPrinter : public AsmPrinter { protected: MapVector<MCSymbol *, MCSymbol *> TOC; @@ -85,16 +76,12 @@ public: std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} - StringRef getPassName() const override { return "PowerPC Assembly Printer"; } - - MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); - - bool doInitialization(Module &M) override { - if (!TOC.empty()) - TOC.clear(); - return AsmPrinter::doInitialization(M); + const char *getPassName() const override { + return "PowerPC Assembly Printer"; } + MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + void EmitInstruction(const MachineInstr *MI) override; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); @@ -113,9 +100,7 @@ public: void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget<PPCSubtarget>(); - bool Changed = AsmPrinter::runOnMachineFunction(MF); - emitXRayTable(); - return Changed; + return AsmPrinter::runOnMachineFunction(MF); } }; @@ -126,7 +111,7 @@ public: std::unique_ptr<MCStreamer> Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) {} - StringRef getPassName() const override { + const char *getPassName() const override { return "Linux PPC Assembly Printer"; } @@ -137,7 +122,6 @@ public: void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; - void EmitInstruction(const MachineInstr *MI) override; }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -148,15 +132,33 @@ public: std::unique_ptr<MCStreamer> Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) {} - StringRef getPassName() const override { + const char *getPassName() const override { return "Darwin PPC Assembly Printer"; } bool doFinalization(Module &M) override; void EmitStartOfAsmFile(Module &M) override; + + void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); }; +} // end of anonymous namespace + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. Used by for linux asm. +static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'q': // for QPX + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } -} // end anonymous namespace + return RegName; +} void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { @@ -165,15 +167,11 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, switch (MO.getType()) { case MachineOperand::MO_Register: { - unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(), - MO.getReg(), OpNo); - - const char *RegName = PPCInstPrinter::getRegisterName(Reg); - + const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget->isDarwin()) - RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); + RegName = stripRegisterPrefix(RegName); O << RegName; return; } @@ -197,14 +195,29 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, MCSymbol *SymToPrint; // External or weakly linked global variables need non-lazily-resolved stubs - if (Subtarget->hasLazyResolverStub(GV)) { - SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( - SymToPrint); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), - !GV->hasInternalLinkage()); + if (TM.getRelocationModel() != Reloc::Static && + !GV->isStrongDefinitionForLinker()) { + if (!GV->hasHiddenVisibility()) { + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry( + SymToPrint); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); + } else if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry( + SymToPrint); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); + } else { + SymToPrint = getSymbol(GV); + } } else { SymToPrint = getSymbol(GV); } @@ -250,21 +263,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (MI->getOperand(OpNo).isImm()) O << "i"; return false; - case 'x': - if(!MI->getOperand(OpNo).isReg()) - return true; - // This operand uses VSX numbering. - // If the operand is a VMX register, convert it to a VSX register. - unsigned Reg = MI->getOperand(OpNo).getReg(); - if (PPCInstrInfo::isVRRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::V0); - else if (PPCInstrInfo::isVFRegister(Reg)) - Reg = PPC::VSX32 + (Reg - PPC::VF0); - const char *RegName; - RegName = PPCInstPrinter::getRegisterName(Reg); - RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); - O << RegName; - return false; } } @@ -289,7 +287,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, { const char *RegName = "r0"; if (!Subtarget->isDarwin()) - RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); + RegName = stripRegisterPrefix(RegName); O << RegName << ", "; printOperand(MI, OpNo, O); return false; @@ -327,7 +325,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { } void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) { - emitStackMaps(SM); + SM.serializeToStackMapSection(); } void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) { @@ -362,10 +360,11 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { PatchPointOpers Opers(&MI); unsigned EncodedBytes = 0; - const MachineOperand &CalleeMO = Opers.getCallTarget(); + const MachineOperand &CalleeMO = + Opers.getMetaOper(PatchPointOpers::TargetPos); if (CalleeMO.isImm()) { - int64_t CallTarget = CalleeMO.getImm(); + int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); @@ -392,7 +391,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { .addImm(CallTarget & 0xFFFF)); // Save the current TOC pointer before the remote call. - int TOCSaveOffset = Subtarget->getFrameLowering()->getTOCSaveOffset(); + int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40; EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD) .addReg(PPC::X2) .addImm(TOCSaveOffset) @@ -444,7 +443,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { EncodedBytes *= 4; // Emit padding. - unsigned NumBytes = Opers.getNumPatchBytes(); + unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); assert(NumBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); assert((NumBytes - EncodedBytes) % 4 == 0 && @@ -471,7 +470,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, "GETtls[ld]ADDR[32] must read GPR3"); if (!Subtarget->isPPC64() && !Subtarget->isDarwin() && - isPositionIndependent()) + TM.getRelocationModel() == Reloc::PIC_) Kind = MCSymbolRefExpr::VK_PLT; const MCSymbolRefExpr *TlsRef = MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext); @@ -493,35 +492,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; bool isPPC64 = Subtarget->isPPC64(); bool isDarwin = TM.getTargetTriple().isOSDarwin(); - const Module *M = MF->getFunction().getParent(); + const Module *M = MF->getFunction()->getParent(); PICLevel::Level PL = M->getPICLevel(); -#ifndef NDEBUG - // Validate that SPE and FPU are mutually exclusive in codegen - if (!MI->isInlineAsm()) { - for (const MachineOperand &MO: MI->operands()) { - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (Subtarget->hasSPE()) { - if (PPC::F4RCRegClass.contains(Reg) || - PPC::F8RCRegClass.contains(Reg) || - PPC::QBRCRegClass.contains(Reg) || - PPC::QFRCRegClass.contains(Reg) || - PPC::QSRCRegClass.contains(Reg) || - PPC::VFRCRegClass.contains(Reg) || - PPC::VRRCRegClass.contains(Reg) || - PPC::VSFRCRegClass.contains(Reg) || - PPC::VSSRCRegClass.contains(Reg) - ) - llvm_unreachable("SPE targets cannot have FPRegs!"); - } else { - if (PPC::SPERCRegClass.contains(Reg)) - llvm_unreachable("SPE register found in FPU-targeted code!"); - } - } - } - } -#endif // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; @@ -533,7 +506,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return LowerPATCHPOINT(SM, *MI); case PPC::MoveGOTtoLR: { - // Transform %lr = MoveGOTtoLR + // Transform %LR = MoveGOTtoLR // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding // _GLOBAL_OFFSET_TABLE_) has exactly one instruction: @@ -554,7 +527,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::MovePCtoLR: case PPC::MovePCtoLR8: { - // Transform %lr = MovePCtoLR + // Transform %LR = MovePCtoLR // Into this, where the label is the PIC base: // bl L1$pb // L1$pb: @@ -572,69 +545,39 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::UpdateGBR: { - // Transform %rd = UpdateGBR(%rt, %ri) - // Into: lwz %rt, .L0$poff - .L0$pb(%ri) - // add %rd, %rt, %ri - // or into (if secure plt mode is on): - // addis r30, r30, .LTOC - .L0$pb@ha - // addi r30, r30, .LTOC - .L0$pb@l + // Transform %Rd = UpdateGBR(%Rt, %Ri) + // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri) + // add %Rd, %Rt, %Ri // Get the offset from the GOT Base Register to the GOT LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); - if (Subtarget->isSecurePlt() && isPositionIndependent() ) { - unsigned PICR = TmpInst.getOperand(0).getReg(); - MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC")); - const MCExpr *PB = - MCSymbolRefExpr::create(MF->getPICBaseSymbol(), - OutContext); - - const MCExpr *LTOCDeltaExpr = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext), - PB, OutContext); - - const MCExpr *LTOCDeltaHi = - PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) - .addReg(PICR) - .addReg(PICR) - .addExpr(LTOCDeltaHi)); + MCSymbol *PICOffset = + MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); + TmpInst.setOpcode(PPC::LWZ); + const MCExpr *Exp = + MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); + const MCExpr *PB = + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), + MCSymbolRefExpr::VK_None, + OutContext); + const MCOperand TR = TmpInst.getOperand(1); + const MCOperand PICR = TmpInst.getOperand(0); + + // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri) + TmpInst.getOperand(1) = + MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext)); + TmpInst.getOperand(0) = TR; + TmpInst.getOperand(2) = PICR; + EmitToStreamer(*OutStreamer, TmpInst); - const MCExpr *LTOCDeltaLo = - PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) - .addReg(PICR) - .addReg(PICR) - .addExpr(LTOCDeltaLo)); - return; - } else { - MCSymbol *PICOffset = - MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); - TmpInst.setOpcode(PPC::LWZ); - const MCExpr *Exp = - MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); - const MCExpr *PB = - MCSymbolRefExpr::create(MF->getPICBaseSymbol(), - MCSymbolRefExpr::VK_None, - OutContext); - const MCOperand TR = TmpInst.getOperand(1); - const MCOperand PICR = TmpInst.getOperand(0); - - // Step 1: lwz %rt, .L$poff - .L$pb(%ri) - TmpInst.getOperand(1) = - MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext)); - TmpInst.getOperand(0) = TR; - TmpInst.getOperand(2) = PICR; - EmitToStreamer(*OutStreamer, TmpInst); - - TmpInst.setOpcode(PPC::ADD4); - TmpInst.getOperand(0) = PICR; - TmpInst.getOperand(1) = TR; - TmpInst.getOperand(2) = PICR; - EmitToStreamer(*OutStreamer, TmpInst); - return; - } + TmpInst.setOpcode(PPC::ADD4); + TmpInst.getOperand(0) = PICR; + TmpInst.getOperand(1) = TR; + TmpInst.getOperand(2) = PICR; + EmitToStreamer(*OutStreamer, TmpInst); + return; } case PPC::LWZtoc: { - // Transform %r3 = LWZtoc @min1, %r2 + // Transform %R3 = LWZtoc <ga:@min1>, %R2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LWZ, and the global address operand to be a @@ -654,7 +597,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isBlockAddress()) MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - if (PL == PICLevel::SmallPIC) { + if (PL == PICLevel::Small) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); @@ -678,7 +621,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtocBA: case PPC::LDtoc: { - // Transform %x3 = LDtoc @min1, %x2 + // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD, and the global address operand to be a @@ -709,7 +652,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDIStocHA: { - // Transform %xd = ADDIStocHA %x2, @sym + // Transform %Xd = ADDIStocHA %X2, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDIS8. If the global address is external, has @@ -744,19 +687,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); - - if (!MO.isJTI() && MO.getOffset()) - Exp = MCBinaryExpr::createAdd(Exp, - MCConstantExpr::create(MO.getOffset(), - OutContext), - OutContext); - TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } case PPC::LDtocL: { - // Transform %xd = LDtocL @sym, %xs + // Transform %Xd = LDtocL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. If the global address is external, has @@ -783,11 +719,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); MOSymbol = getSymbol(GV); - LLVM_DEBUG( - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert((GVFlags & PPCII::MO_NLP_FLAG) && - "LDtocL used on symbol that could be accessed directly is " - "invalid. Must match ADDIStocHA.")); + DEBUG( + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); + assert((GVFlags & PPCII::MO_NLP_FLAG) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA.")); MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -799,7 +735,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItocL: { - // Transform %xd = ADDItocL %xs, @sym + // Transform %Xd = ADDItocL %Xs, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDI8. If the global address is external, then @@ -812,9 +748,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); - LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert(!(GVFlags & PPCII::MO_NLP_FLAG) && - "Interposable definitions must use indirect access.")); + DEBUG( + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); + assert ( + !(GVFlags & PPCII::MO_NLP_FLAG) && + "Interposable definitions must use indirect access.")); MOSymbol = getSymbol(GV); } else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); @@ -828,8 +766,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDISgotTprelHA: { - // Transform: %xd = ADDISgotTprelHA %x2, @sym - // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha + // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -845,7 +783,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { - // Transform %xd = LDgotTprelL @sym, %xs + // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. @@ -906,8 +844,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDIStlsgdHA: { - // Transform: %xd = ADDIStlsgdHA %x2, @sym - // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha + // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -922,11 +860,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItlsgdL: - // Transform: %xd = ADDItlsgdL %xs, @sym - // Into: %xd = ADDI8 %xs, sym@got@tlsgd@l + // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l case PPC::ADDItlsgdL32: { - // Transform: %rd = ADDItlsgdL32 %rs, @sym - // Into: %rd = ADDI %rs, sym@got@tlsgd + // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@got@tlsgd const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -942,17 +880,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::GETtlsADDR: - // Transform: %x3 = GETtlsADDR %x3, @sym + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) case PPC::GETtlsADDR32: { - // Transform: %r3 = GETtlsADDR32 %r3, @sym + // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym> // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD); return; } case PPC::ADDIStlsldHA: { - // Transform: %xd = ADDIStlsldHA %x2, @sym - // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha + // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -967,11 +905,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItlsldL: - // Transform: %xd = ADDItlsldL %xs, @sym - // Into: %xd = ADDI8 %xs, sym@got@tlsld@l + // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l case PPC::ADDItlsldL32: { - // Transform: %rd = ADDItlsldL32 %rs, @sym - // Into: %rd = ADDI %rs, sym@got@tlsld + // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@got@tlsld const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -987,20 +925,20 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::GETtlsldADDR: - // Transform: %x3 = GETtlsldADDR %x3, @sym + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld) case PPC::GETtlsldADDR32: { - // Transform: %r3 = GETtlsldADDR32 %r3, @sym + // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym> // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD); return; } case PPC::ADDISdtprelHA: - // Transform: %xd = ADDISdtprelHA %xs, @sym - // Into: %xd = ADDIS8 %xs, sym@dtprel@ha + // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym> + // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha case PPC::ADDISdtprelHA32: { - // Transform: %rd = ADDISdtprelHA32 %rs, @sym - // Into: %rd = ADDIS %rs, sym@dtprel@ha + // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym> + // Into: %Rd = ADDIS %Rs, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -1016,11 +954,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDIdtprelL: - // Transform: %xd = ADDIdtprelL %xs, @sym - // Into: %xd = ADDI8 %xs, sym@dtprel@l + // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@dtprel@l case PPC::ADDIdtprelL32: { - // Transform: %rd = ADDIdtprelL32 %rs, @sym - // Into: %rd = ADDI %rs, sym@dtprel@l + // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@dtprel@l const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -1037,8 +975,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::MFOCRF: case PPC::MFOCRF8: if (!Subtarget->hasMFOCRF()) { - // Transform: %r3 = MFOCRF %cr7 - // Into: %r3 = MFCR ;; cr7 + // Transform: %R3 = MFOCRF %CR7 + // Into: %R3 = MFCR ;; cr7 unsigned NewOpcode = MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8; OutStreamer->AddComment(PPCInstPrinter:: @@ -1051,8 +989,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::MTOCRF: case PPC::MTOCRF8: if (!Subtarget->hasMFOCRF()) { - // Transform: %cr7 = MTOCRF %r3 - // Into: MTCRF mask, %r3 ;; cr7 + // Transform: %CR7 = MTOCRF %R3 + // Into: MTCRF mask, %R3 ;; cr7 unsigned NewOpcode = MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8; unsigned Mask = 0x80 >> OutContext.getRegisterInfo() @@ -1090,144 +1028,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } -void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) { - if (!Subtarget->isPPC64()) - return PPCAsmPrinter::EmitInstruction(MI); - - switch (MI->getOpcode()) { - default: - return PPCAsmPrinter::EmitInstruction(MI); - case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { - // .begin: - // b .end # lis 0, FuncId[16..32] - // nop # li 0, FuncId[0..15] - // std 0, -8(1) - // mflr 0 - // bl __xray_FunctionEntry - // mtlr 0 - // .end: - // - // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number - // of instructions change. - MCSymbol *BeginOfSled = OutContext.createTempSymbol(); - MCSymbol *EndOfSled = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(BeginOfSled); - EmitToStreamer(*OutStreamer, - MCInstBuilder(PPC::B).addExpr( - MCSymbolRefExpr::create(EndOfSled, OutContext))); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); - EmitToStreamer( - *OutStreamer, - MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1)); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0)); - EmitToStreamer(*OutStreamer, - MCInstBuilder(PPC::BL8_NOP) - .addExpr(MCSymbolRefExpr::create( - OutContext.getOrCreateSymbol("__xray_FunctionEntry"), - OutContext))); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0)); - OutStreamer->EmitLabel(EndOfSled); - recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER); - break; - } - case TargetOpcode::PATCHABLE_RET: { - unsigned RetOpcode = MI->getOperand(0).getImm(); - MCInst RetInst; - RetInst.setOpcode(RetOpcode); - for (const auto &MO : - make_range(std::next(MI->operands_begin()), MI->operands_end())) { - MCOperand MCOp; - if (LowerPPCMachineOperandToMCOperand(MO, MCOp, *this, false)) - RetInst.addOperand(MCOp); - } - - bool IsConditional; - if (RetOpcode == PPC::BCCLR) { - IsConditional = true; - } else if (RetOpcode == PPC::TCRETURNdi8 || RetOpcode == PPC::TCRETURNri8 || - RetOpcode == PPC::TCRETURNai8) { - break; - } else if (RetOpcode == PPC::BLR8 || RetOpcode == PPC::TAILB8) { - IsConditional = false; - } else { - EmitToStreamer(*OutStreamer, RetInst); - break; - } - - MCSymbol *FallthroughLabel; - if (IsConditional) { - // Before: - // bgtlr cr0 - // - // After: - // ble cr0, .end - // .p2align 3 - // .begin: - // blr # lis 0, FuncId[16..32] - // nop # li 0, FuncId[0..15] - // std 0, -8(1) - // mflr 0 - // bl __xray_FunctionExit - // mtlr 0 - // blr - // .end: - // - // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number - // of instructions change. - FallthroughLabel = OutContext.createTempSymbol(); - EmitToStreamer( - *OutStreamer, - MCInstBuilder(PPC::BCC) - .addImm(PPC::InvertPredicate( - static_cast<PPC::Predicate>(MI->getOperand(1).getImm()))) - .addReg(MI->getOperand(2).getReg()) - .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext))); - RetInst = MCInst(); - RetInst.setOpcode(PPC::BLR8); - } - // .p2align 3 - // .begin: - // b(lr)? # lis 0, FuncId[16..32] - // nop # li 0, FuncId[0..15] - // std 0, -8(1) - // mflr 0 - // bl __xray_FunctionExit - // mtlr 0 - // b(lr)? - // - // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number - // of instructions change. - OutStreamer->EmitCodeAlignment(8); - MCSymbol *BeginOfSled = OutContext.createTempSymbol(); - OutStreamer->EmitLabel(BeginOfSled); - EmitToStreamer(*OutStreamer, RetInst); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); - EmitToStreamer( - *OutStreamer, - MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1)); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0)); - EmitToStreamer(*OutStreamer, - MCInstBuilder(PPC::BL8_NOP) - .addExpr(MCSymbolRefExpr::create( - OutContext.getOrCreateSymbol("__xray_FunctionExit"), - OutContext))); - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0)); - EmitToStreamer(*OutStreamer, RetInst); - if (IsConditional) - OutStreamer->EmitLabel(FallthroughLabel); - recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT); - break; - } - case TargetOpcode::PATCHABLE_FUNCTION_EXIT: - llvm_unreachable("PATCHABLE_FUNCTION_EXIT should never be emitted"); - case TargetOpcode::PATCHABLE_TAIL_CALL: - // TODO: Define a trampoline `__xray_FunctionTailExit` and differentiate a - // normal function exit from a tail exit. - llvm_unreachable("Tail call is handled in the normal case. See comments " - "around this assert."); - } -} - void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) { PPCTargetStreamer *TS = @@ -1238,10 +1038,10 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { } if (static_cast<const PPCTargetMachine &>(TM).isPPC64() || - !isPositionIndependent()) + TM.getRelocationModel() != Reloc::PIC_) return AsmPrinter::EmitStartOfAsmFile(M); - if (M.getPICLevel() == PICLevel::SmallPIC) + if (M.getPICLevel() == PICLevel::Small) return AsmPrinter::EmitStartOfAsmFile(M); OutStreamer->SwitchSection(OutContext.getELFSection( @@ -1267,13 +1067,13 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // linux/ppc32 - Normal entry label. if (!Subtarget->isPPC64() && - (!isPositionIndependent() || - MF->getFunction().getParent()->getPICLevel() == PICLevel::SmallPIC)) + (TM.getRelocationModel() != Reloc::PIC_ || + MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small)) return AsmPrinter::EmitFunctionEntryLabel(); if (!Subtarget->isPPC64()) { const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); - if (PPCFI->usesPICBase() && !Subtarget->isSecurePlt()) { + if (PPCFI->usesPICBase()) { MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(); MCSymbol *PICBase = MF->getPICBaseSymbol(); OutStreamer->EmitLabel(RelocSymbol); @@ -1295,7 +1095,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { if (Subtarget->isELFv2ABI()) { // In the Large code model, we allow arbitrary displacements between // the text section and its associated TOC section. We place the - // full 8-byte offset to the TOC in memory immediately preceding + // full 8-byte offset to the TOC in memory immediatedly preceding // the function global entry point. if (TM.getCodeModel() == CodeModel::Large && !MF->getRegInfo().use_empty(PPC::X2)) { @@ -1360,12 +1160,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { E = TOC.end(); I != E; ++I) { OutStreamer->EmitLabel(I->second); MCSymbol *S = I->first; - if (isPPC64) { + if (isPPC64) TS.emitTCEntry(*S); - } else { - OutStreamer->EmitValueToAlignment(4); + else OutStreamer->EmitSymbolValue(S, 4); - } } } @@ -1408,9 +1206,6 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { if (Subtarget->isELFv2ABI() // Only do all that if the function uses r2 in the first place. && !MF->getRegInfo().use_empty(PPC::X2)) { - // Note: The logic here must be synchronized with the code in the - // branch-selection pass which sets the offset of the first block in the - // function. This matters because it affects the alignment. const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(); @@ -1498,7 +1293,6 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", - "ppce500", "ppce500mc", "ppce5500", "power3", @@ -1508,10 +1302,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "power6", "power6x", "power7", - // FIXME: why is power8 missing here? "ppc64", - "ppc64le", - "power9" + "ppc64le" }; // Get the numerically largest directive. @@ -1558,67 +1350,241 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); } +static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { + // Remove $stub suffix, add $lazy_ptr. + StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5); + return Ctx.getOrCreateSymbol(NoStub + "$lazy_ptr"); +} + +static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { + // Add $tmp suffix to $stub, yielding $stub$tmp. + return Ctx.getOrCreateSymbol(Sym->getName() + "$tmp"); +} + +void PPCDarwinAsmPrinter:: +EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { + bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; + + // Construct a local MCSubtargetInfo and shadow EmitToStreamer here. + // This is because the MachineFunction won't exist (but have not yet been + // freed) and since we're at the global level we can use the default + // constructed subtarget. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple().str(), TM.getTargetCPU(), + TM.getTargetFeatureString())); + auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) { + S.EmitInstruction(Inst, *STI); + }; + + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + + // .lazy_symbol_pointer + MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); + + // Output stubs for dynamically-linked functions + if (TM.getRelocationModel() == Reloc::PIC_) { + MCSection *StubSection = OutContext.getMachOSection( + "__TEXT", "__picsymbolstub1", + MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 32, + SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer->SwitchSection(StubSection); + EmitAlignment(4); + + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); + + OutStreamer->EmitLabel(Stub); + OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); + const MCExpr *Sub = + MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext); + + // mflr r0 + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); + // bcl 20, 31, AnonSymbol + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon)); + OutStreamer->EmitLabel(AnonSymbol); + // mflr r11 + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); + // addis r11, r11, ha16(LazyPtr - AnonSymbol) + const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::R11) + .addReg(PPC::R11) + .addExpr(SubHa16)); + // mtlr r0 + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); + + // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) + // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) + const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(SubLo16).addExpr(SubLo16) + .addReg(PPC::R11)); + // mtctr r12 + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR)); + + OutStreamer->SwitchSection(LSPSection); + OutStreamer->EmitLabel(LazyPtr); + OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + MCSymbol *DyldStubBindingHelper = + OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4); + } + } + OutStreamer->AddBlankLine(); + return; + } + + MCSection *StubSection = OutContext.getMachOSection( + "__TEXT", "__symbol_stub1", + MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 16, + SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); + + OutStreamer->SwitchSection(StubSection); + EmitAlignment(4); + OutStreamer->EmitLabel(Stub); + OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + // lis r11, ha16(LazyPtr) + const MCExpr *LazyPtrHa16 = + PPCMCExpr::createHa(LazyPtrExpr, true, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS) + .addReg(PPC::R11) + .addExpr(LazyPtrHa16)); + + // ldu r12, lo16(LazyPtr)(r11) + // lwzu r12, lo16(LazyPtr)(r11) + const MCExpr *LazyPtrLo16 = + PPCMCExpr::createLo(LazyPtrExpr, true, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) + .addReg(PPC::R11)); + + // mtctr r12 + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR)); + + OutStreamer->SwitchSection(LSPSection); + OutStreamer->EmitLabel(LazyPtr); + OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + MCSymbol *DyldStubBindingHelper = + OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4); + } + } + + OutStreamer->AddBlankLine(); +} + bool PPCDarwinAsmPrinter::doFinalization(Module &M) { bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); - if (MMI) { - MachineModuleInfoMachO &MMIMacho = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - if (MAI->doesSupportExceptionHandling()) { - // Add the (possibly multiple) personalities to the set of global values. - // Only referenced functions get into the Personalities list. - for (const Function *Personality : MMI->getPersonalities()) { - if (Personality) { - MCSymbol *NLPSym = - getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMIMacho.getGVStubEntry(NLPSym); - StubSym = - MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true); - } + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); + if (!Stubs.empty()) + EmitFunctionStubs(Stubs); + + if (MAI->doesSupportExceptionHandling() && MMI) { + // Add the (possibly multiple) personalities to the set of global values. + // Only referenced functions get into the Personalities list. + for (const Function *Personality : MMI->getPersonalities()) { + if (Personality) { + MCSymbol *NLPSym = + getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMIMacho.getGVStubEntry(NLPSym); + StubSym = + MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true); } } + } - // Output stubs for dynamically-linked functions. - MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList(); - - // Output macho stubs for external and common global variables. - if (!Stubs.empty()) { - // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(isPPC64 ? 3 : 2); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer->EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), - MCSA_IndirectSymbol); - - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4 /*size*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info - // pointers - // need to be indirect and pc-rel. We accomplish this by using NLPs. - // However, sometimes the types are local to the file. So we need to - // fill in the value for the NLP in those cases. - OutStreamer->EmitValue( - MCSymbolRefExpr::create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4 /*size*/); - } + // Output stubs for dynamically-linked functions. + Stubs = MMIMacho.GetGVStubList(); + + // Output macho stubs for external and common global variables. + if (!Stubs.empty()) { + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer->EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; + OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info pointers + // need to be indirect and pc-rel. We accomplish this by using NLPs. + // However, sometimes the types are local to the file. So we need to + // fill in the value for the NLP in those cases. + OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/); + } - Stubs.clear(); - OutStreamer->AddBlankLine(); + Stubs.clear(); + OutStreamer->AddBlankLine(); + } + + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { + OutStreamer->SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer->EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer->EmitValue(MCSymbolRefExpr:: + create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/); } + + Stubs.clear(); + OutStreamer->AddBlankLine(); } // Funny Darwin hack: This flag tells the linker that no global symbols @@ -1645,10 +1611,7 @@ createPPCAsmPrinterPass(TargetMachine &tm, // Force static initialization. extern "C" void LLVMInitializePowerPCAsmPrinter() { - TargetRegistry::RegisterAsmPrinter(getThePPC32Target(), - createPPCAsmPrinterPass); - TargetRegistry::RegisterAsmPrinter(getThePPC64Target(), - createPPCAsmPrinterPass); - TargetRegistry::RegisterAsmPrinter(getThePPC64LETarget(), - createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass); } diff --git a/gnu/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/gnu/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 70e9049a2ab..1eaa8118ba0 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/gnu/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -12,78 +12,34 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" -#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCISelLowering.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCMachineFunctionInfo.h" -#include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <iterator> -#include <limits> -#include <memory> -#include <new> -#include <tuple> -#include <utility> - +#include "llvm/Target/TargetOptions.h" using namespace llvm; #define DEBUG_TYPE "ppc-codegen" -STATISTIC(NumSextSetcc, - "Number of (sext(setcc)) nodes expanded into GPR sequence."); -STATISTIC(NumZextSetcc, - "Number of (zext(setcc)) nodes expanded into GPR sequence."); -STATISTIC(SignExtensionsAdded, - "Number of sign extensions for compare inputs added."); -STATISTIC(ZeroExtensionsAdded, - "Number of zero extensions for compare inputs added."); -STATISTIC(NumLogicOpsOnComparison, - "Number of logical ops on i1 values calculated in GPR."); -STATISTIC(OmittedForNonExtendUses, - "Number of compares not eliminated as they have non-extending uses."); -STATISTIC(NumP9Setb, - "Number of compares lowered to setb."); - // FIXME: Remove this once the bug has been fixed! cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -103,36 +59,11 @@ static cl::opt<bool> EnableBranchHint( cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); -static cl::opt<bool> EnableTLSOpt( - "ppc-tls-opt", cl::init(true), - cl::desc("Enable tls optimization peephole"), - cl::Hidden); +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} -enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, - ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, - ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; - -static cl::opt<ICmpInGPRType> CmpInGPR( - "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), - cl::desc("Specify the types of comparisons to emit GPR-only code for."), - cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), - clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), - clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), - clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), - clEnumValN(ICGPR_NonExtIn, "nonextin", - "Only comparisons where inputs don't need [sz]ext."), - clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), - clEnumValN(ICGPR_ZextI32, "zexti32", - "Only i32 comparisons with zext result."), - clEnumValN(ICGPR_ZextI64, "zexti64", - "Only i64 comparisons with zext result."), - clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), - clEnumValN(ICGPR_SextI32, "sexti32", - "Only i32 comparisons with sext result."), - clEnumValN(ICGPR_SextI64, "sexti64", - "Only i64 comparisons with sext result."))); namespace { - //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine /// instructions for SelectionDAG operations. @@ -142,10 +73,11 @@ namespace { const PPCSubtarget *PPCSubTarget; const PPCTargetLowering *PPCLowering; unsigned GlobalBaseReg; - public: - explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm) {} + explicit PPCDAGToDAGISel(PPCTargetMachine &tm) + : SelectionDAGISel(tm), TM(tm) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary @@ -163,26 +95,20 @@ namespace { void PreprocessISelDAG() override; void PostprocessISelDAG() override; - /// getI16Imm - Return a target constant with the specified value, of type - /// i16. - inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { - return CurDAG->getTargetConstant(Imm, dl, MVT::i16); - } - /// getI32Imm - Return a target constant with the specified value, of type /// i32. - inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { + inline SDValue getI32Imm(unsigned Imm, SDLoc dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } /// getI64Imm - Return a target constant with the specified value, of type /// i64. - inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { + inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i64); } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { + inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -196,34 +122,24 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - void Select(SDNode *N) override; - - bool tryBitfieldInsert(SDNode *N); - bool tryBitPermutation(SDNode *N); - bool tryIntCompareInGPR(SDNode *N); - - // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into - // an X-Form load instruction with the offset being a relocation coming from - // the PPCISD::ADD_TLS. - bool tryTLSXFormLoad(LoadSDNode *N); - // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into - // an X-Form store instruction with the offset being a relocation coming from - // the PPCISD::ADD_TLS. - bool tryTLSXFormStore(StoreSDNode *N); + SDNode *Select(SDNode *N) override; + + SDNode *SelectBitfieldInsert(SDNode *N); + SDNode *SelectBitPermutation(SDNode *N); + /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. - SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl); + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl); /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -256,11 +172,7 @@ namespace { /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); - } - - bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); } // Select an address into a single register. @@ -277,6 +189,7 @@ namespace { bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override { + switch(ConstraintID) { default: errs() << "ConstraintID: " << ConstraintID << "\n"; @@ -307,7 +220,7 @@ namespace { void InsertVRSaveCode(MachineFunction &MF); - StringRef getPassName() const override { + const char *getPassName() const override { return "PowerPC DAG->DAG Pattern Instruction Selection"; } @@ -315,7 +228,7 @@ namespace { #include "PPCGenDAGISel.inc" private: - bool trySETCC(SDNode *N); + SDNode *SelectSETCC(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -327,11 +240,9 @@ private: bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); - bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; - void transferMemOperands(SDNode *N, SDNode *Result); + SDNode *transferMemOperands(SDNode *N, SDNode *Result); }; - -} // end anonymous namespace +} /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, @@ -397,6 +308,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { } } + /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// @@ -406,13 +318,13 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - const Module *M = MF->getFunction().getParent(); + const Module *M = MF->getFunction()->getParent(); DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; - if (M->getPICLevel() == PICLevel::SmallPIC) { + if (M->getPICLevel() == PICLevel::Small) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); @@ -427,22 +339,12 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } else { GlobalBaseReg = - RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); + RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } } else { - // We must ensure that this sequence is dominated by the prologue. - // FIXME: This is a bit of a big hammer since we don't get the benefits - // of shrink-wrapping whenever we emit this instruction. Considering - // this is used in any function where we emit a jump table, this may be - // a significant limitation. We should consider inserting this in the - // block where it is used and then commoning this sequence up if it - // appears in multiple places. - // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of - // MovePCtoLR8. - MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } @@ -452,6 +354,26 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { .getNode(); } +/// isIntS16Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 16-bit value. If so, this returns true and the +/// immediate. +static bool isIntS16Immediate(SDNode *N, short &Imm) { + if (N->getOpcode() != ISD::Constant) + return false; + + Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + else + return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); +} + +static bool isIntS16Immediate(SDValue Op, short &Imm) { + return isIntS16Immediate(Op.getNode(), Imm); +} + + /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -478,12 +400,6 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } -/// isInt64Immediate - This method tests to see if the value is a 64-bit -/// constant operand. If so Imm will receive the 64-bit value. -static bool isInt64Immediate(SDValue N, uint64_t &Imm) { - return isInt64Immediate(N.getNode(), Imm); -} - static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB) { assert(isa<BasicBlockSDNode>(DestMBB)); @@ -491,7 +407,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, if (!FuncInfo->BPI) return PPC::BR_NO_HINT; const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); - const Instruction *BBTerm = BB->getTerminator(); + const TerminatorInst *BBTerm = BB->getTerminator(); if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; @@ -519,10 +435,10 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) return PPC::BR_NO_HINT; - LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() - << "::" << BB->getName() << "'\n" - << " -> " << TBB->getName() << ": " << TProb << "\n" - << " -> " << FBB->getName() << ": " << FProb << "\n"); + DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" + << BB->getName() << "'\n" + << " -> " << TBB->getName() << ": " << TProb << "\n" + << " -> " << FBB->getName() << ": " << FProb << "\n"); const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); @@ -542,17 +458,16 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { SDLoc dl(SN); int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (SN->hasOneUse()) - CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, - getSmallIPtrImm(Offset, dl)); - else - ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, - getSmallIPtrImm(Offset, dl))); + return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, + getSmallIPtrImm(Offset, dl)); + return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, + getSmallIPtrImm(Offset, dl)); } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, @@ -597,102 +512,19 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, return false; } -bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { - SDValue Base = ST->getBasePtr(); - if (Base.getOpcode() != PPCISD::ADD_TLS) - return false; - SDValue Offset = ST->getOffset(); - if (!Offset.isUndef()) - return false; - - SDLoc dl(ST); - EVT MemVT = ST->getMemoryVT(); - EVT RegVT = ST->getValue().getValueType(); - - unsigned Opcode; - switch (MemVT.getSimpleVT().SimpleTy) { - default: - return false; - case MVT::i8: { - Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; - break; - } - case MVT::i16: { - Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; - break; - } - case MVT::i32: { - Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; - break; - } - case MVT::i64: { - Opcode = PPC::STDXTLS; - break; - } - } - SDValue Chain = ST->getChain(); - SDVTList VTs = ST->getVTList(); - SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), - Chain}; - SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); - transferMemOperands(ST, MN); - ReplaceNode(ST, MN); - return true; -} - -bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { - SDValue Base = LD->getBasePtr(); - if (Base.getOpcode() != PPCISD::ADD_TLS) - return false; - SDValue Offset = LD->getOffset(); - if (!Offset.isUndef()) - return false; - - SDLoc dl(LD); - EVT MemVT = LD->getMemoryVT(); - EVT RegVT = LD->getValueType(0); - unsigned Opcode; - switch (MemVT.getSimpleVT().SimpleTy) { - default: - return false; - case MVT::i8: { - Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; - break; - } - case MVT::i16: { - Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; - break; - } - case MVT::i32: { - Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; - break; - } - case MVT::i64: { - Opcode = PPC::LDXTLS; - break; - } - } - SDValue Chain = LD->getChain(); - SDVTList VTs = LD->getVTList(); - SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; - SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); - transferMemOperands(LD, MN); - ReplaceNode(LD, MN); - return true; -} - -/// Turn an or of two masked values into the rotate left word immediate then -/// mask insert (rlwimi) instruction. -bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { +/// SelectBitfieldInsert - turn an or of two masked values into +/// the rotate left word immediate then mask insert (rlwimi) instruction. +SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); - KnownBits LKnown = CurDAG->computeKnownBits(Op0); - KnownBits RKnown = CurDAG->computeKnownBits(Op1); + APInt LKZ, LKO, RKZ, RKO; + CurDAG->computeKnownBits(Op0, LKZ, LKO); + CurDAG->computeKnownBits(Op1, RKZ, RKO); - unsigned TargetMask = LKnown.Zero.getZExtValue(); - unsigned InsertMask = RKnown.Zero.getZExtValue(); + unsigned TargetMask = LKZ.getZExtValue(); + unsigned InsertMask = RKZ.getZExtValue(); if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); @@ -724,6 +556,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { + SDValue Tmp1, Tmp2; + if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); @@ -733,8 +567,9 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. - KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); - bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); + APInt MKZ, MKO; + CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO); + bool CanFoldMask = InsertMask == MKO.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && @@ -749,16 +584,15 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SH &= 31; SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); - return true; + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } - return false; + return nullptr; } // Predict the number of instructions that would be generated by calling -// selectI64Imm(N). -static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { +// SelectInt64(N). +static unsigned SelectInt64CountDirect(int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -802,13 +636,6 @@ static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { // If no shift, we're done. if (!Shift) return Result; - // If Hi word == Lo word, - // we can use rldimi to insert the Lo word into Hi word. - if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { - ++Result; - return Result; - } - // Shift for next step if the upper 32-bits were not zero. if (Imm) ++Result; @@ -826,20 +653,17 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { return (Imm << R) | (Imm >> (64 - R)); } -static unsigned selectI64ImmInstrCount(int64_t Imm) { - unsigned Count = selectI64ImmInstrCountDirect(Imm); - - // If the instruction count is 1 or 2, we do not need further analysis - // since rotate + load constant requires at least 2 instructions. - if (Count <= 2) +static unsigned SelectInt64Count(int64_t Imm) { + unsigned Count = SelectInt64CountDirect(Imm); + if (Count == 1) return Count; for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; + unsigned RCount = SelectInt64CountDirect(RImm) + 1; Count = std::min(Count, RCount); - // See comments in selectI64Imm for an explanation of the logic below. + // See comments in SelectInt64 for an explanation of the logic below. unsigned LS = findLastSet(RImm); if (LS != r-1) continue; @@ -847,17 +671,16 @@ static unsigned selectI64ImmInstrCount(int64_t Imm) { uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; + RCount = SelectInt64CountDirect(RImmWithOnes) + 1; Count = std::min(Count, RCount); } return Count; } -// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount +// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count // (above) needs to be kept in sync with this function. -static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, - int64_t Imm) { +static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -893,10 +716,8 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, // Simple value. if (isInt<16>(Imm)) { - uint64_t SextImm = SignExtend64(Lo, 16); - SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); // Just the Lo bits. - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); } else if (Lo) { // Handle the Hi bits. unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; @@ -912,14 +733,6 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, // If no shift, we're done. if (!Shift) return Result; - // If Hi word == Lo word, - // we can use rldimi to insert the Lo word into Hi word. - if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { - SDValue Ops[] = - { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; - return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); - } - // Shift for next step if the upper 32-bits were not zero. if (Imm) { Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, @@ -941,14 +754,10 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, return Result; } -static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, - int64_t Imm) { - unsigned Count = selectI64ImmInstrCountDirect(Imm); - - // If the instruction count is 1 or 2, we do not need further analysis - // since rotate + load constant requires at least 2 instructions. - if (Count <= 2) - return selectI64ImmDirect(CurDAG, dl, Imm); +static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { + unsigned Count = SelectInt64CountDirect(Imm); + if (Count == 1) + return SelectInt64Direct(CurDAG, dl, Imm); unsigned RMin = 0; @@ -957,7 +766,7 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; + unsigned RCount = SelectInt64CountDirect(RImm) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -980,7 +789,7 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; + RCount = SelectInt64CountDirect(RImmWithOnes) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -990,90 +799,27 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, } if (!RMin) - return selectI64ImmDirect(CurDAG, dl, Imm); + return SelectInt64Direct(CurDAG, dl, Imm); auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; - SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); + SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0); return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, getI32Imm(64 - RMin), getI32Imm(MaskEnd)); } -static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { - unsigned MaxTruncation = 0; - // Cannot use range-based for loop here as we need the actual use (i.e. we - // need the operand number corresponding to the use). A range-based for - // will unbox the use and provide an SDNode*. - for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); - Use != UseEnd; ++Use) { - unsigned Opc = - Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); - switch (Opc) { - default: return 0; - case ISD::TRUNCATE: - if (Use->isMachineOpcode()) - return 0; - MaxTruncation = - std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); - continue; - case ISD::STORE: { - if (Use->isMachineOpcode()) - return 0; - StoreSDNode *STN = cast<StoreSDNode>(*Use); - unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); - if (MemVTSize == 64 || Use.getOperandNo() != 0) - return 0; - MaxTruncation = std::max(MaxTruncation, MemVTSize); - continue; - } - case PPC::STW8: - case PPC::STWX8: - case PPC::STWU8: - case PPC::STWUX8: - if (Use.getOperandNo() != 0) - return 0; - MaxTruncation = std::max(MaxTruncation, 32u); - continue; - case PPC::STH8: - case PPC::STHX8: - case PPC::STHU8: - case PPC::STHUX8: - if (Use.getOperandNo() != 0) - return 0; - MaxTruncation = std::max(MaxTruncation, 16u); - continue; - case PPC::STB8: - case PPC::STBX8: - case PPC::STBU8: - case PPC::STBUX8: - if (Use.getOperandNo() != 0) - return 0; - MaxTruncation = std::max(MaxTruncation, 8u); - continue; - } - } - return MaxTruncation; -} - // Select a 64-bit constant. -static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { +static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); - if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { - uint64_t SextImm = SignExtend64(Imm, MinSize); - SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); - if (isInt<16>(SextImm)) - return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); - } - return selectI64Imm(CurDAG, dl, Imm); + return SelectInt64(CurDAG, dl, Imm); } namespace { - class BitPermutationSelector { struct ValueBit { SDValue V; @@ -1082,14 +828,9 @@ class BitPermutationSelector { // lowest-order bit. unsigned Idx; - // ConstZero means a bit we need to mask off. - // Variable is a bit comes from an input variable. - // VariableKnownToBeZero is also a bit comes from an input variable, - // but it is known to be already zero. So we do not need to mask them. enum Kind { ConstZero, - Variable, - VariableKnownToBeZero + Variable } K; ValueBit(SDValue V, unsigned I, Kind K = Variable) @@ -1098,11 +839,11 @@ class BitPermutationSelector { : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} bool isZero() const { - return K == ConstZero || K == VariableKnownToBeZero; + return K == ConstZero; } bool hasValue() const { - return K == Variable || K == VariableKnownToBeZero; + return K == Variable; } SDValue getValue() const { @@ -1135,8 +876,8 @@ class BitPermutationSelector { BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), Repl32Coalesced(false) { - LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R - << " [" << S << ", " << E << "]\n"); + DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << + " [" << S << ", " << E << "]\n"); } }; @@ -1144,12 +885,14 @@ class BitPermutationSelector { // associated with each) used to choose the lowering method. struct ValueRotInfo { SDValue V; - unsigned RLAmt = std::numeric_limits<unsigned>::max(); - unsigned NumGroups = 0; - unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); - bool Repl32 = false; + unsigned RLAmt; + unsigned NumGroups; + unsigned FirstGroupStartIdx; + bool Repl32; - ValueRotInfo() = default; + ValueRotInfo() + : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX), + Repl32(false) {} // For sorting (in reverse order) by NumGroups, and then by // FirstGroupStartIdx. @@ -1165,130 +908,91 @@ class BitPermutationSelector { return true; else if (NumGroups < Other.NumGroups) return false; - else if (RLAmt == 0 && Other.RLAmt != 0) - return true; - else if (RLAmt != 0 && Other.RLAmt == 0) - return false; else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) return true; return false; } }; - using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; - using ValueBitsMemoizer = - DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; - ValueBitsMemoizer Memoizer; - - // Return a pair of bool and a SmallVector pointer to a memoization entry. - // The bool is true if something interesting was deduced, otherwise if we're + // Return true if something interesting was deduced, return false if we're // providing only a generic representation of V (or something else likewise - // uninteresting for instruction selection) through the SmallVector. - std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, - unsigned NumBits) { - auto &ValueEntry = Memoizer[V]; - if (ValueEntry) - return std::make_pair(ValueEntry->first, &ValueEntry->second); - ValueEntry.reset(new ValueBitsMemoizedValue()); - bool &Interesting = ValueEntry->first; - SmallVector<ValueBit, 64> &Bits = ValueEntry->second; - Bits.resize(NumBits); - + // uninteresting for instruction selection). + bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) { switch (V.getOpcode()) { default: break; case ISD::ROTL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned RotAmt = V.getConstantOperandVal(1); - const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; + SmallVector<ValueBit, 64> LHSBits(Bits.size()); + getValueBits(V.getOperand(0), LHSBits); - for (unsigned i = 0; i < NumBits; ++i) - Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; + for (unsigned i = 0; i < Bits.size(); ++i) + Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt]; - return std::make_pair(Interesting = true, &Bits); + return true; } break; case ISD::SHL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); - const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; + SmallVector<ValueBit, 64> LHSBits(Bits.size()); + getValueBits(V.getOperand(0), LHSBits); - for (unsigned i = ShiftAmt; i < NumBits; ++i) + for (unsigned i = ShiftAmt; i < Bits.size(); ++i) Bits[i] = LHSBits[i - ShiftAmt]; for (unsigned i = 0; i < ShiftAmt; ++i) Bits[i] = ValueBit(ValueBit::ConstZero); - return std::make_pair(Interesting = true, &Bits); + return true; } break; case ISD::SRL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); - const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; + SmallVector<ValueBit, 64> LHSBits(Bits.size()); + getValueBits(V.getOperand(0), LHSBits); - for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) + for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i) Bits[i] = LHSBits[i + ShiftAmt]; - for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) + for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i) Bits[i] = ValueBit(ValueBit::ConstZero); - return std::make_pair(Interesting = true, &Bits); + return true; } break; case ISD::AND: if (isa<ConstantSDNode>(V.getOperand(1))) { uint64_t Mask = V.getConstantOperandVal(1); - const SmallVector<ValueBit, 64> *LHSBits; + SmallVector<ValueBit, 64> LHSBits(Bits.size()); + bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits); + + for (unsigned i = 0; i < Bits.size(); ++i) + if (((Mask >> i) & 1) == 1) + Bits[i] = LHSBits[i]; + else + Bits[i] = ValueBit(ValueBit::ConstZero); + // Mark this as interesting, only if the LHS was also interesting. This // prevents the overall procedure from matching a single immediate 'and' // (which is non-optimal because such an and might be folded with other // things if we don't select it here). - std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); - - for (unsigned i = 0; i < NumBits; ++i) - if (((Mask >> i) & 1) == 1) - Bits[i] = (*LHSBits)[i]; - else { - // AND instruction masks this bit. If the input is already zero, - // we have nothing to do here. Otherwise, make the bit ConstZero. - if ((*LHSBits)[i].isZero()) - Bits[i] = (*LHSBits)[i]; - else - Bits[i] = ValueBit(ValueBit::ConstZero); - } - - return std::make_pair(Interesting, &Bits); + return LHSTrivial; } break; case ISD::OR: { - const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; + SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size()); + getValueBits(V.getOperand(0), LHSBits); + getValueBits(V.getOperand(1), RHSBits); bool AllDisjoint = true; - SDValue LastVal = SDValue(); - unsigned LastIdx = 0; - for (unsigned i = 0; i < NumBits; ++i) { - if (LHSBits[i].isZero() && RHSBits[i].isZero()) { - // If both inputs are known to be zero and one is ConstZero and - // another is VariableKnownToBeZero, we can select whichever - // we like. To minimize the number of bit groups, we select - // VariableKnownToBeZero if this bit is the next bit of the same - // input variable from the previous bit. Otherwise, we select - // ConstZero. - if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && - LHSBits[i].getValueBitIndex() == LastIdx + 1) - Bits[i] = LHSBits[i]; - else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && - RHSBits[i].getValueBitIndex() == LastIdx + 1) - Bits[i] = RHSBits[i]; - else - Bits[i] = ValueBit(ValueBit::ConstZero); - } - else if (LHSBits[i].isZero()) + for (unsigned i = 0; i < Bits.size(); ++i) + if (LHSBits[i].isZero()) Bits[i] = RHSBits[i]; else if (RHSBits[i].isZero()) Bits[i] = LHSBits[i]; @@ -1296,119 +1000,24 @@ class BitPermutationSelector { AllDisjoint = false; break; } - // We remember the value and bit index of this bit. - if (Bits[i].hasValue()) { - LastVal = Bits[i].getValue(); - LastIdx = Bits[i].getValueBitIndex(); - } - else { - if (LastVal) LastVal = SDValue(); - LastIdx = 0; - } - } if (!AllDisjoint) break; - return std::make_pair(Interesting = true, &Bits); - } - case ISD::ZERO_EXTEND: { - // We support only the case with zero extension from i32 to i64 so far. - if (V.getValueType() != MVT::i64 || - V.getOperand(0).getValueType() != MVT::i32) - break; - - const SmallVector<ValueBit, 64> *LHSBits; - const unsigned NumOperandBits = 32; - std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), - NumOperandBits); - - for (unsigned i = 0; i < NumOperandBits; ++i) - Bits[i] = (*LHSBits)[i]; - - for (unsigned i = NumOperandBits; i < NumBits; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); - - return std::make_pair(Interesting, &Bits); - } - case ISD::TRUNCATE: { - EVT FromType = V.getOperand(0).getValueType(); - EVT ToType = V.getValueType(); - // We support only the case with truncate from i64 to i32. - if (FromType != MVT::i64 || ToType != MVT::i32) - break; - const unsigned NumAllBits = FromType.getSizeInBits(); - SmallVector<ValueBit, 64> *InBits; - std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), - NumAllBits); - const unsigned NumValidBits = ToType.getSizeInBits(); - - // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. - // So, we cannot include this truncate. - bool UseUpper32bit = false; - for (unsigned i = 0; i < NumValidBits; ++i) - if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { - UseUpper32bit = true; - break; - } - if (UseUpper32bit) - break; - - for (unsigned i = 0; i < NumValidBits; ++i) - Bits[i] = (*InBits)[i]; - - return std::make_pair(Interesting, &Bits); - } - case ISD::AssertZext: { - // For AssertZext, we look through the operand and - // mark the bits known to be zero. - const SmallVector<ValueBit, 64> *LHSBits; - std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), - NumBits); - - EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); - const unsigned NumValidBits = FromType.getSizeInBits(); - for (unsigned i = 0; i < NumValidBits; ++i) - Bits[i] = (*LHSBits)[i]; - - // These bits are known to be zero. - for (unsigned i = NumValidBits; i < NumBits; ++i) - Bits[i] = ValueBit((*LHSBits)[i].getValue(), - (*LHSBits)[i].getValueBitIndex(), - ValueBit::VariableKnownToBeZero); - - return std::make_pair(Interesting, &Bits); + return true; } - case ISD::LOAD: - LoadSDNode *LD = cast<LoadSDNode>(V); - if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { - EVT VT = LD->getMemoryVT(); - const unsigned NumValidBits = VT.getSizeInBits(); - - for (unsigned i = 0; i < NumValidBits; ++i) - Bits[i] = ValueBit(V, i); - - // These bits are known to be zero. - for (unsigned i = NumValidBits; i < NumBits; ++i) - Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); - - // Zero-extending load itself cannot be optimized. So, it is not - // interesting by itself though it gives useful information. - return std::make_pair(Interesting = false, &Bits); - } - break; } - for (unsigned i = 0; i < NumBits; ++i) + for (unsigned i = 0; i < Bits.size(); ++i) Bits[i] = ValueBit(V, i); - return std::make_pair(Interesting = false, &Bits); + return false; } // For each value (except the constant ones), compute the left-rotate amount // to get it from its original to final position. void computeRotationAmounts() { - NeedMask = false; + HasZeros = false; RLAmt.resize(Bits.size()); for (unsigned i = 0; i < Bits.size(); ++i) if (Bits[i].hasValue()) { @@ -1418,7 +1027,7 @@ class BitPermutationSelector { else RLAmt[i] = Bits.size() - (VBI - i); } else if (Bits[i].isZero()) { - NeedMask = true; + HasZeros = true; RLAmt[i] = UINT32_MAX; } else { llvm_unreachable("Unknown value bit type"); @@ -1434,7 +1043,6 @@ class BitPermutationSelector { unsigned LastRLAmt = RLAmt[0]; SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); unsigned LastGroupStartIdx = 0; - bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); for (unsigned i = 1; i < Bits.size(); ++i) { unsigned ThisRLAmt = RLAmt[i]; SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); @@ -1447,20 +1055,10 @@ class BitPermutationSelector { LastGroupStartIdx = 0; } - // If this bit is known to be zero and the current group is a bit group - // of zeros, we do not need to terminate the current bit group even the - // Value or RLAmt does not match here. Instead, we terminate this group - // when the first non-zero bit appears later. - if (IsGroupOfZeros && Bits[i].isZero()) - continue; - // If this bit has the same underlying value and the same rotate factor as // the last one, then they're part of the same group. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) - // We cannot continue the current group if this bits is not known to - // be zero in a bit group of zeros. - if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) - continue; + continue; if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1468,7 +1066,6 @@ class BitPermutationSelector { LastRLAmt = ThisRLAmt; LastValue = ThisValue; LastGroupStartIdx = i; - IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); } if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1486,7 +1083,7 @@ class BitPermutationSelector { BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && BitGroups[0].V == BitGroups[BitGroups.size()-1].V && BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { - LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); + DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; BitGroups.erase(BitGroups.begin()); } @@ -1494,9 +1091,7 @@ class BitPermutationSelector { } // Take all (SDValue, RLAmt) pairs and sort them by the number of groups - // associated with each. If the number of groups are same, we prefer a group - // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate - // instruction. If there is a degeneracy, pick the one that occurs + // associated with each. If there is a degeneracy, pick the one that occurs // first (in the final value). void collectValueRotInfo() { ValueRots.clear(); @@ -1517,7 +1112,7 @@ class BitPermutationSelector { for (auto &I : ValueRots) { ValueRotsVec.push_back(I.second); } - llvm::sort(ValueRotsVec); + std::sort(ValueRotsVec.begin(), ValueRotsVec.end()); } // In 64-bit mode, rlwinm and friends have a rotation operator that @@ -1566,20 +1161,6 @@ class BitPermutationSelector { }; for (auto &BG : BitGroups) { - // If this bit group has RLAmt of 0 and will not be merged with - // another bit group, we don't benefit from Repl32. We don't mark - // such group to give more freedom for later instruction selection. - if (BG.RLAmt == 0) { - auto PotentiallyMerged = [this](BitGroup & BG) { - for (auto &BG2 : BitGroups) - if (&BG != &BG2 && BG.V == BG2.V && - (BG2.RLAmt == 0 || BG2.RLAmt == 32)) - return true; - return false; - }; - if (!PotentiallyMerged(BG)) - continue; - } if (BG.StartIdx < 32 && BG.EndIdx < 32) { if (IsAllLow32(BG)) { if (BG.RLAmt >= 32) { @@ -1589,9 +1170,9 @@ class BitPermutationSelector { BG.Repl32 = true; - LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " - << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" - << BG.StartIdx << ", " << BG.EndIdx << "]\n"); + DEBUG(dbgs() << "\t32-bit replicated bit group for " << + BG.V.getNode() << " RLAmt = " << BG.RLAmt << + " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); } } } @@ -1605,11 +1186,11 @@ class BitPermutationSelector { if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { - LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " - << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" - << I->StartIdx << ", " << I->EndIdx - << "] with group with range [" << IP->StartIdx << ", " - << IP->EndIdx << "]\n"); + DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << + I->V.getNode() << " RLAmt = " << I->RLAmt << + " [" << I->StartIdx << ", " << I->EndIdx << + "] with group with range [" << + IP->StartIdx << ", " << IP->EndIdx << "]\n"); IP->EndIdx = I->EndIdx; IP->Repl32CR = IP->Repl32CR || I->Repl32CR; @@ -1628,17 +1209,17 @@ class BitPermutationSelector { "bit group ends at index 63 but there is another?"); auto IN = BitGroups.begin(); - if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && + if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && IsAllLow32(*I)) { - LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() - << " RLAmt = " << I->RLAmt << " [" << I->StartIdx - << ", " << I->EndIdx - << "] with 32-bit replicated groups with ranges [" - << IP->StartIdx << ", " << IP->EndIdx << "] and [" - << IN->StartIdx << ", " << IN->EndIdx << "]\n"); + DEBUG(dbgs() << "\tcombining bit group for " << + I->V.getNode() << " RLAmt = " << I->RLAmt << + " [" << I->StartIdx << ", " << I->EndIdx << + "] with 32-bit replicated groups with ranges [" << + IP->StartIdx << ", " << IP->EndIdx << "] and [" << + IN->StartIdx << ", " << IN->EndIdx << "]\n"); if (IP == IN) { // There is only one other group; change it to cover the whole @@ -1671,7 +1252,7 @@ class BitPermutationSelector { } } - SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { + SDValue getI32Imm(unsigned Imm, SDLoc dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } @@ -1686,39 +1267,10 @@ class BitPermutationSelector { return ~Mask; } - // This method extends an input value to 64 bit if input is 32-bit integer. - // While selecting instructions in BitPermutationSelector in 64-bit mode, - // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. - // In such case, we extend it to 64 bit to be consistent with other values. - SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { - if (V.getValueSizeInBits() == 64) - return V; - - assert(V.getValueSizeInBits() == 32); - SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, - MVT::i64), 0); - SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, - MVT::i64, ImDef, V, - SubRegIdx), 0); - return ExtVal; - } - - SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { - if (V.getValueSizeInBits() == 32) - return V; - - assert(V.getValueSizeInBits() == 64); - SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, - MVT::i32, V, SubRegIdx), 0); - return SubVal; - } - // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. - void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { + void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; @@ -1758,27 +1310,27 @@ class BitPermutationSelector { (unsigned) (ANDIMask != 0 && ANDISMask != 0) + (unsigned) (bool) Res; - LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() - << " RL: " << VRI.RLAmt << ":" - << "\n\t\t\tisel using masking: " << NumAndInsts - << " using rotates: " << VRI.NumGroups << "\n"); + DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << + " RL: " << VRI.RLAmt << ":" << + "\n\t\t\tisel using masking: " << NumAndInsts << + " using rotates: " << VRI.NumGroups << "\n"); if (NumAndInsts >= VRI.NumGroups) continue; - LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); + DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; SDValue VRot; if (VRI.RLAmt) { SDValue Ops[] = - { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), - getI32Imm(0, dl), getI32Imm(31, dl) }; + { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), + getI32Imm(31, dl) }; VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { - VRot = TruncateToInt32(VRI.V, dl); + VRot = VRI.V; } SDValue ANDIVal, ANDISVal; @@ -1825,17 +1377,17 @@ class BitPermutationSelector { // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!NeedMask || LateMask) && !Res) { + if ((!HasZeros || LateMask) && !Res) { ValueRotInfo &VRI = ValueRotsVec[0]; if (VRI.RLAmt) { if (InstCnt) *InstCnt += 1; SDValue Ops[] = - { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), - getI32Imm(0, dl), getI32Imm(31, dl) }; + { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), + getI32Imm(31, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { - Res = TruncateToInt32(VRI.V, dl); + Res = VRI.V; } // Now, remove all groups with this underlying value and rotation factor. @@ -1850,13 +1402,13 @@ class BitPermutationSelector { for (auto &BG : BitGroups) { if (!Res) { SDValue Ops[] = - { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), + { BG.V, getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { SDValue Ops[] = - { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), + { Res, BG.V, getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); @@ -1914,8 +1466,8 @@ class BitPermutationSelector { // For 64-bit values, not all combinations of rotates and masks are // available. Produce one if it is available. - SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, - bool Repl32, unsigned MaskStart, unsigned MaskEnd, + SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32, + unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. @@ -1931,30 +1483,27 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; + { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), + getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63) { SDValue Ops[] = - { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskStart, dl) }; + { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); } if (InstMaskStart == 0) { SDValue Ops[] = - { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskEnd, dl) }; + { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskStart, dl) }; + { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); } @@ -1978,8 +1527,8 @@ class BitPermutationSelector { // For 64-bit values, not all combinations of rotates and masks are // available. Produce a rotate-mask-and-insert if one is available. - SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, - unsigned RLAmt, bool Repl32, unsigned MaskStart, + SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt, + bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. @@ -1995,16 +1544,15 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; + { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), + getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), - getI32Imm(InstMaskStart, dl) }; + { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); } @@ -2026,7 +1574,7 @@ class BitPermutationSelector { return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); } - void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { + void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; @@ -2098,26 +1646,23 @@ class BitPermutationSelector { NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else - NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; + NumAndInsts += SelectInt64Count(Mask) + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; - bool MoreBG = false; for (auto &BG : BitGroups) { - if (!MatchingBG(BG)) { - MoreBG = true; + if (!MatchingBG(BG)) continue; - } NumRLInsts += SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, !FirstBG); FirstBG = false; } - LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() - << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") - << "\n\t\t\tisel using masking: " << NumAndInsts - << " using rotates: " << NumRLInsts << "\n"); + DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << + " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << + "\n\t\t\tisel using masking: " << NumAndInsts << + " using rotates: " << NumRLInsts << "\n"); // When we'd use andi/andis, we bias toward using the rotates (andi only // has a record form, and is cracked on POWER cores). However, when using @@ -2125,13 +1670,10 @@ class BitPermutationSelector { // because that exposes more opportunities for CSE. if (NumAndInsts > NumRLInsts) continue; - // When merging multiple bit groups, instruction or is used. - // But when rotate is used, rldimi can inert the rotated value into any - // register, so instruction or can be avoided. - if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) + if (Use32BitInsts && NumAndInsts == NumRLInsts) continue; - LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); + DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; @@ -2154,14 +1696,10 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - ExtendToInt64(VRot, dl), - getI32Imm(ANDIMask, dl)), - 0); + VRot, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - ExtendToInt64(VRot, dl), - getI32Imm(ANDISMask, dl)), - 0); + VRot, getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) TotalVal = ANDISVal; @@ -2169,21 +1707,19 @@ class BitPermutationSelector { TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ExtendToInt64(ANDIVal, dl), ANDISVal), 0); + ANDIVal, ANDISVal), 0); } else { - TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); + TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - ExtendToInt64(VRot, dl), TotalVal), - 0); + VRot, TotalVal), 0); } if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ExtendToInt64(Res, dl), TotalVal), - 0); + Res, TotalVal), 0); // Now, remove all groups with this underlying value and rotation // factor. @@ -2204,7 +1740,7 @@ class BitPermutationSelector { // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!NeedMask || LateMask) && !Res) { + if ((!HasZeros || LateMask) && !Res) { // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 // groups will come first, and so the VRI representing the largest number // of groups might not be first (it might be the first Repl32 groups). @@ -2303,10 +1839,10 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); + Res, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); + Res, getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; @@ -2314,14 +1850,14 @@ class BitPermutationSelector { Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ExtendToInt64(ANDIVal, dl), ANDISVal), 0); + ANDIVal, ANDISVal), 0); } else { - if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; + if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1; - SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); + SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - ExtendToInt64(Res, dl), MaskVal), 0); + Res, MaskVal), 0); } } @@ -2352,12 +1888,13 @@ class BitPermutationSelector { } void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { - BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); + BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F), + BitGroups.end()); } SmallVector<ValueBit, 64> Bits; - bool NeedMask; + bool HasZeros; SmallVector<unsigned, 64> RLAmt; SmallVector<BitGroup, 16> BitGroups; @@ -2375,21 +1912,18 @@ public: // rotate-and-shift/shift/and/or instructions, using a set of heuristics // known to produce optimial code for common cases (like i32 byte swapping). SDNode *Select(SDNode *N) { - Memoizer.clear(); - auto Result = - getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); - if (!Result.first) + Bits.resize(N->getValueType(0).getSizeInBits()); + if (!getValueBits(SDValue(N, 0), Bits)) return nullptr; - Bits = std::move(*Result.second); - LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" - " selection for: "); - LLVM_DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "Considering bit-permutation-based instruction" + " selection for: "); + DEBUG(N->dump(CurDAG)); - // Fill it RLAmt and set NeedMask. + // Fill it RLAmt and set HasZeros. computeRotationAmounts(); - if (!NeedMask) + if (!HasZeros) return Select(N, false); // We currently have two techniques for handling results with zeros: early @@ -2400,1231 +1934,34 @@ public: // set of bit groups, and then mask in the zeros at the end. With early // masking, we only insert the non-zero parts of the result at every step. - unsigned InstCnt = 0, InstCntLateMask = 0; - LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); + unsigned InstCnt, InstCntLateMask; + DEBUG(dbgs() << "\tEarly masking:\n"); SDNode *RN = Select(N, false, &InstCnt); - LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); + DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); - LLVM_DEBUG(dbgs() << "\tLate masking:\n"); + DEBUG(dbgs() << "\tLate masking:\n"); SDNode *RNLM = Select(N, true, &InstCntLateMask); - LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask - << " instructions\n"); + DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << + " instructions\n"); if (InstCnt <= InstCntLateMask) { - LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); + DEBUG(dbgs() << "\tUsing early-masking for isel\n"); return RN; } - LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); + DEBUG(dbgs() << "\tUsing late-masking for isel\n"); return RNLM; } }; +} // anonymous namespace -class IntegerCompareEliminator { - SelectionDAG *CurDAG; - PPCDAGToDAGISel *S; - // Conversion type for interpreting results of a 32-bit instruction as - // a 64-bit value or vice versa. - enum ExtOrTruncConversion { Ext, Trunc }; - - // Modifiers to guide how an ISD::SETCC node's result is to be computed - // in a GPR. - // ZExtOrig - use the original condition code, zero-extend value - // ZExtInvert - invert the condition code, zero-extend value - // SExtOrig - use the original condition code, sign-extend value - // SExtInvert - invert the condition code, sign-extend value - enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; - - // Comparisons against zero to emit GPR code sequences for. Each of these - // sequences may need to be emitted for two or more equivalent patterns. - // For example (a >= 0) == (a > -1). The direction of the comparison (</>) - // matters as well as the extension type: sext (-1/0), zext (1/0). - // GEZExt - (zext (LHS >= 0)) - // GESExt - (sext (LHS >= 0)) - // LEZExt - (zext (LHS <= 0)) - // LESExt - (sext (LHS <= 0)) - enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; - - SDNode *tryEXTEND(SDNode *N); - SDNode *tryLogicOpOfCompares(SDNode *N); - SDValue computeLogicOpInGPR(SDValue LogicOp); - SDValue signExtendInputIfNeeded(SDValue Input); - SDValue zeroExtendInputIfNeeded(SDValue Input); - SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); - SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, - ZeroCompare CmpTy); - SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); - -public: - IntegerCompareEliminator(SelectionDAG *DAG, - PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { - assert(CurDAG->getTargetLoweringInfo() - .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && - "Only expecting to use this on 64 bit targets."); - } - SDNode *Select(SDNode *N) { - if (CmpInGPR == ICGPR_None) - return nullptr; - switch (N->getOpcode()) { - default: break; - case ISD::ZERO_EXTEND: - if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || - CmpInGPR == ICGPR_SextI64) - return nullptr; - LLVM_FALLTHROUGH; - case ISD::SIGN_EXTEND: - if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || - CmpInGPR == ICGPR_ZextI64) - return nullptr; - return tryEXTEND(N); - case ISD::AND: - case ISD::OR: - case ISD::XOR: - return tryLogicOpOfCompares(N); - } - return nullptr; - } -}; - -static bool isLogicOp(unsigned Opc) { - return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; -} -// The obvious case for wanting to keep the value in a GPR. Namely, the -// result of the comparison is actually needed in a GPR. -SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { - assert((N->getOpcode() == ISD::ZERO_EXTEND || - N->getOpcode() == ISD::SIGN_EXTEND) && - "Expecting a zero/sign extend node!"); - SDValue WideRes; - // If we are zero-extending the result of a logical operation on i1 - // values, we can keep the values in GPRs. - if (isLogicOp(N->getOperand(0).getOpcode()) && - N->getOperand(0).getValueType() == MVT::i1 && - N->getOpcode() == ISD::ZERO_EXTEND) - WideRes = computeLogicOpInGPR(N->getOperand(0)); - else if (N->getOperand(0).getOpcode() != ISD::SETCC) - return nullptr; - else - WideRes = - getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND ? - SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); - - if (!WideRes) - return nullptr; - - SDLoc dl(N); - bool Input32Bit = WideRes.getValueType() == MVT::i32; - bool Output32Bit = N->getValueType(0) == MVT::i32; - - NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; - NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; - - SDValue ConvOp = WideRes; - if (Input32Bit != Output32Bit) - ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : - ExtOrTruncConversion::Trunc); - return ConvOp.getNode(); -} - -// Attempt to perform logical operations on the results of comparisons while -// keeping the values in GPRs. Without doing so, these would end up being -// lowered to CR-logical operations which suffer from significant latency and -// low ILP. -SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { - if (N->getValueType(0) != MVT::i1) - return nullptr; - assert(isLogicOp(N->getOpcode()) && - "Expected a logic operation on setcc results."); - SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); - if (!LoweredLogical) - return nullptr; - - SDLoc dl(N); - bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; - unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; - SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); - SDValue LHS = LoweredLogical.getOperand(0); - SDValue RHS = LoweredLogical.getOperand(1); - SDValue WideOp; - SDValue OpToConvToRecForm; - - // Look through any 32-bit to 64-bit implicit extend nodes to find the - // opcode that is input to the XORI. - if (IsBitwiseNegate && - LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) - OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); - else if (IsBitwiseNegate) - // If the input to the XORI isn't an extension, that's what we're after. - OpToConvToRecForm = LoweredLogical.getOperand(0); - else - // If this is not an XORI, it is a reg-reg logical op and we can convert - // it to record-form. - OpToConvToRecForm = LoweredLogical; - - // Get the record-form version of the node we're looking to use to get the - // CR result from. - uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); - int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); - - // Convert the right node to record-form. This is either the logical we're - // looking at or it is the input node to the negation (if we're looking at - // a bitwise negation). - if (NewOpc != -1 && IsBitwiseNegate) { - // The input to the XORI has a record-form. Use it. - assert(LoweredLogical.getConstantOperandVal(1) == 1 && - "Expected a PPC::XORI8 only for bitwise negation."); - // Emit the record-form instruction. - std::vector<SDValue> Ops; - for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) - Ops.push_back(OpToConvToRecForm.getOperand(i)); - - WideOp = - SDValue(CurDAG->getMachineNode(NewOpc, dl, - OpToConvToRecForm.getValueType(), - MVT::Glue, Ops), 0); - } else { - assert((NewOpc != -1 || !IsBitwiseNegate) && - "No record form available for AND8/OR8/XOR8?"); - WideOp = - SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, - MVT::i64, MVT::Glue, LHS, RHS), 0); - } - - // Select this node to a single bit from CR0 set by the record-form node - // just created. For bitwise negation, use the EQ bit which is the equivalent - // of negating the result (i.e. it is a bit set when the result of the - // operation is zero). - SDValue SRIdxVal = - CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); - SDValue CRBit = - SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - MVT::i1, CR0Reg, SRIdxVal, - WideOp.getValue(1)), 0); - return CRBit.getNode(); -} - -// Lower a logical operation on i1 values into a GPR sequence if possible. -// The result can be kept in a GPR if requested. -// Three types of inputs can be handled: -// - SETCC -// - TRUNCATE -// - Logical operation (AND/OR/XOR) -// There is also a special case that is handled (namely a complement operation -// achieved with xor %a, -1). -SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { - assert(isLogicOp(LogicOp.getOpcode()) && - "Can only handle logic operations here."); - assert(LogicOp.getValueType() == MVT::i1 && - "Can only handle logic operations on i1 values here."); - SDLoc dl(LogicOp); - SDValue LHS, RHS; - - // Special case: xor %a, -1 - bool IsBitwiseNegation = isBitwiseNot(LogicOp); - - // Produces a GPR sequence for each operand of the binary logic operation. - // For SETCC, it produces the respective comparison, for TRUNCATE it truncates - // the value in a GPR and for logic operations, it will recursively produce - // a GPR sequence for the operation. - auto getLogicOperand = [&] (SDValue Operand) -> SDValue { - unsigned OperandOpcode = Operand.getOpcode(); - if (OperandOpcode == ISD::SETCC) - return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); - else if (OperandOpcode == ISD::TRUNCATE) { - SDValue InputOp = Operand.getOperand(0); - EVT InVT = InputOp.getValueType(); - return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : - PPC::RLDICL, dl, InVT, InputOp, - S->getI64Imm(0, dl), - S->getI64Imm(63, dl)), 0); - } else if (isLogicOp(OperandOpcode)) - return computeLogicOpInGPR(Operand); - return SDValue(); - }; - LHS = getLogicOperand(LogicOp.getOperand(0)); - RHS = getLogicOperand(LogicOp.getOperand(1)); - - // If a GPR sequence can't be produced for the LHS we can't proceed. - // Not producing a GPR sequence for the RHS is only a problem if this isn't - // a bitwise negation operation. - if (!LHS || (!RHS && !IsBitwiseNegation)) - return SDValue(); - - NumLogicOpsOnComparison++; - - // We will use the inputs as 64-bit values. - if (LHS.getValueType() == MVT::i32) - LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); - if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) - RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); - - unsigned NewOpc; - switch (LogicOp.getOpcode()) { - default: llvm_unreachable("Unknown logic operation."); - case ISD::AND: NewOpc = PPC::AND8; break; - case ISD::OR: NewOpc = PPC::OR8; break; - case ISD::XOR: NewOpc = PPC::XOR8; break; - } - - if (IsBitwiseNegation) { - RHS = S->getI64Imm(1, dl); - NewOpc = PPC::XORI8; - } - - return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); - -} - -/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. -/// Otherwise just reinterpret it as a 64-bit value. -/// Useful when emitting comparison code for 32-bit values without using -/// the compare instruction (which only considers the lower 32-bits). -SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { - assert(Input.getValueType() == MVT::i32 && - "Can only sign-extend 32-bit values here."); - unsigned Opc = Input.getOpcode(); - - // The value was sign extended and then truncated to 32-bits. No need to - // sign extend it again. - if (Opc == ISD::TRUNCATE && - (Input.getOperand(0).getOpcode() == ISD::AssertSext || - Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); - // The input is a sign-extending load. All ppc sign-extending loads - // sign-extend to the full 64-bits. - if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); - // We don't sign-extend constants. - if (InputConst) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - SDLoc dl(Input); - SignExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, - MVT::i64, Input), 0); -} - -/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. -/// Otherwise just reinterpret it as a 64-bit value. -/// Useful when emitting comparison code for 32-bit values without using -/// the compare instruction (which only considers the lower 32-bits). -SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { - assert(Input.getValueType() == MVT::i32 && - "Can only zero-extend 32-bit values here."); - unsigned Opc = Input.getOpcode(); - - // The only condition under which we can omit the actual extend instruction: - // - The value is a positive constant - // - The value comes from a load that isn't a sign-extending load - // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. - bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && - (Input.getOperand(0).getOpcode() == ISD::AssertZext || - Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); - if (IsTruncateOfZExt) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); - if (InputConst && InputConst->getSExtValue() >= 0) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); - // The input is a load that doesn't sign-extend (it will be zero-extended). - if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) - return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); - - // None of the above, need to zero-extend. - SDLoc dl(Input); - ZeroExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, - S->getI64Imm(0, dl), - S->getI64Imm(32, dl)), 0); -} - -// Handle a 32-bit value in a 64-bit register and vice-versa. These are of -// course not actual zero/sign extensions that will generate machine code, -// they're just a way to reinterpret a 32 bit value in a register as a -// 64 bit value and vice-versa. -SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, - ExtOrTruncConversion Conv) { - SDLoc dl(NatWidthRes); - - // For reinterpreting 32-bit values as 64 bit values, we generate - // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> - if (Conv == ExtOrTruncConversion::Ext) { - SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); - SDValue SubRegIdx = - CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, - ImDef, NatWidthRes, SubRegIdx), 0); - } - - assert(Conv == ExtOrTruncConversion::Trunc && - "Unknown convertion between 32 and 64 bit values."); - // For reinterpreting 64-bit values as 32-bit values, we just need to - // EXTRACT_SUBREG (i.e. extract the low word). - SDValue SubRegIdx = - CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, - NatWidthRes, SubRegIdx), 0); -} - -// Produce a GPR sequence for compound comparisons (<=, >=) against zero. -// Handle both zero-extensions and sign-extensions. -SDValue -IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, - ZeroCompare CmpTy) { - EVT InVT = LHS.getValueType(); - bool Is32Bit = InVT == MVT::i32; - SDValue ToExtend; - - // Produce the value that needs to be either zero or sign extended. - switch (CmpTy) { - case ZeroCompare::GEZExt: - case ZeroCompare::GESExt: - ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, - dl, InVT, LHS, LHS), 0); - break; - case ZeroCompare::LEZExt: - case ZeroCompare::LESExt: { - if (Is32Bit) { - // Upper 32 bits cannot be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - SDValue Neg = - SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); - ToExtend = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - Neg, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - } else { - SDValue Addi = - SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, - S->getI64Imm(~0ULL, dl)), 0); - ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - Addi, LHS), 0); - } - break; - } - } - - // For 64-bit sequences, the extensions are the same for the GE/LE cases. - if (!Is32Bit && - (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - ToExtend, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - if (!Is32Bit && - (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, - S->getI64Imm(63, dl)), 0); - - assert(Is32Bit && "Should have handled the 32-bit sequences above."); - // For 32-bit sequences, the extensions differ between GE/LE cases. - switch (CmpTy) { - case ZeroCompare::GEZExt: { - SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), - S->getI32Imm(31, dl) }; - return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, - ShiftOps), 0); - } - case ZeroCompare::GESExt: - return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, - S->getI32Imm(31, dl)), 0); - case ZeroCompare::LEZExt: - return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, - S->getI32Imm(1, dl)), 0); - case ZeroCompare::LESExt: - return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, - S->getI32Imm(-1, dl)), 0); - } - - // The above case covers all the enumerators so it can't have a default clause - // to avoid compiler warnings. - llvm_unreachable("Unknown zero-comparison type."); -} - -/// Produces a zero-extended result of comparing two 32-bit values according to -/// the passed condition code. -SDValue -IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || - CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) - return SDValue(); - bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) - // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), - S->getI32Imm(31, dl) }; - return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, - ShiftOps), 0); - } - case ISD::SETNE: { - // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) - // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), - S->getI32Imm(31, dl) }; - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); - return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, - S->getI32Imm(1, dl)), 0); - } - case ISD::SETGE: { - // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) - // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) - if(IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); - - // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) - // by swapping inputs and falling through. - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; - } - case ISD::SETLE: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) - // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) - if(IsRHSZero) { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); - } - - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue Sub = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, - S->getI64Imm(1, dl), S->getI64Imm(63, dl)), - 0); - return - SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, - MVT::i64, Shift, S->getI32Imm(1, dl)), 0); - } - case ISD::SETGT: { - // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) - // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) - // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) - // Handle SETLT -1 (which is equivalent to SETGE 0). - if (IsRHSNegOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); - - if (IsRHSZero) { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue Neg = - SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); - } - // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as - // (%b < %a) by swapping inputs and falling through. - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; - } - case ISD::SETLT: { - // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) - // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) - // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) - // Handle SETLT 1 (which is equivalent to SETLE 0). - if (IsRHSOne) { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); - } - - if (IsRHSZero) { - SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), - S->getI32Imm(31, dl) }; - return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, - ShiftOps), 0); - } - - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue SUBFNode = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - SUBFNode, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - } - case ISD::SETUGE: - // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) - // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULE: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - SDValue Subtract = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); - SDValue SrdiNode = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - Subtract, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, - S->getI32Imm(1, dl)), 0); - } - case ISD::SETUGT: - // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) - // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULT: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - SDValue Subtract = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - Subtract, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - } - } -} - -/// Produces a sign-extended result of comparing two 32-bit values according to -/// the passed condition code. -SDValue -IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || - CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) - return SDValue(); - bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; - - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (sext (setcc %a, %b, seteq)) -> - // (ashr (shl (ctlz (xor %a, %b)), 58), 63) - // (sext (setcc %a, 0, seteq)) -> - // (ashr (shl (ctlz %a), 58), 63) - SDValue CountInput = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Cntlzw = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); - SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), - S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; - SDValue Slwi = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); - } - case ISD::SETNE: { - // Bitwise xor the operands, count leading zeros, shift right by 5 bits and - // flip the bit, finally take 2's complement. - // (sext (setcc %a, %b, setne)) -> - // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) - // Same as above, but the first xor is not needed. - // (sext (setcc %a, 0, setne)) -> - // (neg (xor (lshr (ctlz %a), 5), 1)) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = - { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); - SDValue Xori = - SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, - S->getI32Imm(1, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); - } - case ISD::SETGE: { - // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) - // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); - - // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) - // by swapping inputs and falling through. - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; - } - case ISD::SETLE: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) - // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); - - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue SUBFNode = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, - LHS, RHS), 0); - SDValue Srdi = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - SUBFNode, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, - S->getI32Imm(-1, dl)), 0); - } - case ISD::SETGT: { - // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) - // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) - // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) - if (IsRHSNegOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); - if (IsRHSZero) { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue Neg = - SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, - S->getI64Imm(63, dl)), 0); - } - // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as - // (%b < %a) by swapping inputs and falling through. - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; - } - case ISD::SETLT: { - // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) - // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) - // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) - if (IsRHSOne) { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); - } - if (IsRHSZero) - return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, - S->getI32Imm(31, dl)), 0); - - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - SDValue SUBFNode = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, - SUBFNode, S->getI64Imm(63, dl)), 0); - } - case ISD::SETUGE: - // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) - // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULE: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - SDValue Subtract = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, - S->getI32Imm(1, dl), S->getI32Imm(63,dl)), - 0); - return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, - S->getI32Imm(-1, dl)), 0); - } - case ISD::SETUGT: - // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) - // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULT: { - if (CmpInGPR == ICGPR_NonExtIn) - return SDValue(); - // The upper 32-bits of the register can't be undefined for this sequence. - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - SDValue Subtract = - SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, - Subtract, S->getI64Imm(63, dl)), 0); - } - } -} - -/// Produces a zero-extended result of comparing two 64-bit values according to -/// the passed condition code. -SDValue -IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || - CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) - return SDValue(); - bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) - // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, - S->getI64Imm(58, dl), - S->getI64Imm(63, dl)), 0); - } - case ISD::SETNE: { - // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) - // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) - // {addcz.reg, addcz.CA} = (addcarry %a, -1) - // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue AC = - SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, - Xor, S->getI32Imm(~0U, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, - Xor, AC.getValue(1)), 0); - } - case ISD::SETGE: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setge)) -> - // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) - // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; - } - case ISD::SETLE: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setge)) -> - // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) - // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); - SDValue ShiftL = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, - S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - SDValue ShiftR = - SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, - S->getI64Imm(63, dl)), 0); - SDValue SubtractCarry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - LHS, RHS), 1); - return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, - ShiftR, ShiftL, SubtractCarry), 0); - } - case ISD::SETGT: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setgt)) -> - // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) - // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) - if (IsRHSNegOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); - if (IsRHSZero) { - SDValue Addi = - SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, - S->getI64Imm(~0ULL, dl)), 0); - SDValue Nor = - SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, - S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - } - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; - } - case ISD::SETLT: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setlt)) -> - // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) - // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) - if (IsRHSOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); - if (IsRHSZero) - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, - S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - SDValue SRADINode = - SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, - LHS, S->getI64Imm(63, dl)), 0); - SDValue SRDINode = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - RHS, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - SDValue SUBFC8Carry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - RHS, LHS), 1); - SDValue ADDE8Node = - SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, - SRDINode, SRADINode, SUBFC8Carry), 0); - return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, - ADDE8Node, S->getI64Imm(1, dl)), 0); - } - case ISD::SETUGE: - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULE: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) - SDValue SUBFC8Carry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - LHS, RHS), 1); - SDValue SUBFE8Node = - SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, - LHS, LHS, SUBFC8Carry), 0); - return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, - SUBFE8Node, S->getI64Imm(1, dl)), 0); - } - case ISD::SETUGT: - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULT: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) - SDValue SubtractCarry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - RHS, LHS), 1); - SDValue ExtSub = - SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, - LHS, LHS, SubtractCarry), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, - ExtSub), 0); - } - } -} - -/// Produces a sign-extended result of comparing two 64-bit values according to -/// the passed condition code. -SDValue -IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || - CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) - return SDValue(); - bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) - // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) - // {addcz.reg, addcz.CA} = (addcarry %a, -1) - // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) - SDValue AddInput = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue Addic = - SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, - AddInput, S->getI32Imm(~0U, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, - Addic, Addic.getValue(1)), 0); - } - case ISD::SETNE: { - // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) - // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) - // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) - // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue SC = - SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, - Xor, S->getI32Imm(0, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, - SC, SC.getValue(1)), 0); - } - case ISD::SETGE: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setge)) -> - // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) - // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; - } - case ISD::SETLE: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setge)) -> - // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) - // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) - if (IsRHSZero) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); - SDValue ShiftR = - SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, - S->getI64Imm(63, dl)), 0); - SDValue ShiftL = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, - S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - SDValue SubtractCarry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - LHS, RHS), 1); - SDValue Adde = - SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, - ShiftR, ShiftL, SubtractCarry), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); - } - case ISD::SETGT: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (zext (setcc %a, %b, setgt)) -> - // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) - // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) - if (IsRHSNegOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); - if (IsRHSZero) { - SDValue Add = - SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, - S->getI64Imm(-1, dl)), 0); - SDValue Nor = - SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, - S->getI64Imm(63, dl)), 0); - } - std::swap(LHS, RHS); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; - } - case ISD::SETLT: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (zext (setcc %a, %b, setlt)) -> - // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) - // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) - if (IsRHSOne) - return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); - if (IsRHSZero) { - return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, - S->getI64Imm(63, dl)), 0); - } - SDValue SRADINode = - SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, - LHS, S->getI64Imm(63, dl)), 0); - SDValue SRDINode = - SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, - RHS, S->getI64Imm(1, dl), - S->getI64Imm(63, dl)), 0); - SDValue SUBFC8Carry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - RHS, LHS), 1); - SDValue ADDE8Node = - SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, - SRDINode, SRADINode, SUBFC8Carry), 0); - SDValue XORI8Node = - SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, - ADDE8Node, S->getI64Imm(1, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, - XORI8Node), 0); - } - case ISD::SETUGE: - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULE: { - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) - SDValue SubtractCarry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - LHS, RHS), 1); - SDValue ExtSub = - SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, - LHS, SubtractCarry), 0); - return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, - ExtSub, ExtSub), 0); - } - case ISD::SETUGT: - // {subc.reg, subc.CA} = (subcarry %b, %a) - // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) - std::swap(LHS, RHS); - LLVM_FALLTHROUGH; - case ISD::SETULT: { - // {subc.reg, subc.CA} = (subcarry %a, %b) - // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) - SDValue SubCarry = - SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, - RHS, LHS), 1); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, - LHS, LHS, SubCarry), 0); - } - } -} - -/// Do all uses of this SDValue need the result in a GPR? -/// This is meant to be used on values that have type i1 since -/// it is somewhat meaningless to ask if values of other types -/// should be kept in GPR's. -static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { - assert(Compare.getOpcode() == ISD::SETCC && - "An ISD::SETCC node required here."); - - // For values that have a single use, the caller should obviously already have - // checked if that use is an extending use. We check the other uses here. - if (Compare.hasOneUse()) - return true; - // We want the value in a GPR if it is being extended, used for a select, or - // used in logical operations. - for (auto CompareUse : Compare.getNode()->uses()) - if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && - CompareUse->getOpcode() != ISD::ZERO_EXTEND && - CompareUse->getOpcode() != ISD::SELECT && - !isLogicOp(CompareUse->getOpcode())) { - OmittedForNonExtendUses++; - return false; - } - return true; -} - -/// Returns an equivalent of a SETCC node but with the result the same width as -/// the inputs. This can also be used for SELECT_CC if either the true or false -/// values is a power of two while the other is zero. -SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, - SetccInGPROpts ConvOpts) { - assert((Compare.getOpcode() == ISD::SETCC || - Compare.getOpcode() == ISD::SELECT_CC) && - "An ISD::SETCC node required here."); - - // Don't convert this comparison to a GPR sequence because there are uses - // of the i1 result (i.e. uses that require the result in the CR). - if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) - return SDValue(); - - SDValue LHS = Compare.getOperand(0); - SDValue RHS = Compare.getOperand(1); - - // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. - int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; - ISD::CondCode CC = - cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); - EVT InputVT = LHS.getValueType(); - if (InputVT != MVT::i32 && InputVT != MVT::i64) - return SDValue(); - - if (ConvOpts == SetccInGPROpts::ZExtInvert || - ConvOpts == SetccInGPROpts::SExtInvert) - CC = ISD::getSetCCInverse(CC, true); - - bool Inputs32Bit = InputVT == MVT::i32; - - SDLoc dl(Compare); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; - bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || - ConvOpts == SetccInGPROpts::SExtInvert; - - if (IsSext && Inputs32Bit) - return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - else if (Inputs32Bit) - return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); - else if (IsSext) - return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); -} - -} // end anonymous namespace - -bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { - if (N->getValueType(0) != MVT::i32 && - N->getValueType(0) != MVT::i64) - return false; - - // This optimization will emit code that assumes 64-bit registers - // so we don't want to run it in 32-bit mode. Also don't run it - // on functions that are not to be optimized. - if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) - return false; - - switch (N->getOpcode()) { - default: break; - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::AND: - case ISD::OR: - case ISD::XOR: { - IntegerCompareEliminator ICmpElim(CurDAG, this); - if (SDNode *New = ICmpElim.Select(N)) { - ReplaceNode(N, New); - return true; - } - } - } - return false; -} - -bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { +SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) - return false; + return nullptr; if (!UseBitPermRewriter) - return false; + return nullptr; switch (N->getOpcode()) { default: break; @@ -3634,21 +1971,17 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { case ISD::AND: case ISD::OR: { BitPermutationSelector BPS(CurDAG); - if (SDNode *New = BPS.Select(N)) { - ReplaceNode(N, New); - return true; - } - return false; + return BPS.Select(N); } } - return false; + return nullptr; } /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. -SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl) { +SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDLoc dl) { // Always select the LHS. unsigned Opc; @@ -3688,7 +2021,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { - int16_t SImm; + short SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, @@ -3735,7 +2068,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { - int16_t SImm; + short SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), @@ -3743,63 +2076,10 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { - if (PPCSubTarget->hasSPE()) { - switch (CC) { - default: - case ISD::SETEQ: - case ISD::SETNE: - Opc = PPC::EFSCMPEQ; - break; - case ISD::SETLT: - case ISD::SETGE: - case ISD::SETOLT: - case ISD::SETOGE: - case ISD::SETULT: - case ISD::SETUGE: - Opc = PPC::EFSCMPLT; - break; - case ISD::SETGT: - case ISD::SETLE: - case ISD::SETOGT: - case ISD::SETOLE: - case ISD::SETUGT: - case ISD::SETULE: - Opc = PPC::EFSCMPGT; - break; - } - } else - Opc = PPC::FCMPUS; - } else if (LHS.getValueType() == MVT::f64) { - if (PPCSubTarget->hasSPE()) { - switch (CC) { - default: - case ISD::SETEQ: - case ISD::SETNE: - Opc = PPC::EFDCMPEQ; - break; - case ISD::SETLT: - case ISD::SETGE: - case ISD::SETOLT: - case ISD::SETOGE: - case ISD::SETULT: - case ISD::SETUGE: - Opc = PPC::EFDCMPLT; - break; - case ISD::SETGT: - case ISD::SETLE: - case ISD::SETOGT: - case ISD::SETOLE: - case ISD::SETUGT: - case ISD::SETULE: - Opc = PPC::EFDCMPGT; - break; - } - } else - Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + Opc = PPC::FCMPUS; } else { - assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); - assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); - Opc = PPC::XSCMPUQP; + assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); + Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -3975,7 +2255,7 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, } } -bool PPCDAGToDAGISel::trySETCC(SDNode *N) { +SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); @@ -3996,22 +2276,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETNE: { if (isPPC64) break; SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)), 0); - CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); - return true; + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, + AD.getValue(1)); } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue T = @@ -4019,8 +2297,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } } } else if (Imm == ~0U) { // setcc op, -1 @@ -4031,20 +2308,18 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1, dl)), 0); - CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - SDValue(CurDAG->getMachineNode(PPC::LI, dl, - MVT::i32, - getI32Imm(0, dl)), - 0), Op.getValue(1)); - return true; + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::LI, dl, + MVT::i32, + getI32Imm(0, dl)), + 0), Op.getValue(1)); case ISD::SETNE: { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)); - CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, - SDValue(AD, 1)); - return true; + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), + Op, SDValue(AD, 1)); } case ISD::SETLT: { SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, @@ -4053,15 +2328,14 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { Op), 0); SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); - return true; + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, + getI32Imm(1, dl)); } } } @@ -4073,8 +2347,8 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { - if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) - return false; + if (PPCSubTarget->hasQPX()) + return nullptr; EVT VecVT = LHS.getValueType(); bool Swap, Negate; @@ -4086,29 +2360,22 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); - CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, - ResVT, VCmp, VCmp); - return true; + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : + PPC::VNOR, + ResVT, VCmp, VCmp); } - CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); - return true; + return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); } if (PPCSubTarget->useCRBits()) - return false; + return nullptr; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; - // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that - // The correct compare instruction is already set by SelectCC() - if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { - Idx = 1; - } - // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); @@ -4121,208 +2388,31 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; - if (!Inv) { - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; - } + if (!Inv) + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); - return true; + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); } -/// Does this node represent a load/store node whose address can be represented -/// with a register plus an immediate that's a multiple of \p Val: -bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { - LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); - StoreSDNode *STN = dyn_cast<StoreSDNode>(N); - SDValue AddrOp; - if (LDN) - AddrOp = LDN->getOperand(1); - else if (STN) - AddrOp = STN->getOperand(2); - - // If the address points a frame object or a frame object with an offset, - // we need to check the object alignment. - short Imm = 0; - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( - AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : - AddrOp)) { - // If op0 is a frame index that is under aligned, we can't do it either, - // because it is translated to r31 or r1 + slot + offset. We won't know the - // slot number until the stack frame is finalized. - const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); - if ((SlotAlign % Val) != 0) - return false; - - // If we have an offset, we need further check on the offset. - if (AddrOp.getOpcode() != ISD::ADD) - return true; - } - - if (AddrOp.getOpcode() == ISD::ADD) - return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); - - // If the address comes from the outside, the offset will be zero. - return AddrOp.getOpcode() == ISD::CopyFromReg; -} - -void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { +SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. - MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); - CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + return Result; } -static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, - bool &NeedSwapOps, bool &IsUnCmp) { - - assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue TrueRes = N->getOperand(2); - SDValue FalseRes = N->getOperand(3); - ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); - if (!TrueConst) - return false; - - assert((N->getSimpleValueType(0) == MVT::i64 || - N->getSimpleValueType(0) == MVT::i32) && - "Expecting either i64 or i32 here."); - - // We are looking for any of: - // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) - // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) - // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) - // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) - int64_t TrueResVal = TrueConst->getSExtValue(); - if ((TrueResVal < -1 || TrueResVal > 1) || - (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || - (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || - (TrueResVal == 0 && - (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) - return false; - - bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; - SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); - if (SetOrSelCC.getOpcode() != ISD::SETCC && - SetOrSelCC.getOpcode() != ISD::SELECT_CC) - return false; - - // Without this setb optimization, the outer SELECT_CC will be manually - // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass - // transforms pseduo instruction to isel instruction. When there are more than - // one use for result like zext/sext, with current optimization we only see - // isel is replaced by setb but can't see any significant gain. Since - // setb has longer latency than original isel, we should avoid this. Another - // point is that setb requires comparison always kept, it can break the - // oppotunity to get the comparison away if we have in future. - if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) - return false; - - SDValue InnerLHS = SetOrSelCC.getOperand(0); - SDValue InnerRHS = SetOrSelCC.getOperand(1); - ISD::CondCode InnerCC = - cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); - // If the inner comparison is a select_cc, make sure the true/false values are - // 1/-1 and canonicalize it if needed. - if (InnerIsSel) { - ConstantSDNode *SelCCTrueConst = - dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); - ConstantSDNode *SelCCFalseConst = - dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); - if (!SelCCTrueConst || !SelCCFalseConst) - return false; - int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); - int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); - // The values must be -1/1 (requiring a swap) or 1/-1. - if (SelCCTVal == -1 && SelCCFVal == 1) { - std::swap(InnerLHS, InnerRHS); - } else if (SelCCTVal != 1 || SelCCFVal != -1) - return false; - } - - // Canonicalize unsigned case - if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { - IsUnCmp = true; - InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; - } - - bool InnerSwapped = false; - if (LHS == InnerRHS && RHS == InnerLHS) - InnerSwapped = true; - else if (LHS != InnerLHS || RHS != InnerRHS) - return false; - - switch (CC) { - // (select_cc lhs, rhs, 0, \ - // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) - case ISD::SETEQ: - if (!InnerIsSel) - return false; - if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) - return false; - NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; - break; - - // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) - // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) - // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) - // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) - // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) - // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) - case ISD::SETULT: - if (!IsUnCmp && InnerCC != ISD::SETNE) - return false; - IsUnCmp = true; - LLVM_FALLTHROUGH; - case ISD::SETLT: - if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || - (InnerCC == ISD::SETLT && InnerSwapped)) - NeedSwapOps = (TrueResVal == 1); - else - return false; - break; - - // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) - // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) - // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) - // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) - // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) - // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) - case ISD::SETUGT: - if (!IsUnCmp && InnerCC != ISD::SETNE) - return false; - IsUnCmp = true; - LLVM_FALLTHROUGH; - case ISD::SETGT: - if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || - (InnerCC == ISD::SETGT && InnerSwapped)) - NeedSwapOps = (TrueResVal == -1); - else - return false; - break; - - default: - return false; - } - - LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); - LLVM_DEBUG(N->dump()); - - return true; -} // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -void PPCDAGToDAGISel::Select(SDNode *N) { +SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return; // Already selected. + return nullptr; // Already selected. } // In case any misguided DAG-level optimizations form an ADD with a @@ -4333,68 +2423,40 @@ void PPCDAGToDAGISel::Select(SDNode *N) { llvm_unreachable("Invalid ADD with TargetConstant operand"); // Try matching complex bit permutations before doing anything else. - if (tryBitPermutation(N)) - return; - - // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). - if (tryIntCompareInGPR(N)) - return; + if (SDNode *NN = SelectBitPermutation(N)) + return NN; switch (N->getOpcode()) { default: break; - case ISD::Constant: - if (N->getValueType(0) == MVT::i64) { - ReplaceNode(N, selectI64Imm(CurDAG, N)); - return; - } + case ISD::Constant: { + if (N->getValueType(0) == MVT::i64) + return SelectInt64(CurDAG, N); break; + } - case ISD::SETCC: - if (trySETCC(N)) - return; + case ISD::SETCC: { + SDNode *SN = SelectSETCC(N); + if (SN) + return SN; break; - - case PPCISD::CALL: { - const Module *M = MF->getFunction().getParent(); - - if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) || - !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC) - break; - - SDValue Op = N->getOperand(1); - - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { - if (GA->getTargetFlags() == PPCII::MO_PLT) - getGlobalBaseReg(); - } - else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { - if (ES->getTargetFlags() == PPCII::MO_PLT) - getGlobalBaseReg(); - } } - break; - case PPCISD::GlobalBaseReg: - ReplaceNode(N, getGlobalBaseReg()); - return; + return getGlobalBaseReg(); case ISD::FrameIndex: - selectFrameIndex(N, N); - return; + return getFrameIndex(N, N); case PPCISD::MFOCRF: { SDValue InFlag = N->getOperand(1); - ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, - N->getOperand(0), InFlag)); - return; + return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag); } - case PPCISD::READ_TIME_BASE: - ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, - MVT::Other, N->getOperand(0))); - return; + case PPCISD::READ_TIME_BASE: { + return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, + MVT::Other, N->getOperand(0)); + } case PPCISD::SRA_ADDZE: { SDValue N0 = N->getOperand(0); @@ -4406,43 +2468,27 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Op = CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, N0, ShiftAmt); - CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), - SDValue(Op, 1)); - return; + return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, + SDValue(Op, 0), SDValue(Op, 1)); } else { assert(N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); SDNode *Op = CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, ShiftAmt); - CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), - SDValue(Op, 1)); - return; + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)); } } - case ISD::STORE: { - // Change TLS initial-exec D-form stores to X-form stores. - StoreSDNode *ST = cast<StoreSDNode>(N); - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && - ST->getAddressingMode() != ISD::PRE_INC) - if (tryTLSXFormStore(ST)) - return; - break; - } case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. - if (LD->getAddressingMode() != ISD::PRE_INC) { - // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) - if (tryTLSXFormLoad(LD)) - return; + if (LD->getAddressingMode() != ISD::PRE_INC) break; - } SDValue Offset = LD->getOffset(); if (Offset.getOpcode() == ISD::TargetConstant || @@ -4478,12 +2524,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - SDNode *MN = CurDAG->getMachineNode( - Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -4518,12 +2563,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *MN = CurDAG->getMachineNode( - Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } } @@ -4538,8 +2582,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Val = N->getOperand(0).getOperand(0); SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm @@ -4549,8 +2592,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is a 64-bit zero-extension mask, emit rldicl. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && @@ -4559,23 +2601,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { MB = 64 - countTrailingOnes(Imm64); SH = 0; - if (Val.getOpcode() == ISD::ANY_EXTEND) { - auto Op0 = Val.getOperand(0); - if ( Op0.getOpcode() == ISD::SRL && - isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { - - auto ResultType = Val.getNode()->getValueType(0); - auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, - ResultType); - SDValue IDVal (ImDef, 0); - - Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, - ResultType, IDVal, Op0.getOperand(0), - getI32Imm(1, dl)), 0); - SH = 64 - Imm; - } - } - // If the operand is a logical right shift, we can fold it into this // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) // for n <= mb. The right shift is really a left rotate followed by a @@ -4589,26 +2614,12 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); } - // If this is a negated 64-bit zero-extension mask, - // i.e. the immediate is a sequence of ones from most significant side - // and all zero for reminder, we should use rldicr. - if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && - isMask_64(~Imm64)) { - SDValue Val = N->getOperand(0); - MB = 63 - countTrailingOnes(~Imm64); - SH = 0; - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); - return; - } - // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return; + return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a @@ -4634,8 +2645,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); - return; + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } @@ -4644,73 +2654,29 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::OR: { if (N->getValueType(0) == MVT::i32) - if (tryBitfieldInsert(N)) - return; + if (SDNode *I = SelectBitfieldInsert(N)) + return I; - int16_t Imm; + short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); + APInt LHSKnownZero, LHSKnownOne; + CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. - if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); - return; - } - } - - // OR with a 32-bit immediate can be handled by ori + oris - // without creating an immediate in a GPR. - uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); - if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && - (Imm64 & ~0xFFFFFFFFuLL) == 0) { - // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. - uint64_t ImmHi = Imm64 >> 16; - uint64_t ImmLo = Imm64 & 0xFFFF; - if (ImmHi != 0 && ImmLo != 0) { - SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, - N->getOperand(0), - getI16Imm(ImmLo, dl)); - SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; - CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); - return; - } + if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) + return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); } // Other cases are autogenerated. break; } - case ISD::XOR: { - // XOR with a 32-bit immediate can be handled by xori + xoris - // without creating an immediate in a GPR. - uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); - if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && - (Imm64 & ~0xFFFFFFFFuLL) == 0) { - // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. - uint64_t ImmHi = Imm64 >> 16; - uint64_t ImmLo = Imm64 & 0xFFFF; - if (ImmHi != 0 && ImmLo != 0) { - SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, - N->getOperand(0), - getI16Imm(ImmLo, dl)); - SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; - CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); - return; - } - } - - break; - } case ISD::ADD: { - int16_t Imm; + short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && - isIntS16Immediate(N->getOperand(1), Imm)) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); - return; - } + isIntS16Immediate(N->getOperand(1), Imm)) + return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); break; } @@ -4721,8 +2687,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. @@ -4735,8 +2700,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. @@ -4762,9 +2726,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); - CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, - SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); - return; + return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, + CR0Reg, SRIdxVal, + SDValue(AndI.getNode(), 1) /* glue */); } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -4777,31 +2741,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getValueType() == MVT::i1) break; - if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { - bool NeedSwapOps = false; - bool IsUnCmp = false; - if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (NeedSwapOps) - std::swap(LHS, RHS); - - // Make use of SelectCC to generate the comparison to set CR bits, for - // equality comparisons having one literal operand, SelectCC probably - // doesn't need to materialize the whole literal and just use xoris to - // check it first, it leads the following comparison result can't - // exactly represent GT/LT relationship. So to avoid this we specify - // SETGT/SETUGT here instead of SETEQ. - SDValue GenCC = - SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); - CurDAG->SelectNodeTo( - N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, - N->getValueType(0), GenCC); - NumP9Setb++; - return; - } - } - // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) @@ -4814,9 +2753,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, N->getOperand(0), getI32Imm(~0U, dl)); - CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), - N->getOperand(0), SDValue(Tmp, 1)); - return; + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, + SDValue(Tmp, 0), N->getOperand(0), + SDValue(Tmp, 1)); } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); @@ -4847,8 +2786,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, NotC, N->getOperand(3)), 0); - CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); - return; + return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); } unsigned BROpc = getPredicateForSetCC(CC); @@ -4858,24 +2796,16 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectCCOp = PPC::SELECT_CC_I4; else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; - else if (N->getValueType(0) == MVT::f32) { + else if (N->getValueType(0) == MVT::f32) if (PPCSubTarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; - else if (PPCSubTarget->hasSPE()) - SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; - } else if (N->getValueType(0) == MVT::f64) { + else if (N->getValueType(0) == MVT::f64) if (PPCSubTarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; - else if (PPCSubTarget->hasSPE()) - SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; - } else if (N->getValueType(0) == MVT::f128) - SelectCCOp = PPC::SELECT_CC_F16; - else if (PPCSubTarget->hasSPE()) - SelectCCOp = PPC::SELECT_CC_SPE; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) SelectCCOp = PPC::SELECT_CC_QFRC; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) @@ -4890,9 +2820,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc, dl) }; - CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); - return; + return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); } + case ISD::VSELECT: + if (PPCSubTarget->hasVSX()) { + SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); + } + + break; case ISD::VECTOR_SHUFFLE: if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { @@ -4920,11 +2856,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - MachineMemOperand *MemOp = LD->getMemOperand(); - SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops); - CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); - return; + return CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); } } @@ -4940,8 +2873,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, MVT::i32); SDValue Ops[] = { Op1, Op2, DMV }; - CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); } break; @@ -4949,11 +2881,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::BDZ: { bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; - CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ - ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) - : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), - MVT::Other, Ops); - return; + return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? + (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), + MVT::Other, Ops); } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. @@ -4969,8 +2900,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; - CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); @@ -4989,21 +2919,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; } - // A signed comparison of i1 values produces the opposite result to an - // unsigned one if the condition code includes less-than or greater-than. - // This is because 1 is the most negative signed i1 number and the most - // positive unsigned i1 number. The CR-logical operations used for such - // comparisons are non-commutative so for signed comparisons vs. unsigned - // ones, the input operands just need to be swapped. - if (ISD::isSignedIntSetCC(CC)) - Swap = !Swap; - SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, N->getOperand(Swap ? 3 : 2), N->getOperand(Swap ? 2 : 3)), 0); - CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), - N->getOperand(0)); - return; + return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, + BitComp, N->getOperand(4), N->getOperand(0)); } if (EnableBranchHint) @@ -5012,8 +2932,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; - CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); - return; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BRIND: { // FIXME: Should custom lower this. @@ -5023,19 +2942,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) { unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); - CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); - return; + return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && "Only supported for 64-bit ABI and 32-bit SVR4"); if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { SDValue GA = N->getOperand(0); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl, + MVT::i32, GA, N->getOperand(1))); } // For medium and large code model, we generate two instructions as @@ -5047,37 +2962,39 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // The first source operand is a TargetGlobalAddress or a TargetJumpTable. // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(@sym, ADDIStocHA(%x2, @sym)) + // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, @sym), @sym) + // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - if (PPCLowering->isAccessedAsGotIndirect(GA)) { - // If it is access as got-indirect, we need an extra LD to load - // the address. - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; + + if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || + CModel == CodeModel::Large) + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, + MVT::i64, GA, SDValue(Tmp, 0))); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { + const GlobalValue *GV = G->getGlobal(); + unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); + if (GVFlags & PPCII::MO_NLP_FLAG) { + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, + MVT::i64, GA, SDValue(Tmp, 0))); + } } - // Build the address relative to the TOC-pointer.. - ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, - SDValue(Tmp, 0), GA)); - return; + return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + SDValue(Tmp, 0), GA); } - case PPCISD::PPC32_PICGOT: + case PPCISD::PPC32_PICGOT: { // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); - CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, - PPCLowering->getPointerTy(CurDAG->getDataLayout()), - MVT::i32); - return; - + return CurDAG->SelectNodeTo( + N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), + MVT::i32); + } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. @@ -5118,8 +3035,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDValue EltVal = getI32Imm(Elt >> 1, dl); SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); SDValue TmpVal = SDValue(Tmp, 0); - ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); - return; + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + } else if (Elt > 0) { // Elt is odd and positive, in the range [17,31]. // @@ -5131,9 +3048,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); - ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), - SDValue(Tmp2, 0))); - return; + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } else { // Elt is odd and negative, in the range [-31,-17]. // @@ -5145,14 +3062,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); - ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), - SDValue(Tmp2, 0))); - return; + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); } } } - SelectCode(N); + return SelectCode(N); } // If the target supports the cmpb instruction, do the idiom recognition here. @@ -5178,7 +3094,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { EVT VT = N->getValueType(0); SDValue RHS, LHS; - bool BytesFound[8] = {false, false, false, false, false, false, false, false}; + bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; uint64_t Mask = 0, Alt = 0; auto IsByteSelectCC = [this](SDValue O, unsigned &b, @@ -5217,7 +3133,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && isa<ConstantSDNode>(Op0.getOperand(1))) { - unsigned Bits = Op0.getValueSizeInBits(); + unsigned Bits = Op0.getValueType().getSizeInBits(); if (b != Bits/8-1) return false; if (Op0.getConstantOperandVal(1) != Bits-8) @@ -5245,9 +3161,9 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { // Now we need to make sure that the upper bytes are known to be // zero. - unsigned Bits = Op0.getValueSizeInBits(); - if (!CurDAG->MaskedValueIsZero( - Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) + unsigned Bits = Op0.getValueType().getSizeInBits(); + if (!CurDAG->MaskedValueIsZero(Op0, + APInt::getHighBitsSet(Bits, Bits - (b+1)*8))) return false; LHS = Op0.getOperand(0); @@ -5280,7 +3196,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { } else if (Op.getOpcode() == ISD::SRL) { if (!isa<ConstantSDNode>(Op.getOperand(1))) return false; - unsigned Bits = Op.getValueSizeInBits(); + unsigned Bits = Op.getValueType().getSizeInBits(); if (b != Bits/8-1) return false; if (Op.getConstantOperandVal(1) != Bits-8) @@ -5415,13 +3331,11 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { O0.getNode(), O1.getNode()); }; - // FIXME: When the semantics of the interaction between select and undef - // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); - if (!TrueRes || TrueRes.isUndef()) + if (!TrueRes) break; SDValue FalseRes = TryFold(ConstFalse); - if (!FalseRes || FalseRes.isUndef()) + if (!FalseRes) break; // For us to materialize these using one instruction, we must be able to @@ -5441,7 +3355,8 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { } void PPCDAGToDAGISel::PreprocessISelDAG() { - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { @@ -5461,11 +3376,11 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { foldBoolExts(Res, N); if (Res) { - LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nNew: "); - LLVM_DEBUG(Res.getNode()->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(Res.getNode()->dump(CurDAG)); + DEBUG(dbgs() << "\n"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); MadeChange = true; @@ -5479,6 +3394,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { /// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { + // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; @@ -5494,6 +3410,10 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // be folded with the isel so that we don't need to materialize a register // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { + // If we're not using isel, then this does not matter. + if (!PPCSubTarget->hasISEL()) + return false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDNode *User = *UI; @@ -5542,13 +3462,13 @@ void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { User->getOperand(2), User->getOperand(1)); - LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); - LLVM_DEBUG(User->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nNew: "); - LLVM_DEBUG(ResNode->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(User->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); - ReplaceUses(User, ResNode); + ReplaceUses(User, ResNode); } } @@ -5588,8 +3508,7 @@ void PPCDAGToDAGISel::PeepholeCROps() { Op.getOperand(0) == Op.getOperand(1)) Op2Not = true; } - LLVM_FALLTHROUGH; - } + } // fallthrough case PPC::BC: case PPC::BCn: case PPC::SELECT_I4: @@ -5599,8 +3518,6 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_QFRC: case PPC::SELECT_QSRC: case PPC::SELECT_QBRC: - case PPC::SELECT_SPE: - case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: @@ -5648,12 +3565,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CRNAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5687,12 +3603,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CROR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5720,12 +3635,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CRXOR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5760,12 +3674,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CRNOR: if (Op1Set || Op2Set) @@ -5794,12 +3707,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CREQV: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5834,12 +3746,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), - MachineNode->getOperand(1)); + MachineNode->getOperand(1)), SelectSwap = true; - } break; case PPC::CRANDC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5870,12 +3781,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), - MachineNode->getOperand(0)); + MachineNode->getOperand(0)), SelectSwap = true; - } break; case PPC::CRORC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) @@ -5906,12 +3816,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); - else if (AllUsersSelectZero(MachineNode)) { + else if (AllUsersSelectZero(MachineNode)) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), - MachineNode->getOperand(0)); + MachineNode->getOperand(0)), SelectSwap = true; - } break; case PPC::SELECT_I4: case PPC::SELECT_I8: @@ -5920,8 +3829,6 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_QFRC: case PPC::SELECT_QSRC: case PPC::SELECT_QBRC: - case PPC::SELECT_SPE: - case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: @@ -5960,11 +3867,11 @@ void PPCDAGToDAGISel::PeepholeCROps() { SwapAllSelectUsers(MachineNode); if (ResNode != MachineNode) { - LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); - LLVM_DEBUG(MachineNode->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nNew: "); - LLVM_DEBUG(ResNode->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(MachineNode->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); ReplaceUses(MachineNode, ResNode); IsModified = true; @@ -6020,9 +3927,8 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, return true; } - // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. - if (Op32.getMachineOpcode() == PPC::CNTLZW || - Op32.getMachineOpcode() == PPC::CNTTZW) { + // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended. + if (Op32.getMachineOpcode() == PPC::CNTLZW) { ToPromote.insert(Op32.getNode()); return true; } @@ -6133,7 +4039,8 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { // unnecessary. When that happens, we remove it here, and redefine the // relevant 32-bit operation to be a 64-bit operation. - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { @@ -6216,7 +4123,6 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; - case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; @@ -6258,25 +4164,25 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { else NewVTs.push_back(VTs.VTs[i]); - LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); - LLVM_DEBUG(PN->dump(CurDAG)); + DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); + DEBUG(PN->dump(CurDAG)); CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); - LLVM_DEBUG(dbgs() << "\nNew: "); - LLVM_DEBUG(PN->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "\nNew: "); + DEBUG(PN->dump(CurDAG)); + DEBUG(dbgs() << "\n"); } // Now we replace the original zero extend and its associated INSERT_SUBREG // with the value feeding the INSERT_SUBREG (which has now been promoted to // return an i64). - LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nNew: "); - LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(Op32.getNode()->dump(CurDAG)); + DEBUG(dbgs() << "\n"); ReplaceUses(N, Op32.getNode()); } @@ -6290,7 +4196,8 @@ void PPCDAGToDAGISel::PeepholePPC64() { if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) return; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; @@ -6300,37 +4207,28 @@ void PPCDAGToDAGISel::PeepholePPC64() { unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); - bool RequiresMod4Offset = false; switch (StorageOpcode) { default: continue; - case PPC::LWA: - case PPC::LD: - case PPC::DFLOADf64: - case PPC::DFLOADf32: - RequiresMod4Offset = true; - LLVM_FALLTHROUGH; case PPC::LBZ: case PPC::LBZ8: + case PPC::LD: case PPC::LFD: case PPC::LFS: case PPC::LHA: case PPC::LHA8: case PPC::LHZ: case PPC::LHZ8: + case PPC::LWA: case PPC::LWZ: case PPC::LWZ8: FirstOp = 0; break; - case PPC::STD: - case PPC::DFSTOREf64: - case PPC::DFSTOREf32: - RequiresMod4Offset = true; - LLVM_FALLTHROUGH; case PPC::STB: case PPC::STB8: + case PPC::STD: case PPC::STFD: case PPC::STFS: case PPC::STH: @@ -6352,6 +4250,13 @@ void PPCDAGToDAGISel::PeepholePPC64() { if (!Base.isMachineOpcode()) continue; + // On targets with fusion, we don't want this to fire and remove a fusion + // opportunity, unless a) it results in another fusion opportunity or + // b) optimizing for size. + if (PPCSubTarget->hasFusion() && + (!MF->getFunction()->optForSize() && !Base.hasOneUse())) + continue; + unsigned Flags = 0; bool ReplaceFlags = true; @@ -6377,7 +4282,9 @@ void PPCDAGToDAGISel::PeepholePPC64() { // For these cases, the immediate may not be divisible by 4, in // which case the fold is illegal for DS-form instructions. (The // other cases provide aligned addresses and are always safe.) - if (RequiresMod4Offset && + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && (!isa<ConstantSDNode>(Base.getOperand(1)) || Base.getConstantOperandVal(1) % 4 != 0)) continue; @@ -6394,73 +4301,25 @@ void PPCDAGToDAGISel::PeepholePPC64() { } SDValue ImmOpnd = Base.getOperand(1); - - // On PPC64, the TOC base pointer is guaranteed by the ABI only to have - // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, - // we might have needed different @ha relocation values for the offset - // pointers). - int MaxDisplacement = 7; + int MaxDisplacement = 0; if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); - MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); + MaxDisplacement = GV->getAlignment() - 1; } - bool UpdateHBase = false; - SDValue HBase = Base.getOperand(0); - int Offset = N->getConstantOperandVal(FirstOp); - if (ReplaceFlags) { - if (Offset < 0 || Offset > MaxDisplacement) { - // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only - // one use, then we can do this for any offset, we just need to also - // update the offset (i.e. the symbol addend) on the addis also. - if (Base.getMachineOpcode() != PPC::ADDItocL) - continue; - - if (!HBase.isMachineOpcode() || - HBase.getMachineOpcode() != PPC::ADDIStocHA) - continue; - - if (!Base.hasOneUse() || !HBase.hasOneUse()) - continue; - - SDValue HImmOpnd = HBase.getOperand(1); - if (HImmOpnd != ImmOpnd) - continue; - - UpdateHBase = true; - } - } else { - // If we're directly folding the addend from an addi instruction, then: - // 1. In general, the offset on the memory access must be zero. - // 2. If the addend is a constant, then it can be combined with a - // non-zero offset, but only if the result meets the encoding - // requirements. - if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { - Offset += C->getSExtValue(); - - if (RequiresMod4Offset && (Offset % 4) != 0) - continue; - - if (!isInt<16>(Offset)) - continue; - - ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), - ImmOpnd.getValueType()); - } else if (Offset != 0) { - continue; - } - } + if (Offset < 0 || Offset > MaxDisplacement) + continue; // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to // reflect the necessary relocation information. - LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); - LLVM_DEBUG(Base->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nN: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); // If the relocation information isn't already present on the // immediate operand, add it now. @@ -6471,8 +4330,9 @@ void PPCDAGToDAGISel::PeepholePPC64() { // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && - (RequiresMod4Offset || (Offset % 4) != 0)) { - LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); + (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || + StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { + DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); @@ -6492,20 +4352,28 @@ void PPCDAGToDAGISel::PeepholePPC64() { (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), N->getOperand(2)); - if (UpdateHBase) - (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), - ImmOpnd); - // The add-immediate may now be dead, in which case remove it. if (Base.getNode()->use_empty()) CurDAG->RemoveDeadNode(Base.getNode()); } } + /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PPCDAGToDAGISel(TM, OptLevel); +FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { + return new PPCDAGToDAGISel(TM); +} + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, + nullptr, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); } + diff --git a/gnu/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/gnu/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index e731c0bc0c2..44a692d4bb4 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/gnu/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -12,21 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCExpr.h" #include "PPC.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "PPCSubtarget.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -34,39 +34,71 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>(); } -static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, - AsmPrinter &AP) { + +static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ const TargetMachine &TM = AP.TM; - Mangler &Mang = TM.getObjFileLowering()->getMangler(); + Mangler *Mang = AP.Mang; const DataLayout &DL = AP.getDataLayout(); MCContext &Ctx = AP.OutContext; + bool isDarwin = TM.getTargetTriple().isOSDarwin(); SmallString<128> Name; StringRef Suffix; - if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) { + if (isDarwin) + Suffix = "$stub"; + } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) Suffix = "$non_lazy_ptr"; if (!Suffix.empty()) Name += DL.getPrivateGlobalPrefix(); + unsigned PrefixLen = Name.size(); + if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); } else { const GlobalValue *GV = MO.getGlobal(); - TM.getNameWithPrefix(Name, GV, Mang); + TM.getNameWithPrefix(Name, GV, *Mang); } + unsigned OrigLen = Name.size() - PrefixLen; + Name += Suffix; MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); + StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); + + // If the target flags on the operand changes the name of the symbol, do that + // before we return the symbol. + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) { + MachineModuleInfoImpl::StubValueTy &StubSym = + getMachOMMI(AP).getFnStubEntry(Sym); + if (StubSym.getPointer()) + return Sym; + + if (MO.isGlobal()) { + StubSym = + MachineModuleInfoImpl:: + StubValueTy(AP.getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); + } else { + StubSym = + MachineModuleInfoImpl:: + StubValueTy(Ctx.getOrCreateSymbol(OrigName), false); + } + return Sym; + } // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, // then add the suffix. if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { MachineModuleInfoMachO &MachO = getMachOMMI(AP); - - MachineModuleInfoImpl::StubValueTy &StubSym = MachO.getGVStubEntry(Sym); - + + MachineModuleInfoImpl::StubValueTy &StubSym = + (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? + MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: @@ -75,7 +107,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } return Sym; } - + return Sym; } @@ -107,20 +139,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; } - if (MO.getTargetFlags() == PPCII::MO_PLT) + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) RefKind = MCSymbolRefExpr::VK_PLT; - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>()); - const TargetMachine &TM = Printer.TM; const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); - // -msecure-plt option works only in PIC mode. If secure plt mode - // is on add 32768 to symbol. - if (Subtarget->isSecurePlt() && TM.isPositionIndependent() && - MO.getTargetFlags() == PPCII::MO_PLT) - Expr = MCBinaryExpr::createAdd(Expr, - MCConstantExpr::create(32768, Ctx), - Ctx); if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::createAdd(Expr, @@ -130,7 +152,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Subtract off the PIC base if required. if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - + const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); Expr = MCBinaryExpr::createSub(Expr, PB, Ctx); } @@ -151,50 +173,47 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin) { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; - if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, - isDarwin)) - OutMI.addOperand(MCOp); - } -} - -bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, - MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin) { - switch (MO.getType()) { - default: - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - assert(MO.getReg() > PPC::NoRegister && - MO.getReg() < PPC::NUM_TARGET_REGS && - "Invalid register for this target!"); - OutMO = MCOperand::createReg(MO.getReg()); - return true; - case MachineOperand::MO_Immediate: - OutMO = MCOperand::createImm(MO.getImm()); - return true; - case MachineOperand::MO_MachineBasicBlock: - OutMO = MCOperand::createExpr( - MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext)); - return true; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: - OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); - return true; - case MachineOperand::MO_JumpTableIndex: - OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); - return true; - case MachineOperand::MO_ConstantPoolIndex: - OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); - return true; - case MachineOperand::MO_BlockAddress: - OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, - isDarwin); - return true; - case MachineOperand::MO_RegisterMask: - return false; + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + assert(MO.getReg() > PPC::NoRegister && + MO.getReg() < PPC::NUM_TARGET_REGS && + "Invalid register for this target!"); + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( + MO.getMBB()->getSymbol(), AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + break; + case MachineOperand::MO_BlockAddress: + MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, + isDarwin); + break; + case MachineOperand::MO_RegisterMask: + continue; + } + + OutMI.addOperand(MCOp); } } |