diff options
author | George Koehler <gkoehler@cvs.openbsd.org> | 2023-11-19 01:14:08 +0000 |
---|---|---|
committer | George Koehler <gkoehler@cvs.openbsd.org> | 2023-11-19 01:14:08 +0000 |
commit | d0062ab41c069e8d9b5e5948aba8c4d1b05991ec (patch) | |
tree | 31e5ff3606f6f691cf8ae4d71e44fff92bebb2fe /gnu | |
parent | 7278bf57249ef7a02bd53c90b22377b3ab2a6ec9 (diff) |
Fix cc -ftrapping-math on macppc
Handle CALL_RM like CALL for 32-bit ELF. If a function call has the
strictfp attribute, its opcode changes from CALL to CALL_RM. If a
call uses the secure PLT, then it must getGlobalBaseReg() to set r30.
After I rebuilt xenocara/lib/pixman with this change, Xorg stopped
crashing on my macppc. pixman uses cc -ftrapping-math which puts
strictfp on each function call.
https://github.com/llvm/llvm-project/pull/72758
ok jca@ tobhe@ deraadt@
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2206 |
1 files changed, 1584 insertions, 622 deletions
diff --git a/gnu/llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/gnu/llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 776ec52e260..7704d1efc03 100644 --- a/gnu/llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/gnu/llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -43,6 +44,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -66,7 +68,8 @@ using namespace llvm; -#define DEBUG_TYPE "ppc-codegen" +#define DEBUG_TYPE "ppc-isel" +#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection" STATISTIC(NumSextSetcc, "Number of (sext(setcc)) nodes expanded into GPR sequence."); @@ -138,24 +141,34 @@ namespace { /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; - const PPCSubtarget *PPCSubTarget = nullptr; + const PPCSubtarget *Subtarget = nullptr; const PPCTargetLowering *PPCLowering = nullptr; unsigned GlobalBaseReg = 0; public: + static char ID; + + PPCDAGToDAGISel() = delete; + explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm) {} + : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; - PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); - PPCLowering = PPCSubTarget->getTargetLowering(); + Subtarget = &MF.getSubtarget<PPCSubtarget>(); + PPCLowering = Subtarget->getTargetLowering(); + if (Subtarget->hasROPProtect()) { + // Create a place on the stack for the ROP Protection Hash. + // The ROP Protection Hash will always be 8 bytes and aligned to 8 + // bytes. + MachineFrameInfo &MFI = MF.getFrameInfo(); + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + const int Result = MFI.CreateStackObject(8, Align(8), false); + FI->setROPProtectionHashSaveIndex(Result); + } SelectionDAGISel::runOnMachineFunction(MF); - if (!PPCSubTarget->isSVR4ABI()) - InsertVRSaveCode(MF); - return true; } @@ -181,7 +194,7 @@ namespace { } /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { + inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } @@ -195,7 +208,7 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -204,7 +217,6 @@ namespace { bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); - bool tryAndWithMask(SDNode *N); // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into // an X-Form load instruction with the offset being a relocation coming from @@ -217,7 +229,7 @@ namespace { /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl); + const SDLoc &dl, SDValue Chain = SDValue()); /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the @@ -232,6 +244,61 @@ namespace { return false; } + /// SelectDSForm - Returns true if address N can be represented by the + /// addressing mode of DSForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 4. + bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(4)) == PPC::AM_DSForm; + } + + /// SelectDQForm - Returns true if address N can be represented by the + /// addressing mode of DQForm instructions (a base register, plus a signed + /// 16-bit displacement that is a multiple of 16. + bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + Align(16)) == PPC::AM_DQForm; + } + + /// SelectDForm - Returns true if address N can be represented by + /// the addressing mode of DForm instructions (a base register, plus a + /// signed 16-bit immediate. + bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + std::nullopt) == PPC::AM_DForm; + } + + /// SelectPCRelForm - Returns true if address N can be represented by + /// PC-Relative addressing mode. + bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + std::nullopt) == PPC::AM_PCRel; + } + + /// SelectPDForm - Returns true if address N can be represented by Prefixed + /// DForm addressing mode (a base register, plus a signed 34-bit immediate. + bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + std::nullopt) == + PPC::AM_PrefixDForm; + } + + /// SelectXForm - Returns true if address N can be represented by the + /// addressing mode of XForm instructions (an indexed [r+r] operation). + bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, + std::nullopt) == PPC::AM_XForm; + } + + /// SelectForceXForm - Given the specified address, force it to be + /// represented as an indexed [r+r] operation (an XForm instruction). + bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == + PPC::AM_XForm; + } + /// SelectAddrIdx - Given the specified address, check to see if it can be /// represented as an indexed [r+r] operation. /// This is for xform instructions whose associated displacement form is D. @@ -239,7 +306,8 @@ namespace { /// bit signed displacement. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + std::nullopt); } /// SelectAddrIdx4 - Given the specified address, check to see if it can be @@ -249,7 +317,8 @@ namespace { /// displacement must be a multiple of 4. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + Align(4)); } /// SelectAddrIdx16 - Given the specified address, check to see if it can be @@ -259,7 +328,8 @@ namespace { /// displacement must be a multiple of 16. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + Align(16)); } /// SelectAddrIdxOnly - Given the specified address, force it to be @@ -267,28 +337,37 @@ namespace { bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } - + /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. /// The last parameter \p 0 means D form has no requirment for 16 bit signed /// displacement. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, + std::nullopt); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 4 (last parameter). Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); } /// SelectAddrImmX16 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 16(last parameter). Suitable for use by STXV and friends. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, + Align(16)); + } + + /// SelectAddrImmX34 - Returns true if the address N can be represented by + /// a base register plus a signed 34-bit displacement. Suitable for use by + /// PSTXVP and friends. + bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); } // Select an address into a single register. @@ -297,6 +376,10 @@ namespace { return true; } + bool SelectAddrPCRel(SDValue N, SDValue &Base) { + return PPCLowering->SelectAddressPCRel(N, Base); + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -317,7 +400,7 @@ namespace { case InlineAsm::Constraint_Zy: // We need to make sure that this one operand does not end up in r0 // (because we might end up lowering this as 0(%op)). - const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); @@ -332,17 +415,20 @@ namespace { return true; } - void InsertVRSaveCode(MachineFunction &MF); - - StringRef getPassName() const override { - return "PowerPC DAG->DAG Pattern Instruction Selection"; - } - // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" private: bool trySETCC(SDNode *N); + bool tryFoldSWTestBRCC(SDNode *N); + bool trySelectLoopCountIntrinsic(SDNode *N); + bool tryAsSingleRLDICL(SDNode *N); + bool tryAsSingleRLDICR(SDNode *N); + bool tryAsSingleRLWINM(SDNode *N); + bool tryAsSingleRLWINM8(SDNode *N); + bool tryAsSingleRLWIMI(SDNode *N); + bool tryAsPairOfRLDICL(SDNode *N); + bool tryAsSingleRLDIMI(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -360,76 +446,16 @@ private: } // end anonymous namespace -/// InsertVRSaveCode - Once the entire function has been instruction selected, -/// all virtual registers are created and all machine instructions are built, -/// check to see if we need to save/restore VRSAVE. If so, do it. -void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { - // Check to see if this function uses vector registers, which means we have to - // save and restore the VRSAVE register and update it with the regs we use. - // - // In this case, there will be virtual registers of vector type created - // by the scheduler. Detect them now. - bool HasVectorVReg = false; - for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); - if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { - HasVectorVReg = true; - break; - } - } - if (!HasVectorVReg) return; // nothing to do. +char PPCDAGToDAGISel::ID = 0; - // If we have a vector register, we want to emit code into the entry and exit - // blocks to save and restore the VRSAVE register. We do this here (instead - // of marking all vector instructions as clobbering VRSAVE) for two reasons: - // - // 1. This (trivially) reduces the load on the register allocator, by not - // having to represent the live range of the VRSAVE register. - // 2. This (more significantly) allows us to create a temporary virtual - // register to hold the saved VRSAVE value, allowing this temporary to be - // register allocated, instead of forcing it to be spilled to the stack. - - // Create two vregs - one to hold the VRSAVE register that is live-in to the - // function and one for the value after having bits or'd into it. - Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - - const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); - MachineBasicBlock &EntryBB = *Fn.begin(); - DebugLoc dl; - // Emit the following code into the entry block: - // InVRSAVE = MFVRSAVE - // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE - // MTVRSAVE UpdatedVRSAVE - MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point - BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); - BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), - UpdatedVRSAVE).addReg(InVRSAVE); - BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); - - // Find all return blocks, outputting a restore in each epilog. - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (BB->isReturnBlock()) { - IP = BB->end(); --IP; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = IP; - while (I2 != BB->begin() && (--I2)->isTerminator()) - IP = I2; - - // Emit: MTVRSAVE InVRSave - BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); - } - } -} +INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { - const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -437,9 +463,9 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { - if (PPCSubTarget->isTargetELF()) { + if (Subtarget->isTargetELF()) { GlobalBaseReg = PPC::R30; - if (!PPCSubTarget->isSecurePlt() && + if (!Subtarget->isSecurePlt() && M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); @@ -480,6 +506,58 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { .getNode(); } +// Check if a SDValue has the toc-data attribute. +static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val); + if (!GA) + return false; + + const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal()); + if (!GV) + return false; + + if (!GV->hasAttribute("toc-data")) + return false; + + // TODO: These asserts should be updated as more support for the toc data + // transformation is added (struct support, etc.). + + assert( + PointerSize >= GV->getAlign().valueOrOne().value() && + "GlobalVariables with an alignment requirement stricter than TOC entry " + "size not supported by the toc data transformation."); + + Type *GVType = GV->getValueType(); + + assert(GVType->isSized() && "A GlobalVariable's size must be known to be " + "supported by the toc data transformation."); + + if (GVType->isVectorTy()) + report_fatal_error("A GlobalVariable of Vector type is not currently " + "supported by the toc data transformation."); + + if (GVType->isArrayTy()) + report_fatal_error("A GlobalVariable of Array type is not currently " + "supported by the toc data transformation."); + + if (GVType->isStructTy()) + report_fatal_error("A GlobalVariable of Struct type is not currently " + "supported by the toc data transformation."); + + assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && + "A GlobalVariable with size larger than a TOC entry is not currently " + "supported by the toc data transformation."); + + if (GV->hasLocalLinkage() || GV->hasPrivateLinkage()) + report_fatal_error("A GlobalVariable with private or local linkage is not " + "currently supported by the toc data transformation."); + + assert(!GV->hasCommonLinkage() && + "Tentative definitions cannot have the mapping class XMC_TD."); + + return true; +} + /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -571,7 +649,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { && isInt32Immediate(N->getOperand(1).getNode(), Imm); } -void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { +void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { SDLoc dl(SN); int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); @@ -633,6 +711,8 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Offset = ST->getOffset(); if (!Offset.isUndef()) return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); @@ -676,6 +756,8 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Offset = LD->getOffset(); if (!Offset.isUndef()) return false; + if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) + return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); @@ -785,251 +867,6 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { return false; } -// Predict the number of instructions that would be generated by calling -// selectI64Imm(N). -static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { - // Assume no remaining bits. - unsigned Remainder = 0; - // Assume no shift required. - unsigned Shift = 0; - - // If it can't be represented as a 32 bit value. - if (!isInt<32>(Imm)) { - Shift = countTrailingZeros<uint64_t>(Imm); - int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; - - // If the shifted value fits 32 bits. - if (isInt<32>(ImmSh)) { - // Go with the shifted value. - Imm = ImmSh; - } else { - // Still stuck with a 64 bit value. - Remainder = Imm; - Shift = 32; - Imm >>= 32; - } - } - - // Intermediate operand. - unsigned Result = 0; - - // Handle first 32 bits. - unsigned Lo = Imm & 0xFFFF; - - // Simple value. - if (isInt<16>(Imm)) { - // Just the Lo bits. - ++Result; - } else if (Lo) { - // Handle the Hi bits and Lo bits. - Result += 2; - } else { - // Just the Hi bits. - ++Result; - } - - // If no shift, we're done. - if (!Shift) return Result; - - // If Hi word == Lo word, - // we can use rldimi to insert the Lo word into Hi word. - if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { - ++Result; - return Result; - } - - // Shift for next step if the upper 32-bits were not zero. - if (Imm) - ++Result; - - // Add in the last bits as required. - if ((Remainder >> 16) & 0xFFFF) - ++Result; - if (Remainder & 0xFFFF) - ++Result; - - return Result; -} - -static uint64_t Rot64(uint64_t Imm, unsigned R) { - return (Imm << R) | (Imm >> (64 - R)); -} - -static unsigned selectI64ImmInstrCount(int64_t Imm) { - unsigned Count = selectI64ImmInstrCountDirect(Imm); - - // If the instruction count is 1 or 2, we do not need further analysis - // since rotate + load constant requires at least 2 instructions. - if (Count <= 2) - return Count; - - for (unsigned r = 1; r < 63; ++r) { - uint64_t RImm = Rot64(Imm, r); - unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; - Count = std::min(Count, RCount); - - // See comments in selectI64Imm for an explanation of the logic below. - unsigned LS = findLastSet(RImm); - if (LS != r-1) - continue; - - uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); - uint64_t RImmWithOnes = RImm | OnesMask; - - RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; - Count = std::min(Count, RCount); - } - - return Count; -} - -// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount -// (above) needs to be kept in sync with this function. -static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, - int64_t Imm) { - // Assume no remaining bits. - unsigned Remainder = 0; - // Assume no shift required. - unsigned Shift = 0; - - // If it can't be represented as a 32 bit value. - if (!isInt<32>(Imm)) { - Shift = countTrailingZeros<uint64_t>(Imm); - int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; - - // If the shifted value fits 32 bits. - if (isInt<32>(ImmSh)) { - // Go with the shifted value. - Imm = ImmSh; - } else { - // Still stuck with a 64 bit value. - Remainder = Imm; - Shift = 32; - Imm >>= 32; - } - } - - // Intermediate operand. - SDNode *Result; - - // Handle first 32 bits. - unsigned Lo = Imm & 0xFFFF; - unsigned Hi = (Imm >> 16) & 0xFFFF; - - auto getI32Imm = [CurDAG, dl](unsigned Imm) { - return CurDAG->getTargetConstant(Imm, dl, MVT::i32); - }; - - // Simple value. - if (isInt<16>(Imm)) { - uint64_t SextImm = SignExtend64(Lo, 16); - SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); - // Just the Lo bits. - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); - } else if (Lo) { - // Handle the Hi bits. - unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); - // And Lo bits. - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Lo)); - } else { - // Just the Hi bits. - Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); - } - - // If no shift, we're done. - if (!Shift) return Result; - - // If Hi word == Lo word, - // we can use rldimi to insert the Lo word into Hi word. - if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { - SDValue Ops[] = - { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; - return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); - } - - // Shift for next step if the upper 32-bits were not zero. - if (Imm) { - Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, - SDValue(Result, 0), - getI32Imm(Shift), - getI32Imm(63 - Shift)); - } - - // Add in the last bits as required. - if ((Hi = (Remainder >> 16) & 0xFFFF)) { - Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Hi)); - } - if ((Lo = Remainder & 0xFFFF)) { - Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Lo)); - } - - return Result; -} - -static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, - int64_t Imm) { - unsigned Count = selectI64ImmInstrCountDirect(Imm); - - // If the instruction count is 1 or 2, we do not need further analysis - // since rotate + load constant requires at least 2 instructions. - if (Count <= 2) - return selectI64ImmDirect(CurDAG, dl, Imm); - - unsigned RMin = 0; - - int64_t MatImm; - unsigned MaskEnd; - - for (unsigned r = 1; r < 63; ++r) { - uint64_t RImm = Rot64(Imm, r); - unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; - if (RCount < Count) { - Count = RCount; - RMin = r; - MatImm = RImm; - MaskEnd = 63; - } - - // If the immediate to generate has many trailing zeros, it might be - // worthwhile to generate a rotated value with too many leading ones - // (because that's free with li/lis's sign-extension semantics), and then - // mask them off after rotation. - - unsigned LS = findLastSet(RImm); - // We're adding (63-LS) higher-order ones, and we expect to mask them off - // after performing the inverse rotation by (64-r). So we need that: - // 63-LS == 64-r => LS == r-1 - if (LS != r-1) - continue; - - uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); - uint64_t RImmWithOnes = RImm | OnesMask; - - RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; - if (RCount < Count) { - Count = RCount; - RMin = r; - MatImm = RImmWithOnes; - MaskEnd = LS; - } - } - - if (!RMin) - return selectI64ImmDirect(CurDAG, dl, Imm); - - auto getI32Imm = [CurDAG, dl](unsigned Imm) { - return CurDAG->getTargetConstant(Imm, dl, MVT::i32); - }; - - SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); - return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, - getI32Imm(64 - RMin), getI32Imm(MaskEnd)); -} - static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { unsigned MaxTruncation = 0; // Cannot use range-based for loop here as we need the actual use (i.e. we @@ -1086,6 +923,421 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { return MaxTruncation; } +// For any 32 < Num < 64, check if the Imm contains at least Num consecutive +// zeros and return the number of bits by the left of these consecutive zeros. +static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { + unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); + unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); + if ((HiTZ + LoLZ) >= Num) + return (32 + HiTZ); + return 0; +} + +// Direct materialization of 64-bit constants by enumerated patterns. +static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, + uint64_t Imm, unsigned &InstCnt) { + unsigned TZ = countTrailingZeros<uint64_t>(Imm); + unsigned LZ = countLeadingZeros<uint64_t>(Imm); + unsigned TO = countTrailingOnes<uint64_t>(Imm); + unsigned LO = countLeadingOnes<uint64_t>(Imm); + unsigned Hi32 = Hi_32(Imm); + unsigned Lo32 = Lo_32(Imm); + SDNode *Result = nullptr; + unsigned Shift = 0; + + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + + // Following patterns use 1 instructions to materialize the Imm. + InstCnt = 1; + // 1-1) Patterns : {zeros}{15-bit valve} + // {ones}{15-bit valve} + if (isInt<16>(Imm)) { + SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); + return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); + } + // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} + // {ones}{15-bit valve}{16 zeros} + if (TZ > 15 && (LZ > 32 || LO > 32)) + return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> 16) & 0xffff)); + + // Following patterns use 2 instructions to materialize the Imm. + InstCnt = 2; + assert(LZ < 64 && "Unexpected leading zeros here."); + // Count of ones follwing the leading zeros. + unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); + // 2-1) Patterns : {zeros}{31-bit value} + // {ones}{31-bit value} + if (isInt<32>(Imm)) { + uint64_t ImmHi16 = (Imm >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Imm & 0xffff)); + } + // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} + // {zeros}{15-bit value}{zeros} + // {zeros}{ones}{15-bit value} + // {ones}{15-bit value}{zeros} + // We can take advantage of LI's sign-extension semantics to generate leading + // ones, and then use RLDIC to mask off the ones in both sides after rotation. + if ((LZ + FO + TZ) > 48) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> TZ) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // 2-3) Pattern : {zeros}{15-bit value}{ones} + // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, + // therefore we can take advantage of LI's sign-extension semantics, and then + // mask them off after rotation. + // + // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ + // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // Imm (Imm >> (48 - LZ) & 0xffff) + // +----sext-----|--16-bit--+ +clear-|-----------------+ + // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ + if ((LZ + TO) > 48) { + // Since the immediates with (LZ > 32) have been handled by previous + // patterns, here we have (LZ <= 32) to make sure we will not shift right + // the Imm by a negative value. + assert(LZ <= 32 && "Unexpected shift value."); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> (48 - LZ) & 0xffff))); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(48 - LZ), getI32Imm(LZ)); + } + // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} + // {ones}{15-bit value}{ones} + // We can take advantage of LI's sign-extension semantics to generate leading + // ones, and then use RLDICL to mask off the ones in left sides (if required) + // after rotation. + // + // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ + // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // Imm (Imm >> TO) & 0xffff + // +----sext-----|--16-bit--+ +LZ|---------------------+ + // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ + if ((LZ + FO + TO) > 48) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm((Imm >> TO) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} + // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit + // value, we can use LI for Lo16 without generating leading ones then add the + // Hi16(in Lo32). + if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm(Lo32 & 0xffff)); + return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo32 >> 16)); + } + // 2-6) Patterns : {******}{49 zeros}{******} + // {******}{49 ones}{******} + // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 + // bits remain on both sides. Rotate right the Imm to construct an int<16> + // value, use LI for int<16> value and then use RLDICL without mask to rotate + // it back. + // + // 1) findContiguousZerosAtLeast(Imm, 49) + // +------|--zeros-|------+ +---ones--||---15 bit--+ + // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + // + // 2) findContiguousZerosAtLeast(~Imm, 49) + // +------|--ones--|------+ +---ones--||---15 bit--+ + // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || + (Shift = findContiguousZerosAtLeast(~Imm, 49))) { + uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, + getI32Imm(RotImm & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Shift), getI32Imm(0)); + } + + // Following patterns use 3 instructions to materialize the Imm. + InstCnt = 3; + // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} + // {zeros}{31-bit value}{zeros} + // {zeros}{ones}{31-bit value} + // {ones}{31-bit value}{zeros} + // We can take advantage of LIS's sign-extension semantics to generate leading + // ones, add the remaining bits with ORI, and then use RLDIC to mask off the + // ones in both sides after rotation. + if ((LZ + FO + TZ) > 32) { + uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> TZ) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // 3-2) Pattern : {zeros}{31-bit value}{ones} + // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits + // value, therefore we can take advantage of LIS's sign-extension semantics, + // add the remaining bits with ORI, and then mask them off after rotation. + // This is similar to Pattern 2-3, please refer to the diagram there. + if ((LZ + TO) > 32) { + // Since the immediates with (LZ > 32) have been handled by previous + // patterns, here we have (LZ <= 32) to make sure we will not shift right + // the Imm by a negative value. + assert(LZ <= 32 && "Unexpected shift value."); + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> (48 - LZ)) & 0xffff)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> (32 - LZ)) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(32 - LZ), getI32Imm(LZ)); + } + // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} + // {ones}{31-bit value}{ones} + // We can take advantage of LIS's sign-extension semantics to generate leading + // ones, add the remaining bits with ORI, and then use RLDICL to mask off the + // ones in left sides (if required) after rotation. + // This is similar to Pattern 2-4, please refer to the diagram there. + if ((LZ + FO + TO) > 32) { + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, + getI32Imm((Imm >> (TO + 16)) & 0xffff)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm((Imm >> TO) & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // 3-4) Patterns : High word == Low word + if (Hi32 == Lo32) { + // Handle the first 32 bits. + uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo32 & 0xffff)); + // Use rldimi to insert the Low word into High word. + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + // 3-5) Patterns : {******}{33 zeros}{******} + // {******}{33 ones}{******} + // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 + // bits remain on both sides. Rotate right the Imm to construct an int<32> + // value, use LIS + ORI for int<32> value and then use RLDICL without mask to + // rotate it back. + // This is similar to Pattern 2-6, please refer to the diagram there. + if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || + (Shift = findContiguousZerosAtLeast(~Imm, 33))) { + uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); + uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; + unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(RotImm & 0xffff)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Shift), getI32Imm(0)); + } + + InstCnt = 0; + return nullptr; +} + +// Try to select instructions to generate a 64 bit immediate using prefix as +// well as non prefix instructions. The function will return the SDNode +// to materialize that constant or it will return nullptr if it does not +// find one. The variable InstCnt is set to the number of instructions that +// were selected. +static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, + uint64_t Imm, unsigned &InstCnt) { + unsigned TZ = countTrailingZeros<uint64_t>(Imm); + unsigned LZ = countLeadingZeros<uint64_t>(Imm); + unsigned TO = countTrailingOnes<uint64_t>(Imm); + unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ)); + unsigned Hi32 = Hi_32(Imm); + unsigned Lo32 = Lo_32(Imm); + + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + + auto getI64Imm = [CurDAG, dl](uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i64); + }; + + // Following patterns use 1 instruction to materialize Imm. + InstCnt = 1; + + // The pli instruction can materialize up to 34 bits directly. + // If a constant fits within 34-bits, emit the pli instruction here directly. + if (isInt<34>(Imm)) + return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + CurDAG->getTargetConstant(Imm, dl, MVT::i64)); + + // Require at least two instructions. + InstCnt = 2; + SDNode *Result = nullptr; + // Patterns : {zeros}{ones}{33-bit value}{zeros} + // {zeros}{33-bit value}{zeros} + // {zeros}{ones}{33-bit value} + // {ones}{33-bit value}{zeros} + // We can take advantage of PLI's sign-extension semantics to generate leading + // ones, and then use RLDIC to mask off the ones on both sides after rotation. + if ((LZ + FO + TZ) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // Pattern : {zeros}{33-bit value}{ones} + // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value, + // therefore we can take advantage of PLI's sign-extension semantics, and then + // mask them off after rotation. + // + // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ + // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // + // +----sext-----|--34-bit--+ +clear-|-----------------+ + // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + if ((LZ + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(30 - LZ), getI32Imm(LZ)); + } + // Patterns : {zeros}{ones}{33-bit value}{ones} + // {ones}{33-bit value}{ones} + // Similar to LI we can take advantage of PLI's sign-extension semantics to + // generate leading ones, and then use RLDICL to mask off the ones in left + // sides (if required) after rotation. + if ((LZ + FO + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // Patterns : {******}{31 zeros}{******} + // : {******}{31 ones}{******} + // If Imm contains 31 consecutive zeros/ones then the remaining bit count + // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI + // for the int<33> value and then use RLDICL without a mask to rotate it back. + // + // +------|--ones--|------+ +---ones--||---33 bit--+ + // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + for (unsigned Shift = 0; Shift < 63; ++Shift) { + uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); + if (isInt<34>(RotImm)) { + Result = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Shift), + getI32Imm(0)); + } + } + + // Patterns : High word == Low word + // This is basically a splat of a 32 bit immediate. + if (Hi32 == Lo32) { + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + + InstCnt = 3; + // Catch-all + // This pattern can form any 64 bit immediate in 3 instructions. + SDNode *ResultHi = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDNode *ResultLo = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); + SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); +} + +static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, + unsigned *InstCnt = nullptr) { + unsigned InstCntDirect = 0; + // No more than 3 instructions are used if we can select the i64 immediate + // directly. + SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); + + const PPCSubtarget &Subtarget = + CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>(); + + // If we have prefixed instructions and there is a chance we can + // materialize the constant with fewer prefixed instructions than + // non-prefixed, try that. + if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) { + unsigned InstCntDirectP = 0; + SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); + // Use the prefix case in either of two cases: + // 1) We have no result from the non-prefix case to use. + // 2) The non-prefix case uses more instructions than the prefix case. + // If the prefix and non-prefix cases use the same number of instructions + // we will prefer the non-prefix case. + if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { + if (InstCnt) + *InstCnt = InstCntDirectP; + return ResultP; + } + } + + if (Result) { + if (InstCnt) + *InstCnt = InstCntDirect; + return Result; + } + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + // Handle the upper 32 bit value. + Result = + selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); + // Add in the last bits as required. + if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { + Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Hi16)); + ++InstCntDirect; + } + if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(Lo16)); + ++InstCntDirect; + } + if (InstCnt) + *InstCnt = InstCntDirect; + return Result; +} + // Select a 64-bit constant. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); @@ -1123,8 +1375,7 @@ class BitPermutationSelector { ValueBit(SDValue V, unsigned I, Kind K = Variable) : V(V), Idx(I), K(K) {} - ValueBit(Kind K = Variable) - : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} + ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {} bool isZero() const { return K == ConstZero || K == VariableKnownToBeZero; @@ -1238,6 +1489,7 @@ class BitPermutationSelector { } break; case ISD::SHL: + case PPCISD::SHL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); @@ -1253,6 +1505,7 @@ class BitPermutationSelector { } break; case ISD::SRL: + case PPCISD::SRL: if (isa<ConstantSDNode>(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); @@ -2132,11 +2385,14 @@ class BitPermutationSelector { unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (bool) Res; + unsigned NumOfSelectInsts = 0; + selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); + assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant."); if (Use32BitInsts) NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else - NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; + NumAndInsts += NumOfSelectInsts + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; @@ -2360,12 +2616,14 @@ class BitPermutationSelector { Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { - if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; - - SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); - Res = - SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - ExtendToInt64(Res, dl), MaskVal), 0); + unsigned NumOfSelectInsts = 0; + SDValue MaskVal = + SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); + Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, + ExtendToInt64(Res, dl), MaskVal), + 0); + if (InstCnt) + *InstCnt += NumOfSelectInsts + /* and */ 1; } } @@ -2396,7 +2654,7 @@ class BitPermutationSelector { } void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { - BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); + erase_if(BitGroups, F); } SmallVector<ValueBit, 64> Bits; @@ -2523,7 +2781,7 @@ public: if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_SextI64) return nullptr; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SIGN_EXTEND: if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_ZextI64) @@ -2950,8 +3208,8 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; + IsRHSZero = RHSConst && RHSConst->isZero(); + [[fallthrough]]; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) @@ -3000,9 +3258,9 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::SETLT: { // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) @@ -3037,7 +3295,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); @@ -3057,7 +3315,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); @@ -3134,8 +3392,8 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; + IsRHSZero = RHSConst && RHSConst->isZero(); + [[fallthrough]]; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) @@ -3179,9 +3437,9 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::SETLT: { // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) @@ -3210,7 +3468,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); @@ -3230,7 +3488,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); @@ -3292,8 +3550,8 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; + IsRHSZero = RHSConst && RHSConst->isZero(); + [[fallthrough]]; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) @@ -3334,9 +3592,9 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) @@ -3369,7 +3627,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) @@ -3386,7 +3644,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) @@ -3451,8 +3709,8 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); - LLVM_FALLTHROUGH; + IsRHSZero = RHSConst && RHSConst->isZero(); + [[fallthrough]]; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) @@ -3494,9 +3752,9 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSZero = RHSConst && RHSConst->isZero(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) @@ -3532,7 +3790,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) @@ -3549,7 +3807,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) std::swap(LHS, RHS); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) @@ -3576,7 +3834,7 @@ static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { return true; // We want the value in a GPR if it is being extended, used for a select, or // used in logical operations. - for (auto CompareUse : Compare.getNode()->uses()) + for (auto *CompareUse : Compare.getNode()->uses()) if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && CompareUse->getOpcode() != ISD::ZERO_EXTEND && CompareUse->getOpcode() != ISD::SELECT && @@ -3646,6 +3904,12 @@ bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) return false; + // For POWER10, it is more profitable to use the set boolean extension + // instructions rather than the integer compare elimination codegen. + // Users can override this via the command line option, `--ppc-gpr-icmps`. + if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) + return false; + switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: @@ -3673,9 +3937,19 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::SRL: + // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that + // uses the BRH instruction. + if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 && + N->getOperand(0).getOpcode() == ISD::BSWAP) { + auto &OpRight = N->getOperand(1); + ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight); + if (SRLConst && SRLConst->getSExtValue() == 16) + return false; + } + LLVM_FALLTHROUGH; case ISD::ROTL: case ISD::SHL: - case ISD::SRL: case ISD::AND: case ISD::OR: { BitPermutationSelector BPS(CurDAG); @@ -3693,7 +3967,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &dl) { + const SDLoc &dl, SDValue Chain) { // Always select the LHS. unsigned Opc; @@ -3788,7 +4062,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { - if (PPCSubTarget->hasSPE()) { + if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: @@ -3815,7 +4089,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, } else Opc = PPC::FCMPUS; } else if (LHS.getValueType() == MVT::f64) { - if (PPCSubTarget->hasSPE()) { + if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: @@ -3840,13 +4114,18 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } } else - Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } else { assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); - assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); + assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector"); Opc = PPC::XSCMPUQP; } - return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); + if (Chain) + return SDValue( + CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), + 0); + else + return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, @@ -3872,10 +4151,10 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; case ISD::SETULE: case ISD::SETLE: - return UseSPE ? PPC::PRED_LE : PPC::PRED_LE; + return PPC::PRED_LE; case ISD::SETOGT: case ISD::SETGT: - return UseSPE ? PPC::PRED_GT : PPC::PRED_GT; + return PPC::PRED_GT; case ISD::SETUGE: case ISD::SETGE: return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; @@ -3921,7 +4200,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, -// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). +// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, +// and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; @@ -4002,6 +4282,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPEQUW; else if (VecVT == MVT::v2i64) return PPC::VCMPEQUD; + else if (VecVT == MVT::v1i128) + return PPC::VCMPEQUQ; break; case ISD::SETGT: if (VecVT == MVT::v16i8) @@ -4012,6 +4294,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTSW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTSD; + else if (VecVT == MVT::v1i128) + return PPC::VCMPGTSQ; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) @@ -4022,6 +4306,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTUW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTUD; + else if (VecVT == MVT::v1i128) + return PPC::VCMPGTUQ; break; default: break; @@ -4033,18 +4319,23 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; - ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + ISD::CondCode CC = + cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + + SDValue LHS = N->getOperand(IsStrict ? 1 : 0); + SDValue RHS = N->getOperand(IsStrict ? 2 : 1); - if (!PPCSubTarget->useCRBits() && - isInt32Immediate(N->getOperand(1), Imm)) { + if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: { @@ -4079,7 +4370,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { } } } else if (Imm == ~0U) { // setcc op, -1 - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: @@ -4122,26 +4413,23 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { } } - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. - if (LHS.getValueType().isVector()) { - if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) + if (!IsStrict && LHS.getValueType().isVector()) { + if (Subtarget->hasSPE()) return false; EVT VecVT = LHS.getValueType(); bool Swap, Negate; - unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, - PPCSubTarget->hasVSX(), Swap, Negate); + unsigned int VCmpInst = + getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); if (Swap) std::swap(LHS, RHS); EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); - CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, + CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, ResVT, VCmp, VCmp); return true; } @@ -4150,24 +4438,26 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } - if (PPCSubTarget->useCRBits()) + if (Subtarget->useCRBits()) return false; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); - SDValue CCReg = SelectCC(LHS, RHS, CC, dl); + SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); + if (IsStrict) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); SDValue IntCR; // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that // The correct compare instruction is already set by SelectCC() - if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { + if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { Idx = 1; } // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); - SDValue InFlag(nullptr, 0); // Null incoming flag value. + SDValue InFlag; // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); @@ -4193,9 +4483,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); StoreSDNode *STN = dyn_cast<StoreSDNode>(N); + MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N); SDValue AddrOp; - if (LDN) - AddrOp = LDN->getOperand(1); + if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT)) + AddrOp = N->getOperand(1); else if (STN) AddrOp = STN->getOperand(2); @@ -4209,7 +4500,7 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); + unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); if ((SlotAlign % Val) != 0) return false; @@ -4241,13 +4532,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, SDValue TrueRes = N->getOperand(2); SDValue FalseRes = N->getOperand(3); ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); - if (!TrueConst) + if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && + N->getSimpleValueType(0) != MVT::i32)) return false; - assert((N->getSimpleValueType(0) == MVT::i64 || - N->getSimpleValueType(0) == MVT::i32) && - "Expecting either i64 or i32 here."); - // We are looking for any of: // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) @@ -4261,8 +4549,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) return false; - bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; - SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); + SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC + ? FalseRes + : FalseRes.getOperand(0); + bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; if (SetOrSelCC.getOpcode() != ISD::SETCC && SetOrSelCC.getOpcode() != ISD::SELECT_CC) return false; @@ -4333,7 +4623,7 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETLT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || (InnerCC == ISD::SETLT && InnerSwapped)) @@ -4352,7 +4642,7 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETGT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || (InnerCC == ISD::SETGT && InnerSwapped)) @@ -4371,142 +4661,380 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, return true; } -bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) { - if (N->getOpcode() != ISD::AND) +// Return true if it's a software square-root/divide operand. +static bool isSWTestOp(SDValue N) { + if (N.getOpcode() == PPCISD::FTSQRT) + return true; + if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) || + N.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return false; + switch (N.getConstantOperandVal(0)) { + case Intrinsic::ppc_vsx_xvtdivdp: + case Intrinsic::ppc_vsx_xvtdivsp: + case Intrinsic::ppc_vsx_xvtsqrtdp: + case Intrinsic::ppc_vsx_xvtsqrtsp: + return true; + } + return false; +} + +bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { + assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); + // We are looking for following patterns, where `truncate to i1` actually has + // the same semantic with `and 1`. + // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) + // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) + // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) + // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) + // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) + // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return false; + + SDValue CmpRHS = N->getOperand(3); + if (!isa<ConstantSDNode>(CmpRHS) || + cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) + return false; + + SDValue CmpLHS = N->getOperand(2); + if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) + return false; + + unsigned PCC = 0; + bool IsCCNE = CC == ISD::SETNE; + if (CmpLHS.getOpcode() == ISD::AND && + isa<ConstantSDNode>(CmpLHS.getOperand(1))) + switch (CmpLHS.getConstantOperandVal(1)) { + case 1: + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + break; + case 2: + PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; + break; + case 4: + PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; + break; + case 8: + PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; + break; + default: + return false; + } + else if (CmpLHS.getOpcode() == ISD::TRUNCATE && + CmpLHS.getValueType() == MVT::i1) + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + + if (PCC) { + SDLoc dl(N); + SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), + N->getOperand(0)}; + CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + return true; + } + return false; +} + +bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { + // Sometimes the promoted value of the intrinsic is ANDed by some non-zero + // value, for example when crbits is disabled. If so, select the + // loop_decrement intrinsics now. + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + + if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) || + isNullConstant(LHS.getOperand(1))) + return false; + + if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN || + cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() != + Intrinsic::loop_decrement) + return false; + + if (!isa<ConstantSDNode>(RHS)) + return false; + + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Counter decrement comparison is not EQ or NE"); + + SDValue OldDecrement = LHS.getOperand(0); + assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!"); + + SDLoc DecrementLoc(OldDecrement); + SDValue ChainInput = OldDecrement.getOperand(0); + SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc) + : getI32Imm(1, DecrementLoc)}; + unsigned DecrementOpcode = + Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop; + SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc, + MVT::i1, DecrementOps); + + unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); + bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); + unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn; + + ReplaceUses(LHS.getValue(0), LHS.getOperand(1)); + CurDAG->RemoveDeadNode(LHS.getNode()); + + // Mark the old loop_decrement intrinsic as dead. + ReplaceUses(OldDecrement.getValue(1), ChainInput); + CurDAG->RemoveDeadNode(OldDecrement.getNode()); + + SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, + ChainInput, N->getOperand(0)); + + CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0), + N->getOperand(4), Chain); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + unsigned Imm; + if (!isInt32Immediate(N->getOperand(1), Imm)) return false; SDLoc dl(N); SDValue Val = N->getOperand(0); - unsigned Imm, Imm2, SH, MB, ME; - uint64_t Imm64; - + unsigned SH, MB, ME; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm - if (isInt32Immediate(N->getOperand(1), Imm) && - isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { - SDValue Val = N->getOperand(0).getOperand(0); - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; + if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { + Val = Val.getOperand(0); + SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), + getI32Imm(ME, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // If this is just a masked value where the input is not handled, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm - if (isInt32Immediate(N->getOperand(1), Imm)) { - if (isRunOfOnes(Imm, MB, ME) && - N->getOperand(0).getOpcode() != ISD::ROTL) { - SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; - } - // AND X, 0 -> 0, not "rlwinm 32". - if (Imm == 0) { - ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return true; - } + if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { + SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), + getI32Imm(ME, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; + } - // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a - // bitfield insert. - if (N->getOperand(0).getOpcode() == ISD::OR && - isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { - // The idea here is to check whether this is equivalent to: - // (c1 & m) | (x & ~m) - // where m is a run-of-ones mask. The logic here is that, for each bit in - // c1 and c2: - // - if both are 1, then the output will be 1. - // - if both are 0, then the output will be 0. - // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will - // come from x. - // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will - // be 0. - // If that last condition is never the case, then we can form m from the - // bits that are the same between c1 and c2. - unsigned MB, ME; - if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { - SDValue Ops[] = { N->getOperand(0).getOperand(0), - N->getOperand(0).getOperand(1), - getI32Imm(0, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; - ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); - return true; - } - } - } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) { - // If this is a 64-bit zero-extension mask, emit rldicl. - if (isMask_64(Imm64)) { - MB = 64 - countTrailingOnes(Imm64); - SH = 0; - - if (Val.getOpcode() == ISD::ANY_EXTEND) { - auto Op0 = Val.getOperand(0); - if ( Op0.getOpcode() == ISD::SRL && - isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { - - auto ResultType = Val.getNode()->getValueType(0); - auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, - ResultType); - SDValue IDVal (ImDef, 0); - - Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, - ResultType, IDVal, Op0.getOperand(0), - getI32Imm(1, dl)), 0); - SH = 64 - Imm; - } - } + // AND X, 0 -> 0, not "rlwinm 32". + if (Imm == 0) { + ReplaceUses(SDValue(N, 0), N->getOperand(1)); + return true; + } - // If the operand is a logical right shift, we can fold it into this - // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) - // for n <= mb. The right shift is really a left rotate followed by a - // mask, and this mask is a more-restrictive sub-mask of the mask implied - // by the shift. - if (Val.getOpcode() == ISD::SRL && - isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { - assert(Imm < 64 && "Illegal shift amount"); - Val = Val.getOperand(0); - SH = 64 - Imm; - } + return false; +} - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); - return true; - } else if (isMask_64(~Imm64)) { - // If this is a negated 64-bit zero-extension mask, - // i.e. the immediate is a sequence of ones from most significant side - // and all zero for reminder, we should use rldicr. - MB = 63 - countTrailingOnes(~Imm64); - SH = 0; - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); - return true; - } +bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; - // It is not 16-bit imm that means we need two instructions at least if - // using "and" instruction. Try to exploit it with rotate mask instructions. - if (isRunOfOnes64(Imm64, MB, ME)) { - if (MB >= 32 && MB <= ME) { - // MB ME - // +----------------------+ - // |xxxxxxxxxxx00011111000| - // +----------------------+ - // 0 32 64 - // We can only do it if the MB is larger than 32 and MB <= ME - // as RLWINM will replace the content of [0 - 32) with [32 - 64) even - // we didn't rotate it. - SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl), - getI64Imm(ME - 32, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); - return true; - } - // TODO - handle it with rldicl + rldicl - } + unsigned MB, ME; + if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { + // MB ME + // +----------------------+ + // |xxxxxxxxxxx00011111000| + // +----------------------+ + // 0 32 64 + // We can only do it if the MB is larger than 32 and MB <= ME + // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even + // we didn't rotate it. + SDLoc dl(N); + SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), + getI64Imm(ME - 32, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); + return true; } return false; } +bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + + // Optimized with two rldicl's as follows: + // Add missing bits on left to the mask and check that the mask is a + // wrapped run of ones, i.e. + // Change pattern |0001111100000011111111| + // to |1111111100000011111111|. + unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); + if (NumOfLeadingZeros != 0) + Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); + + unsigned MB, ME; + if (!isRunOfOnes64(Imm64, MB, ME)) + return false; + + // ME MB MB-ME+63 + // +----------------------+ +----------------------+ + // |1111111100000011111111| -> |0000001111111111111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. + unsigned OnesOnLeft = ME + 1; + unsigned ZerosInBetween = (MB - ME + 63) & 63; + // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear + // on the left the bits that are already zeros in the mask. + Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, + getI64Imm(OnesOnLeft, Loc), + getI64Imm(ZerosInBetween, Loc)), + 0); + // MB-ME+63 ME MB + // +----------------------+ +----------------------+ + // |0000001111111111111111| -> |0001111100000011111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the + // left the number of ones we previously added. + SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), + getI64Imm(NumOfLeadingZeros, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + unsigned Imm; + if (!isInt32Immediate(N->getOperand(1), Imm)) + return false; + + SDValue Val = N->getOperand(0); + unsigned Imm2; + // ISD::OR doesn't get all the bitfield insertion fun. + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. + if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) + return false; + + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. + unsigned MB, ME; + if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { + SDLoc dl(N); + SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), + getI32Imm(MB, dl), getI32Imm(ME, dl)}; + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); + return true; + } + + return false; +} + +bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) + return false; + + // If this is a 64-bit zero-extension mask, emit rldicl. + unsigned MB = 64 - countTrailingOnes(Imm64); + unsigned SH = 0; + unsigned Imm; + SDValue Val = N->getOperand(0); + SDLoc dl(N); + + if (Val.getOpcode() == ISD::ANY_EXTEND) { + auto Op0 = Val.getOperand(0); + if (Op0.getOpcode() == ISD::SRL && + isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { + + auto ResultType = Val.getNode()->getValueType(0); + auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); + SDValue IDVal(ImDef, 0); + + Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, + IDVal, Op0.getOperand(0), + getI32Imm(1, dl)), + 0); + SH = 64 - Imm; + } + } + + // If the operand is a logical right shift, we can fold it into this + // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) + // for n <= mb. The right shift is really a left rotate followed by a + // mask, and this mask is a more-restrictive sub-mask of the mask implied + // by the shift. + if (Val.getOpcode() == ISD::SRL && + isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { + assert(Imm < 64 && "Illegal shift amount"); + Val = Val.getOperand(0); + SH = 64 - Imm; + } + + SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || + !isMask_64(~Imm64)) + return false; + + // If this is a negated 64-bit zero-extension mask, + // i.e. the immediate is a sequence of ones from most significant side + // and all zero for reminder, we should use rldicr. + unsigned MB = 63 - countTrailingOnes(~Imm64); + unsigned SH = 0; + SDLoc dl(N); + SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected"); + uint64_t Imm64; + unsigned MB, ME; + SDValue N0 = N->getOperand(0); + + // We won't get fewer instructions if the imm is 32-bit integer. + // rldimi requires the imm to have consecutive ones with both sides zero. + // Also, make sure the first Op has only one use, otherwise this may increase + // register pressure since rldimi is destructive. + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || + isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) + return false; + + unsigned SH = 63 - ME; + SDLoc Dl(N); + // Use select64Imm for making LI instr instead of directly putting Imm64 + SDValue Ops[] = { + N->getOperand(0), + SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), + getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); + return true; +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -4541,7 +5069,214 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } break; + case ISD::INTRINSIC_VOID: { + auto IntrinsicID = N->getConstantOperandVal(1); + if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw && + IntrinsicID != Intrinsic::ppc_trapd && + IntrinsicID != Intrinsic::ppc_trap) + break; + unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw || + IntrinsicID == Intrinsic::ppc_trapd) + ? PPC::TDI + : PPC::TWI; + SmallVector<SDValue, 4> OpsWithMD; + unsigned MDIndex; + if (IntrinsicID == Intrinsic::ppc_tdw || + IntrinsicID == Intrinsic::ppc_tw) { + SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)}; + int16_t SImmOperand2; + int16_t SImmOperand3; + int16_t SImmOperand4; + bool isOperand2IntS16Immediate = + isIntS16Immediate(N->getOperand(2), SImmOperand2); + bool isOperand3IntS16Immediate = + isIntS16Immediate(N->getOperand(3), SImmOperand3); + // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg + + // reg or imm + imm. The imm + imm form will be optimized to either an + // unconditional trap or a nop in a later pass. + if (isOperand2IntS16Immediate == isOperand3IntS16Immediate) + Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW; + else if (isOperand3IntS16Immediate) + // The 2nd and 3rd operands are reg + imm. + Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl); + else { + // The 2nd and 3rd operands are imm + reg. + bool isOperand4IntS16Immediate = + isIntS16Immediate(N->getOperand(4), SImmOperand4); + (void)isOperand4IntS16Immediate; + assert(isOperand4IntS16Immediate && + "The 4th operand is not an Immediate"); + // We need to flip the condition immediate TO. + int16_t TO = int(SImmOperand4) & 0x1F; + // We swap the first and second bit of TO if they are not same. + if ((TO & 0x1) != ((TO & 0x2) >> 1)) + TO = (TO & 0x1) ? TO + 1 : TO - 1; + // We swap the fourth and fifth bit of TO if they are not same. + if ((TO & 0x8) != ((TO & 0x10) >> 1)) + TO = (TO & 0x8) ? TO + 8 : TO - 8; + Ops[0] = getI32Imm(TO, dl); + Ops[1] = N->getOperand(3); + Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl); + } + OpsWithMD = {Ops[0], Ops[1], Ops[2]}; + MDIndex = 5; + } else { + OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)}; + MDIndex = 3; + } + + if (N->getNumOperands() > MDIndex) { + SDValue MDV = N->getOperand(MDIndex); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!"); + assert((isa<MDString>(MD->getOperand(0)) && cast<MDString>( + MD->getOperand(0))->getString().equals("ppc-trap-reason")) + && "Unsupported annotation data type!"); + for (unsigned i = 1; i < MD->getNumOperands(); i++) { + assert(isa<MDString>(MD->getOperand(i)) && + "Invalid data type for annotation ppc-trap-reason!"); + OpsWithMD.push_back( + getI32Imm(std::stoi(cast<MDString>( + MD->getOperand(i))->getString().str()), dl)); + } + } + OpsWithMD.push_back(N->getOperand(0)); // chain + CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD); + return; + } + + case ISD::INTRINSIC_WO_CHAIN: { + // We emit the PPC::FSELS instruction here because of type conflicts with + // the comparison operand. The FSELS instruction is defined to use an 8-byte + // comparison like the FSELD version. The fsels intrinsic takes a 4-byte + // value for the comparison. When selecting through a .td file, a type + // error is raised. Must check this first so we never break on the + // !Subtarget->isISA3_1() check. + auto IntID = N->getConstantOperandVal(0); + if (IntID == Intrinsic::ppc_fsels) { + SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)}; + CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops); + return; + } + + if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) { + auto Pred = N->getConstantOperandVal(1); + unsigned Opcode = + IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec; + unsigned SubReg = 0; + unsigned ShiftVal = 0; + bool Reverse = false; + switch (Pred) { + case 0: + SubReg = PPC::sub_eq; + ShiftVal = 1; + break; + case 1: + SubReg = PPC::sub_eq; + ShiftVal = 1; + Reverse = true; + break; + case 2: + SubReg = PPC::sub_lt; + ShiftVal = 3; + break; + case 3: + SubReg = PPC::sub_lt; + ShiftVal = 3; + Reverse = true; + break; + case 4: + SubReg = PPC::sub_gt; + ShiftVal = 2; + break; + case 5: + SubReg = PPC::sub_gt; + ShiftVal = 2; + Reverse = true; + break; + case 6: + SubReg = PPC::sub_un; + break; + case 7: + SubReg = PPC::sub_un; + Reverse = true; + break; + } + + EVT VTs[] = {MVT::v16i8, MVT::Glue}; + SDValue Ops[] = {N->getOperand(2), N->getOperand(3), + CurDAG->getTargetConstant(0, dl, MVT::i32)}; + SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0); + SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); + // On Power10, we can use SETBC[R]. On prior architectures, we have to use + // MFOCRF and shift/negate the value. + if (Subtarget->isISA3_1()) { + SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32); + SDValue CRBit = SDValue( + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, + CR6Reg, SubRegIdx, BCDOp.getValue(1)), + 0); + CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32, + CRBit); + } else { + SDValue Move = + SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg, + BCDOp.getValue(1)), + 0); + SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl), + getI32Imm(31, dl), getI32Imm(31, dl)}; + if (!Reverse) + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + else { + SDValue Shift = SDValue( + CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl)); + } + } + return; + } + + if (!Subtarget->isISA3_1()) + break; + unsigned Opcode = 0; + switch (IntID) { + default: + break; + case Intrinsic::ppc_altivec_vstribr_p: + Opcode = PPC::VSTRIBR_rec; + break; + case Intrinsic::ppc_altivec_vstribl_p: + Opcode = PPC::VSTRIBL_rec; + break; + case Intrinsic::ppc_altivec_vstrihr_p: + Opcode = PPC::VSTRIHR_rec; + break; + case Intrinsic::ppc_altivec_vstrihl_p: + Opcode = PPC::VSTRIHL_rec; + break; + } + if (!Opcode) + break; + + // Generate the appropriate vector string isolate intrinsic to match. + EVT VTs[] = {MVT::v16i8, MVT::Glue}; + SDValue VecStrOp = + SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); + // Vector string isolate instructions update the EQ bit of CR6. + // Generate a SETBC instruction to extract the bit and place it in a GPR. + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); + SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); + SDValue CRBit = SDValue( + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, + CR6Reg, SubRegIdx, VecStrOp.getValue(1)), + 0); + CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); + return; + } + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: if (trySETCC(N)) return; break; @@ -4551,17 +5286,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::ADDI_TLSGD_L_ADDR: { const Module *Mod = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || + !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || Mod->getPICLevel() == PICLevel::SmallPIC) break; // Attach global base pointer on GETtlsADDR32 node in order to // generate secure plt code for TLS symbols. getGlobalBaseReg(); } break; - case PPCISD::CALL: { + case PPCISD::CALL: + case PPCISD::CALL_RM: { if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || - !PPCSubTarget->isTargetELF()) + !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || + !Subtarget->isTargetELF()) break; SDValue Op = N->getOperand(1); @@ -4625,7 +5361,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: { // Change TLS initial-exec D-form stores to X-form stores. StoreSDNode *ST = cast<StoreSDNode>(N); - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && + if (EnableTLSOpt && Subtarget->isELFv2ABI() && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; @@ -4639,7 +5375,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) + if (EnableTLSOpt && Subtarget->isELFv2ABI()) if (tryTLSXFormLoad(LD)) return; break; @@ -4693,8 +5429,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); - case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX - case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX case MVT::f64: Opcode = PPC::LFDUX; break; case MVT::f32: Opcode = PPC::LFSUX; break; case MVT::i32: Opcode = PPC::LWZUX; break; @@ -4730,7 +5464,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr - if (tryAndWithMask(N)) + if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. @@ -4748,15 +5483,20 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } } + // If this is 'or' against an imm with consecutive ones and both sides zero, + // try to emit rldimi + if (tryAsSingleRLDIMI(N)) + return; + // OR with a 32-bit immediate can be handled by ori + oris // without creating an immediate in a GPR. uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. @@ -4779,7 +5519,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // XOR with a 32-bit immediate can be handled by xori + xoris // without creating an immediate in a GPR. uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. @@ -4801,7 +5541,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm); return; } @@ -4835,6 +5575,47 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::MUL: { + SDValue Op1 = N->getOperand(1); + if (Op1.getOpcode() != ISD::Constant || + (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32)) + break; + + // If the multiplier fits int16, we can handle it with mulli. + int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); + unsigned Shift = countTrailingZeros<uint64_t>(Imm); + if (isInt<16>(Imm) || !Shift) + break; + + // If the shifted value fits int16, we can do this transformation: + // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to + // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). + uint64_t ImmSh = Imm >> Shift; + if (!isInt<16>(ImmSh)) + break; + + uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + if (Op1.getValueType() == MVT::i64) { + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); + SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, + N->getOperand(0), SDImm); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(63 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return; + } else { + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32); + SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32, + N->getOperand(0), SDImm); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(0, dl), getI32Imm(31 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return; + } + break; + } // FIXME: Remove this once the ANDI glue bug is fixed: case PPCISD::ANDI_rec_1_EQ_BIT: case PPCISD::ANDI_rec_1_GT_BIT: { @@ -4866,11 +5647,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. - if (PPCSubTarget->useCRBits() && - N->getOperand(0).getValueType() == MVT::i1) + if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; - if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { + if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { bool NeedSwapOps = false; bool IsUnCmp = false; if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { @@ -4900,8 +5680,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) - if (N1C->isNullValue() && N3C->isNullValue() && - N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && + if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL && + CC == ISD::SETNE && // FIXME: Implement this optzn for PPC64. N->getValueType(0) == MVT::i32) { SDNode *Tmp = @@ -4945,7 +5725,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } unsigned BROpc = - getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget); + getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); unsigned SelectCCOp; if (N->getValueType(0) == MVT::i32) @@ -4953,29 +5733,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) { else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; else if (N->getValueType(0) == MVT::f32) { - if (PPCSubTarget->hasP8Vector()) + if (Subtarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; } else if (N->getValueType(0) == MVT::f64) { - if (PPCSubTarget->hasVSX()) + if (Subtarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; } else if (N->getValueType(0) == MVT::f128) SelectCCOp = PPC::SELECT_CC_F16; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) - SelectCCOp = PPC::SELECT_CC_QFRC; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) - SelectCCOp = PPC::SELECT_CC_QSRC; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) - SelectCCOp = PPC::SELECT_CC_QBRC; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) SelectCCOp = PPC::SELECT_CC_VSRC; @@ -4988,8 +5762,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case ISD::VECTOR_SHUFFLE: - if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || - N->getValueType(0) == MVT::v2i64)) { + if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || + N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), @@ -5024,7 +5798,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // For little endian, we must swap the input operands and adjust // the mask elements (reverse and invert them). - if (PPCSubTarget->isLittleEndian()) { + if (Subtarget->isLittleEndian()) { std::swap(Op1, Op2); unsigned tmp = DM[0]; DM[0] = 1 - DM[1]; @@ -5041,7 +5815,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; case PPCISD::BDNZ: case PPCISD::BDZ: { - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) @@ -5067,9 +5841,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case ISD::BR_CC: { + if (tryFoldSWTestBRCC(N)) + return; + if (trySelectLoopCountIntrinsic(N)) + return; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); unsigned PCC = - getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget); + getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); if (N->getOperand(2).getValueType() == MVT::i1) { unsigned Opc; @@ -5122,11 +5900,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - const bool isPPC64 = PPCSubTarget->isPPC64(); - const bool isELFABI = PPCSubTarget->isSVR4ABI(); - const bool isAIXABI = PPCSubTarget->isAIXABI(); - - assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + const bool isPPC64 = Subtarget->isPPC64(); + const bool isELFABI = Subtarget->isSVR4ABI(); + const bool isAIXABI = Subtarget->isAIXABI(); // PowerPC only support small, medium and large code model. const CodeModel::Model CModel = TM.getCodeModel(); @@ -5136,36 +5912,57 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (isAIXABI && CModel == CodeModel::Medium) report_fatal_error("Medium code model is not supported on AIX."); - // For 64-bit small code model, we allow SelectCodeCommon to handle this, - // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - if (isPPC64 && CModel == CodeModel::Small) + // For 64-bit ELF small code model, we allow SelectCodeCommon to handle + // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX + // small code model, we need to check for a toc-data attribute. + if (isPPC64 && !isAIXABI && CModel == CodeModel::Small) break; - // Handle 32-bit small code model. - if (!isPPC64) { - // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. - auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { - SDValue GA = TocEntry->getOperand(0); - SDValue TocBase = TocEntry->getOperand(1); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - TocBase); - transferMemOperands(TocEntry, MN); - ReplaceNode(TocEntry, MN); - }; + auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry, + EVT OperandTy) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + // Handle 32-bit small code model. + if (!isPPC64 && CModel == CodeModel::Small) { + // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either + // PPC::ADDItoc, or PPC::LWZtoc if (isELFABI) { assert(TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code."); // 32-bit ELF always uses a small code model toc access. - replaceWithLWZtoc(N); + replaceWith(PPC::LWZtoc, N, MVT::i32); return; } - if (isAIXABI && CModel == CodeModel::Small) { - replaceWithLWZtoc(N); + assert(isAIXABI && "ELF ABI already handled"); + + if (hasTocDataAttr(N->getOperand(0), + CurDAG->getDataLayout().getPointerSize())) { + replaceWith(PPC::ADDItoc, N, MVT::i32); return; } + + replaceWith(PPC::LWZtoc, N, MVT::i32); + return; + } + + if (isPPC64 && CModel == CodeModel::Small) { + assert(isAIXABI && "ELF ABI handled in common SelectCode"); + + if (hasTocDataAttr(N->getOperand(0), + CurDAG->getDataLayout().getPointerSize())) { + replaceWith(PPC::ADDItoc8, N, MVT::i64); + return; + } + // Break if it doesn't have toc data attribute. Proceed with common + // SelectCode. + break; } assert(CModel != CodeModel::Small && "All small code models handled."); @@ -5177,7 +5974,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We // generate two instructions as described below. The first source operand // is a symbol reference. If it must be toc-referenced according to - // PPCSubTarget, we generate: + // Subtarget, we generate: // [32-bit AIX] // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) // [64-bit ELF/AIX] @@ -5209,7 +6006,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. - assert(PPCSubTarget->is32BitELFABI() && + assert(Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), @@ -5288,6 +6085,78 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } } + case PPCISD::LD_SPLAT: { + // Here we want to handle splat load for type v16i8 and v8i16 when there is + // no direct move, we don't need to use stack for this case. If target has + // direct move, we should be able to get the best selection in the .td file. + if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove()) + break; + + EVT Type = N->getValueType(0); + if (Type != MVT::v16i8 && Type != MVT::v8i16) + break; + + // If the alignment for the load is 16 or bigger, we don't need the + // permutated mask to get the required value. The value must be the 0 + // element in big endian target or 7/15 in little endian target in the + // result vsx register of lvx instruction. + // Select the instruction in the .td file. + if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) && + isOffsetMultipleOf(N, 16)) + break; + + SDValue ZeroReg = + CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Subtarget->isPPC64() ? MVT::i64 : MVT::i32); + unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI; + // v16i8 LD_SPLAT addr + // ======> + // Mask = LVSR/LVSL 0, addr + // LoadLow = LVX 0, addr + // Perm = VPERM LoadLow, LoadLow, Mask + // Splat = VSPLTB 15/0, Perm + // + // v8i16 LD_SPLAT addr + // ======> + // Mask = LVSR/LVSL 0, addr + // LoadLow = LVX 0, addr + // LoadHigh = LVX (LI, 1), addr + // Perm = VPERM LoadLow, LoadHigh, Mask + // Splat = VSPLTH 7/0, Perm + unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH; + unsigned SplatElemIndex = + Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0; + + SDNode *Mask = CurDAG->getMachineNode( + Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg, + N->getOperand(1)); + + SDNode *LoadLow = + CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other, + {ZeroReg, N->getOperand(1), N->getOperand(0)}); + + SDNode *LoadHigh = LoadLow; + if (Type == MVT::v8i16) { + LoadHigh = CurDAG->getMachineNode( + PPC::LVX, dl, MVT::v16i8, MVT::Other, + {SDValue(CurDAG->getMachineNode( + LIOpcode, dl, MVT::i32, + CurDAG->getTargetConstant(1, dl, MVT::i8)), + 0), + N->getOperand(1), SDValue(LoadLow, 1)}); + } + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1)); + transferMemOperands(N, LoadHigh); + + SDNode *Perm = + CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0), + SDValue(LoadHigh, 0), SDValue(Mask, 0)); + CurDAG->SelectNodeTo(N, SplatOp, Type, + CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8), + SDValue(Perm, 0)); + return; + } } SelectCode(N); @@ -5306,7 +6175,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { "Only OR nodes are supported for CMPB"); SDValue Res; - if (!PPCSubTarget->hasCMPB()) + if (!Subtarget->hasCMPB()) return Res; if (N->getValueType(0) != MVT::i32 && @@ -5517,7 +6386,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { // only one instruction (like a zero or one), then we should fold in those // operations with the select. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { - if (!PPCSubTarget->useCRBits()) + if (!Subtarget->useCRBits()) return; if (N->getOpcode() != ISD::ZERO_EXTEND && @@ -5549,8 +6418,7 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { SDValue O1 = UserO1.getNode() == N ? Val : UserO1; return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, - User->getValueType(0), - O0.getNode(), O1.getNode()); + User->getValueType(0), {O0, O1}); }; // FIXME: When the semantics of the interaction between select and undef @@ -5632,16 +6500,20 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // be folded with the isel so that we don't need to materialize a register // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; + for (const SDNode *User : N->uses()) { if (!User->isMachineOpcode()) return false; if (User->getMachineOpcode() != PPC::SELECT_I4 && User->getMachineOpcode() != PPC::SELECT_I8) return false; + SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); + // If we have a degenerate select with two equal operands, swapping will + // not do anything, and we may run into an infinite loop. + if (Op1 == Op2) + return false; + if (!Op2->isMachineOpcode()) return false; @@ -5653,7 +6525,7 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { if (!C) return false; - if (!C->isNullValue()) + if (!C->isZero()) return false; } @@ -5662,18 +6534,14 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { SmallVector<SDNode *, 4> ToReplace; - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : N->uses()) { assert((User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users"); ToReplace.push_back(User); } - for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), - UE = ToReplace.end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : ToReplace) { SDNode *ResNode = CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), User->getValueType(0), User->getOperand(0), @@ -5722,11 +6590,12 @@ void PPCDAGToDAGISel::PeepholeCROps() { Op2Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op2Unset = true; - else if (Op.getMachineOpcode() == PPC::CRNOR && - Op.getOperand(0) == Op.getOperand(1)) + else if ((Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) || + Op.getMachineOpcode() == PPC::CRNOT) Op2Not = true; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } case PPC::BC: case PPC::BCn: @@ -5734,9 +6603,6 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: - case PPC::SELECT_QFRC: - case PPC::SELECT_QSRC: - case PPC::SELECT_QBRC: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: @@ -5749,8 +6615,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { Op1Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op1Unset = true; - else if (Op.getMachineOpcode() == PPC::CRNOR && - Op.getOperand(0) == Op.getOperand(1)) + else if ((Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) || + Op.getMachineOpcode() == PPC::CRNOT) Op1Not = true; } } @@ -6055,9 +6922,6 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: - case PPC::SELECT_QFRC: - case PPC::SELECT_QSRC: - case PPC::SELECT_QBRC: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: @@ -6259,7 +7123,7 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, } void PPCDAGToDAGISel::PeepholePPC64ZExt() { - if (!PPCSubTarget->isPPC64()) + if (!Subtarget->isPPC64()) return; // When we zero-extend from i32 to i64, we use a pattern like this: @@ -6427,11 +7291,106 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { CurDAG->RemoveDeadNodes(); } -void PPCDAGToDAGISel::PeepholePPC64() { - // These optimizations are currently supported only for 64-bit SVR4. - if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) +static bool isVSXSwap(SDValue N) { + if (!N->isMachineOpcode()) + return false; + unsigned Opc = N->getMachineOpcode(); + + // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate + // operand is 2. + if (Opc == PPC::XXPERMDIs) { + return isa<ConstantSDNode>(N->getOperand(1)) && + N->getConstantOperandVal(1) == 2; + } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) { + return N->getOperand(0) == N->getOperand(1) && + isa<ConstantSDNode>(N->getOperand(2)) && + N->getConstantOperandVal(2) == 2; + } + + return false; +} + +// TODO: Make this complete and replace with a table-gen bit. +static bool isLaneInsensitive(SDValue N) { + if (!N->isMachineOpcode()) + return false; + unsigned Opc = N->getMachineOpcode(); + + switch (Opc) { + default: + return false; + case PPC::VAVGSB: + case PPC::VAVGUB: + case PPC::VAVGSH: + case PPC::VAVGUH: + case PPC::VAVGSW: + case PPC::VAVGUW: + case PPC::VMAXFP: + case PPC::VMAXSB: + case PPC::VMAXUB: + case PPC::VMAXSH: + case PPC::VMAXUH: + case PPC::VMAXSW: + case PPC::VMAXUW: + case PPC::VMINFP: + case PPC::VMINSB: + case PPC::VMINUB: + case PPC::VMINSH: + case PPC::VMINUH: + case PPC::VMINSW: + case PPC::VMINUW: + case PPC::VADDFP: + case PPC::VADDUBM: + case PPC::VADDUHM: + case PPC::VADDUWM: + case PPC::VSUBFP: + case PPC::VSUBUBM: + case PPC::VSUBUHM: + case PPC::VSUBUWM: + case PPC::VAND: + case PPC::VANDC: + case PPC::VOR: + case PPC::VORC: + case PPC::VXOR: + case PPC::VNOR: + case PPC::VMULUWM: + return true; + } +} + +// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is +// lane-insensitive. +static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { + // Our desired xxswap might be source of COPY_TO_REGCLASS. + // TODO: Can we put this a common method for DAG? + auto SkipRCCopy = [](SDValue V) { + while (V->isMachineOpcode() && + V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) { + // All values in the chain should have single use. + if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode())) + return SDValue(); + V = V->getOperand(0); + } + return V.hasOneUse() ? V : SDValue(); + }; + + SDValue VecOp = SkipRCCopy(N->getOperand(0)); + if (!VecOp || !isLaneInsensitive(VecOp)) + return; + + SDValue LHS = SkipRCCopy(VecOp.getOperand(0)), + RHS = SkipRCCopy(VecOp.getOperand(1)); + if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS)) return; + // These swaps may still have chain-uses here, count on dead code elimination + // in following passes to remove them. + DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0)); + DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0)); + DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0)); +} + +void PPCDAGToDAGISel::PeepholePPC64() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); while (Position != CurDAG->allnodes_begin()) { @@ -6440,6 +7399,9 @@ void PPCDAGToDAGISel::PeepholePPC64() { if (N->use_empty() || !N->isMachineOpcode()) continue; + if (isVSXSwap(SDValue(N, 0))) + reduceVSXSwap(N, CurDAG); + unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); bool RequiresMod4Offset = false; @@ -6452,7 +7414,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { case PPC::DFLOADf64: case PPC::DFLOADf32: RequiresMod4Offset = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case PPC::LBZ: case PPC::LBZ8: case PPC::LFD: @@ -6470,7 +7432,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { case PPC::DFSTOREf64: case PPC::DFSTOREf32: RequiresMod4Offset = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case PPC::STB: case PPC::STB8: case PPC::STFD: @@ -6544,7 +7506,8 @@ void PPCDAGToDAGISel::PeepholePPC64() { int MaxDisplacement = 7; if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); - MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); + Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); + MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); } bool UpdateHBase = false; @@ -6610,10 +7573,10 @@ void PPCDAGToDAGISel::PeepholePPC64() { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); + Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. - if (GV->getAlignment() < 4 && - (RequiresMod4Offset || (Offset % 4) != 0)) { + if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } @@ -6621,8 +7584,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { const Constant *C = CP->getConstVal(); - ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, - CP->getAlignment(), + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), Offset, Flags); } } |