diff options
author | Pascal Stumpf <pascal@cvs.openbsd.org> | 2016-09-03 22:47:01 +0000 |
---|---|---|
committer | Pascal Stumpf <pascal@cvs.openbsd.org> | 2016-09-03 22:47:01 +0000 |
commit | a534753628335e3186437f47e6aaf535e31fe07e (patch) | |
tree | 8f73b64c4dd01e37a87244f6a020edc46c8d4d69 | |
parent | 6b3631ebf5744993aff6637b366dbb5654aa8292 (diff) |
Use the space freed up by sparc and zaurus to import LLVM.
ok hackroom@
-rw-r--r-- | gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 464 |
1 files changed, 131 insertions, 333 deletions
diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5fec51c095d..4521640e394 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -11,12 +11,6 @@ // non-loop form. In cases that this kicks in, it can be a significant // performance win. // -// If compiling for code size we avoid idiom recognition if the resulting -// code could be larger than the code for the original loop. One way this could -// happen is if the loop is not removable after idiom recognition due to the -// presence of non-idiom instructions. The initial implementation of the -// heuristics applies to idioms in multi-block loops. -// //===----------------------------------------------------------------------===// // // TODO List: @@ -32,19 +26,21 @@ // i64 and larger types when i64 is legal and the value has few bits set. It // would be good to enhance isel to emit a loop for ctpop in this case. // +// We should enhance the memset/memcpy recognition to handle multiple stores in +// the loop. This would handle things like: +// void foo(_Complex float *P) +// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } +// // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -59,11 +55,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; #define DEBUG_TYPE "loop-idiom" @@ -71,15 +63,9 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); -static cl::opt<bool> UseLIRCodeSizeHeurs( - "use-lir-code-size-heurs", - cl::desc("Use loop idiom recognition code size heuristics when compiling" - "with -Os/-Oz"), - cl::init(true), cl::Hidden); - namespace { -class LoopIdiomRecognize { +class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; AliasAnalysis *AA; DominatorTree *DT; @@ -88,24 +74,41 @@ class LoopIdiomRecognize { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; - bool ApplyCodeSizeHeuristics; public: - explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, - TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, - const DataLayout *DL) - : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), - DL(DL) {} + static char ID; + explicit LoopIdiomRecognize() : LoopPass(ID) { + initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - bool runOnLoop(Loop *L); + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } private: typedef SmallVector<StoreInst *, 8> StoreList; - typedef MapVector<Value *, StoreList> StoreListMap; - StoreListMap StoreRefsForMemset; - StoreListMap StoreRefsForMemsetPattern; + StoreList StoreRefsForMemset; StoreList StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; @@ -119,21 +122,15 @@ private: SmallVectorImpl<BasicBlock *> &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern, - bool &ForMemcpy); - bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount, - bool ForMemset); + bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); + bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, - Instruction *TheStore, - SmallPtrSetImpl<Instruction *> &Stores, - const SCEVAddRecExpr *Ev, const SCEV *BECount, - bool NegStride, bool IsLoopMemset = false); + Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount, bool NegStride); bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); - bool avoidLIRForMultiBlockLoop(bool IsMemset = false, - bool IsLoopMemset = false); /// @} /// \name Noncountable Loop Idiom Handling @@ -148,70 +145,38 @@ private: /// @} }; -class LoopIdiomRecognizeLegacyPass : public LoopPass { -public: - static char ID; - explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) { - initializeLoopIdiomRecognizeLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (skipLoop(L)) - return false; - - AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - const TargetTransformInfo *TTI = - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( - *L->getHeader()->getParent()); - const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); - - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL); - return LIR.runOnLoop(L); - } - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - getLoopAnalysisUsage(AU); - } -}; } // End anonymous namespace. -PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &) { - const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL); - if (!LIR.runOnLoop(&L)) - return PreservedAnalyses::all(); - - return getLoopPassPreservedAnalyses(); -} - -char LoopIdiomRecognizeLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", - "Recognize loop idioms", false, false) -INITIALIZE_PASS_DEPENDENCY(LoopPass) +char LoopIdiomRecognize::ID = 0; +INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom", - "Recognize loop idioms", false, false) +INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", + false, false) -Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); } +Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } -static void deleteDeadInstruction(Instruction *I) { +/// deleteDeadInstruction - Delete this instruction. Before we do, go through +/// and zero out all the operands of this instruction. If any of them become +/// dead, delete them and the computation tree that feeds them. +/// +static void deleteDeadInstruction(Instruction *I, + const TargetLibraryInfo *TLI) { + SmallVector<Value *, 16> Operands(I->value_op_begin(), I->value_op_end()); I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); + for (Value *Op : Operands) + RecursivelyDeleteTriviallyDeadInstructions(Op, TLI); } //===----------------------------------------------------------------------===// @@ -220,7 +185,10 @@ static void deleteDeadInstruction(Instruction *I) { // //===----------------------------------------------------------------------===// -bool LoopIdiomRecognize::runOnLoop(Loop *L) { +bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + CurLoop = L; // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. @@ -232,9 +200,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { if (Name == "memset" || Name == "memcpy") return false; - // Determine if code size heuristics need to be applied. - ApplyCodeSizeHeuristics = - L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs; + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( + *CurLoop->getHeader()->getParent()); + DL = &CurLoop->getHeader()->getModule()->getDataLayout(); HasMemset = TLI->has(LibFunc::memset); HasMemsetPattern = TLI->has(LibFunc::memset_pattern16); @@ -267,14 +240,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() { << CurLoop->getHeader()->getName() << "\n"); bool MadeChange = false; - - // The following transforms hoist stores/memsets into the loop pre-header. - // Give up if the loop has instructions may throw. - LoopSafetyInfo SafetyInfo; - computeLoopSafetyInfo(&SafetyInfo, CurLoop); - if (SafetyInfo.MayThrow) - return MadeChange; - // Scan all the blocks in the loop that are not in subloops. for (auto *BB : CurLoop->getBlocks()) { // Ignore blocks in subloops. @@ -293,9 +258,9 @@ static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) { return (unsigned)SizeInBits >> 3; } -static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) { +static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) { const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1)); - return ConstStride->getAPInt(); + return ConstStride->getAPInt().getZExtValue(); } /// getMemSetPatternValue - If a strided store of the specified value is safe to @@ -340,15 +305,11 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { } bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemsetPattern, bool &ForMemcpy) { + bool &ForMemcpy) { // Don't touch volatile stores. if (!SI->isSimple()) return false; - // Avoid merging nontemporal stores. - if (SI->getMetadata(LLVMContext::MD_nontemporal)) - return false; - Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -392,7 +353,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // It looks like we can use PatternValue! - ForMemsetPattern = true; + ForMemset = true; return true; } @@ -400,7 +361,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, if (HasMemcpy) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - APInt Stride = getStoreStride(StoreEv); + unsigned Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); if (StoreSize != Stride && StoreSize != -Stride) return false; @@ -432,7 +393,6 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, void LoopIdiomRecognize::collectStores(BasicBlock *BB) { StoreRefsForMemset.clear(); - StoreRefsForMemsetPattern.clear(); StoreRefsForMemcpy.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast<StoreInst>(&I); @@ -440,22 +400,15 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { continue; bool ForMemset = false; - bool ForMemsetPattern = false; bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy)) + if (!isLegalStore(SI, ForMemset, ForMemcpy)) continue; // Save the store locations. - if (ForMemset) { - // Find the base pointer. - Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); - StoreRefsForMemset[Ptr].push_back(SI); - } else if (ForMemsetPattern) { - // Find the base pointer. - Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); - StoreRefsForMemsetPattern[Ptr].push_back(SI); - } else if (ForMemcpy) + if (ForMemset) + StoreRefsForMemset.push_back(SI); + else if (ForMemcpy) StoreRefsForMemcpy.push_back(SI); } } @@ -477,14 +430,9 @@ bool LoopIdiomRecognize::runOnLoopBlock( // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - // Look for a single store or sets of stores with a common base, which can be - // optimized into a memset (memset_pattern). The latter most commonly happens - // with structs and handunrolled loops. - for (auto &SL : StoreRefsForMemset) - MadeChange |= processLoopStores(SL.second, BECount, true); - - for (auto &SL : StoreRefsForMemsetPattern) - MadeChange |= processLoopStores(SL.second, BECount, false); + // Look for a single store which can be optimized into a memset. + for (auto &SI : StoreRefsForMemset) + MadeChange |= processLoopStore(SI, BECount); // Optimize the store into a memcpy, if it feeds an similarly strided load. for (auto &SI : StoreRefsForMemcpy) @@ -510,144 +458,26 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStores - See if this store(s) can be promoted to a memset. -bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, - const SCEV *BECount, - bool ForMemset) { - // Try to find consecutive stores that can be transformed into memsets. - SetVector<StoreInst *> Heads, Tails; - SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; - - // Do a quadratic search on all of the given stores and find - // all of the pairs of stores that follow each other. - SmallVector<unsigned, 16> IndexQueue; - for (unsigned i = 0, e = SL.size(); i < e; ++i) { - assert(SL[i]->isSimple() && "Expected only non-volatile stores."); - - Value *FirstStoredVal = SL[i]->getValueOperand(); - Value *FirstStorePtr = SL[i]->getPointerOperand(); - const SCEVAddRecExpr *FirstStoreEv = - cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr)); - APInt FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); - - // See if we can optimize just this store in isolation. - if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { - Heads.insert(SL[i]); - continue; - } - - Value *FirstSplatValue = nullptr; - Constant *FirstPatternValue = nullptr; - - if (ForMemset) - FirstSplatValue = isBytewiseValue(FirstStoredVal); - else - FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); - - assert((FirstSplatValue || FirstPatternValue) && - "Expected either splat value or pattern value."); - - IndexQueue.clear(); - // If a store has multiple consecutive store candidates, search Stores - // array according to the sequence: from i+1 to e, then from i-1 to 0. - // This is because usually pairing with immediate succeeding or preceding - // candidate create the best chance to find memset opportunity. - unsigned j = 0; - for (j = i + 1; j < e; ++j) - IndexQueue.push_back(j); - for (j = i; j > 0; --j) - IndexQueue.push_back(j - 1); - - for (auto &k : IndexQueue) { - assert(SL[k]->isSimple() && "Expected only non-volatile stores."); - Value *SecondStorePtr = SL[k]->getPointerOperand(); - const SCEVAddRecExpr *SecondStoreEv = - cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr)); - APInt SecondStride = getStoreStride(SecondStoreEv); - - if (FirstStride != SecondStride) - continue; - - Value *SecondStoredVal = SL[k]->getValueOperand(); - Value *SecondSplatValue = nullptr; - Constant *SecondPatternValue = nullptr; - - if (ForMemset) - SecondSplatValue = isBytewiseValue(SecondStoredVal); - else - SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); - - assert((SecondSplatValue || SecondPatternValue) && - "Expected either splat value or pattern value."); - - if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { - if (ForMemset) { - if (FirstSplatValue != SecondSplatValue) - continue; - } else { - if (FirstPatternValue != SecondPatternValue) - continue; - } - Tails.insert(SL[k]); - Heads.insert(SL[i]); - ConsecutiveChain[SL[i]] = SL[k]; - break; - } - } - } - - // We may run into multiple chains that merge into a single chain. We mark the - // stores that we transformed so that we don't visit the same store twice. - SmallPtrSet<Value *, 16> TransformedStores; - bool Changed = false; - - // For stores that start but don't end a link in the chain: - for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); - it != e; ++it) { - if (Tails.count(*it)) - continue; - - // We found a store instr that starts a chain. Now follow the chain and try - // to transform it. - SmallPtrSet<Instruction *, 8> AdjacentStores; - StoreInst *I = *it; - - StoreInst *HeadStore = I; - unsigned StoreSize = 0; - - // Collect the chain into a list. - while (Tails.count(I) || Heads.count(I)) { - if (TransformedStores.count(I)) - break; - AdjacentStores.insert(I); - - StoreSize += getStoreSizeInBytes(I, DL); - // Move to the next value in the chain. - I = ConsecutiveChain[I]; - } - - Value *StoredVal = HeadStore->getValueOperand(); - Value *StorePtr = HeadStore->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); - APInt Stride = getStoreStride(StoreEv); +/// processLoopStore - See if this store can be promoted to a memset. +bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { + assert(SI->isSimple() && "Expected only non-volatile stores."); - // Check to see if the stride matches the size of the stores. If so, then - // we know that every byte is touched in the loop. - if (StoreSize != Stride && StoreSize != -Stride) - continue; + Value *StoredVal = SI->getValueOperand(); + Value *StorePtr = SI->getPointerOperand(); - bool NegStride = StoreSize == -Stride; + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) + return false; - if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), - StoredVal, HeadStore, AdjacentStores, StoreEv, - BECount, NegStride)) { - TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); - Changed = true; - } - } + bool NegStride = StoreSize == -Stride; - return Changed; + // See if we can optimize just this store in isolation. + return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -658,7 +488,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, return false; // If we're not allowed to hack on memset, we fail. - if (!HasMemset) + if (!TLI->has(LibFunc::memset)) return false; Value *Pointer = MSI->getDest(); @@ -677,12 +507,11 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, // Check to see if the stride matches the size of the memset. If so, then we // know that every byte is touched in the loop. - const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); - if (!ConstStride) - return false; + const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); - APInt Stride = ConstStride->getAPInt(); - if (SizeInBytes != Stride && SizeInBytes != -Stride) + // TODO: Could also handle negative stride here someday, that will require the + // validity check in mayLoopAccessLocation to be updated though. + if (!Stride || MSI->getLength() != Stride->getValue()) return false; // Verify that the memset value is loop invariant. If not, we can't promote @@ -691,22 +520,18 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) return false; - SmallPtrSet<Instruction *, 1> MSIs; - MSIs.insert(MSI); - bool NegStride = SizeInBytes == -Stride; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, - BECount, NegStride, /*IsLoopMemset=*/true); + MSI->getAlignment(), SplatValue, MSI, Ev, + BECount, /*NegStride=*/false); } /// mayLoopAccessLocation - Return true if the specified loop might access the /// specified pointer location, which is a loop-strided access. The 'Access' /// argument specifies what the verboten forms of access are (read or write). -static bool -mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, - const SCEV *BECount, unsigned StoreSize, - AliasAnalysis &AA, - SmallPtrSetImpl<Instruction *> &IgnoredStores) { +static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, + const SCEV *BECount, unsigned StoreSize, + AliasAnalysis &AA, + Instruction *IgnoredStore) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. @@ -725,9 +550,8 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) - for (Instruction &I : **BI) - if (IgnoredStores.count(&I) == 0 && - (AA.getModRefInfo(&I, StoreLoc) & Access)) + for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) + if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access)) return true; return false; @@ -750,9 +574,8 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, /// transform this into a memset or memset_pattern in the loop preheader, do so. bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, - Value *StoredVal, Instruction *TheStore, - SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev, - const SCEV *BECount, bool NegStride, bool IsLoopMemset) { + Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount, bool NegStride) { Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; @@ -786,16 +609,13 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, - *AA, Stores)) { + *AA, TheStore)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); return false; } - if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) - return false; - // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -824,14 +644,13 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr); - inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); - GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. + GV->setUnnamedAddr(true); // Ok to merge these. GV->setAlignment(16); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); @@ -843,8 +662,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - for (auto *I : Stores) - deleteDeadInstruction(I); + deleteDeadInstruction(TheStore, TLI); ++NumMemSet; return true; } @@ -858,7 +676,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); - APInt Stride = getStoreStride(StoreEv); + unsigned Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); bool NegStride = StoreSize == -Stride; @@ -896,10 +714,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - SmallPtrSet<Instruction *, 1> Stores; - Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { + StoreSize, *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); @@ -919,7 +735,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Stores)) { + *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); @@ -927,9 +743,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, return false; } - if (avoidLIRForMultiBlockLoop()) - return false; - // Okay, everything is safe, we can transform this! // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -956,28 +769,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI); + deleteDeadInstruction(SI, TLI); ++NumMemCpy; return true; } -// When compiling for codesize we avoid idiom recognition for a multi-block loop -// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop. -// -bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, - bool IsLoopMemset) { - if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { - if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) { - DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() - << " : LIR " << (IsMemset ? "Memset" : "Memcpy") - << " avoided: multi-block top-level loop\n"); - return true; - } - } - - return false; -} - bool LoopIdiomRecognize::runOnNoncountableLoop() { return recognizePopcount(); } @@ -985,7 +781,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { /// Check if the given conditional branch is based on the comparison between /// a variable and zero, and if the variable is non-zero, the control yields to /// the loop entry. If the branch matches the behavior, the variable involved -/// in the comparison is returned. This function will be called to see if the +/// in the comparion is returned. This function will be called to see if the /// precondition and postcondition of the loop are in desirable form. static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { if (!BI || !BI->isConditional()) @@ -1169,7 +965,9 @@ bool LoopIdiomRecognize::recognizePopcount() { // It should have a preheader containing nothing but an unconditional branch. BasicBlock *PH = CurLoop->getLoopPreheader(); - if (!PH || &PH->front() != PH->getTerminator()) + if (!PH) + return false; + if (&PH->front() != PH->getTerminator()) return false; auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator()); if (!EntryBI || EntryBI->isConditional()) @@ -1195,7 +993,7 @@ bool LoopIdiomRecognize::recognizePopcount() { } static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, - const DebugLoc &DL) { + DebugLoc DL) { Value *Ops[] = {Val}; Type *Tys[] = {Val->getType()}; |