summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Stumpf <pascal@cvs.openbsd.org>2016-09-03 22:47:01 +0000
committerPascal Stumpf <pascal@cvs.openbsd.org>2016-09-03 22:47:01 +0000
commita534753628335e3186437f47e6aaf535e31fe07e (patch)
tree8f73b64c4dd01e37a87244f6a020edc46c8d4d69
parent6b3631ebf5744993aff6637b366dbb5654aa8292 (diff)
Use the space freed up by sparc and zaurus to import LLVM.
ok hackroom@
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp464
1 files changed, 131 insertions, 333 deletions
diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5fec51c095d..4521640e394 100644
--- a/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -11,12 +11,6 @@
// non-loop form. In cases that this kicks in, it can be a significant
// performance win.
//
-// If compiling for code size we avoid idiom recognition if the resulting
-// code could be larger than the code for the original loop. One way this could
-// happen is if the loop is not removable after idiom recognition due to the
-// presence of non-idiom instructions. The initial implementation of the
-// heuristics applies to idioms in multi-block loops.
-//
//===----------------------------------------------------------------------===//
//
// TODO List:
@@ -32,19 +26,21 @@
// i64 and larger types when i64 is legal and the value has few bits set. It
// would be good to enhance isel to emit a loop for ctpop in this case.
//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -59,11 +55,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
#define DEBUG_TYPE "loop-idiom"
@@ -71,15 +63,9 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
-static cl::opt<bool> UseLIRCodeSizeHeurs(
- "use-lir-code-size-heurs",
- cl::desc("Use loop idiom recognition code size heuristics when compiling"
- "with -Os/-Oz"),
- cl::init(true), cl::Hidden);
-
namespace {
-class LoopIdiomRecognize {
+class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
AliasAnalysis *AA;
DominatorTree *DT;
@@ -88,24 +74,41 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
- bool ApplyCodeSizeHeuristics;
public:
- explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE,
- TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- const DataLayout *DL)
- : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI),
- DL(DL) {}
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool runOnLoop(Loop *L);
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
private:
typedef SmallVector<StoreInst *, 8> StoreList;
- typedef MapVector<Value *, StoreList> StoreListMap;
- StoreListMap StoreRefsForMemset;
- StoreListMap StoreRefsForMemsetPattern;
+ StoreList StoreRefsForMemset;
StoreList StoreRefsForMemcpy;
bool HasMemset;
bool HasMemsetPattern;
@@ -119,21 +122,15 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
- bool &ForMemcpy);
- bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
- bool ForMemset);
+ bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore,
- SmallPtrSetImpl<Instruction *> &Stores,
- const SCEVAddRecExpr *Ev, const SCEV *BECount,
- bool NegStride, bool IsLoopMemset = false);
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
- bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
- bool IsLoopMemset = false);
/// @}
/// \name Noncountable Loop Idiom Handling
@@ -148,70 +145,38 @@ private:
/// @}
};
-class LoopIdiomRecognizeLegacyPass : public LoopPass {
-public:
- static char ID;
- explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
- initializeLoopIdiomRecognizeLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent());
- const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
-
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
- return LIR.runOnLoop(L);
- }
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
} // End anonymous namespace.
-PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- const auto *DL = &L.getHeader()->getModule()->getDataLayout();
-
- LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
- if (!LIR.runOnLoop(&L))
- return PreservedAnalyses::all();
-
- return getLoopPassPreservedAnalyses();
-}
-
-char LoopIdiomRecognizeLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
- "Recognize loop idioms", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
- "Recognize loop idioms", false, false)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
-Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
-static void deleteDeadInstruction(Instruction *I) {
+/// deleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void deleteDeadInstruction(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Value *, 16> Operands(I->value_op_begin(), I->value_op_end());
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
+ for (Value *Op : Operands)
+ RecursivelyDeleteTriviallyDeadInstructions(Op, TLI);
}
//===----------------------------------------------------------------------===//
@@ -220,7 +185,10 @@ static void deleteDeadInstruction(Instruction *I) {
//
//===----------------------------------------------------------------------===//
-bool LoopIdiomRecognize::runOnLoop(Loop *L) {
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
CurLoop = L;
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
@@ -232,9 +200,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
if (Name == "memset" || Name == "memcpy")
return false;
- // Determine if code size heuristics need to be applied.
- ApplyCodeSizeHeuristics =
- L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *CurLoop->getHeader()->getParent());
+ DL = &CurLoop->getHeader()->getModule()->getDataLayout();
HasMemset = TLI->has(LibFunc::memset);
HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
@@ -267,14 +240,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
<< CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
-
- // The following transforms hoist stores/memsets into the loop pre-header.
- // Give up if the loop has instructions may throw.
- LoopSafetyInfo SafetyInfo;
- computeLoopSafetyInfo(&SafetyInfo, CurLoop);
- if (SafetyInfo.MayThrow)
- return MadeChange;
-
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
@@ -293,9 +258,9 @@ static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
return (unsigned)SizeInBits >> 3;
}
-static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
+static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
- return ConstStride->getAPInt();
+ return ConstStride->getAPInt().getZExtValue();
}
/// getMemSetPatternValue - If a strided store of the specified value is safe to
@@ -340,15 +305,11 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
}
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
- bool &ForMemsetPattern, bool &ForMemcpy) {
+ bool &ForMemcpy) {
// Don't touch volatile stores.
if (!SI->isSimple())
return false;
- // Avoid merging nontemporal stores.
- if (SI->getMetadata(LLVMContext::MD_nontemporal))
- return false;
-
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -392,7 +353,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// It looks like we can use PatternValue!
- ForMemsetPattern = true;
+ ForMemset = true;
return true;
}
@@ -400,7 +361,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
if (HasMemcpy) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- APInt Stride = getStoreStride(StoreEv);
+ unsigned Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
if (StoreSize != Stride && StoreSize != -Stride)
return false;
@@ -432,7 +393,6 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
StoreRefsForMemset.clear();
- StoreRefsForMemsetPattern.clear();
StoreRefsForMemcpy.clear();
for (Instruction &I : *BB) {
StoreInst *SI = dyn_cast<StoreInst>(&I);
@@ -440,22 +400,15 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
continue;
bool ForMemset = false;
- bool ForMemsetPattern = false;
bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
+ if (!isLegalStore(SI, ForMemset, ForMemcpy))
continue;
// Save the store locations.
- if (ForMemset) {
- // Find the base pointer.
- Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
- StoreRefsForMemset[Ptr].push_back(SI);
- } else if (ForMemsetPattern) {
- // Find the base pointer.
- Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
- StoreRefsForMemsetPattern[Ptr].push_back(SI);
- } else if (ForMemcpy)
+ if (ForMemset)
+ StoreRefsForMemset.push_back(SI);
+ else if (ForMemcpy)
StoreRefsForMemcpy.push_back(SI);
}
}
@@ -477,14 +430,9 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// Look for store instructions, which may be optimized to memset/memcpy.
collectStores(BB);
- // Look for a single store or sets of stores with a common base, which can be
- // optimized into a memset (memset_pattern). The latter most commonly happens
- // with structs and handunrolled loops.
- for (auto &SL : StoreRefsForMemset)
- MadeChange |= processLoopStores(SL.second, BECount, true);
-
- for (auto &SL : StoreRefsForMemsetPattern)
- MadeChange |= processLoopStores(SL.second, BECount, false);
+ // Look for a single store which can be optimized into a memset.
+ for (auto &SI : StoreRefsForMemset)
+ MadeChange |= processLoopStore(SI, BECount);
// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : StoreRefsForMemcpy)
@@ -510,144 +458,26 @@ bool LoopIdiomRecognize::runOnLoopBlock(
return MadeChange;
}
-/// processLoopStores - See if this store(s) can be promoted to a memset.
-bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
- const SCEV *BECount,
- bool ForMemset) {
- // Try to find consecutive stores that can be transformed into memsets.
- SetVector<StoreInst *> Heads, Tails;
- SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
-
- // Do a quadratic search on all of the given stores and find
- // all of the pairs of stores that follow each other.
- SmallVector<unsigned, 16> IndexQueue;
- for (unsigned i = 0, e = SL.size(); i < e; ++i) {
- assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
-
- Value *FirstStoredVal = SL[i]->getValueOperand();
- Value *FirstStorePtr = SL[i]->getPointerOperand();
- const SCEVAddRecExpr *FirstStoreEv =
- cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
- APInt FirstStride = getStoreStride(FirstStoreEv);
- unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
-
- // See if we can optimize just this store in isolation.
- if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
- Heads.insert(SL[i]);
- continue;
- }
-
- Value *FirstSplatValue = nullptr;
- Constant *FirstPatternValue = nullptr;
-
- if (ForMemset)
- FirstSplatValue = isBytewiseValue(FirstStoredVal);
- else
- FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
-
- assert((FirstSplatValue || FirstPatternValue) &&
- "Expected either splat value or pattern value.");
-
- IndexQueue.clear();
- // If a store has multiple consecutive store candidates, search Stores
- // array according to the sequence: from i+1 to e, then from i-1 to 0.
- // This is because usually pairing with immediate succeeding or preceding
- // candidate create the best chance to find memset opportunity.
- unsigned j = 0;
- for (j = i + 1; j < e; ++j)
- IndexQueue.push_back(j);
- for (j = i; j > 0; --j)
- IndexQueue.push_back(j - 1);
-
- for (auto &k : IndexQueue) {
- assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
- Value *SecondStorePtr = SL[k]->getPointerOperand();
- const SCEVAddRecExpr *SecondStoreEv =
- cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
- APInt SecondStride = getStoreStride(SecondStoreEv);
-
- if (FirstStride != SecondStride)
- continue;
-
- Value *SecondStoredVal = SL[k]->getValueOperand();
- Value *SecondSplatValue = nullptr;
- Constant *SecondPatternValue = nullptr;
-
- if (ForMemset)
- SecondSplatValue = isBytewiseValue(SecondStoredVal);
- else
- SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
-
- assert((SecondSplatValue || SecondPatternValue) &&
- "Expected either splat value or pattern value.");
-
- if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
- if (ForMemset) {
- if (FirstSplatValue != SecondSplatValue)
- continue;
- } else {
- if (FirstPatternValue != SecondPatternValue)
- continue;
- }
- Tails.insert(SL[k]);
- Heads.insert(SL[i]);
- ConsecutiveChain[SL[i]] = SL[k];
- break;
- }
- }
- }
-
- // We may run into multiple chains that merge into a single chain. We mark the
- // stores that we transformed so that we don't visit the same store twice.
- SmallPtrSet<Value *, 16> TransformedStores;
- bool Changed = false;
-
- // For stores that start but don't end a link in the chain:
- for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
- it != e; ++it) {
- if (Tails.count(*it))
- continue;
-
- // We found a store instr that starts a chain. Now follow the chain and try
- // to transform it.
- SmallPtrSet<Instruction *, 8> AdjacentStores;
- StoreInst *I = *it;
-
- StoreInst *HeadStore = I;
- unsigned StoreSize = 0;
-
- // Collect the chain into a list.
- while (Tails.count(I) || Heads.count(I)) {
- if (TransformedStores.count(I))
- break;
- AdjacentStores.insert(I);
-
- StoreSize += getStoreSizeInBytes(I, DL);
- // Move to the next value in the chain.
- I = ConsecutiveChain[I];
- }
-
- Value *StoredVal = HeadStore->getValueOperand();
- Value *StorePtr = HeadStore->getPointerOperand();
- const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- APInt Stride = getStoreStride(StoreEv);
+/// processLoopStore - See if this store can be promoted to a memset.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
- // Check to see if the stride matches the size of the stores. If so, then
- // we know that every byte is touched in the loop.
- if (StoreSize != Stride && StoreSize != -Stride)
- continue;
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
- bool NegStride = StoreSize == -Stride;
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
+ return false;
- if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
- StoredVal, HeadStore, AdjacentStores, StoreEv,
- BECount, NegStride)) {
- TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
- Changed = true;
- }
- }
+ bool NegStride = StoreSize == -Stride;
- return Changed;
+ // See if we can optimize just this store in isolation.
+ return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
@@ -658,7 +488,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
return false;
// If we're not allowed to hack on memset, we fail.
- if (!HasMemset)
+ if (!TLI->has(LibFunc::memset))
return false;
Value *Pointer = MSI->getDest();
@@ -677,12 +507,11 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
// Check to see if the stride matches the size of the memset. If so, then we
// know that every byte is touched in the loop.
- const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
- if (!ConstStride)
- return false;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
- APInt Stride = ConstStride->getAPInt();
- if (SizeInBytes != Stride && SizeInBytes != -Stride)
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (!Stride || MSI->getLength() != Stride->getValue())
return false;
// Verify that the memset value is loop invariant. If not, we can't promote
@@ -691,22 +520,18 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
return false;
- SmallPtrSet<Instruction *, 1> MSIs;
- MSIs.insert(MSI);
- bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
- BECount, NegStride, /*IsLoopMemset=*/true);
+ MSI->getAlignment(), SplatValue, MSI, Ev,
+ BECount, /*NegStride=*/false);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool
-mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
- AliasAnalysis &AA,
- SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -725,9 +550,8 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
- for (Instruction &I : **BI)
- if (IgnoredStores.count(&I) == 0 &&
- (AA.getModRefInfo(&I, StoreLoc) & Access))
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
return true;
return false;
@@ -750,9 +574,8 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
- Value *StoredVal, Instruction *TheStore,
- SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
+ Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride) {
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
@@ -786,16 +609,13 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *BasePtr =
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
- *AA, Stores)) {
+ *AA, TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
return false;
}
- if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
- return false;
-
// Okay, everything looks good, insert the memset.
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
@@ -824,14 +644,13 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *MSP =
M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
- inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
@@ -843,8 +662,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- for (auto *I : Stores)
- deleteDeadInstruction(I);
+ deleteDeadInstruction(TheStore, TLI);
++NumMemSet;
return true;
}
@@ -858,7 +676,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StorePtr = SI->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- APInt Stride = getStoreStride(StoreEv);
+ unsigned Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
bool NegStride = StoreSize == -Stride;
@@ -896,10 +714,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *StoreBasePtr = Expander.expandCodeFor(
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
- SmallPtrSet<Instruction *, 1> Stores;
- Stores.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
+ StoreSize, *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
@@ -919,7 +735,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
- *AA, Stores)) {
+ *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -927,9 +743,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
return false;
}
- if (avoidLIRForMultiBlockLoop())
- return false;
-
// Okay, everything is safe, we can transform this!
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
@@ -956,28 +769,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI);
+ deleteDeadInstruction(SI, TLI);
++NumMemCpy;
return true;
}
-// When compiling for codesize we avoid idiom recognition for a multi-block loop
-// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
-//
-bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
- bool IsLoopMemset) {
- if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
- if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
- DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
- << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
- << " avoided: multi-block top-level loop\n");
- return true;
- }
- }
-
- return false;
-}
-
bool LoopIdiomRecognize::runOnNoncountableLoop() {
return recognizePopcount();
}
@@ -985,7 +781,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero, the control yields to
/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparison is returned. This function will be called to see if the
+/// in the comparion is returned. This function will be called to see if the
/// precondition and postcondition of the loop are in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
if (!BI || !BI->isConditional())
@@ -1169,7 +965,9 @@ bool LoopIdiomRecognize::recognizePopcount() {
// It should have a preheader containing nothing but an unconditional branch.
BasicBlock *PH = CurLoop->getLoopPreheader();
- if (!PH || &PH->front() != PH->getTerminator())
+ if (!PH)
+ return false;
+ if (&PH->front() != PH->getTerminator())
return false;
auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
if (!EntryBI || EntryBI->isConditional())
@@ -1195,7 +993,7 @@ bool LoopIdiomRecognize::recognizePopcount() {
}
static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
- const DebugLoc &DL) {
+ DebugLoc DL) {
Value *Ops[] = {Val};
Type *Tys[] = {Val->getType()};