[llvm] r320823 - [LICM] Allow sinking when foldable in loop
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 08:09:54 PST 2017
Author: junbuml
Date: Fri Dec 15 08:09:54 2017
New Revision: 320823
URL: http://llvm.org/viewvc/llvm-project?rev=320823&view=rev
Log:
[LICM] Allow sinking when foldable in loop
Summary:
Continue trying to sink an instruction if its users in the loop is foldable.
This will allow the instruction to be folded in the loop by decoupling it from
the user outside of the loop.
Reviewers: hfinkel, majnemer, davidxl, efriedma, danielcdh, bmakam, mcrosier
Reviewed By: hfinkel
Subscribers: javed.absar, bmakam, mcrosier, llvm-commits
Differential Revision: https://reviews.llvm.org/D37076
Added:
llvm/trunk/test/Transforms/LICM/sink-foldable.ll
Modified:
llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
llvm/trunk/lib/Transforms/Scalar/LICM.cpp
Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=320823&r1=320822&r2=320823&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Fri Dec 15 08:09:54 2017
@@ -436,8 +436,9 @@ bool formLCSSARecursively(Loop &L, Domin
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
- TargetLibraryInfo *, Loop *, AliasSetTracker *,
- LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
+ TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ AliasSetTracker *, LoopSafetyInfo *,
+ OptimizationRemarkEmitter *ORE);
/// \brief Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=320823&r1=320822&r2=320823&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Fri Dec 15 08:09:54 2017
@@ -90,14 +90,15 @@ static cl::opt<uint32_t> MaxNumUsesTrave
"invariance in loop using invariant start (default = 8)"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo);
+static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI, bool &FreeInLoop);
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE);
+ OptimizationRemarkEmitter *ORE, bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -115,7 +116,8 @@ CloneInstructionInExitBlock(Instruction
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);
DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
@@ -159,6 +161,8 @@ struct LegacyLICMPass : public LoopPass
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
}
@@ -170,6 +174,7 @@ struct LegacyLICMPass : public LoopPass
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (EnableMSSALoopDependency)
AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
}
@@ -210,8 +215,8 @@ PreservedAnalyses LICMPass::run(Loop &L,
"cached at a higher level");
LoopInvariantCodeMotion LICM;
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE,
- true))
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
+ AR.MSSA, ORE, true))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
@@ -224,6 +229,7 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "l
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
@@ -236,12 +242,10 @@ Pass *llvm::createLICMPass() { return ne
/// We should delete AST for inner loops in the new pass manager to avoid
/// memory leak.
///
-bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
- LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI,
- ScalarEvolution *SE, MemorySSA *MSSA,
- OptimizationRemarkEmitter *ORE,
- bool DeleteAST) {
+bool LoopInvariantCodeMotion::runOnLoop(
+ Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
+ MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -266,7 +270,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST, &SafetyInfo, ORE);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
@@ -359,7 +363,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI, Loop *CurLoop,
AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
@@ -400,12 +405,15 @@ bool llvm::sinkRegion(DomTreeNode *N, Al
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
+ bool FreeInLoop = false;
+ if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) {
- ++II;
- CurAST->deleteValue(&I);
- I.eraseFromParent();
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
+ if (!FreeInLoop) {
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ }
Changed = true;
}
}
@@ -708,13 +716,39 @@ static bool isTriviallyReplacablePHI(con
return true;
}
+/// Return true if the instruction is free in the loop.
+static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
+ const TargetTransformInfo *TTI) {
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free)
+ return false;
+ // For a GEP, we cannot simply use getUserCost because currently it
+ // optimistically assume that a GEP will fold into addressing mode
+ // regardless of its users.
+ const BasicBlock *BB = I.getParent();
+ for (const User *U : I.users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (CurLoop->contains(UI) &&
+ (BB != UI->getParent() ||
+ (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
+ return false;
+ }
+ return true;
+ } else
+ return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
+}
+
/// Return true if the only users of this instruction are outside of
/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.
///
-static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo) {
+/// We also return true if the instruction could be folded away in lowering.
+/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
+static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI, bool &FreeInLoop) {
const auto &BlockColors = SafetyInfo->BlockColors;
+ bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
@@ -731,8 +765,13 @@ static bool isNotUsedInLoop(const Instru
return false;
}
- if (CurLoop->contains(UI))
+ if (CurLoop->contains(UI)) {
+ if (IsFree) {
+ FreeInLoop = true;
+ continue;
+ }
return false;
+ }
}
return true;
}
@@ -888,7 +927,7 @@ static void splitPredecessorsOfLoopExit(
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -900,7 +939,6 @@ static bool sink(Instruction &I, LoopInf
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumSunk;
- Changed = true;
// Iterate over users to be ready for actual sinking. Replace users via
// unrechable blocks with undef and make all user PHIs trivially replcable.
@@ -910,11 +948,12 @@ static bool sink(Instruction &I, LoopInf
Use &U = UI.getUse();
++UI;
- if (VisitedUsers.count(User))
+ if (VisitedUsers.count(User) || CurLoop->contains(User))
continue;
if (!DT->isReachableFromEntry(User->getParent())) {
U = UndefValue::get(I.getType());
+ Changed = true;
continue;
}
@@ -927,6 +966,7 @@ static bool sink(Instruction &I, LoopInf
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
U = UndefValue::get(I.getType());
+ Changed = true;
continue;
}
@@ -935,7 +975,7 @@ static bool sink(Instruction &I, LoopInf
continue;
if (!canSplitPredecessors(PN))
- return false;
+ return Changed;
// Split predecessors of the PHI so that we can make users trivially
// replacable.
@@ -947,6 +987,9 @@ static bool sink(Instruction &I, LoopInf
UE = I.user_end();
}
+ if (VisitedUsers.empty())
+ return Changed;
+
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -960,9 +1003,14 @@ static bool sink(Instruction &I, LoopInf
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
- while (!I.use_empty()) {
- Value::user_iterator UI = I.user_begin();
- PHINode *PN = cast<PHINode>(*UI);
+ SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
+ for (auto *UI : Users) {
+ auto *User = cast<Instruction>(UI);
+
+ if (CurLoop->contains(User))
+ continue;
+
+ PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
// The PHI must be trivially replacable.
@@ -970,6 +1018,7 @@ static bool sink(Instruction &I, LoopInf
SafetyInfo, CurLoop);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
+ Changed = true;
}
return Changed;
}
Added: llvm/trunk/test/Transforms/LICM/sink-foldable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/sink-foldable.ll?rev=320823&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LICM/sink-foldable.ll (added)
+++ llvm/trunk/test/Transforms/LICM/sink-foldable.ll Fri Dec 15 08:09:54 2017
@@ -0,0 +1,147 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+target triple = "aarch64--linux-gnueabi"
+
+; CHECK-LABEL:@test1
+; CHECK-LABEL:loopexit1:
+; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
+; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1
+define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) {
+entry:
+ %cmp0 = icmp slt i32 0, %j
+ br i1 %cmp0, label %for.body.lr.ph, label %return
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
+ %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
+
+ %i0.ext = sext i32 %i0 to i64
+ %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
+ %l0 = load i8*, i8** %arrayidx0, align 8
+ %cmp1 = icmp ugt i8* %l0, %Q
+ br i1 %cmp1, label %loopexit0, label %if.end
+
+if.end: ; preds = %for.body
+ %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1
+ %l1 = load i8*, i8** %arrayidx1, align 8
+ %cmp4 = icmp ugt i8* %l1, %Q
+ %i.add = add nsw i32 %i0, 2
+ br i1 %cmp4, label %loopexit1, label %for.body
+
+loopexit0:
+ %p1 = phi i8** [%arrayidx0, %for.body]
+ br label %return
+
+loopexit1:
+ %p2 = phi i8** [%arrayidx1, %if.end]
+ br label %return
+
+return:
+ %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
+ ret i8** %retval.0
+}
+
+; CHECK-LABEL: @test2
+; CHECK-LABEL: loopexit2:
+; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ]
+; CHECK: getelementptr inbounds i8*, i8** %[[PHI]]
+define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) {
+
+entry:
+ br label %for.body
+
+for.cond:
+ %i.addr.0 = phi i32 [ %add, %if.end ]
+ %P.addr.0 = phi i8** [ %add.ptr, %if.end ]
+ %cmp = icmp slt i32 %i.addr.0, %j
+ br i1 %cmp, label %for.body, label %loopexit0
+
+for.body:
+ %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
+ %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]
+
+ %idx.ext = sext i32 %i.addr to i64
+ %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext
+ %l0 = load i8*, i8** %add.ptr, align 8
+
+ %cmp1 = icmp ugt i8* %l0, %Q
+ br i1 %cmp1, label %loopexit1, label %if.end
+
+if.end:
+ %add.i = add i32 %i.addr, 1
+ %idx2.ext = sext i32 %add.i to i64
+ %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext
+ %l1 = load i8*, i8** %arrayidx2, align 8
+ %cmp2 = icmp ugt i8* %l1, %Q
+ %add = add nsw i32 %add.i, 1
+ br i1 %cmp2, label %loopexit2, label %for.cond
+
+loopexit0:
+ %p0 = phi i8** [ null, %for.cond ]
+ br label %return
+
+loopexit1:
+ %p1 = phi i8** [ %add.ptr, %for.body ]
+ br label %return
+
+loopexit2:
+ %p2 = phi i8** [ %arrayidx2, %if.end ]
+ br label %return
+
+return:
+ %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
+ ret i8** %retval.0
+}
+
+
+; CHECK-LABEL: @test3
+; CHECK-LABEL: loopexit1:
+; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ]
+; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ]
+; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32
+; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]]
+; CHECK: call void @dummy(i32 %[[TRUNC]])
+define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) {
+entry:
+ %cmp0 = icmp slt i64 0, %j
+ br i1 %cmp0, label %for.body.lr.ph, label %return
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
+ %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
+
+ %i0.ext = sext i32 %i0 to i64
+ %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
+ %l0 = load i8*, i8** %arrayidx0, align 8
+ %cmp1 = icmp ugt i8* %l0, %Q
+ br i1 %cmp1, label %loopexit0, label %if.end
+
+if.end: ; preds = %for.body
+ %add = add i64 %i0.ext, 1
+ %trunc = trunc i64 %add to i32
+ %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc
+ %l1 = load i8*, i8** %arrayidx1, align 8
+ %cmp4 = icmp ugt i8* %l1, %Q
+ %i.add = add nsw i32 %i0, 2
+ br i1 %cmp4, label %loopexit1, label %for.body
+
+loopexit0:
+ %p1 = phi i8** [%arrayidx0, %for.body]
+ br label %return
+
+loopexit1:
+ %p2 = phi i8** [%arrayidx1, %if.end]
+ call void @dummy(i32 %trunc)
+ br label %return
+
+return:
+ %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
+ ret i8** %retval.0
+}
+
+declare void @dummy(i32)
More information about the llvm-commits
mailing list