[llvm] r268147 - Reroll loops with multiple IV and negative step part 3
Lawrence Hu via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 17:51:22 PDT 2016
Author: lawrence
Date: Fri Apr 29 19:51:22 2016
New Revision: 268147
URL: http://llvm.org/viewvc/llvm-project?rev=268147&view=rev
Log:
Reroll loops with multiple IV and negative step part 3
support multiple induction variables
This patch enable loop reroll for the following case:
for(int i=0; i<N; i += 2) {
S += *a++;
S += *a++;
};
Differential Revision: http://reviews.llvm.org/D16550
Added:
llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp?rev=268147&r1=268146&r2=268147&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp Fri Apr 29 19:51:22 2016
@@ -163,6 +163,9 @@ namespace {
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> IVToIncMap;
+ // For loop with multiple induction variable, remember the one used only to
+ // control the loop.
+ Instruction *LoopControlIV;
// A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
@@ -350,9 +353,11 @@ namespace {
ScalarEvolution *SE, AliasAnalysis *AA,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA,
- DenseMap<Instruction *, int64_t> &IncrMap)
+ DenseMap<Instruction *, int64_t> &IncrMap,
+ Instruction *LoopCtrlIV)
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
- PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
+ PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
+ LoopControlIV(LoopCtrlIV) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -391,6 +396,7 @@ namespace {
UsesTy::iterator Start,
UsesTy::iterator End);
void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
+ void updateNonLoopCtrlIncr();
LoopReroll *Parent;
@@ -421,8 +427,18 @@ namespace {
UsesTy Uses;
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> &IVToIncMap;
+ Instruction *LoopControlIV;
};
+ // Check if it is a compare-like instruction whose user is a branch
+ bool isCompareUsedByBranch(Instruction *I) {
+ auto *TI = I->getParent()->getTerminator();
+ if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
+ return false;
+ return I->hasOneUse() && TI->getOperand(0) == I;
+ };
+
+ bool isLoopControlIV(Loop *L, Instruction *IV);
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
void collectPossibleReductions(Loop *L,
ReductionTracker &Reductions);
@@ -494,6 +510,60 @@ static const SCEVConstant *getIncrmentFa
return CIncSCEV;
}
+// Check if an IV is only used to control the loop. There are two cases:
+// 1. It only has one use which is loop increment, and the increment is only
+// used by comparison and the PHI, and the comparison is only used by branch.
+// 2. It is used by loop increment and the comparison, the loop increment is
+// only used by the PHI, and the comparison is used only by the branch.
+bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
+
+ unsigned IVUses = IV->getNumUses();
+ if (IVUses != 2 && IVUses != 1)
+ return false;
+
+ for (auto *User : IV->users()) {
+ int32_t IncOrCmpUses = User->getNumUses();
+ bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
+
+ // User can only have one or two uses.
+ if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
+ return false;
+
+ // Case 1
+ if (IVUses == 1) {
+ // The only user must be the loop increment.
+ // The loop increment must have two uses.
+ if (IsCompInst || IncOrCmpUses != 2)
+ return false;
+ }
+
+ // Case 2
+ if (IVUses == 2 && IncOrCmpUses != 1)
+ return false;
+
+ // The users of the IV must be a binary operation or a comparison
+ if (auto *BO = dyn_cast<BinaryOperator>(User)) {
+ if (BO->getOpcode() == Instruction::Add) {
+ // Loop Increment
+ // User of Loop Increment should be either PHI or CMP
+ for (auto *UU : User->users()) {
+ if (PHINode *PN = dyn_cast<PHINode>(UU)) {
+ if (PN != IV)
+ return false;
+ }
+ // Must be a CMP
+ else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
+ return false;
+ }
+ } else
+ return false;
+ // Compare : can only have one use, and must be branch
+ } else if (!IsCompInst)
+ return false;
+ }
+ return true;
+}
+
// Collect the list of loop induction variables with respect to which it might
// be possible to reroll the loop.
void LoopReroll::collectPossibleIVs(Loop *L,
@@ -525,7 +595,14 @@ void LoopReroll::collectPossibleIVs(Loop
IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
<< "\n");
- PossibleIVs.push_back(&*I);
+
+ if (isLoopControlIV(L, &*I)) {
+ assert(!LoopControlIV && "Found two loop control only IV");
+ LoopControlIV = &(*I);
+ DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
+ << *PHISCEV << "\n");
+ } else
+ PossibleIVs.push_back(&*I);
}
}
}
@@ -1072,6 +1149,28 @@ bool LoopReroll::DAGRootTracker::validat
Uses[I].set(IL_All);
}
+ // Make sure we mark loop-control-only PHIs as used in all iterations. See
+ // comment above LoopReroll::isLoopControlIV for more information.
+ BasicBlock *Header = L->getHeader();
+ if (LoopControlIV && LoopControlIV != IV) {
+ for (auto *U : LoopControlIV->users()) {
+ Instruction *IVUser = dyn_cast<Instruction>(U);
+ // IVUser could be loop increment or compare
+ Uses[IVUser].set(IL_All);
+ for (auto *UU : IVUser->users()) {
+ Instruction *UUser = dyn_cast<Instruction>(UU);
+ // UUser could be compare, PHI or branch
+ Uses[UUser].set(IL_All);
+ // Is UUser a compare instruction?
+ if (UU->hasOneUse()) {
+ Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
+ if (BI == cast<BranchInst>(Header->getTerminator()))
+ Uses[BI].set(IL_All);
+ }
+ }
+ }
+ }
+
// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
@@ -1314,25 +1413,65 @@ void LoopReroll::DAGRootTracker::replace
++J;
}
- // We need to create a new induction variable for each different BaseInst.
- for (auto &DRS : RootSets)
- // Insert the new induction variable.
- replaceIV(DRS.BaseInst, IV, IterCount);
+ bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
+
+ if (HasTwoIVs) {
+ updateNonLoopCtrlIncr();
+ replaceIV(LoopControlIV, LoopControlIV, IterCount);
+ } else
+ // We need to create a new induction variable for each different BaseInst.
+ for (auto &DRS : RootSets)
+ // Insert the new induction variable.
+ replaceIV(DRS.BaseInst, IV, IterCount);
SimplifyInstructionsInBlock(Header, TLI);
DeleteDeadPHIs(Header, TLI);
}
+// For non-loop-control IVs, we only need to update the last increment
+// with right amount, then we are done.
+void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
+ const SCEV *NewInc = nullptr;
+ for (auto *LoopInc : LoopIncs) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
+ const SCEVConstant *COp = nullptr;
+ if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+ } else {
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
+ if (!COp)
+ COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+ }
+
+ assert(COp && "Didn't find constant operand of LoopInc!\n");
+
+ const APInt &AInt = COp->getValue()->getValue();
+ const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
+ if (AInt.isNegative()) {
+ NewInc = SE->getNegativeSCEV(COp);
+ NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
+ NewInc = SE->getNegativeSCEV(NewInc);
+ } else
+ NewInc = SE->getUDivExpr(COp, ScaleSCEV);
+
+ LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
+ }
+}
+
void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
Instruction *InstIV,
const SCEV *IterCount) {
BasicBlock *Header = L->getHeader();
int64_t Inc = IVToIncMap[InstIV];
- bool Negative = Inc < 0;
+ bool NeedNewIV = InstIV == LoopControlIV;
+ bool Negative = !NeedNewIV && Inc < 0;
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
const SCEV *Start = RealIVSCEV->getStart();
+ if (NeedNewIV)
+ Start = SE->getConstant(Start->getType(), 0);
+
const SCEV *SizeOfExpr = nullptr;
const SCEV *IncrExpr =
SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
@@ -1360,6 +1499,12 @@ void LoopReroll::DAGRootTracker::replace
if (Uses[BI].find_first() == IL_All) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+ if (NeedNewIV)
+ ICSCEV = SE->getMulExpr(IterCount,
+ SE->getConstant(IterCount->getType(), Scale));
+ else
+ ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+
// Iteration count SCEV minus or plus 1
const SCEV *MinusPlus1SCEV =
SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
@@ -1514,7 +1659,7 @@ bool LoopReroll::reroll(Instruction *IV,
const SCEV *IterCount,
ReductionTracker &Reductions) {
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
- IVToIncMap);
+ IVToIncMap, LoopControlIV);
if (!DAGRoots.findRoots())
return false;
@@ -1566,6 +1711,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPa
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
IVToIncMap.clear();
+ LoopControlIV = nullptr;
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
Added: llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll?rev=268147&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll Fri Apr 29 19:51:22 2016
@@ -0,0 +1,134 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+declare i32 @goo(i32, i32)
+
+ at buf = external global i8*
+ at aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
+
+define i32 @test1(i32 %len) {
+entry:
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
+;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
+;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
+;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
+;CHECK-NEXT: %add = add i64 %conv, %sum44.020
+;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
+;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
+
+ %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
+ %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+ %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
+ %0 = load i8, i8* %buf.021, align 1
+ %conv = zext i8 %0 to i64
+ %add = add i64 %conv, %sum44.020
+ %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
+ %1 = load i8, i8* %arrayidx1, align 1
+ %conv2 = zext i8 %1 to i64
+ %add3 = add i64 %add, %conv2
+ %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
+ %2 = load i8, i8* %arrayidx4, align 1
+ %conv5 = zext i8 %2 to i64
+ %add6 = add i64 %add3, %conv5
+ %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
+ %3 = load i8, i8* %arrayidx7, align 1
+ %conv8 = zext i8 %3 to i64
+ %add9 = add i64 %add6, %conv8
+ %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
+ %dec = add nsw i32 %dec22, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ %conv11 = trunc i64 %add9 to i32
+ %call = tail call i32 @goo(i32 0, i32 %conv11)
+ unreachable
+}
+
+define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
+entry:
+ %cmp.9 = icmp sgt i32 %N, 0
+ br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
+ ret i32 %S.addr.0.lcssa
+
+for.body:
+;CHECK-LABEL: for.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
+;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
+;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
+;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
+;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+
+ %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+ %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
+ %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+ %0 = load i32, i32* %a.addr.010, align 4
+ %add = add nsw i32 %0, %S.addr.011
+ %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
+ %1 = load i32, i32* %incdec.ptr, align 4
+ %add2 = add nsw i32 %add, %1
+ %add3 = add nsw i32 %i.012, 2
+ %cmp = icmp slt i32 %add3, %N
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
+entry:
+ %cmp10 = icmp sgt i32 %len, 1
+ br i1 %cmp10, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
+;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
+;CHECK-NEXT: %add = add nsw i32 %4, %S.012
+;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+ %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
+ %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
+ %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+ %0 = load i32, i32* %buf.addr.011, align 4
+ %add = add nsw i32 %0, %S.012
+ %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add2 = add nsw i32 %add, %1
+ %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
+ %sub = add nsw i32 %i.013, -2
+ %cmp = icmp sgt i32 %sub, 1
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
+ ret i32 %S.0.lcssa
+}
+
More information about the llvm-commits
mailing list