[llvm-commits] [llvm] r132038 - in /llvm/trunk: lib/Transforms/Scalar/IndVarSimplify.cpp test/Transforms/IndVarSimplify/elim-extend.ll test/Transforms/IndVarSimplify/no-iv-rewrite.ll
Andrew Trick
atrick at apple.com
Tue May 24 21:42:22 PDT 2011
Author: atrick
Date: Tue May 24 23:42:22 2011
New Revision: 132038
URL: http://llvm.org/viewvc/llvm-project?rev=132038&view=rev
Log:
indvars: fixed IV cloning in -disable-iv-rewrite mode with associated
cleanup and overdue test cases.
Added:
llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll
llvm/trunk/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=132038&r1=132037&r2=132038&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Tue May 24 23:42:22 2011
@@ -67,6 +67,9 @@
STATISTIC(NumInserted, "Number of canonical indvars added");
STATISTIC(NumReplaced, "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
+STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
// and referenced here.
@@ -117,9 +120,6 @@
PHINode *IVPhi);
void RewriteNonIntegerIVs(Loop *L);
- bool canExpandBackedgeTakenCount(Loop *L,
- const SCEV *BackedgeTakenCount);
-
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter);
@@ -200,9 +200,8 @@
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
-bool IndVarSimplify::
-canExpandBackedgeTakenCount(Loop *L,
- const SCEV *BackedgeTakenCount) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
return false;
@@ -235,6 +234,36 @@
return true;
}
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
+static const Type *getBackedgeIVType(Loop *L) {
+ if (!L->getExitingBlock())
+ return 0;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ if (!BI)
+ return 0;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return 0;
+
+ const Type *Ty = 0;
+ for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+ OI != OE; ++OI) {
+ assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+ TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+ if (!Trunc)
+ continue;
+
+ return Trunc->getSrcTy();
+ }
+ return Ty;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
@@ -245,7 +274,7 @@
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) && "precondition");
+ assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
// If the exiting block is not the same as the backedge block, we must compare
@@ -536,12 +565,12 @@
bool CreateWideIV(SCEVExpander &Rewriter);
protected:
- const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
-
Instruction *CloneIVUser(Instruction *NarrowUse,
Instruction *NarrowDef,
Instruction *WideDef);
+ const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
Instruction *WidenIVUse(Instruction *NarrowUse,
Instruction *NarrowDef,
Instruction *WideDef);
@@ -594,24 +623,10 @@
}
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
- if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
-
- const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
- const SCEV *WideExpr = IsSigned ?
- SE->getSignExtendExpr(NarrowExpr, WideType) :
- SE->getZeroExtendExpr(NarrowExpr, WideType);
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
- if (!AddRec || AddRec->getLoop() != L)
- return 0;
-
- return AddRec;
+static Value *getExtend( Value *NarrowOper, const Type *WideType,
+ bool IsSigned, IRBuilder<> &Builder) {
+ return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
+ Builder.CreateZExt(NarrowOper, WideType);
}
/// CloneIVUser - Instantiate a wide operation to replace a narrow
@@ -636,36 +651,51 @@
case Instruction::AShr:
DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
+ IRBuilder<> Builder(NarrowUse);
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know
+ // anything about the narrow operand yet so must insert a [sz]ext. It is
+ // probably loop invariant and will be folded or hoisted. If it actually
+ // comes from a widened IV, it should be removed during a future call to
+ // WidenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
+ getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+
BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
- NarrowBO->getOperand(0),
- NarrowBO->getOperand(1),
+ LHS, RHS,
NarrowBO->getName());
- IRBuilder<> Builder(NarrowUse);
Builder.Insert(WideBO);
if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
- for (unsigned i = 0; i < NarrowBO->getNumOperands(); ++i) {
- Value *NarrowOper = NarrowBO->getOperand(i);
- if (NarrowOper == NarrowDef) {
- WideBO->setOperand(i, WideDef);
- continue;
- }
- // We don't know anything about the other operand here so must insert a
- // [sz]ext. It is probably loop invariant and will be folded or
- // hoisted. If it actually comes from a widened IV, it should be removed
- // during a future call to WidenIVUse.
- IRBuilder<> Builder(NarrowUse);
- Value *Extend = IsSigned ?
- Builder.CreateSExt(NarrowOper, WideType) :
- Builder.CreateZExt(NarrowOper, WideType);
- WideBO->setOperand(i, Extend);
- }
+ return WideBO;
}
llvm_unreachable(0);
}
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+ if (!SE->isSCEVable(NarrowUse->getType()))
+ return 0;
+
+ const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+ const SCEV *WideExpr = IsSigned ?
+ SE->getSignExtendExpr(NarrowExpr, WideType) :
+ SE->getZeroExtendExpr(NarrowExpr, WideType);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+
+ return AddRec;
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
@@ -682,9 +712,32 @@
// Our raison d'etre! Eliminate sign and zero extension.
if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
- NarrowUse->replaceAllUsesWith(WideDef);
- DeadInsts.push_back(NarrowUse);
-
+ Value *NewDef = WideDef;
+ if (NarrowUse->getType() != WideType) {
+ unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ if (CastWidth < IVWidth) {
+ // The cast isn't as wide as the IV, so insert a Trunc.
+ IRBuilder<> Builder(NarrowUse);
+ NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ }
+ else {
+ // A wider extend was hidden behind a narrower one. This may induce
+ // another round of IV widening in which the intermediate IV becomes
+ // dead. It should be very rare.
+ DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
+ << " not wide enough to subsume " << *NarrowUse << "\n");
+ NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
+ NewDef = NarrowUse;
+ }
+ }
+ if (NewDef != NarrowUse) {
+ DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
+ << " replaced by " << *WideDef << "\n");
+ ++NumElimExt;
+ NarrowUse->replaceAllUsesWith(NewDef);
+ DeadInsts.push_back(NarrowUse);
+ }
// Now that the extend is gone, expose it's uses to IVUsers for potential
// further simplification within SimplifyIVUsers.
IU->AddUsersIfInteresting(WideDef, WidePhi);
@@ -698,7 +751,7 @@
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
IRBuilder<> Builder(NarrowUse);
- Value *Trunc = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+ Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
return 0;
}
@@ -834,6 +887,7 @@
return;
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
Changed = true;
DeadInsts.push_back(ICmp);
}
@@ -888,6 +942,7 @@
IU->AddUsersIfInteresting(I, IVPhi);
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
Changed = true;
DeadInsts.push_back(Rem);
}
@@ -940,13 +995,20 @@
// a canonical induction variable should be inserted.
const Type *LargestType = 0;
bool NeedCannIV = false;
- bool ExpandBECount = canExpandBackedgeTakenCount(L, BackedgeTakenCount);
+ bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
if (ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
NeedCannIV = true;
const Type *Ty = BackedgeTakenCount->getType();
+ if (DisableIVRewrite) {
+ // In this mode, SimplifyIVUsers may have already widened the IV used by
+ // the backedge test and inserted a Trunc on the compare's operand. Get
+ // the wider type to avoid creating a redundant narrow IV only used by the
+ // loop test.
+ LargestType = getBackedgeIVType(L);
+ }
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
@@ -1002,7 +1064,7 @@
// using it. We can currently only handle loops with a single exit.
ICmpInst *NewICmp = 0;
if (ExpandBECount) {
- assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) &&
+ assert(canExpandBackedgeTakenCount(L, SE) &&
"canonical IV disrupted BackedgeTaken expansion");
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
Added: llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll?rev=132038&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll Tue May 24 23:42:22 2011
@@ -0,0 +1,93 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Tests sign extend elimination in the inner and outer loop.
+; %outercount is straightforward to widen, besides being in an outer loop.
+; %innercount is currently blocked by lcssa, so is not widened.
+; %inneriv can be widened only after proving it has no signed-overflow
+; based on the loop test.
+define void @nestedIV(i8* %address, i32 %limit) nounwind {
+entry:
+ %limitdec = add i32 %limit, -1
+ br label %outerloop
+
+; CHECK: outerloop:
+;
+; Eliminate %ofs1 after widening outercount.
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; IV rewriting hoists a gep into this block. We don't like that.
+; CHECK-NOT: getelementptr
+outerloop:
+ %outercount = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ]
+ %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ]
+
+ %outercountdec = add i32 %outercount, -1
+ %ofs1 = sext i32 %outercountdec to i64
+ %adr1 = getelementptr i8* %address, i64 %ofs1
+ store i8 0, i8* %adr1
+
+ br label %innerpreheader
+
+innerpreheader:
+ %innerprecmp = icmp sgt i32 %limitdec, %innercount
+ br i1 %innerprecmp, label %innerloop, label %outermerge
+
+; CHECK: innerloop:
+;
+; Eliminate %ofs2 after widening inneriv.
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; FIXME: We should not increase the number of IVs in this loop.
+; sext elimination plus LFTR results in 3 final IVs.
+;
+; FIXME: eliminate %ofs3 based the loop pre/post conditions
+; even though innerpostiv is not NSW, thus sign extending innerpostiv
+; does not yield the same expression as incrementing the widened inneriv.
+innerloop:
+ %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
+ %innerpostiv = add i32 %inneriv, 1
+
+ %ofs2 = sext i32 %inneriv to i64
+ %adr2 = getelementptr i8* %address, i64 %ofs2
+ store i8 0, i8* %adr2
+
+ %ofs3 = sext i32 %innerpostiv to i64
+ %adr3 = getelementptr i8* %address, i64 %ofs3
+ store i8 0, i8* %adr3
+
+ %innercmp = icmp sgt i32 %limitdec, %innerpostiv
+ br i1 %innercmp, label %innerloop, label %innerexit
+
+innerexit:
+ %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ]
+ br label %outermerge
+
+; CHECK: outermerge:
+;
+; Eliminate %ofs4 after widening outercount
+; CHECK-NOT: sext
+; CHECK: getelementptr
+;
+; TODO: Eliminate %ofs5 after removing lcssa
+outermerge:
+ %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ]
+
+ %ofs4 = sext i32 %outercount to i64
+ %adr4 = getelementptr i8* %address, i64 %ofs4
+ store i8 0, i8* %adr3
+
+ %ofs5 = sext i32 %innercount.merge to i64
+ %adr5 = getelementptr i8* %address, i64 %ofs5
+ store i8 0, i8* %adr4
+
+ %outerpostcount = add i32 %outercount, 1
+ %tmp47 = icmp slt i32 %outerpostcount, %limit
+ br i1 %tmp47, label %outerloop, label %return
+
+return:
+ ret void
+}
Added: llvm/trunk/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/no-iv-rewrite.ll?rev=132038&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/no-iv-rewrite.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/no-iv-rewrite.ll Tue May 24 23:42:22 2011
@@ -0,0 +1,123 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
+; Make sure that indvars isn't inserting canonical IVs.
+; This is kinda hard to do until linear function test replacement is removed.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i32 @sum(i32* %arr, i32 %n) nounwind {
+entry:
+ %precond = icmp slt i32 0, %n
+ br i1 %precond, label %ph, label %return
+
+ph:
+ br label %loop
+
+; CHECK: loop:
+;
+; We should only have 2 IVs.
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
+;
+; sext should be eliminated while preserving gep inboundsness.
+; CHECK-NOT: sext
+; CHECK: getelementptr inbounds
+loop:
+ %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
+ %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
+ %ofs = sext i32 %i.02 to i64
+ %adr = getelementptr inbounds i32* %arr, i64 %ofs
+ %val = load i32* %adr
+ %sinc = add nsw i32 %s.01, %val
+ %iinc = add nsw i32 %i.02, 1
+ %cond = icmp slt i32 %iinc, %n
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ %s.lcssa = phi i32 [ %sinc, %loop ]
+ br label %return
+
+return:
+ %s.0.lcssa = phi i32 [ %s.lcssa, %exit ], [ 0, %entry ]
+ ret i32 %s.0.lcssa
+}
+
+define i64 @suml(i32* %arr, i32 %n) nounwind {
+entry:
+ %precond = icmp slt i32 0, %n
+ br i1 %precond, label %ph, label %return
+
+ph:
+ br label %loop
+
+; CHECK: loop:
+;
+; We should only have 2 IVs.
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
+;
+; %ofs sext should be eliminated while preserving gep inboundsness.
+; CHECK-NOT: sext
+; CHECK: getelementptr inbounds
+; %vall sext should obviously not be eliminated
+; CHECK: sext
+loop:
+ %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
+ %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
+ %ofs = sext i32 %i.02 to i64
+ %adr = getelementptr inbounds i32* %arr, i64 %ofs
+ %val = load i32* %adr
+ %vall = sext i32 %val to i64
+ %sinc = add nsw i64 %s.01, %vall
+ %iinc = add nsw i32 %i.02, 1
+ %cond = icmp slt i32 %iinc, %n
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ %s.lcssa = phi i64 [ %sinc, %loop ]
+ br label %return
+
+return:
+ %s.0.lcssa = phi i64 [ %s.lcssa, %exit ], [ 0, %entry ]
+ ret i64 %s.0.lcssa
+}
+
+define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind {
+ %precond = icmp ne i32* %first, %last
+ br i1 %precond, label %ph, label %return
+
+; CHECK: ph:
+; It's not indvars' job to perform LICM on %ofs
+; CHECK-NOT: sext
+ph:
+ br label %loop
+
+; CHECK: loop:
+;
+; Preserve exactly one pointer type IV.
+; CHECK: phi i32*
+; CHECK-NOT: phi
+;
+; Don't create any extra adds.
+; CHECK-NOT: add
+;
+; Preserve gep inboundsness, and don't factor it.
+; CHECK: getelementptr inbounds i32* %ptriv, i32 1
+; CHECK-NOT: add
+loop:
+ %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ]
+ %ofs = sext i32 %idx to i64
+ %adr = getelementptr inbounds i32* %ptriv, i64 %ofs
+ store i32 3, i32* %adr
+ %ptrpost = getelementptr inbounds i32* %ptriv, i32 1
+ %cond = icmp ne i32* %ptrpost, %last
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
More information about the llvm-commits
mailing list