[llvm-branch-commits] [llvm-branch] r89119 - /llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Evan Cheng
evan.cheng at apple.com
Tue Nov 17 10:18:03 PST 2009
Author: evancheng
Date: Tue Nov 17 12:18:02 2009
New Revision: 89119
URL: http://llvm.org/viewvc/llvm-project?rev=89119&view=rev
Log:
Merge 89116.
Modified:
llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Modified: llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=89119&r1=89118&r2=89119&view=diff
==============================================================================
--- llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/Apple/Leela/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Nov 17 12:18:02 2009
@@ -108,7 +108,7 @@
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
@@ -134,7 +134,7 @@
private:
void OptimizeIndvars(Loop *L);
- /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
@@ -225,11 +225,11 @@
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
-
+
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
DeadInsts.pop_back();
-
+
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -240,14 +240,14 @@
DeadInsts.push_back(U);
}
}
-
+
I->eraseFromParent();
Changed = true;
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
// This is very common, put it first.
@@ -273,7 +273,7 @@
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
@@ -345,7 +345,7 @@
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
const SCEV *Base;
-
+
/// Inst - The instruction using the induction variable.
Instruction *Inst;
@@ -369,11 +369,11 @@
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
-
+
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
@@ -384,8 +384,8 @@
SCEVExpander &Rewriter, Loop *L, Pass *P,
LoopInfo &LI,
SmallVectorImpl<WeakVH> &DeadInsts);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -400,7 +400,7 @@
errs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -410,10 +410,10 @@
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
Instruction *BaseInsertPt = IP;
-
+
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
+
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
@@ -421,7 +421,7 @@
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
-
+
Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
const SCEV *NewValSCEV = SE->getUnknown(Base);
@@ -447,7 +447,7 @@
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
-
+
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
@@ -459,7 +459,7 @@
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
+ // artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
@@ -494,7 +494,7 @@
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
+ // which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
@@ -590,11 +590,11 @@
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
+
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
@@ -612,7 +612,7 @@
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
+
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
@@ -635,11 +635,11 @@
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
const SCEV *NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
+
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
@@ -658,7 +658,7 @@
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
+
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
@@ -674,8 +674,8 @@
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
+
+ // If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
@@ -684,7 +684,7 @@
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
-
+
// Update what is left of 'Val'.
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
return;
@@ -732,7 +732,7 @@
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-
+
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
@@ -742,7 +742,7 @@
}
}
-// This is logically local to the following function, but C++ says we have
+// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
@@ -780,7 +780,7 @@
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
+
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
SmallVector<const SCEV *, 16> UniqueSubExprs;
@@ -797,7 +797,7 @@
if (!L->contains(Uses[i].Inst->getParent()))
continue;
NumUsesInsideLoop++;
-
+
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
@@ -829,13 +829,13 @@
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
- else
+ else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
@@ -867,13 +867,13 @@
// If we found no CSE's, return now.
if (Result == Zero) return Result;
-
+
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
@@ -900,7 +900,7 @@
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
-
+
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
Uses[i].Base = Zero;
@@ -908,11 +908,11 @@
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
-
+
return Result;
}
-/// ValidScale - Check whether the given Scale is valid for all loads and
+/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
@@ -929,7 +929,7 @@
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
-
+
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
@@ -1001,13 +1001,13 @@
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *const &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (StrideNoReuse.count(Stride))
@@ -1017,7 +1017,7 @@
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
@@ -1075,7 +1075,7 @@
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
@@ -1095,9 +1095,9 @@
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
+ if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
@@ -1127,11 +1127,11 @@
static bool isNonConstantNegative(const SCEV *const &Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
-
+
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
-
+
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
@@ -1168,7 +1168,7 @@
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
+ // possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
@@ -1188,11 +1188,11 @@
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
- UsersToProcess[i].Base =
+ UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
+ AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
@@ -1206,11 +1206,11 @@
if (isAddress)
HasAddress = true;
-
+
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
-
+
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
@@ -1221,7 +1221,7 @@
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
-
+
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
@@ -1611,7 +1611,7 @@
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
+ SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
@@ -1629,7 +1629,7 @@
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
+ // target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
@@ -1672,7 +1672,7 @@
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
@@ -1746,7 +1746,7 @@
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
@@ -1838,7 +1838,7 @@
}
}
return false;
-}
+}
namespace {
// Constant strides come first which in turns are sorted by their absolute
@@ -2255,7 +2255,7 @@
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
-
+
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -2274,13 +2274,13 @@
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
- for (unsigned i = 0; i < n; ++i)
+ for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
+ for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
@@ -2302,7 +2302,7 @@
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
+ if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
@@ -2314,12 +2314,12 @@
Entry = 1;
Latch = 0;
}
-
+
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- BinaryOperator *Incr =
+ BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
@@ -2346,7 +2346,7 @@
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
+ BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
@@ -2414,7 +2414,7 @@
/// conditional branch or it's and / or with other conditions before being used
/// as the condition.
static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
- BasicBlock *CondBB = Cond->getParent();
+ BasicBlock *CondBB = Cond->getParent();
if (!L->isLoopExiting(CondBB))
return false;
BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
@@ -2448,7 +2448,7 @@
return false;
if (!isUsedByExitBranch(Cond, L))
return false;
-
+
Value *CondOp0 = Cond->getOperand(0);
const SCEV *IV = SE->getSCEV(CondOp0);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
@@ -2479,109 +2479,122 @@
return true;
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitingBlock = L->getExitingBlock();
-
- if (!ExitingBlock)
- // Multiple exits, just look at the exit in the latch block if there is one.
- ExitingBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
- return;
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
- return;
+ bool LatchExit = L->isLoopExiting(LatchBlock);
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
- return;
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
- bool UsePostInc = true;
- if (ExitingBlock != LatchBlock) {
- if (Cond->hasOneUse()) {
- // See below, we don't want the condition to be cloned.
-
- // If exiting block is the latch block, we know it's safe and profitable
- // to transform the icmp to use post-inc iv. Otherwise do so only if it
- // would not reuse another iv and its iv would be reused by other uses.
- // We are optimizing for the case where the icmp is the only use of the
- // iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue()) {
- UsePostInc = false;
- break;
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEV *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ continue;
+
+ // If the latch block is exiting and it's not a single block loop, it's
+ // not safe to use postinc iv in other exiting blocks. FIXME: overly
+ // conservative? How about icmp stride optimization?
+ bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+ if (UsePostInc && ExitingBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ UsePostInc = false;
+ else {
+ // If exiting block is the latch block, we know it's safe and profitable
+ // to transform the icmp to use post-inc iv. Otherwise do so only if it
+ // would not reuse another iv and its iv would be reused by other uses.
+ // We are optimizing for the case where the icmp is the only use of the
+ // iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue()) {
+ UsePostInc = false;
+ break;
+ }
}
}
+
+ // If iv for the stride might be shared and any of the users use pre-inc
+ // iv might be used, then it's not safe to use post-inc iv.
+ if (UsePostInc &&
+ isa<SCEVConstant>(CondStride) &&
+ StrideMightBeShared(CondStride, L, true))
+ UsePostInc = false;
+ }
+
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride. However, avoid rewriting the compare instruction
+ // with an iv of new stride if it's likely the new stride uses will be
+ // rewritten using the stride of the compare instruction.
+ if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+ // If the condition stride is a constant and it's the only use, we might
+ // want to optimize it first by turning it to count toward zero.
+ if (!StrideMightBeShared(CondStride, L, false) &&
+ !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
}
- // If iv for the stride might be shared and any of the users use pre-inc iv
- // might be used, then it's not safe to use post-inc iv.
- if (UsePostInc &&
- isa<SCEVConstant>(CondStride) &&
- StrideMightBeShared(CondStride, L, true))
- UsePostInc = false;
- }
-
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride. However, avoid rewriting the compare instruction with
- // an iv of new stride if it's likely the new stride uses will be rewritten
- // using the stride of the compare instruction.
- if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
- // If the condition stride is a constant and it's the only use, we might
- // want to optimize it first by turning it to count toward zero.
- if (!StrideMightBeShared(CondStride, L, false) &&
- !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
- }
+ if (!UsePostInc)
+ continue;
- if (!UsePostInc)
- return;
+ DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the latch block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
- } else {
- // Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- LatchBlock->getInstList().insert(TermBr, Cond);
-
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the
+ // loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ }
}
- }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- ++NumLoopCond;
+ ++NumLoopCond;
+ }
}
bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
@@ -2638,7 +2651,7 @@
Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
+ // original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
@@ -2728,7 +2741,6 @@
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
-
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
More information about the llvm-branch-commits
mailing list