[llvm-commits] [llvm] r64407 - in /llvm/trunk: include/llvm/Analysis/ScalarEvolution.h lib/Analysis/ScalarEvolution.cpp lib/Transforms/Scalar/IndVarSimplify.cpp test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll

Thu Feb 12 14:19:27 PST 2009

Author: djg
Date: Thu Feb 12 16:19:27 2009
New Revision: 64407

URL: http://llvm.org/viewvc/llvm-project?rev=64407&view=rev
Log:
Teach IndVarSimplify to optimize code using the C "int" type for
loop induction on LP64 targets. When the induction variable is
used in addressing, IndVars now is usually able to inserst a
64-bit induction variable and eliminates the sign-extending cast.
This is also useful for code using C "short" types for
induction variables on targets with 32-bit addressing.

Inserting a wider induction variable is easy; the tricky part is
determining when trunc(sext(i)) expressions are no-ops. This
requires range analysis of the loop trip count. A common case is
when the original loop iteration starts at 0 and exits when the
induction variable is signed-less-than a fixed value; this case
is now handled.

This replaces IndVarSimplify's OptimizeCanonicalIVType. It was
doing the same optimization, but it was limited to loops with
constant trip counts, because it was running after the loop
rewrite, and the information about the original induction
variable is lost by that point.

Rename ScalarEvolution's executesAtLeastOnce to
isLoopGuardedByCond, generalize it to be able to test for
ICMP_NE conditions, and move it to be a public function so that
IndVars can use it.

Added:
    llvm/trunk/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp
    llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=64407&r1=64406&r2=64407&view=diff

==============================================================================

--- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Thu Feb 12 16:19:27 2009
@@ -29,9 +29,7 @@
 namespace llvm {
   class APInt;
   class ConstantInt;
-  class Instruction;
   class Type;
-  class ConstantRange;
   class SCEVHandle;
   class ScalarEvolution;
 
@@ -282,6 +280,11 @@
     /// object is returned.
     SCEVHandle getSCEVAtScope(Value *V, const Loop *L) const;
 
+    /// isLoopGuardedByCond - Test whether entry to the loop is protected by
+    /// a conditional between LHS and RHS.
+    bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                             SCEV *LHS, SCEV *RHS);
+
     /// getIterationCount - If the specified loop has a predictable iteration
     /// count, return it, otherwise return a SCEVCouldNotCompute object.
     SCEVHandle getIterationCount(const Loop *L) const;

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=64407&r1=64406&r2=64407&view=diff

==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Feb 12 16:19:27 2009
@@ -1404,6 +1404,11 @@
     SCEVHandle getSCEVAtScope(SCEV *V, const Loop *L);
 
 
+    /// isLoopGuardedByCond - Test whether entry to the loop is protected by
+    /// a conditional between LHS and RHS.
+    bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                             SCEV *LHS, SCEV *RHS);
+
     /// hasLoopInvariantIterationCount - Return true if the specified loop has
     /// an analyzable loop-invariant iteration count.
     bool hasLoopInvariantIterationCount(const Loop *L);
@@ -1476,10 +1481,6 @@
     /// found.
     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
-    /// executesAtLeastOnce - Test whether entry to the loop is protected by
-    /// a conditional between LHS and RHS.
-    bool executesAtLeastOnce(const Loop *L, bool isSigned, SCEV *LHS, SCEV *RHS);
-
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
     /// constant number of times, and the PHI node is just a recurrence
@@ -2726,9 +2727,10 @@
   return 0;
 }
 
-/// executesAtLeastOnce - Test whether entry to the loop is protected by
+/// isLoopGuardedByCond - Test whether entry to the loop is protected by
 /// a conditional between LHS and RHS.
-bool ScalarEvolutionsImpl::executesAtLeastOnce(const Loop *L, bool isSigned,
+bool ScalarEvolutionsImpl::isLoopGuardedByCond(const Loop *L,
+                                               ICmpInst::Predicate Pred,
                                                SCEV *LHS, SCEV *RHS) {
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *PreheaderDest = L->getHeader();
@@ -2759,26 +2761,62 @@
     else
       Cond = ICI->getInversePredicate();
 
-    switch (Cond) {
-    case ICmpInst::ICMP_UGT:
-      if (isSigned) continue;
-      std::swap(PreCondLHS, PreCondRHS);
-      Cond = ICmpInst::ICMP_ULT;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (!isSigned) continue;
-      std::swap(PreCondLHS, PreCondRHS);
-      Cond = ICmpInst::ICMP_SLT;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (isSigned) continue;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (!isSigned) continue;
-      break;
-    default:
-      continue;
-    }
+    if (Cond == Pred)
+      ; // An exact match.
+    else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+      ; // The actual condition is beyond sufficient.
+    else
+      // Check a few special cases.
+      switch (Cond) {
+      case ICmpInst::ICMP_UGT:
+        if (Pred == ICmpInst::ICMP_ULT) {
+          std::swap(PreCondLHS, PreCondRHS);
+          Cond = ICmpInst::ICMP_ULT;
+          break;
+        }
+        continue;
+      case ICmpInst::ICMP_SGT:
+        if (Pred == ICmpInst::ICMP_SLT) {
+          std::swap(PreCondLHS, PreCondRHS);
+          Cond = ICmpInst::ICMP_SLT;
+          break;
+        }
+        continue;
+      case ICmpInst::ICMP_NE:
+        // Expressions like (x >u 0) are often canonicalized to (x != 0),
+        // so check for this case by checking if the NE is comparing against
+        // a minimum or maximum constant.
+        if (!ICmpInst::isTrueWhenEqual(Pred))
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+            const APInt &A = CI->getValue();
+            switch (Pred) {
+            case ICmpInst::ICMP_SLT:
+              if (A.isMaxSignedValue()) break;
+              continue;
+            case ICmpInst::ICMP_SGT:
+              if (A.isMinSignedValue()) break;
+              continue;
+            case ICmpInst::ICMP_ULT:
+              if (A.isMaxValue()) break;
+              continue;
+            case ICmpInst::ICMP_UGT:
+              if (A.isMinValue()) break;
+              continue;
+            default:
+              continue;
+            }
+            Cond = ICmpInst::ICMP_NE;
+            // NE is symmetric but the original comparison may not be. Swap
+            // the operands if necessary so that they match below.
+            if (isa<SCEVConstant>(LHS))
+              std::swap(PreCondLHS, PreCondRHS);
+            break;
+          }
+        continue;
+      default:
+        // We weren't able to reconcile the condition.
+        continue;
+      }
 
     if (!PreCondLHS->getType()->isInteger()) continue;
 
@@ -2819,7 +2857,8 @@
     // First, we get the value of the LHS in the first iteration: n
     SCEVHandle Start = AddRec->getOperand(0);
 
-    if (executesAtLeastOnce(L, isSigned,
+    if (isLoopGuardedByCond(L,
+                            isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
                             SE.getMinusSCEV(AddRec->getOperand(0), One), RHS)) {
       // Since we know that the condition is true in order to enter the loop,
       // we know that it will run exactly m-n times.
@@ -2997,6 +3036,13 @@
 }
 
 
+bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          SCEV *LHS, SCEV *RHS) {
+  return ((ScalarEvolutionsImpl*)Impl)->isLoopGuardedByCond(L, Pred,
+                                                            LHS, RHS);
+}
+
 SCEVHandle ScalarEvolution::getIterationCount(const Loop *L) const {
   return ((ScalarEvolutionsImpl*)Impl)->getIterationCount(L);
 }

Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=64407&r1=64406&r2=64407&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Thu Feb 12 16:19:27 2009
@@ -53,6 +53,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -89,13 +90,14 @@
 
     void EliminatePointerRecurrence(PHINode *PN, BasicBlock *Preheader,
                                     SmallPtrSet<Instruction*, 16> &DeadInsts);
-    Instruction *LinearFunctionTestReplace(Loop *L, SCEV *IterationCount,
-                                           SCEVExpander &RW);
+    void LinearFunctionTestReplace(Loop *L, SCEVHandle IterationCount, Value *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter);
     void RewriteLoopExitValues(Loop *L, SCEV *IterationCount);
 
     void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts);
 
-    void OptimizeCanonicalIVType(Loop *L);
     void HandleFloatingPointIV(Loop *L, PHINode *PH, 
                                SmallPtrSet<Instruction*, 16> &DeadInsts);
   };
@@ -225,68 +227,54 @@
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
 /// is actually a much broader range than just linear tests.
-///
-/// This method returns a "potentially dead" instruction whose computation chain
-/// should be deleted when convenient.
-Instruction *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
-                                                       SCEV *IterationCount,
-                                                       SCEVExpander &RW) {
-  // Find the exit block for the loop.  We can currently only handle loops with
-  // a single exit.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return 0;
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  // Make sure there is only one predecessor block in the loop.
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
-       PI != PE; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return 0;  // Multiple exits from loop to this block.
-    }
-  assert(ExitingBlock && "Loop info is broken");
-
-  if (!isa<BranchInst>(ExitingBlock->getTerminator()))
-    return 0;  // Can't rewrite non-branch yet
-  BranchInst *BI = cast<BranchInst>(ExitingBlock->getTerminator());
-  assert(BI->isConditional() && "Must be conditional to be part of loop!");
-
-  Instruction *PotentiallyDeadInst = dyn_cast<Instruction>(BI->getCondition());
-  
+void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+                                   SCEVHandle IterationCount,
+                                   Value *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter) {
   // If the exiting block is not the same as the backedge block, we must compare
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
-  BasicBlock *Header = L->getHeader();
-  pred_iterator HPI = pred_begin(Header);
-  assert(HPI != pred_end(Header) && "Loop with zero preds???");
-  if (!L->contains(*HPI)) ++HPI;
-  assert(HPI != pred_end(Header) && L->contains(*HPI) &&
-         "No backedge in loop?");
-
-  SCEVHandle TripCount = IterationCount;
-  Value *IndVar;
-  if (*HPI == ExitingBlock) {
+  Value *CmpIndVar;
+  if (ExitingBlock == L->getLoopLatch()) {
+    // What ScalarEvolution calls the "iteration count" is actually the
+    // number of times the branch is taken. Add one to get the number
+    // of times the branch is executed. If this addition may overflow,
+    // we have to be more pessimistic and cast the induction variable
+    // before doing the add.
+    SCEVHandle Zero = SE->getIntegerSCEV(0, IterationCount->getType());
+    SCEVHandle N =
+      SE->getAddExpr(IterationCount,
+                     SE->getIntegerSCEV(1, IterationCount->getType()));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      IterationCount = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      IterationCount = SE->getTruncateOrZeroExtend(IterationCount,
+                                                   IndVar->getType());
+      IterationCount =
+        SE->getAddExpr(IterationCount,
+                       SE->getIntegerSCEV(1, IndVar->getType()));
+    }
+
     // The IterationCount expression contains the number of times that the
     // backedge actually branches to the loop header.  This is one less than the
     // number of times the loop executes, so add one to it.
-    ConstantInt *OneC = ConstantInt::get(IterationCount->getType(), 1);
-    TripCount = SE->getAddExpr(IterationCount, SE->getConstant(OneC));
-    IndVar = L->getCanonicalInductionVariableIncrement();
+    CmpIndVar = L->getCanonicalInductionVariableIncrement();
   } else {
     // We have to use the preincremented value...
-    IndVar = L->getCanonicalInductionVariable();
+    IterationCount = SE->getTruncateOrZeroExtend(IterationCount,
+                                                 IndVar->getType());
+    CmpIndVar = IndVar;
   }
-  
-  DOUT << "INDVARS: LFTR: TripCount = " << *TripCount
-       << "  IndVar = " << *IndVar << "\n";
 
   // Expand the code for the iteration count into the preheader of the loop.
   BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = RW.expandCodeFor(TripCount, Preheader->getTerminator());
+  Value *ExitCnt = Rewriter.expandCodeFor(IterationCount,
+                                          Preheader->getTerminator());
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   ICmpInst::Predicate Opcode;
@@ -295,14 +283,18 @@
   else
     Opcode = ICmpInst::ICMP_EQ;
 
-  Value *Cond = new ICmpInst(Opcode, IndVar, ExitCnt, "exitcond", BI);
+  DOUT << "INDVARS: Rewriting loop exit condition to:\n"
+       << "      LHS:" << *CmpIndVar // includes a newline
+       << "       op:\t"
+       << (Opcode == ICmpInst::ICMP_NE ? "!=" : "=") << "\n"
+       << "      RHS:\t" << *IterationCount << "\n";
+
+  Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
   BI->setCondition(Cond);
   ++NumLFTR;
   Changed = true;
-  return PotentiallyDeadInst;
 }
 
-
 /// RewriteLoopExitValues - Check to see if this loop has a computable
 /// loop-invariant execution count.  If so, this means that we can compute the
 /// final value of any expressions that are recurrent in the loop, and
@@ -444,15 +436,100 @@
   return Changed;
 }
 
-bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// getEffectiveIndvarType - Determine the widest type that the
+/// induction-variable PHINode Phi is cast to.
+///
+static const Type *getEffectiveIndvarType(const PHINode *Phi) {
+  const Type *Ty = Phi->getType();
+
+  for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+       UI != UE; ++UI) {
+    const Type *CandidateType = NULL;
+    if (const ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
+      CandidateType = ZI->getDestTy();
+    else if (const SExtInst *SI = dyn_cast<SExtInst>(UI))
+      CandidateType = SI->getDestTy();
+    if (CandidateType &&
+        CandidateType->getPrimitiveSizeInBits() >
+          Ty->getPrimitiveSizeInBits())
+      Ty = CandidateType;
+  }
 
+  return Ty;
+}
+
+/// isOrigIVAlwaysNonNegative - Analyze the original induction variable
+/// in the loop to determine whether it would ever have a negative
+/// value.
+///
+/// TODO: This duplicates a fair amount of ScalarEvolution logic.
+/// Perhaps this can be merged with ScalarEvolution::getIterationCount.
+///
+static bool isOrigIVAlwaysNonNegative(const Loop *L,
+                                      const Instruction *OrigCond) {
+  // Verify that the loop is sane and find the exit condition.
+  const ICmpInst *Cmp = dyn_cast<ICmpInst>(OrigCond);
+  if (!Cmp) return false;
+
+  // For now, analyze only SLT loops for signed overflow.
+  if (Cmp->getPredicate() != ICmpInst::ICMP_SLT) return false;
+
+  // Get the increment instruction. Look past SExtInsts if we will
+  // be able to prove that the original induction variable doesn't
+  // undergo signed overflow.
+  const Value *OrigIncrVal = Cmp->getOperand(0);
+  const Value *IncrVal = OrigIncrVal;
+  if (SExtInst *SI = dyn_cast<SExtInst>(Cmp->getOperand(0))) {
+    if (!isa<ConstantInt>(Cmp->getOperand(1)) ||
+        !cast<ConstantInt>(Cmp->getOperand(1))->getValue()
+          .isSignedIntN(IncrVal->getType()->getPrimitiveSizeInBits()))
+      return false;
+    IncrVal = SI->getOperand(0);
+  }
+
+  // For now, only analyze induction variables that have simple increments.
+  const BinaryOperator *IncrOp = dyn_cast<BinaryOperator>(IncrVal);
+  if (!IncrOp ||
+      IncrOp->getOpcode() != Instruction::Add ||
+      !isa<ConstantInt>(IncrOp->getOperand(1)) ||
+      !cast<ConstantInt>(IncrOp->getOperand(1))->equalsInt(1))
+    return false;
+
+  // Make sure the PHI looks like a normal IV.
+  const PHINode *PN = dyn_cast<PHINode>(IncrOp->getOperand(0));
+  if (!PN || PN->getNumIncomingValues() != 2)
+    return false;
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge = !IncomingEdge;
+  if (!L->contains(PN->getIncomingBlock(BackEdge)) ||
+      PN->getIncomingValue(BackEdge) != IncrOp)
+    return false;
+
+  // For now, only analyze loops with a constant start value, so that
+  // we can easily determine if the start value is non-negative and
+  // not a maximum value which would wrap on the first iteration.
+  const Value *InitialVal = PN->getIncomingValue(IncomingEdge);
+  if (!isa<ConstantInt>(InitialVal) ||
+      cast<ConstantInt>(InitialVal)->getValue().isNegative() ||
+      cast<ConstantInt>(InitialVal)->getValue().isMaxSignedValue())
+    return false;
+
+  // The original induction variable will start at some non-negative
+  // non-max value, it counts up by one, and the loop iterates only
+  // while it remans less than (signed) some value in the same type.
+  // As such, it will always be non-negative.
+  return true;
+}
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
 
   Changed = false;
-  BasicBlock *Header    = L->getHeader();
+  BasicBlock *Header       = L->getHeader();
+  BasicBlock *ExitingBlock = L->getExitingBlock();
   SmallPtrSet<Instruction*, 16> DeadInsts;
-  
+
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm());
 
@@ -486,35 +563,23 @@
     }
   }
 
-  // If there are no induction variables in the loop, there is nothing more to
-  // do.
-  if (IndVars.empty()) {
-    // Actually, if we know how many times the loop iterates, lets insert a
-    // canonical induction variable to help subsequent passes.
-    if (!isa<SCEVCouldNotCompute>(IterationCount)) {
-      SCEVExpander Rewriter(*SE, *LI);
-      Rewriter.getOrInsertCanonicalInductionVariable(L,
-                                                     IterationCount->getType());
-      if (Instruction *I = LinearFunctionTestReplace(L, IterationCount,
-                                                     Rewriter)) {
-        SmallPtrSet<Instruction*, 16> InstructionsToDelete;
-        InstructionsToDelete.insert(I);
-        DeleteTriviallyDeadInstructions(InstructionsToDelete);
-      }
-    }
-    return Changed;
+  // Compute the type of the largest recurrence expression, and collect
+  // the set of the types of the other recurrence expressions.
+  const Type *LargestType = 0;
+  SmallSetVector<const Type *, 4> SizesToInsert;
+  if (!isa<SCEVCouldNotCompute>(IterationCount)) {
+    LargestType = IterationCount->getType();
+    SizesToInsert.insert(IterationCount->getType());
   }
-
-  // Compute the type of the largest recurrence expression.
-  //
-  const Type *LargestType = IndVars[0].first->getType();
-  bool DifferingSizes = false;
-  for (unsigned i = 1, e = IndVars.size(); i != e; ++i) {
-    const Type *Ty = IndVars[i].first->getType();
-    DifferingSizes |= 
-      Ty->getPrimitiveSizeInBits() != LargestType->getPrimitiveSizeInBits();
-    if (Ty->getPrimitiveSizeInBits() > LargestType->getPrimitiveSizeInBits())
-      LargestType = Ty;
+  for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
+    const PHINode *PN = IndVars[i].first;
+    SizesToInsert.insert(PN->getType());
+    const Type *EffTy = getEffectiveIndvarType(PN);
+    SizesToInsert.insert(EffTy);
+    if (!LargestType ||
+        EffTy->getPrimitiveSizeInBits() >
+          LargestType->getPrimitiveSizeInBits())
+      LargestType = EffTy;
   }
 
   // Create a rewriter object which we'll use to transform the code with.
@@ -522,17 +587,32 @@
 
   // Now that we know the largest of of the induction variables in this loop,
   // insert a canonical induction variable of the largest size.
-  Value *IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
-  ++NumInserted;
-  Changed = true;
-  DOUT << "INDVARS: New CanIV: " << *IndVar;
-
-  if (!isa<SCEVCouldNotCompute>(IterationCount)) {
-    IterationCount = SE->getTruncateOrZeroExtend(IterationCount, LargestType);
-    if (Instruction *DI = LinearFunctionTestReplace(L, IterationCount,Rewriter))
-      DeadInsts.insert(DI);
+  Value *IndVar = 0;
+  if (!SizesToInsert.empty()) {
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+    ++NumInserted;
+    Changed = true;
+    DOUT << "INDVARS: New CanIV: " << *IndVar;
   }
 
+  // If we have a trip count expression, rewrite the loop's exit condition
+  // using it.  We can currently only handle loops with a single exit.
+  bool OrigIVAlwaysNonNegative = false;
+  if (!isa<SCEVCouldNotCompute>(IterationCount) && ExitingBlock)
+    // Can't rewrite non-branch yet.
+    if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) {
+      if (Instruction *OrigCond = dyn_cast<Instruction>(BI->getCondition())) {
+        // Determine if the OrigIV will ever have a non-zero sign bit.
+        OrigIVAlwaysNonNegative = isOrigIVAlwaysNonNegative(L, OrigCond);
+
+        // We'll be replacing the original condition, so it'll be dead.
+        DeadInsts.insert(OrigCond);
+      }
+
+      LinearFunctionTestReplace(L, IterationCount, IndVar,
+                                ExitingBlock, BI, Rewriter);
+    }
+
   // Now that we have a canonical induction variable, we can rewrite any
   // recurrences in terms of the induction variable.  Start with the auxillary
   // induction variables, and recursively rewrite any of their uses.
@@ -541,21 +621,13 @@
   // If there were induction variables of other sizes, cast the primary
   // induction variable to the right size for them, avoiding the need for the
   // code evaluation methods to insert induction variables of different sizes.
-  if (DifferingSizes) {
-    SmallVector<unsigned,4> InsertedSizes;
-    InsertedSizes.push_back(LargestType->getPrimitiveSizeInBits());
-    for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
-      unsigned ithSize = IndVars[i].first->getType()->getPrimitiveSizeInBits();
-      if (std::find(InsertedSizes.begin(), InsertedSizes.end(), ithSize)
-          == InsertedSizes.end()) {
-        PHINode *PN = IndVars[i].first;
-        InsertedSizes.push_back(ithSize);
-        Instruction *New = new TruncInst(IndVar, PN->getType(), "indvar",
-                                         InsertPt);
-        Rewriter.addInsertedValue(New, SE->getSCEV(New));
-        DOUT << "INDVARS: Made trunc IV for " << *PN
-             << "   NewVal = " << *New << "\n";
-      }
+  for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) {
+    const Type *Ty = SizesToInsert[i];
+    if (Ty != LargestType) {
+      Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt);
+      Rewriter.addInsertedValue(New, SE->getSCEV(New));
+      DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": "
+           << *New << "\n";
     }
   }
 
@@ -568,6 +640,23 @@
          << "   into = " << *NewVal << "\n";
     NewVal->takeName(PN);
 
+    /// If the new canonical induction variable is wider than the original,
+    /// and the original has uses that are casts to wider types, see if the
+    /// truncate and extend can be omitted.
+    if (isa<TruncInst>(NewVal))
+      for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+           UI != UE; ++UI)
+        if (isa<ZExtInst>(UI) ||
+            (isa<SExtInst>(UI) && OrigIVAlwaysNonNegative)) {
+          Value *TruncIndVar = IndVar;
+          if (TruncIndVar->getType() != UI->getType())
+            TruncIndVar = new TruncInst(IndVar, UI->getType(), "truncindvar",
+                                        InsertPt);
+          UI->replaceAllUsesWith(TruncIndVar);
+          if (Instruction *DeadUse = dyn_cast<Instruction>(*UI))
+            DeadInsts.insert(DeadUse);
+        }
+
     // Replace the old PHI Node with the inserted computation.
     PN->replaceAllUsesWith(NewVal);
     DeadInsts.insert(PN);
@@ -603,125 +692,10 @@
 #endif
 
   DeleteTriviallyDeadInstructions(DeadInsts);
-  OptimizeCanonicalIVType(L);
   assert(L->isLCSSAForm());
   return Changed;
 }
 
-/// OptimizeCanonicalIVType - If loop induction variable is always
-/// sign or zero extended then extend the type of the induction 
-/// variable.
-void IndVarSimplify::OptimizeCanonicalIVType(Loop *L) {
-  PHINode *PH = L->getCanonicalInductionVariable();
-  if (!PH) return;
-  
-  // Check loop iteration count.
-  SCEVHandle IC = SE->getIterationCount(L);
-  if (isa<SCEVCouldNotCompute>(IC)) return;
-  SCEVConstant *IterationCount = dyn_cast<SCEVConstant>(IC);
-  if (!IterationCount) return;
-
-  unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
-  
-  // Check IV uses. If all IV uses are either SEXT or ZEXT (except
-  // IV increment instruction) then this IV is suitable for this
-  // transformation.
-  bool isSEXT = false;
-  BinaryOperator *Incr = NULL;
-  const Type *NewType = NULL;
-  for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); 
-      UI != UE; ++UI) {
-    const Type *CandidateType = NULL;
-    if (ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
-      CandidateType = ZI->getDestTy();
-    else if (SExtInst *SI = dyn_cast<SExtInst>(UI)) {
-      CandidateType = SI->getDestTy();
-      isSEXT = true;
-    }
-    else if ((Incr = dyn_cast<BinaryOperator>(UI))) {
-      // Validate IV increment instruction.
-      if (PH->getIncomingValue(BackEdge) == Incr)
-        continue;
-    }
-    if (!CandidateType) {
-      NewType = NULL;
-      break;
-    }
-    if (!NewType)
-      NewType = CandidateType;
-    else if (NewType != CandidateType) {
-      NewType = NULL;
-      break;
-    }
-  }
-
-  // IV uses are not suitable then avoid this transformation.
-  if (!NewType || !Incr)
-    return;
-
-  // IV increment instruction has two uses, one is loop exit condition
-  // and second is the IV (phi node) itself.
-  ICmpInst *Exit = NULL;
-  for(Value::use_iterator II = Incr->use_begin(), IE = Incr->use_end();
-      II != IE; ++II) {
-    if (PH == *II)  continue;
-    Exit = dyn_cast<ICmpInst>(*II);
-    break;
-  }
-  if (!Exit) return;
-  ConstantInt *EV = dyn_cast<ConstantInt>(Exit->getOperand(0));
-  if (!EV) 
-    EV = dyn_cast<ConstantInt>(Exit->getOperand(1));
-  if (!EV) return;
-
-  // Check iteration count max value to avoid loops that wrap around IV.
-  APInt ICount = IterationCount->getValue()->getValue();
-  if (ICount.isNegative()) return;
-  uint32_t BW = PH->getType()->getPrimitiveSizeInBits();
-  APInt Max = (isSEXT ? APInt::getSignedMaxValue(BW) : APInt::getMaxValue(BW));
-  if (ICount.getZExtValue() > Max.getZExtValue())  return;                         
-
-  // Extend IV type.
-
-  SCEVExpander Rewriter(*SE, *LI);
-  Value *NewIV = Rewriter.getOrInsertCanonicalInductionVariable(L,NewType);
-  PHINode *NewPH = cast<PHINode>(NewIV);
-  Instruction *NewIncr = cast<Instruction>(NewPH->getIncomingValue(BackEdge));
-
-  // Replace all SEXT or ZEXT uses.
-  SmallVector<Instruction *, 4> PHUses;
-  for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); 
-      UI != UE; ++UI) {
-      Instruction *I = cast<Instruction>(UI);
-      PHUses.push_back(I);
-  }
-  while (!PHUses.empty()){
-    Instruction *Use = PHUses.back(); PHUses.pop_back();
-    if (Incr == Use) continue;
-    
-    SE->deleteValueFromRecords(Use);
-    Use->replaceAllUsesWith(NewIV);
-    Use->eraseFromParent();
-  }
-
-  // Replace exit condition.
-  ConstantInt *NEV = ConstantInt::get(NewType, EV->getZExtValue());
-  Instruction *NE = new ICmpInst(Exit->getPredicate(),
-                                 NewIncr, NEV, "new.exit", 
-                                 Exit->getParent()->getTerminator());
-  SE->deleteValueFromRecords(Exit);
-  Exit->replaceAllUsesWith(NE);
-  Exit->eraseFromParent();
-  
-  // Remove old IV and increment instructions.
-  SE->deleteValueFromRecords(PH);
-  PH->removeIncomingValue((unsigned)0);
-  PH->removeIncomingValue((unsigned)0);
-  SE->deleteValueFromRecords(Incr);
-  Incr->eraseFromParent();
-}
-
 /// Return true if it is OK to use SIToFPInst for an inducation variable
 /// with given inital and exit values.
 static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,

Added: llvm/trunk/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll?rev=64407&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll (added)
+++ llvm/trunk/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll Thu Feb 12 16:19:27 2009
@@ -0,0 +1,62 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep sext
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly {
+entry:
+	%t0 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%result.02 = phi i64 [ %t5, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=1]
+	%n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %first, i64 %t1		; <i64*> [#uses=1]
+	%t3 = load i64* %t2, align 8		; <i64> [#uses=1]
+	%t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
+	%t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
+	%t6 = add i32 %n.01, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t7 = icmp slt i32 %t6, %count		; <i1> [#uses=1]
+	br i1 %t7, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i64 [ %t5, %bb1 ]		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%result.0.lcssa = phi i64 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i64> [#uses=1]
+	ret i64 %result.0.lcssa
+}
+
+define void @foo(i16 signext %N, i32* nocapture %P) nounwind {
+entry:
+	%t0 = icmp sgt i16 %N, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i16 [ %t3, %bb1 ], [ 0, %bb.nph ]		; <i16> [#uses=2]
+	%t1 = sext i16 %i.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i32* %P, i64 %t1		; <i32*> [#uses=1]
+	store i32 123, i32* %t2, align 4
+	%t3 = add i16 %i.01, 1		; <i16> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t4 = icmp slt i16 %t3, %N		; <i1> [#uses=1]
+	br i1 %t4, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}