[llvm-commits] [llvm] r135420 - in /llvm/trunk: lib/Transforms/Scalar/IndVarSimplify.cpp test/Transforms/IndVarSimplify/ada-loops.ll test/Transforms/IndVarSimplify/lftr-reuse.ll

Mon Jul 18 13:32:32 PDT 2011

Author: atrick
Date: Mon Jul 18 15:32:31 2011
New Revision: 135420

URL: http://llvm.org/viewvc/llvm-project?rev=135420&view=rev
Log:
indvars: LinearFunctionTestReplace for non-canonical IVs.

For -disable-iv-rewrite, perform LFTR without generating a new
"canonical" induction variable. Instead find the "best" existing
induction variable for use in the loop exit test and compute the final
value of that IV for use in the new loop exit test. In short,
convert to a simple eq/ne exit test as long as it's cheap to do so.

Added:
    llvm/trunk/test/Transforms/IndVarSimplify/lftr-reuse.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll

Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=135420&r1=135419&r2=135420&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Mon Jul 18 15:32:31 2011
@@ -79,6 +79,12 @@
   "disable-iv-rewrite", cl::Hidden,
   cl::desc("Disable canonical induction variable rewriting"));
 
+// Temporary flag for use with -disable-iv-rewrite to force a canonical IV for
+// LFTR purposes.
+static cl::opt<bool> ForceLFTR(
+  "force-lftr", cl::Hidden,
+  cl::desc("Enable forced linear function test replacement"));
+
 namespace {
   class IndVarSimplify : public LoopPass {
     IVUsers         *IU;
@@ -140,9 +146,8 @@
 
     void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
 
-    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
-                                        PHINode *IndVar,
-                                        SCEVExpander &Rewriter);
+    Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                     PHINode *IndVar, SCEVExpander &Rewriter);
 
     void SinkUnusedInvariants(Loop *L);
   };
@@ -1014,7 +1019,7 @@
     NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
     return 0;
   }
-  // We assume that block terminators are not SCEVable. We wouldn't want to
+  // Assume block terminators cannot evaluate to a recurrence. We can't to
   // insert a Trunc after a terminator if there happens to be a critical edge.
   assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
          "SCEV is not expected to evaluate a block terminator");
@@ -1302,10 +1307,6 @@
   // Get the symbolic expression for this instruction.
   const SCEV *S = SE->getSCEV(I);
 
-  // We assume that terminators are not SCEVable.
-  assert((!S || I != I->getParent()->getTerminator()) &&
-         "can't fold terminators");
-
   // Only consider affine recurrences.
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
   if (AR && AR->getLoop() == L)
@@ -1471,7 +1472,7 @@
     }
   }
 
-  if (!DisableIVRewrite)
+  if (!DisableIVRewrite || ForceLFTR)
     return false;
 
   // Recurse past add expressions, which commonly occur in the
@@ -1522,7 +1523,7 @@
 /// getBackedgeIVType - Get the widest type used by the loop test after peeking
 /// through Truncs.
 ///
-/// TODO: Unnecessary if LFTR does not force a canonical IV.
+/// TODO: Unnecessary when ForceLFTR is removed.
 static Type *getBackedgeIVType(Loop *L) {
   if (!L->getExitingBlock())
     return 0;
@@ -1549,12 +1550,198 @@
   return Ty;
 }
 
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+  Instruction *IncI = dyn_cast<Instruction>(IncV);
+  if (!IncI)
+    return 0;
+
+  switch (IncI->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+    break;
+  case Instruction::GetElementPtr:
+    // An IV counter must preserve its type.
+    if (IncI->getNumOperands() == 2)
+      break;
+  default:
+    return 0;
+  }
+
+  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+  if (Phi && Phi->getParent() == L->getHeader()) {
+    if (isLoopInvariant(IncI->getOperand(1), L, DT))
+      return Phi;
+    return 0;
+  }
+  if (IncI->getOpcode() == Instruction::GetElementPtr)
+    return 0;
+
+  // Allow add/sub to be commuted.
+  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+  if (Phi && Phi->getParent() == L->getHeader()) {
+    if (isLoopInvariant(IncI->getOperand(0), L, DT))
+      return Phi;
+  }
+  return 0;
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+  assert(L->getExitingBlock() && "expected loop exit");
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  // Don't bother with LFTR if the loop is not properly simplified.
+  if (!LatchBlock)
+    return false;
+
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  assert(BI && "expected exit branch");
+
+  // Do LFTR to simplify the exit condition to an ICMP.
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return true;
+
+  // Do LFTR to simplify the exit ICMP to EQ/NE
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return true;
+
+  // Look for a loop invariant RHS
+  Value *LHS = Cond->getOperand(0);
+  Value *RHS = Cond->getOperand(1);
+  if (!isLoopInvariant(RHS, L, DT)) {
+    if (!isLoopInvariant(LHS, L, DT))
+      return true;
+    std::swap(LHS, RHS);
+  }
+  // Look for a simple IV counter LHS
+  PHINode *Phi = dyn_cast<PHINode>(LHS);
+  if (!Phi)
+    Phi = getLoopPhiForCounter(LHS, L, DT);
+
+  if (!Phi)
+    return true;
+
+  // Do LFTR if the exit condition's IV is *not* a simple counter.
+  Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+  return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+  Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+  for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+       UI != UE; ++UI) {
+    if (*UI != Cond && *UI != IncV) return false;
+  }
+
+  for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+       UI != UE; ++UI) {
+    if (*UI != Cond && *UI != Phi) return false;
+  }
+  return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+                ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+  // I'm not sure how BECount could be a pointer type, but we definitely don't
+  // want to LFTR that.
+  if (BECount->getType()->isPointerTy())
+    return 0;
+
+  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+  Value *Cond =
+    cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+  // Loop over all of the PHI nodes, looking for a simple counter.
+  PHINode *BestPhi = 0;
+  const SCEV *BestInit = 0;
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+    if (!AR || AR->getLoop() != L || !AR->isAffine())
+      continue;
+
+    // AR may be a pointer type, while BECount is an integer type.
+    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+    // AR may not be a narrower type, or we may never exit.
+    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+    if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+      continue;
+
+    const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+    if (!Step || !Step->isOne())
+      continue;
+
+    int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+    Value *IncV = Phi->getIncomingValue(LatchIdx);
+    if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+      continue;
+
+    const SCEV *Init = AR->getStart();
+
+    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+      // Don't force a live loop counter if another IV can be used.
+      if (AlmostDeadIV(Phi, LatchBlock, Cond))
+        continue;
+
+      // Prefer to count-from-zero. This is a more "canonical" counter form. It
+      // also prefers integer to pointer IVs.
+      if (BestInit->isZero() != Init->isZero()) {
+        if (BestInit->isZero())
+          continue;
+      }
+      // If two IVs both count from zero or both count from nonzero then the
+      // narrower is likely a dead phi that has been widened. Use the wider phi
+      // to allow the other to be eliminated.
+      if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+        continue;
+    }
+    BestPhi = Phi;
+    BestInit = Init;
+  }
+  return BestPhi;
+}
+
 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
 /// loop to be a canonical != comparison against the incremented loop induction
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
 /// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
+Value *IndVarSimplify::
 LinearFunctionTestReplace(Loop *L,
                           const SCEV *BackedgeTakenCount,
                           PHINode *IndVar,
@@ -1562,62 +1749,118 @@
   assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
 
+  // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
+  // mode, LFTR can ignore IV overflow and truncate to the width of
+  // BECount. This avoids materializing the add(zext(add)) expression.
+  Type *CntTy = DisableIVRewrite ?
+    BackedgeTakenCount->getType() : IndVar->getType();
+
+  const SCEV *IVLimit = BackedgeTakenCount;
+
   // If the exiting block is not the same as the backedge block, we must compare
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
   Value *CmpIndVar;
-  const SCEV *RHS = BackedgeTakenCount;
   if (L->getExitingBlock() == L->getLoopLatch()) {
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
     const SCEV *N =
-      SE->getAddExpr(BackedgeTakenCount,
-                     SE->getConstant(BackedgeTakenCount->getType(), 1));
-    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-      // No overflow. Cast the sum.
-      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
-    } else {
-      // Potential overflow. Cast before doing the add.
-      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                        IndVar->getType());
-      RHS = SE->getAddExpr(RHS,
-                           SE->getConstant(IndVar->getType(), 1));
+      SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
+    if (CntTy == IVLimit->getType())
+      IVLimit = N;
+    else {
+      const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+      if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+          SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+        // No overflow. Cast the sum.
+        IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+      } else {
+        // Potential overflow. Cast before doing the add.
+        IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+        IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+      }
     }
-
     // The BackedgeTaken expression contains the number of times that the
     // backedge branches to the loop header.  This is one less than the
     // number of times the loop executes, so use the incremented indvar.
     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   } else {
     // We have to use the preincremented value...
-    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                      IndVar->getType());
+    IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
     CmpIndVar = IndVar;
   }
 
+  // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
+  // So for, non-zero start compute the IVLimit here.
+  bool isPtrIV = false;
+  Type *CmpTy = CntTy;
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+  if (!AR->getStart()->isZero()) {
+    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+    const SCEV *IVInit = AR->getStart();
+
+    // For pointer types, sign extend BECount in order to materialize a GEP.
+    // Note that for DisableIVRewrite, we never run SCEVExpander on a
+    // pointer type, because we must preserve the existing GEPs. Instead we
+    // directly generate a GEP later.
+    if (IVInit->getType()->isPointerTy()) {
+      isPtrIV = true;
+      CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
+      IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
+    }
+    // For integer types, truncate the IV before computing IVInit + BECount.
+    else {
+      if (SE->getTypeSizeInBits(IVInit->getType())
+          > SE->getTypeSizeInBits(CmpTy))
+        IVInit = SE->getTruncateExpr(IVInit, CmpTy);
+
+      IVLimit = SE->getAddExpr(IVInit, IVLimit);
+    }
+  }
   // Expand the code for the iteration count.
-  assert(SE->isLoopInvariant(RHS, L) &&
+  IRBuilder<> Builder(BI);
+
+  assert(SE->isLoopInvariant(IVLimit, L) &&
          "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
+
+  // Create a gep for IVInit + IVLimit from on an existing pointer base.
+  assert(isPtrIV == IndVar->getType()->isPointerTy() &&
+         "IndVar type must match IVInit type");
+  if (isPtrIV) {
+      Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+      assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
+      const PointerType *PointerTy = cast<PointerType>(IVStart->getType());
+      assert(SE->getSizeOfExpr(PointerTy->getElementType())->isOne() &&
+             "unit stride pointer IV must be i8*");
+
+      Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+      ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
+      Builder.SetInsertPoint(BI);
+  }
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  ICmpInst::Predicate Opcode;
+  ICmpInst::Predicate P;
   if (L->contains(BI->getSuccessor(0)))
-    Opcode = ICmpInst::ICMP_NE;
+    P = ICmpInst::ICMP_NE;
   else
-    Opcode = ICmpInst::ICMP_EQ;
+    P = ICmpInst::ICMP_EQ;
 
   DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
                << "      LHS:" << *CmpIndVar << '\n'
                << "       op:\t"
-               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-               << "      RHS:\t" << *RHS << "\n");
+               << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *ExitCnt << "\n"
+               << "     Expr:\t" << *IVLimit << "\n");
 
-  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
-  Cond->setDebugLoc(BI->getDebugLoc());
+  if (SE->getTypeSizeInBits(CmpIndVar->getType())
+      > SE->getTypeSizeInBits(CmpTy)) {
+    CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+  }
+
+  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   Value *OrigCond = BI->getCondition();
   // It's tempting to use replaceAllUsesWith here to fully replace the old
   // comparison, but that's not immediately safe, since users of the old
@@ -1784,8 +2027,9 @@
   // a canonical induction variable should be inserted.
   Type *LargestType = 0;
   bool NeedCannIV = false;
+  bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR;
   bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
-  if (ExpandBECount) {
+  if (ExpandBECount && !ReuseIVForExit) {
     // If we have a known trip count and a single exit block, we'll be
     // rewriting the loop exit test condition below, which requires a
     // canonical induction variable.
@@ -1848,15 +2092,13 @@
       OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
     }
   }
-
+  else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) {
+    IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+  }
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  ICmpInst *NewICmp = 0;
-  if (ExpandBECount) {
-    assert(canExpandBackedgeTakenCount(L, SE) &&
-           "canonical IV disrupted BackedgeTaken expansion");
-    assert(NeedCannIV &&
-           "LinearFunctionTestReplace requires a canonical induction variable");
+  Value *NewICmp = 0;
+  if (ExpandBECount && IndVar) {
     // Check preconditions for proper SCEVExpander operation. SCEV does not
     // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
     // pass that uses the SCEVExpander must do it. This does not work well for
@@ -1894,9 +2136,11 @@
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
-  if (NewICmp && IU)
-    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
-
+  if (IU && NewICmp) {
+    ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
+    if (NewICmpInst)
+      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+  }
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
   // Check a post-condition.

Modified: llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll?rev=135420&r1=135419&r2=135420&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll Mon Jul 18 15:32:31 2011
@@ -9,10 +9,9 @@
 ; Note that all four functions should actually be converted to
 ; memset. However, this test case validates indvars behavior.  We
 ; don't check that phis are "folded together" because that is a job
-; for loop strength reduction. But indvars must remove sext, zext,
-; trunc, and add i8.
+; for loop strength reduction. But indvars must remove sext, zext, and add i8.
 ;
-; CHECK-NOT: {{sext|zext|trunc|add i8}}
+; CHECK-NOT: {{sext|zext|add i8}}
 
 ; ModuleID = 'ada.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"

Added: llvm/trunk/test/Transforms/IndVarSimplify/lftr-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/lftr-reuse.ll?rev=135420&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/lftr-reuse.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/lftr-reuse.ll Mon Jul 18 15:32:31 2011
@@ -0,0 +1,230 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
+; Make sure that indvars can perform LFTR without a canonical IV.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Perform LFTR using the original pointer-type IV.
+
+;  for(char* p = base; p < base + n; ++p) {
+;    *p = p-base;
+;  }
+define void @ptriv(i8* %base, i32 %n) nounwind {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i8* %base, i64 %idx.ext
+  %cmp1 = icmp ult i8* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK: icmp ne i8*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64
+  %sub.ptr.rhs.cast = ptrtoint i8* %base to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %conv = trunc i64 %sub.ptr.sub to i8
+  store i8 %conv, i8* %p.02
+  %incdec.ptr = getelementptr inbounds i8* %p.02, i32 1
+  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; It would be nice if SCEV and any loop analysis could assume that
+; preheaders exist. Unfortunately it is not always the case. This test
+; checks that SCEVExpander can handle an outer loop that has not yet
+; been simplified. As a result, the inner loop's exit test will not be
+; rewritten.
+define void @expandOuterRecurrence(i32 %arg) nounwind {
+entry:
+  %sub1 = sub nsw i32 %arg, 1
+  %cmp1 = icmp slt i32 0, %sub1
+  br i1 %cmp1, label %outer, label %exit
+
+outer:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
+  %sub2 = sub nsw i32 %arg, %i
+  %sub3 = sub nsw i32 %sub2, 1
+  %cmp2 = icmp slt i32 0, %sub3
+  br i1 %cmp2, label %inner.ph, label %outer.inc
+
+inner.ph:
+  br label %inner
+
+; CHECK: inner:
+; CHECK: icmp slt
+; CHECK: br i1
+inner:
+  %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
+  %j.inc = add nsw i32 %j, 1
+  %cmp3 = icmp slt i32 %j.inc, %sub3
+  br i1 %cmp3, label %inner, label %outer.inc
+
+; CHECK: outer.inc:
+; CHECK: icmp ne
+; CHECK: br i1
+outer.inc:
+  %i.inc = add nsw i32 %i, 1
+  %cmp4 = icmp slt i32 %i.inc, %sub1
+  br i1 %cmp4, label %outer, label %exit
+
+exit:
+  ret void
+}
+
+; Force SCEVExpander to look for an existing well-formed phi.
+; Perform LFTR without generating extra preheader code.
+define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
+                         i32 %irow, i32 %ilead) nounwind {
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp ne
+; CHECK: br i1
+entry:
+  %cmp = icmp slt i32 1, %irow
+  br i1 %cmp, label %loop, label %return
+
+loop:
+  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %diagidx = add nsw i32 %rowidx, %i
+  %diagidxw = sext i32 %diagidx to i64
+  %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+  %v1 = load double* %matrixp
+  %iw = sext i32 %i to i64
+  %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+  %v2 = load double* %vectorp
+  %row.inc = add nsw i32 %rowidx, %ilead
+  %i.inc = add nsw i32 %i, 1
+  %cmp196 = icmp slt i32 %i.inc, %irow
+  br i1 %cmp196, label %loop, label %return
+
+return:
+  ret void
+}
+
+; Avoid generating extra code to materialize a trip count. Skip LFTR.
+define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector,
+                           i32 %irow, i32 %ilead) nounwind {
+entry:
+  br label %loop
+
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp slt
+; CHECK: br i1
+loop:
+  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %diagidx = add nsw i32 %rowidx, %i
+  %diagidxw = sext i32 %diagidx to i64
+  %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+  %v1 = load double* %matrixp
+  %iw = sext i32 %i to i64
+  %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+  %v2 = load double* %vectorp
+  %row.inc = add nsw i32 %rowidx, %ilead
+  %i.inc = add nsw i32 %i, 1
+  %cmp196 = icmp slt i32 %i.inc, %irow
+  br i1 %cmp196, label %loop, label %return
+
+return:
+  ret void
+}
+
+; Remove %i which is only used by the exit test.
+; Verify that SCEV can still compute a backedge count from the sign
+; extended %n, used for pointer comparison by LFTR.
+define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
+entry:
+  %x.ext = sext i32 %x to i64
+  %add.ptr = getelementptr inbounds i8* %base, i64 %x.ext
+  %y.ext = sext i32 %y to i64
+  %add.ptr10 = getelementptr inbounds i8* %add.ptr, i64 %y.ext
+  %lim = add i32 %x, %n
+  %cmp.ph = icmp ult i32 %x, %lim
+  br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+  %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
+  %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ]
+  %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+  store i8 3, i8* %aptr
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, %lim
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Exercise backedge taken count verification with a never-taken loop.
+define void @nevertaken() nounwind uwtable ssp {
+entry:
+  br label %loop
+
+; CHECK: loop:
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK-NOT: icmp
+; CHECK: exit:
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp sle i32 %inc, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test LFTR on an IV whose recurrence start is a non-unit pointer type.
+define void @aryptriv([256 x i8]* %base, i32 %n) nounwind {
+entry:
+  %ivstart = getelementptr inbounds [256 x i8]* %base, i32 0, i32 0
+  %ivend = getelementptr inbounds [256 x i8]* %base, i32 0, i32 %n
+  %cmp.ph = icmp ult i8* %ivstart, %ivend
+  br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+  %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ]
+  %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+  store i8 3, i8* %aptr
+  %cmp = icmp ult i8* %incdec.ptr, %ivend
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}