[llvm] r245118 - [SCEV] Apply NSW and NUW flags via poison value analysis for sub, mul and shl

Fri Aug 14 15:45:26 PDT 2015

Author: broune
Date: Fri Aug 14 17:45:26 2015
New Revision: 245118

URL: http://llvm.org/viewvc/llvm-project?rev=245118&view=rev
Log:
[SCEV] Apply NSW and NUW flags via poison value analysis for sub, mul and shl

Summary:
http://reviews.llvm.org/D11212 made Scalar Evolution able to propagate NSW and NUW flags from instructions to SCEVs for add instructions. This patch expands that to sub, mul and shl instructions.

This change makes LSR able to generate pointer induction variables for loops like these, where the index is 32 bit and the pointer is 64 bit:

  for (int i = 0; i < numIterations; ++i)
    sum += ptr[i - offset];

  for (int i = 0; i < numIterations; ++i)
    sum += ptr[i * stride];

  for (int i = 0; i < numIterations; ++i)
    sum += ptr[3 * (i << 7)];


Reviewers: atrick, sanjoy

Subscribers: sanjoy, majnemer, hfinkel, llvm-commits, meheff, jingyue, eliben

Differential Revision: http://reviews.llvm.org/D11860

Modified:
    llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp
    llvm/trunk/test/Analysis/Delinearization/a.ll
    llvm/trunk/test/Analysis/ScalarEvolution/flags-from-poison.ll
    llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll
    llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Fri Aug 14 17:45:26 2015
@@ -712,7 +712,8 @@ namespace llvm {
 
     /// getNegativeSCEV - Return the SCEV object corresponding to -V.
     ///
-    const SCEV *getNegativeSCEV(const SCEV *V);
+    const SCEV *getNegativeSCEV(const SCEV *V,
+                                SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
     /// getNotSCEV - Return the SCEV object corresponding to ~V.
     ///

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Fri Aug 14 17:45:26 2015
@@ -3339,15 +3339,16 @@ const SCEV *ScalarEvolution::getExisting
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+                                             SCEV::NoWrapFlags Flags) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
     return getConstant(
                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V,
-                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+  return getMulExpr(
+      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
 }
 
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3366,15 +3367,40 @@ const SCEV *ScalarEvolution::getNotSCEV(
 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                                           SCEV::NoWrapFlags Flags) {
-  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
   // Fast path: X - X --> 0.
   if (LHS == RHS)
     return getConstant(LHS->getType(), 0);
 
-  // X - Y --> X + -Y.
-  // X -(nsw || nuw) Y --> X + -Y.
-  return getAddExpr(LHS, getNegativeSCEV(RHS));
+  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+  // makes it so that we cannot make much use of NUW.
+  auto AddFlags = SCEV::FlagAnyWrap;
+  const bool RHSIsNotMinSigned =
+      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+    // Let M be the minimum representable signed value. Then (-1)*RHS
+    // signed-wraps if and only if RHS is M. That can happen even for
+    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+    // (-1)*RHS, we need to prove that RHS != M.
+    //
+    // If LHS is non-negative and we know that LHS - RHS does not
+    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+    // either by proving that RHS > M or that LHS >= 0.
+    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+      AddFlags = SCEV::FlagNSW;
+    }
+  }
+
+  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+  // RHS is NSW and LHS >= 0.
+  //
+  // The difficulty here is that the NSW flag may have been proven
+  // relative to a loop that is to be found in a recurrence in LHS and
+  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+  // larger scope than intended.
+  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
 }
 
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -4094,6 +4120,7 @@ ScalarEvolution::getRange(const SCEV *S,
 }
 
 SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
   const BinaryOperator *BinOp = cast<BinaryOperator>(V);
 
   // Return early if there are no flags to propagate to the SCEV.
@@ -4185,9 +4212,6 @@ const SCEV *ScalarEvolution::createSCEV(
     // because it leads to N-1 getAddExpr calls for N ultimate operands.
     // Instead, gather up all the operands and make a single getAddExpr call.
     // LLVM IR canonical form means we need only traverse the left operands.
-    //
-    // FIXME: Expand this handling of NSW and NUW to other instructions, like
-    // sub and mul.
     SmallVector<const SCEV *, 4> AddOps;
     for (Value *Op = U;; Op = U->getOperand(0)) {
       U = dyn_cast<Operator>(Op);
@@ -4198,7 +4222,7 @@ const SCEV *ScalarEvolution::createSCEV(
         break;
       }
 
-      if (auto *OpSCEV = getExistingSCEV(Op)) {
+      if (auto *OpSCEV = getExistingSCEV(U)) {
         AddOps.push_back(OpSCEV);
         break;
       }
@@ -4210,45 +4234,57 @@ const SCEV *ScalarEvolution::createSCEV(
       // since the flags are only known to apply to this particular
       // addition - they may not apply to other additions that can be
       // formed with operands from AddOps.
-      //
-      // FIXME: Expand this to sub instructions.
-      if (Opcode == Instruction::Add && isa<BinaryOperator>(U)) {
-        SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
-        if (Flags != SCEV::FlagAnyWrap) {
-          AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
-                                      getSCEV(U->getOperand(1)), Flags));
-          break;
-        }
+      const SCEV *RHS = getSCEV(U->getOperand(1));
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        const SCEV *LHS = getSCEV(U->getOperand(0));
+        if (Opcode == Instruction::Sub)
+          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+        else
+          AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+        break;
       }
 
-      const SCEV *Op1 = getSCEV(U->getOperand(1));
       if (Opcode == Instruction::Sub)
-        AddOps.push_back(getNegativeSCEV(Op1));
+        AddOps.push_back(getNegativeSCEV(RHS));
       else
-        AddOps.push_back(Op1);
+        AddOps.push_back(RHS);
     }
     return getAddExpr(AddOps);
   }
 
   case Instruction::Mul: {
-    // FIXME: Transfer NSW/NUW as in AddExpr.
     SmallVector<const SCEV *, 4> MulOps;
-    MulOps.push_back(getSCEV(U->getOperand(1)));
-    for (Value *Op = U->getOperand(0);
-         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
-         Op = U->getOperand(0)) {
-      U = cast<Operator>(Op);
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      if (!U || U->getOpcode() != Instruction::Mul) {
+        assert(Op != V && "V should be a mul");
+        MulOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        MulOps.push_back(OpSCEV);
+        break;
+      }
+
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+                                    getSCEV(U->getOperand(1)), Flags));
+        break;
+      }
+
       MulOps.push_back(getSCEV(U->getOperand(1)));
     }
-    MulOps.push_back(getSCEV(U->getOperand(0)));
     return getMulExpr(MulOps);
   }
   case Instruction::UDiv:
     return getUDivExpr(getSCEV(U->getOperand(0)),
                        getSCEV(U->getOperand(1)));
   case Instruction::Sub:
-    return getMinusSCEV(getSCEV(U->getOperand(0)),
-                        getSCEV(U->getOperand(1)));
+    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+                        getNoWrapFlagsFromUB(U));
   case Instruction::And:
     // For an expression like x&255 that merely masks off the high bits,
     // use zext(trunc(x)) as the SCEV expression.
@@ -4368,9 +4404,18 @@ const SCEV *ScalarEvolution::createSCEV(
       if (SA->getValue().uge(BitWidth))
         break;
 
+      // It is currently not resolved how to interpret NSW for left
+      // shift by BitWidth - 1, so we avoid applying flags in that
+      // case. Remove this check (or this comment) once the situation
+      // is resolved. See
+      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+      // and http://reviews.llvm.org/D8890 .
+      auto Flags = SCEV::FlagAnyWrap;
+      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
       Constant *X = ConstantInt::get(getContext(),
         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
-      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
     }
     break;
 

Modified: llvm/trunk/test/Analysis/Delinearization/a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/Delinearization/a.ll?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/Delinearization/a.ll (original)
+++ llvm/trunk/test/Analysis/Delinearization/a.ll Fri Aug 14 17:45:26 2015
@@ -10,7 +10,7 @@
 ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
 
 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
 entry:

Modified: llvm/trunk/test/Analysis/ScalarEvolution/flags-from-poison.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvolution/flags-from-poison.ll?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/ScalarEvolution/flags-from-poison.ll (original)
+++ llvm/trunk/test/Analysis/ScalarEvolution/flags-from-poison.ll Fri Aug 14 17:45:26 2015
@@ -356,3 +356,237 @@ loop:
 exit:
   ret void
 }
+
+; Example where a mul should get the nsw flag, so that a sext can be
+; distributed over the mul.
+define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nsw>
+  %index32 = mul nsw i32 %i, %stride
+
+; CHECK: %index64 =
+; CHECK: --> {0,+,(sext i32 %stride to i64)}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a mul should get the nuw flag.
+define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nuw>
+  %index32 = mul nuw i32 %i, %stride
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a shl should get the nsw flag, so that a sext can be
+; distributed over the shl.
+define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(256 * %start),+,256}<nsw>
+  %index32 = shl nsw i32 %i, 8
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a shl should get the nuw flag.
+define void @test-shl-nuw(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,512}<nuw>
+  %index32 = shl nuw i32 %i, 9
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a sub should *not* get the nsw flag, because of how
+; scalar evolution represents A - B as A + (-B) and -B can wrap even
+; in cases where A - B does not.
+define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-no-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %sub) + %start),+,1}<nw>
+  %index32 = sub nsw i32 %i, %sub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag as the RHS cannot be the
+; minimal signed value.
+define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw
+entry:
+  %halfsub = ashr i32 %sub, 1
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %halfsub)<nsw> + %start),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %halfsub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag, since the LHS is non-negative,
+; which implies that the RHS cannot be the minimal signed value.
+define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(-1 * %sub),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %sub
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Two adds with a sub in the middle and the sub should have nsw. There is
+; a special case for sequential adds/subs and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-with-add
+entry:
+  br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + (-1 * %offset)),+,1}<nw>
+  %seq = add nsw nuw i32 %index32, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+  %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + (-1 * %offset)),+,1}<nsw>
+  %index32 = sub nsw i32 %j, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  store float 1.0, float* %ptr, align 4
+  br label %loop2
+exit:
+  ret void
+}
+
+
+; Subtraction of two recurrences. The addition in the SCEV that this
+; maps to is NSW, but the negation of the RHS does not since that
+; recurrence could be the most negative representable value.
+define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
+; CHECK-LABEL: @subrecurrences
+ entry:
+  br label %outer
+
+outer:
+  %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
+  %o_idx.inc = add nsw i32 %o_idx, 1
+  %cond = icmp eq i32 %o_idx, %val
+  br i1 %cond, label %inner, label %outer.be
+
+inner:
+  %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
+  %i_idx.inc = add nsw i32 %i_idx, 1
+; CHECK: %v =
+; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<nw><%outer>,+,1}<nsw><%inner>
+  %v = sub nsw i32 %i_idx, %o_idx.inc
+  %forub = udiv i32 1, %v
+  %cond2 = icmp eq i32 %i_idx, %inner_l
+  br i1 %cond2, label %outer.be, label %inner
+
+outer.be:
+  %cond3 = icmp eq i32 %o_idx, %outer_l
+  br i1 %cond3, label %exit, label %outer
+
+exit:
+  ret void
+}

Modified: llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll (original)
+++ llvm/trunk/test/Analysis/ScalarEvolution/min-max-exprs.ll Fri Aug 14 17:45:26 2015
@@ -33,7 +33,7 @@ bb2:
   %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
 ;                  min(N, i+3)
 ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
-; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
+; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))<nsw>) smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)))<nsw>)
   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
   %tmp12 = load i32, i32* %tmp11, align 4
   %tmp13 = shl nsw i32 %tmp12, 1

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll?rev=245118&r1=245117&r2=245118&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll Fri Aug 14 17:45:26 2015
@@ -8,6 +8,11 @@ target triple = "nvptx64-unknown-unknown
 ; instruction to the SCEV, preventing distributing sext into the
 ; corresponding addrec.
 
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i + offset];
+;
 define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
 ; CHECK-LABEL: @testadd
 ; CHECK: sext i32 %offset to i64
@@ -27,6 +32,105 @@ loop:
   %ptr = getelementptr inbounds float, float* %input, i64 %index64
   %addend = load float, float* %ptr, align 4
   %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = sub nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = mul nuw nsw i32 %i, %stride
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = shl nuw nsw i32 %i, 7
+  %index32mul = mul nuw nsw i32 %index32, 3
+  %index64 = sext i32 %index32mul to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
   %nexti = add nuw nsw i32 %i, 1
   %exitcond = icmp eq i32 %nexti, %numIterations
   br i1 %exitcond, label %exit, label %loop