[llvm] r269023 - [LoopVectorize] Handling induction variable with non-constant step.

Tue May 10 00:33:36 PDT 2016

Author: delena
Date: Tue May 10 02:33:35 2016
New Revision: 269023

URL: http://llvm.org/viewvc/llvm-project?rev=269023&view=rev
Log:
[LoopVectorize] Handling induction variable with non-constant step.

Allow vectorization when the step is a loop-invariant variable.
This is the loop example that is getting vectorized after the patch:

 int int_inc;
 int bar(int init, int *restrict A, int N) {

  int x = init;
  for (int i=0;i<N;i++){
    A[i] = x;
    x += int_inc;
  }
  return x;
 }

"x" is an induction variable with *loop-invariant* step.
But it is not a primary induction. Primary induction variable with non-constant step is not handled yet.

Differential Revision: http://reviews.llvm.org/D19258


Added:
    llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll
Modified:
    llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=269023&r1=269022&r2=269023&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Tue May 10 02:33:35 2016
@@ -269,7 +269,7 @@ public:
 public:
   /// Default constructor - creates an invalid induction.
   InductionDescriptor()
-      : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+      : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {}
 
   /// Get the consecutive direction. Returns:
   ///   0 - unknown or non-consecutive.
@@ -283,11 +283,13 @@ public:
   /// For pointer induction, returns StartValue[Index * StepValue].
   /// FIXME: The newly created binary instructions should contain nsw/nuw
   /// flags, which can be found from the original scalar operations.
-  Value *transform(IRBuilder<> &B, Value *Index) const;
+  Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
+                   const DataLayout& DL) const;
 
   Value *getStartValue() const { return StartValue; }
   InductionKind getKind() const { return IK; }
-  ConstantInt *getStepValue() const { return StepValue; }
+  const SCEV *getStep() const { return Step; }
+  ConstantInt *getConstIntStepValue() const;
 
   /// Returns true if \p Phi is an induction. If \p Phi is an induction,
   /// the induction descriptor \p D will contain the data describing this
@@ -308,14 +310,14 @@ public:
 
 private:
   /// Private constructor - used by \c isInductionPHI.
-  InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step);
+  InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step);
 
   /// Start value.
   TrackingVH<Value> StartValue;
   /// Induction kind.
   InductionKind IK;
   /// Step value.
-  ConstantInt *StepValue;
+  const SCEV *Step;
 };
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,

Modified: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp?rev=269023&r1=269022&r2=269023&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp Tue May 10 02:33:35 2016
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
@@ -653,45 +654,77 @@ Value *RecurrenceDescriptor::createMinMa
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         ConstantInt *Step)
-  : StartValue(Start), IK(K), StepValue(Step) {
+                                         const SCEV *Step)
+  : StartValue(Start), IK(K), Step(Step) {
   assert(IK != IK_NoInduction && "Not an induction");
+
+  // Start value type should match the induction kind and the value
+  // itself should not be null.
   assert(StartValue && "StartValue is null");
-  assert(StepValue && !StepValue->isZero() && "StepValue is zero");
   assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
          "StartValue is not a pointer for pointer induction");
   assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
          "StartValue is not an integer for integer induction");
-  assert(StepValue->getType()->isIntegerTy() &&
-         "StepValue is not an integer");
+
+  // Check the Step Value. It should be non-zero integer value.
+  assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+         "Step value is zero");
+
+  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+         "Step value should be constant for pointer induction");
+  assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
-  if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
-    return StepValue->getSExtValue();
+  ConstantInt *ConstStep = getConstIntStepValue();
+  if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+    return ConstStep->getSExtValue();
   return 0;
 }
 
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+  if (isa<SCEVConstant>(Step))
+    return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+  return nullptr;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
+                                      ScalarEvolution *SE,
+                                      const DataLayout& DL) const {
+
+  SCEVExpander Exp(*SE, DL, "induction");
   switch (IK) {
-  case IK_IntInduction:
+  case IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
-    if (StepValue->isMinusOne())
-      return B.CreateSub(StartValue, Index);
-    if (!StepValue->isOne())
-      Index = B.CreateMul(Index, StepValue);
-    return B.CreateAdd(StartValue, Index);
 
-  case IK_PtrInduction:
-    assert(Index->getType() == StepValue->getType() &&
+    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
+    // and calculate (Start + Index * Step) for all cases, without
+    // special handling for "isOne" and "isMinusOne".
+    // But in the real life the result code getting worse. We mix SCEV
+    // expressions and ADD/SUB operations and receive redundant
+    // intermediate values being calculated in different ways and
+    // Instcombine is unable to reduce them all.
+
+    if (getConstIntStepValue() &&
+        getConstIntStepValue()->isMinusOne())
+      return B.CreateSub(StartValue, Index);
+    if (getConstIntStepValue() &&
+        getConstIntStepValue()->isOne())
+      return B.CreateAdd(StartValue, Index);
+    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
+                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
+    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+  }
+  case IK_PtrInduction: {
+    assert(Index->getType() == Step->getType() &&
            "Index type does not match StepValue type");
-    if (StepValue->isMinusOne())
-      Index = B.CreateNeg(Index);
-    else if (!StepValue->isOne())
-      Index = B.CreateMul(Index, StepValue);
+    assert(isa<SCEVConstant>(Step) &&
+           "Expected constant step for pointer induction");
+    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
+    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
     return B.CreateGEP(nullptr, StartValue, Index);
-
+  }
   case IK_NoInduction:
     return nullptr;
   }
@@ -746,17 +779,22 @@ bool InductionDescriptor::isInductionPHI
     Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-  if (!C)
+  // The stride may be a constant or a loop invariant integer value.
+  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+  if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
     return false;
 
-  ConstantInt *CV = C->getValue();
   if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, CV);
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step);
     return true;
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  // Pointer induction should be a constant.
+  if (!ConstStep)
+    return false;
+
+  ConstantInt *CV = ConstStep->getValue();
   Type *PointerElementType = PhiTy->getPointerElementType();
   // The pointer stride cannot be determined if the pointer element type is not
   // sized.
@@ -771,8 +809,8 @@ bool InductionDescriptor::isInductionPHI
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
     return false;
-  auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
-
+  auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
+                                    true /* signed */);
   D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
   return true;
 }

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=269023&r1=269022&r2=269023&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue May 10 02:33:35 2016
@@ -416,6 +416,12 @@ protected:
   /// to each vector element of Val. The sequence starts at StartIndex.
   virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
 
+  /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
+  /// to each vector element of Val. The sequence starts at StartIndex.
+  /// Step is a SCEV. In order to get StepValue it takes the existing value
+  /// from SCEV or creates a new using SCEVExpander.
+  virtual Value *getStepVector(Value *Val, int StartIdx, const SCEV *Step);
+
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
   /// When we widen (vectorize) values we place them in the map. If the values
@@ -600,6 +606,7 @@ private:
   void vectorizeMemoryInstruction(Instruction *Instr) override;
   Value *getBroadcastInstrs(Value *V) override;
   Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
+  Value *getStepVector(Value *Val, int StartIdx, const SCEV *StepSCEV) override;
   Value *reverseVector(Value *Vec) override;
 };
 
@@ -2076,6 +2083,15 @@ Value *InnerLoopVectorizer::getBroadcast
 }
 
 Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
+                                          const SCEV *StepSCEV) {
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+  SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+  Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
+                                       &*Builder.GetInsertPoint());
+  return getStepVector(Val, StartIdx, StepValue);
+}
+
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
                                           Value *Step) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
@@ -3141,9 +3157,10 @@ void InnerLoopVectorizer::createEmptyLoo
       EndValue = CountRoundDown;
     } else {
       IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
-      Value *CRD = B.CreateSExtOrTrunc(
-          CountRoundDown, II.getStepValue()->getType(), "cast.crd");
-      EndValue = II.transform(B, CRD);
+      Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
+                                       II.getStep()->getType(), "cast.crd");
+      const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+      EndValue = II.transform(B, CRD, PSE.getSE(), DL);
       EndValue->setName("ind.end");
     }
 
@@ -4035,6 +4052,7 @@ void InnerLoopVectorizer::widenPHIInstru
   assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
 
   InductionDescriptor II = Legal->getInductionVars()->lookup(P);
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
 
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
@@ -4048,14 +4066,14 @@ void InnerLoopVectorizer::widenPHIInstru
     Value *V = Induction;
     if (P != OldInduction) {
       V = Builder.CreateSExtOrTrunc(Induction, P->getType());
-      V = II.transform(Builder, V);
+      V = II.transform(Builder, V, PSE.getSE(), DL);
       V->setName("offset.idx");
     }
     Value *Broadcasted = getBroadcastInstrs(V);
     // After broadcasting the induction variable we need to make the vector
     // consecutive by adding 0, 1, 2, etc.
     for (unsigned part = 0; part < UF; ++part)
-      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
+      Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep());
     return;
   }
   case InductionDescriptor::IK_PtrInduction:
@@ -4063,7 +4081,7 @@ void InnerLoopVectorizer::widenPHIInstru
     assert(P->getType()->isPointerTy() && "Unexpected type.");
     // This is the normalized GEP that starts counting at zero.
     Value *PtrInd = Induction;
-    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
+    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
     // This is the vector of results. Notice that we don't generate
     // vector geps because scalar geps result in better code.
     for (unsigned part = 0; part < UF; ++part) {
@@ -4071,7 +4089,7 @@ void InnerLoopVectorizer::widenPHIInstru
         int EltIndex = part;
         Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
         Entry[part] = SclrGep;
         continue;
@@ -4082,7 +4100,7 @@ void InnerLoopVectorizer::widenPHIInstru
         int EltIndex = i + part * VF;
         Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx);
+        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
         VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
                                              Builder.getInt32(i), "insert.gep");
@@ -4218,23 +4236,26 @@ void InnerLoopVectorizer::vectorizeBlock
     case Instruction::BitCast: {
       CastInst *CI = dyn_cast<CastInst>(it);
       setDebugLocFromInst(Builder, &*it);
-      /// Optimize the special case where the source is the induction
-      /// variable. Notice that we can only optimize the 'trunc' case
+      /// Optimize the special case where the source is a constant integer
+      /// induction variable. Notice that we can only optimize the 'trunc' case
       /// because: a. FP conversions lose precision, b. sext/zext may wrap,
       /// c. other casts depend on pointer size.
+
       if (CI->getOperand(0) == OldInduction &&
           it->getOpcode() == Instruction::Trunc) {
-        Value *ScalarCast =
-            Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
-        Value *Broadcasted = getBroadcastInstrs(ScalarCast);
         InductionDescriptor II =
-            Legal->getInductionVars()->lookup(OldInduction);
-        Constant *Step = ConstantInt::getSigned(
-            CI->getType(), II.getStepValue()->getSExtValue());
-        for (unsigned Part = 0; Part < UF; ++Part)
-          Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
-        addMetadata(Entry, &*it);
-        break;
+          Legal->getInductionVars()->lookup(OldInduction);
+        if (auto StepValue = II.getConstIntStepValue()) {
+          StepValue = ConstantInt::getSigned(cast<IntegerType>(CI->getType()),
+                                             StepValue->getSExtValue());
+          Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+                                                 CI->getType());
+          Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+          for (unsigned Part = 0; Part < UF; ++Part)
+            Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue);
+          addMetadata(Entry, &*it);
+          break;
+        }
       }
       /// Vectorize casts.
       Type *DestTy =
@@ -4596,9 +4617,11 @@ bool LoopVectorizationLegality::addInduc
 
   // Int inductions are special because we only allow one IV.
   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
-      ID.getStepValue()->isOne() &&
+      ID.getConstIntStepValue() &&
+      ID.getConstIntStepValue()->isOne() &&
       isa<Constant>(ID.getStartValue()) &&
-        cast<Constant>(ID.getStartValue())->isNullValue()) {
+      cast<Constant>(ID.getStartValue())->isNullValue()) {
+
     // Use the phi node with the widest type as induction. Use the last
     // one if there are multiple (no good reason for doing this other
     // than it is expedient). We've checked that it begins at zero and
@@ -6235,6 +6258,15 @@ Value *InnerLoopUnroller::reverseVector(
 
 Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
 
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx,
+                                        const SCEV *StepSCEV) {
+  const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+  SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+  Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
+                                       &*Builder.GetInsertPoint());
+  return getStepVector(Val, StartIdx, StepValue);
+}
+
 Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
   Type *ITy = Val->getType();

Added: llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll?rev=269023&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll Tue May 10 02:33:35 2016
@@ -0,0 +1,124 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -S | FileCheck %s
+
+; int int_inc;
+;
+;int induction_with_global(int init, int *restrict A, int N) {
+;  int x = init;
+;  for (int i=0;i<N;i++){
+;    A[i] = x;
+;    x += int_inc;
+;  }
+;  return x;
+;}
+
+; CHECK-LABEL: @induction_with_global(
+; CHECK: %[[INT_INC:.*]] = load i32, i32* @int_inc, align 4
+; CHECK: vector.body:
+; CHECK:  %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INT_INC]], i32 0
+; CHECK:  %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK:  mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+
+ at int_inc = common global i32 0, align 4
+
+define i32 @induction_with_global(i32 %init, i32* noalias nocapture %A, i32 %N) {
+entry:
+  %cmp4 = icmp sgt i32 %N, 0
+  br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = load i32, i32* @int_inc, align 4
+  %1 = mul i32 %0, %N
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %x.05 = phi i32 [ %init, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %x.05, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %x.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  %2 = add i32 %1, %init
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %x.0.lcssa = phi i32 [ %init, %entry ], [ %2, %for.end.loopexit ]
+  ret i32 %x.0.lcssa
+}
+
+
+;int induction_with_loop_inv(int init, int *restrict A, int N, int M) {
+;  int x = init;
+;  for (int j = 0; j < M; j++) {
+;    for (int i=0; i<N; i++){
+;      A[i] = x;
+;      x += j; // induction step is a loop invariant variable
+;    }
+;  }
+;  return x;
+;}
+
+; CHECK-LABEL: @induction_with_loop_inv(
+; CHECK: for.cond1.preheader:                            
+; CHECK: %[[INDVAR0:.*]] = phi i32 [ 0,
+; CHECK: %[[INDVAR1:.*]] = phi i32 [ 0,
+; CHECK: vector.body:
+; CHECK:  %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INDVAR1]], i32 0
+; CHECK:  %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK:  mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
+
+define i32 @induction_with_loop_inv(i32 %init, i32* noalias nocapture %A, i32 %N, i32 %M) {
+entry:
+  %cmp10 = icmp sgt i32 %M, 0
+  br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %for.end6
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp27 = icmp sgt i32 %N, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc4, %for.cond1.preheader.lr.ph
+  %indvars.iv15 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next16, %for.inc4 ]
+  %j.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc5, %for.inc4 ]
+  %x.011 = phi i32 [ %init, %for.cond1.preheader.lr.ph ], [ %x.1.lcssa, %for.inc4 ]
+  br i1 %cmp27, label %for.body3.preheader, label %for.inc4
+
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
+  %x.18 = phi i32 [ %add, %for.body3 ], [ %x.011, %for.body3.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %x.18, i32* %arrayidx, align 4
+  %add = add nsw i32 %x.18, %j.012
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.inc4.loopexit, label %for.body3
+
+for.inc4.loopexit:                                ; preds = %for.body3
+  %0 = add i32 %x.011, %indvars.iv15
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.inc4.loopexit, %for.cond1.preheader
+  %x.1.lcssa = phi i32 [ %x.011, %for.cond1.preheader ], [ %0, %for.inc4.loopexit ]
+  %inc5 = add nuw nsw i32 %j.012, 1
+  %indvars.iv.next16 = add i32 %indvars.iv15, %N
+  %exitcond17 = icmp eq i32 %inc5, %M
+  br i1 %exitcond17, label %for.end6.loopexit, label %for.cond1.preheader
+
+for.end6.loopexit:                                ; preds = %for.inc4
+  %x.1.lcssa.lcssa = phi i32 [ %x.1.lcssa, %for.inc4 ]
+  br label %for.end6
+
+for.end6:                                         ; preds = %for.end6.loopexit, %entry
+  %x.0.lcssa = phi i32 [ %init, %entry ], [ %x.1.lcssa.lcssa, %for.end6.loopexit ]
+  ret i32 %x.0.lcssa
+}