[llvm] c416f67 - [IVDescriptors] Add pointer InductionDescriptors with non-constant strides (try 2)

Wed Apr 5 09:32:45 PDT 2023

Author: Philip Reames
Date: 2023-04-05T09:32:35-07:00
New Revision: c416f6700f513d86226482f47ad956ceb4e7c927

URL: https://github.com/llvm/llvm-project/commit/c416f6700f513d86226482f47ad956ceb4e7c927
DIFF: https://github.com/llvm/llvm-project/commit/c416f6700f513d86226482f47ad956ceb4e7c927.diff

LOG: [IVDescriptors] Add pointer InductionDescriptors with non-constant strides (try 2)

(JFYI - This has been heavily reframed since original attempt at landing.)

This change updates the InductionDescriptor logic to allow matching a pointer IV with a non-constant stride, but also updates the LoopVectorizer to bailout on such descriptors by default. This preserves the default vectorizer behavior.

In review, it was pointed out that there's multiple unfortunate performance implications which need to be addressed before this can be enabled. Having a flag allows us to exercise the behavior, and write test cases for logic which is otherwise unreachable (or hard to reach).

This will also enable non-constant stride pointer recurrences for other consumers. I've audited said code, and don't see any obvious issues.

Differential Revision: https://reviews.llvm.org/D147336

Added: 
    

Modified: 
    llvm/lib/Analysis/IVDescriptors.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
    llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll
    llvm/test/Transforms/LoopVectorize/pointer-induction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 156d141c461ed..22b9f64ef88d8 100644

--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1288,8 +1288,6 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
   assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
          "Step value is zero");
 
-  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
-         "Step value should be constant for pointer induction");
   assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
          "StepValue is not an integer");
 
@@ -1570,15 +1568,25 @@ bool InductionDescriptor::isInductionPHI(
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  PointerType *PtrTy = cast<PointerType>(PhiTy);
+
+  // Always use i8 element type for opaque pointer inductions.
+  // This allows induction variables w/non-constant steps.
+  if (PtrTy->isOpaque()) {
+    D = InductionDescriptor(StartValue, IK_PtrInduction, Step,
+                            /* BinOp */ nullptr,
+                            Type::getInt8Ty(PtrTy->getContext()));
+    return true;
+  }
+
   // Pointer induction should be a constant.
+  // TODO: This could be generalized, but should probably just
+  // be dropped instead once the migration to opaque ptrs is
+  // complete.
   if (!ConstStep)
     return false;
 
-  // Always use i8 element type for opaque pointer inductions.
-  PointerType *PtrTy = cast<PointerType>(PhiTy);
-  Type *ElementType = PtrTy->isOpaque()
-                          ? Type::getInt8Ty(PtrTy->getContext())
-                          : PtrTy->getNonOpaquePointerElementType();
+  Type *ElementType = PtrTy->getNonOpaquePointerElementType();
   if (!ElementType->isSized())
     return false;
 

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3bfa54a073bb6..64290f0749b9a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -37,6 +37,11 @@ static cl::opt<bool>
     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                        cl::desc("Enable if-conversion during vectorization."));
 
+static cl::opt<bool>
+AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
+                       cl::desc("Enable recognition of non-constant strided "
+                                "pointer induction variables."));
+
 namespace llvm {
 cl::opt<bool>
     HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -700,6 +705,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
+        // We prevent matching non-constant strided pointer IVS to preserve
+        // historical vectorizer behavior after a generalization of the
+        // IVDescriptor code.  The intent is to remove this check, but we
+        // have to fix issues around code quality for such loops first.
+        auto isDisallowedStridedPointerInduction =
+          [](const InductionDescriptor &ID) {
+          if (AllowStridedPointerIVs)
+            return false;
+          return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
+            ID.getConstIntStepValue() == nullptr;
+        };
+
         // TODO: Instead of recording the AllowedExit, it would be good to
         // record the complementary set: NotAllowedExit. These include (but may
         // not be limited to):
@@ -715,7 +732,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // By recording these, we can then reason about ways to vectorize each
         // of these NotAllowedExit.
         InductionDescriptor ID;
-        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
+        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
+            !isDisallowedStridedPointerInduction(ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
           Requirements->addExactFPMathInst(ID.getExactFPMathInst());
           continue;
@@ -730,7 +748,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         // As a last resort, coerce the PHI to a AddRec expression
         // and re-try classifying it a an induction PHI.
-        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
+        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
+            !isDisallowedStridedPointerInduction(ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
           continue;
         }

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b174ab893efac..1cbbb8fe101df 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2491,8 +2491,6 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
     return CreateAdd(StartValue, Offset);
   }
   case InductionDescriptor::IK_PtrInduction: {
-    assert(isa<Constant>(Step) &&
-           "Expected constant step for pointer induction");
     return B.CreateGEP(ID.getElementType(), StartValue, CreateMul(Index, Step));
   }
   case InductionDescriptor::IK_FpInduction: {
@@ -8275,7 +8273,6 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
   if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
     VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
                                                            *PSE.getSE());
-    assert(isa<SCEVConstant>(II->getStep()));
     return new VPWidenPointerInductionRecipe(
         Phi, Operands[0], Step, *II,
         LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -9380,7 +9377,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
             PartStart, ConstantInt::get(PtrInd->getType(), Lane));
         Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
 
-        Value *Step = State.get(getOperand(1), VPIteration(0, Part));
+        Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
         Value *SclrGep = emitTransformedIndex(
             State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc);
         SclrGep->setName("next.gep");
@@ -9390,8 +9387,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
     return;
   }
 
-  assert(isa<SCEVConstant>(IndDesc.getStep()) &&
-         "Induction step not a SCEV constant!");
   Type *PhiType = IndDesc.getStep()->getType();
 
   // Build a pointer phi
@@ -9434,7 +9429,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
     StartOffset = State.Builder.CreateAdd(
         StartOffset, State.Builder.CreateStepVector(VecPhiType));
 
-    assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, Part)) &&
+    assert(ScalarStepValue == State.get(getOperand(1), VPIteration(Part, 0)) &&
            "scalar step must be the same across all parts");
     Value *GEP = State.Builder.CreateGEP(
         IndDesc.getElementType(), NewPointerPhi,

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 6cfdc62a65fed..d44e9461e5537 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck --check-prefixes=CHECK,NOSTRIDED %s
+; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -lv-strided-pointer-ivs=true -S | FileCheck --check-prefixes=CHECK,STRIDED %s
 
 
 define void @single_constant_stride_int_scaled(ptr %p) {
@@ -547,23 +548,111 @@ exit:
 
 
 define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
-; CHECK-LABEL: @double_stride_ptr_iv(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[P2:%.*]], [[ENTRY]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
-; CHECK-NEXT:    store i32 [[Y0]], ptr [[PTR2]], align 4
-; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
-; CHECK-NEXT:    [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
-; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
-; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
+; NOSTRIDED-LABEL: @double_stride_ptr_iv(
+; NOSTRIDED-NEXT:  entry:
+; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
+; NOSTRIDED:       loop:
+; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT:    [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT:    [[PTR2:%.*]] = phi ptr [ [[P2:%.*]], [[ENTRY]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
+; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
+; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[PTR2]], align 4
+; NOSTRIDED-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
+; NOSTRIDED-NEXT:    [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
+; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; NOSTRIDED:       exit:
+; NOSTRIDED-NEXT:    ret void
+;
+; STRIDED-LABEL: @double_stride_ptr_iv(
+; STRIDED-NEXT:  entry:
+; STRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; STRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; STRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
+; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
+; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; STRIDED:       vector.scevcheck:
+; STRIDED-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
+; STRIDED-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; STRIDED:       vector.memcheck:
+; STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 1027
+; STRIDED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1027
+; STRIDED-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP1]]
+; STRIDED-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
+; STRIDED-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; STRIDED-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; STRIDED:       vector.ph:
+; STRIDED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; STRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
+; STRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
+; STRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; STRIDED-NEXT:    [[TMP5:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
+; STRIDED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]]
+; STRIDED-NEXT:    [[TMP6:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
+; STRIDED-NEXT:    [[IND_END3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP6]]
+; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; STRIDED:       vector.body:
+; STRIDED-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[POINTER_PHI7:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND8:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; STRIDED-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
+; STRIDED-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; STRIDED-NEXT:    [[TMP10:%.*]] = mul i64 [[STRIDE]], [[TMP9]]
+; STRIDED-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; STRIDED-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; STRIDED-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; STRIDED-NEXT:    [[TMP13:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP12]]
+; STRIDED-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
+; STRIDED-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP13]], [[DOTSPLAT6]]
+; STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]]
+; STRIDED-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; STRIDED-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
+; STRIDED-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 1
+; STRIDED-NEXT:    [[TMP18:%.*]] = mul i64 [[STRIDE]], [[TMP17]]
+; STRIDED-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP16]], 0
+; STRIDED-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP19]], i64 0
+; STRIDED-NEXT:    [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[TMP20:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; STRIDED-NEXT:    [[TMP21:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT10]], [[TMP20]]
+; STRIDED-NEXT:    [[VECTOR_GEP13:%.*]] = mul <vscale x 4 x i64> [[TMP21]], [[DOTSPLAT6]]
+; STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[POINTER_PHI7]], <vscale x 4 x i64> [[VECTOR_GEP13]]
+; STRIDED-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope !16
+; STRIDED-NEXT:    [[TMP23:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; STRIDED-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP23]], <vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope !19, !noalias !16
+; STRIDED-NEXT:    [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; STRIDED-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP24]], 4
+; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP25]]
+; STRIDED-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
+; STRIDED-NEXT:    [[PTR_IND8]] = getelementptr i8, ptr [[POINTER_PHI7]], i64 [[TMP18]]
+; STRIDED-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; STRIDED:       middle.block:
+; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED:       scalar.ph:
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ], [ [[P]], [[VECTOR_MEMCHECK]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[ENTRY]] ], [ [[P2]], [[VECTOR_SCEVCHECK]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ]
+; STRIDED-NEXT:    br label [[LOOP:%.*]]
+; STRIDED:       loop:
+; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; STRIDED-NEXT:    [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
+; STRIDED-NEXT:    [[PTR2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
+; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
+; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
+; STRIDED-NEXT:    store i32 [[Y0]], ptr [[PTR2]], align 4
+; STRIDED-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE]]
+; STRIDED-NEXT:    [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
+; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
+; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
+; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
+; STRIDED:       exit:
+; STRIDED-NEXT:    ret void
 ;
 entry:
   br label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll
index 82574635f74ce..bdb005b0c265c 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll
@@ -1,24 +1,85 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S | FileCheck --check-prefixes=CHECK,DEFAULT %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -lv-strided-pointer-ivs=true -S | FileCheck --check-prefixes=CHECK,STRIDED %s
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Test the scalar expansion of a non-constant stride pointer IV
 define void @non_constant_scalar_expansion(i32 %0, ptr %call) {
-; CHECK-LABEL: @non_constant_scalar_expansion(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
-; CHECK-NEXT:    br label [[FOR_COND:%.*]]
-; CHECK:       for.cond:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
-; CHECK-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[INC]] = add i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
+; DEFAULT-LABEL: @non_constant_scalar_expansion(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
+; DEFAULT-NEXT:    br label [[FOR_COND:%.*]]
+; DEFAULT:       for.cond:
+; DEFAULT-NEXT:    [[TMP1:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
+; DEFAULT-NEXT:    [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
+; DEFAULT-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
+; DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
+; DEFAULT-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[INC]] = add i32 [[TMP1]], 1
+; DEFAULT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
+; DEFAULT-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
+; DEFAULT:       for.end:
+; DEFAULT-NEXT:    ret void
+;
+; STRIDED-LABEL: @non_constant_scalar_expansion(
+; STRIDED-NEXT:  entry:
+; STRIDED-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
+; STRIDED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED:       vector.ph:
+; STRIDED-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; STRIDED-NEXT:    [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]]
+; STRIDED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
+; STRIDED-NEXT:    [[TMP3:%.*]] = sext i32 [[MUL]] to i64
+; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; STRIDED:       vector.body:
+; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; STRIDED-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP3]]
+; STRIDED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
+; STRIDED-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 1
+; STRIDED-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP3]]
+; STRIDED-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
+; STRIDED-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; STRIDED-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP3]]
+; STRIDED-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
+; STRIDED-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 3
+; STRIDED-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP3]]
+; STRIDED-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
+; STRIDED-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
+; STRIDED-NEXT:    [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
+; STRIDED-NEXT:    [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0
+; STRIDED-NEXT:    [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 1
+; STRIDED-NEXT:    [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 2
+; STRIDED-NEXT:    [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 3
+; STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP12]]
+; STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP13]]
+; STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP14]]
+; STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP15]]
+; STRIDED-NEXT:    store ptr [[NEXT_GEP]], ptr [[TMP16]], align 4
+; STRIDED-NEXT:    store ptr [[NEXT_GEP2]], ptr [[TMP17]], align 4
+; STRIDED-NEXT:    store ptr [[NEXT_GEP3]], ptr [[TMP18]], align 4
+; STRIDED-NEXT:    store ptr [[NEXT_GEP4]], ptr [[TMP19]], align 4
+; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264
+; STRIDED-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; STRIDED:       middle.block:
+; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4294967267, 4294967264
+; STRIDED-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; STRIDED:       scalar.ph:
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[ENTRY:%.*]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY]] ]
+; STRIDED-NEXT:    br label [[FOR_COND:%.*]]
+; STRIDED:       for.cond:
+; STRIDED-NEXT:    [[TMP21:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
+; STRIDED-NEXT:    [[P_0:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
+; STRIDED-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
+; STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP21]]
+; STRIDED-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
+; STRIDED-NEXT:    [[INC]] = add i32 [[TMP21]], 1
+; STRIDED-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP21]], 0
+; STRIDED-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+; STRIDED:       for.end:
+; STRIDED-NEXT:    ret void
 ;
 entry:
   %mul = shl i32 %0, 1
@@ -38,3 +99,5 @@ for.cond:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.cond
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 9c5bedfb7b019..91f2156301560 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck --check-prefixes=CHECK,DEFAULT %s
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -lv-strided-pointer-ivs=true -S | FileCheck --check-prefixes=CHECK,STRIDED %s
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
 
@@ -208,21 +209,70 @@ exit:                            ; preds = %loop.body
 
 ; Test the vector expansion of a non-constant stride pointer IV
 define void @non_constant_vector_expansion(i32 %0, ptr %call) {
-; CHECK-LABEL: @non_constant_vector_expansion(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
-; CHECK-NEXT:    br label [[FOR_COND:%.*]]
-; CHECK:       for.cond:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
-; CHECK-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[INC]] = add i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
+; DEFAULT-LABEL: @non_constant_vector_expansion(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
+; DEFAULT-NEXT:    br label [[FOR_COND:%.*]]
+; DEFAULT:       for.cond:
+; DEFAULT-NEXT:    [[TMP1:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
+; DEFAULT-NEXT:    [[P_0:%.*]] = phi ptr [ null, [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
+; DEFAULT-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
+; DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP1]]
+; DEFAULT-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
+; DEFAULT-NEXT:    [[INC]] = add i32 [[TMP1]], 1
+; DEFAULT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
+; DEFAULT-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END:%.*]], label [[FOR_COND]]
+; DEFAULT:       for.end:
+; DEFAULT-NEXT:    ret void
+;
+; STRIDED-LABEL: @non_constant_vector_expansion(
+; STRIDED-NEXT:  entry:
+; STRIDED-NEXT:    [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1
+; STRIDED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; STRIDED:       vector.scevcheck:
+; STRIDED-NEXT:    br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; STRIDED:       vector.ph:
+; STRIDED-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; STRIDED-NEXT:    [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]]
+; STRIDED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
+; STRIDED-NEXT:    [[TMP3:%.*]] = sext i32 [[MUL]] to i64
+; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; STRIDED:       vector.body:
+; STRIDED-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
+; STRIDED-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
+; STRIDED-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
+; STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]]
+; STRIDED-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
+; STRIDED-NEXT:    [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
+; STRIDED-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 0
+; STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP6]]
+; STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0
+; STRIDED-NEXT:    store <4 x ptr> [[TMP5]], ptr [[TMP8]], align 4
+; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]]
+; STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264
+; STRIDED-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; STRIDED:       middle.block:
+; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4294967267, 4294967264
+; STRIDED-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; STRIDED:       scalar.ph:
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[ENTRY:%.*]] ], [ 30, [[VECTOR_SCEVCHECK]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY]] ], [ null, [[VECTOR_SCEVCHECK]] ]
+; STRIDED-NEXT:    br label [[FOR_COND:%.*]]
+; STRIDED:       for.cond:
+; STRIDED-NEXT:    [[TMP10:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
+; STRIDED-NEXT:    [[P_0:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_COND]] ]
+; STRIDED-NEXT:    [[ADD_PTR]] = getelementptr i8, ptr [[P_0]], i32 [[MUL]]
+; STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP10]]
+; STRIDED-NEXT:    store ptr [[P_0]], ptr [[ARRAYIDX]], align 4
+; STRIDED-NEXT:    [[INC]] = add i32 [[TMP10]], 1
+; STRIDED-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0
+; STRIDED-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+; STRIDED:       for.end:
+; STRIDED-NEXT:    ret void
 ;
 entry:
   %mul = shl i32 %0, 1