[llvm-branch-commits] [llvm] [VPlan] Start implementing VPlan-based stride multiversioning (PR #182595)

Fri Feb 20 13:23:04 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-vectorizers

Author: Andrei Elovikov (eas)

<details>
<summary>Changes</summary>

This commit only implements the run-time guard without actually optimizing the vector loop. That would come in a separate PR to ease review.

Stacked on top of https://github.com/llvm/llvm-project/pull/182594.

---

Patch is 96.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182595.diff


10 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+5) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+111) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+43) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+3) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+7) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+14-3) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.cpp (+3-2) 
- (modified) llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll (+137-65) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-based-stride-mv.ll (+227-59) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 54bb073eb4f81..189dcf121e65b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -382,6 +382,11 @@ class VPBuilder {
     return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
   }
 
+  VPExpandStridePredicatesRecipe *
+  createExpandSCEVPredicate(const SCEVUnionPredicate &StridePredicates) {
+    return tryInsertInstruction(new VPExpandStridePredicatesRecipe(StridePredicates));
+  }
+
   //===--------------------------------------------------------------------===//
   // RAII helpers.
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5ea9fa7ac3288..86d98ce58e87c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -418,6 +418,11 @@ static cl::opt<bool> ConsiderRegPressure(
     "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
     cl::desc("Discard VFs if their register pressure is too high."));
 
+static cl::opt<bool> EnableVPlanBasedStrideMV(
+    "enable-vplan-based-stride-mv", cl::init(false), cl::Hidden,
+    cl::desc("Perform stride multiversioning directly on VPlan instead of in "
+             "LoopAccessAnalysis."));
+
 // Likelyhood of bypassing the vectorized loop because there are zero trips left
 // after prolog. See `emitIterationCountCheck`.
 static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -10121,6 +10126,10 @@ void VPlanTransforms::makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
         return false;
       });
 
+  if (EnableVPlanBasedStrideMV)
+    RUN_VPLAN_PASS(VPlanTransforms::multiversionForUnitStridedMemOps, Plan,
+                   CostCtx, MemOps);
+
   VPlanTransforms::runPass("delegateMemOpWideningToLegacyCM", ProcessSubset,
                            Plan, [&](VPInstruction *VPI) {
                              VPRecipeBase *Recipe =
@@ -10132,3 +10141,105 @@ void VPlanTransforms::makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
                              return ReplaceWith(VPI, Recipe);
                            });
 }
+
+void VPlanTransforms::multiversionForUnitStridedMemOps(
+    VPlan &Plan, VPCostContext &CostCtx,
+    SmallVectorImpl<VPInstruction *> &MemOps) {
+  SmallVector<VPInstruction *> RemainingOps;
+  // Makes a copy of VPTypeAnalysis (not sure where the problem is).
+  auto Types = CostCtx.Types;
+
+  ScalarEvolution *SE = CostCtx.PSE.getSE();
+
+  PredicatedScalarEvolution StrideMVPSE(*SE, const_cast<Loop &>(*CostCtx.L));
+
+  SCEVUnionPredicate StridePredicates({}, *SE);
+
+  // Use `for_each` so that we could do `return Skip();`.
+  for_each(MemOps, [&](VPInstruction *VPI) {
+    auto Skip = [&]() { RemainingOps.push_back(VPI); };
+    auto *PtrOp = VPI->getOpcode() == Instruction::Load ? VPI->getOperand(0)
+                                                        : VPI->getOperand(1);
+
+    const SCEV *PtrSCEV =
+        vputils::getSCEVExprForVPValue(PtrOp, CostCtx.PSE, CostCtx.L);
+    const SCEV *Start = nullptr;
+    const SCEV *Stride = nullptr;
+
+    if (!match(PtrSCEV, m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Stride),
+                                            m_SpecificLoop(CostCtx.L)))) {
+      return Skip();
+    }
+
+    Type *ScalarTy = Types.inferScalarType(
+        VPI->getOpcode() == Instruction::Load ? VPI : VPI->getOperand(0));
+
+    const SCEV *TypeSize = SE->getSizeOfExpr(
+        Stride->getType(), SE->getDataLayout().getTypeStoreSize(ScalarTy));
+
+    if (isa<SCEVConstant>(Stride)) {
+      // TODO: Process non-MV unit strided accesses prior to this pass so that
+      // we could be sure this one is due another MemOp MV.
+      return Skip();
+    }
+
+    const SCEVConstant *StrideConstantMultiplier;
+    const SCEV *StrideNonConstantMultiplier;
+
+    const SCEV *ToMultiVersion = Stride;
+    const SCEV *MVConst = TypeSize;
+    if (match(Stride, m_scev_c_Mul(m_SCEVConstant(StrideConstantMultiplier),
+                                   m_SCEV(StrideNonConstantMultiplier)))) {
+      if (TypeSize != StrideConstantMultiplier) {
+        // TODO: Support `TypeSize = N * StrideCosntantMultiplier`,
+        // including negative `N`. For now, only process when they're equal,
+        // which matches the usefull part of the legacy behavior that
+        // multiversiones GEP index for stride one.
+        return Skip();
+      }
+      ToMultiVersion = StrideNonConstantMultiplier;
+      MVConst = SE->getOne(ToMultiVersion->getType());
+    } else if (!TypeSize->isOne()) {
+      // Likewise - try to match legacy behavior.
+      return Skip();
+    }
+
+    if (!isa<SCEVUnknown>(ToMultiVersion)) {
+      // Match legacy behavior.
+      return Skip();
+    }
+
+    StridePredicates = StridePredicates.getUnionWith(
+        SE->getComparePredicate(CmpInst::ICMP_EQ, ToMultiVersion, MVConst),
+        *SE);
+
+    return Skip();
+  });
+
+  MemOps.swap(RemainingOps);
+
+  if (StridePredicates.isAlwaysTrue())
+    return;
+
+  VPBasicBlock *Entry = Plan.getEntry();
+  VPBuilder Builder(Entry);
+
+  auto *Pred =
+      Builder.createExpandSCEVPredicate(StridePredicates);
+
+  auto *StridesCheckBB = Plan.createVPBasicBlock("strides.check");
+  VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+  VPBlockUtils::insertBlockBefore(StridesCheckBB, Plan.getVectorPreheader());
+  VPBlockUtils::connectBlocks(StridesCheckBB, ScalarPH);
+  // SCEVExpander::expandCodeForPredicate would negate the condition, so scalar
+  // preheader should be the first successor.
+  std::swap(StridesCheckBB->getSuccessors()[0],
+            StridesCheckBB->getSuccessors()[1]);
+  Builder.setInsertPoint(StridesCheckBB);
+  Builder.createNaryOp(VPInstruction::BranchOnCond, Pred);
+
+  for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
+    auto &Phi = cast<VPPhi>(R);
+    Phi.addOperand(Phi.getIncomingValueForBlock(Entry));
+  }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a0c23df0b3c38..2a99be00daaac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -406,6 +406,7 @@ class LLVM_ABI_FOR_TEST VPRecipeBase
     VPBranchOnMaskSC,
     VPDerivedIVSC,
     VPExpandSCEVSC,
+    VPExpandStridePredicatesSC,
     VPExpressionSC,
     VPIRInstructionSC,
     VPInstructionSC,
@@ -599,6 +600,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue {
     switch (R->getVPRecipeID()) {
     case VPRecipeBase::VPDerivedIVSC:
     case VPRecipeBase::VPExpandSCEVSC:
+    case VPRecipeBase::VPExpandStridePredicatesSC:
     case VPRecipeBase::VPExpressionSC:
     case VPRecipeBase::VPInstructionSC:
     case VPRecipeBase::VPReductionEVLSC:
@@ -3742,6 +3744,47 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
 #endif
 };
 
+class VPExpandStridePredicatesRecipe : public VPSingleDefRecipe {
+  SCEVUnionPredicate StridePredicates;
+
+public:
+  VPExpandStridePredicatesRecipe(const SCEVUnionPredicate &StridePredicates)
+      : VPSingleDefRecipe(VPRecipeBase::VPExpandStridePredicatesSC, {}),
+        StridePredicates(StridePredicates) {}
+
+  VPExpandStridePredicatesRecipe(SCEVUnionPredicate &&StridePredicates)
+      : VPSingleDefRecipe(VPRecipeBase::VPExpandStridePredicatesSC, {}),
+        StridePredicates(std::move(StridePredicates)) {}
+
+  ~VPExpandStridePredicatesRecipe() override = default;
+
+  VPExpandStridePredicatesRecipe *clone() override {
+    return new VPExpandStridePredicatesRecipe(StridePredicates);
+  }
+
+  VP_CLASSOF_IMPL(VPRecipeBase::VPExpandStridePredicatesSC)
+
+  void execute(VPTransformState &State) override {
+    llvm_unreachable("SCEVPredicates must be expanded before final execute");
+  }
+
+  /// Return the cost of this VPExpandSCEVRecipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override {
+    // TODO: Compute accurate cost after retiring the legacy cost model.
+    return 0;
+  }
+
+  const SCEVPredicate *getSCEVPredicate() const { return &StridePredicates; }
+
+protected:
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void printRecipe(raw_ostream &O, const Twine &Indent,
+                   VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
 /// Canonical scalar induction phi of the vector loop. Starting at the specified
 /// start value (either 0 or the resume value when vectorizing the epilogue
 /// loop). VPWidenCanonicalIVRecipe represents the vector version of the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 4b744b9128171..524ef4211e862 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -316,6 +316,9 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
           .Case([](const VPExpandSCEVRecipe *R) {
             return R->getSCEV()->getType();
           })
+          .Case([this](const VPExpandStridePredicatesRecipe *R) {
+            return Type::getInt1Ty(Ctx);
+          })
           .Case([this](const VPReductionRecipe *R) {
             return inferScalarType(R->getChainOp());
           })
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 33cb1509565d5..695bc7758c054 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -4508,6 +4508,13 @@ void VPExpandSCEVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
   printAsOperand(O, SlotTracker);
   O << " = EXPAND SCEV " << *Expr;
 }
+
+void VPExpandStridePredicatesRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
+                                     VPSlotTracker &SlotTracker) const {
+  O << Indent << "EMIT ";
+  printAsOperand(O, SlotTracker);
+  O << " = EXPAND SCEVPredicate " << StridePredicates;
+}
 #endif
 
 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 22a8edaf30eb6..b90a9eb342241 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -5122,6 +5122,16 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
   for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
     if (isa<VPIRInstruction, VPIRPhi>(&R))
       continue;
+    if (auto *ExpStrides = dyn_cast<VPExpandStridePredicatesRecipe>(&R)) {
+      Value *Res = Expander.expandCodeForPredicate(
+          ExpStrides->getSCEVPredicate(), EntryBB->getTerminator());
+      Res->setName("strides.mv.check");
+      VPValue *Exp = Plan.getOrAddLiveIn(Res);
+
+      ExpStrides->replaceAllUsesWith(Exp);
+      ExpStrides->eraseFromParent();
+      continue;
+    }
     auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
     if (!ExpSCEV)
       break;
@@ -5135,9 +5145,10 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
       Plan.resetTripCount(Exp);
     ExpSCEV->eraseFromParent();
   }
-  assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
-         "VPExpandSCEVRecipes must be at the beginning of the entry block, "
-         "before any VPIRInstructions");
+  assert(none_of(*Entry,
+                 IsaPred<VPExpandSCEVRecipe, VPExpandStridePredicatesRecipe>) &&
+         "VPExpandSCEVRecipes/VPExpandStridePredicatesRecipe must be at the "
+         "beginning of the entry block, before any VPIRInstructions");
   // Add IR instructions in the entry basic block but not in the VPIRBasicBlock
   // to the VPIRBasicBlock.
   auto EI = Entry->begin();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 787a687f19cdd..ddc31659b0514 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -487,6 +487,11 @@ struct VPlanTransforms {
   static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
                                          VPRecipeBuilder &RecipeBuilder,
                                          VPCostContext &CostCtx);
+
+  /// \p MemOps must be updated to contain ones that haven't been processed by
+  /// the pass.
+  static void multiversionForUnitStridedMemOps(VPlan &Plan, VPCostContext &CostCtx,
+                                 SmallVectorImpl<VPInstruction *> &MemOps);
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index f5318bb1c6515..af08539129362 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -389,8 +389,9 @@ bool vputils::isSingleScalar(const VPValue *VPV) {
   if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
     return Expr->isSingleScalar();
 
-  // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
-  return isa<VPExpandSCEVRecipe>(VPV);
+  // VPExpandSCEVRecipes and VPExpandStridePredicatesRecipe must be placed in
+  // the entry and are always uniform.
+  return isa<VPExpandSCEVRecipe, VPExpandStridePredicatesRecipe>(VPV);
 }
 
 bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
index 33eac6bc08b26..610d8cc63217f 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -p loop-vectorize -force-vector-width=4 -disable-output \
-; RUN:     -vplan-print-after=scalarizeMemOpsWithIrregularTypes \
-; RUN:     -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+; RUN:     -vplan-print-after=multiversionForUnitStridedMemOps \
+; RUN:     -enable-mem-access-versioning=false -enable-vplan-based-stride-mv 2>&1 | FileCheck %s
 
 define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-LABEL: VPlan for loop in 'basic'
@@ -12,6 +12,12 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:  Live-in ir<128> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<entry>:
+; CHECK-NEXT:    EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:  Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT:  strides.check:
+; CHECK-NEXT:    EMIT branch-on-cond vp<[[VP3]]>
 ; CHECK-NEXT:  Successor(s): scalar.ph, vector.ph
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -19,7 +25,7 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:      EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT:      EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK-NEXT:      ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
 ; CHECK-NEXT:      EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
 ; CHECK-NEXT:      EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
@@ -28,8 +34,8 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:      EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
 ; CHECK-NEXT:      EMIT store ir<%ld>, ir<%gep.st>
 ; CHECK-NEXT:      EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
-; CHECK-NEXT:      EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
-; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT:      EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond>
+; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
@@ -44,11 +50,11 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
-; CHECK-NEXT:    EMIT-SCALAR vp<[[VP7:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT:    EMIT-SCALAR vp<[[VP9:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ], [ ir<0>, strides.check ]
 ; CHECK-NEXT:  Successor(s): ir-bb<header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<header>:
-; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP7]]> from scalar.ph)
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP9]]> from scalar.ph)
 ; CHECK-NEXT:    IR   %iv.next = add nsw i64 %iv, 1
 ; CHECK-NEXT:    IR   %idx = mul i64 %iv, %stride
 ; CHECK-NEXT:    IR   %gep.ld = getelementptr i64, ptr %p, i64 %idx
@@ -90,6 +96,12 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:  Live-in ir<128> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<entry>:
+; CHECK-NEXT:    EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:  Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT:  strides.check:
+; CHECK-NEXT:    EMIT branch-on-cond vp<[[VP3]]>
 ; CHECK-NEXT:  Successor(s): scalar.ph, vector.ph
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -97,7 +109,7 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:      EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT:      EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK-NEXT:      ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
 ; CHECK-NEXT:      EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
 ; CHECK-NEXT:      EMIT ir<%stride.x8> = mul ir<%stride>, ir<8>
@@ -107,8 +119,8 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:      EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
 ; CHECK-NEXT:      EMIT store ir<%ld>, ir<%gep.st>
 ; CHECK-NEXT:      EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
-; CHECK-NEXT:      EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
-; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT:      EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond>
+; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
@@ -123,11 +135,11 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
-; CHECK-NEXT:    EMIT-SCALAR vp<[[VP7:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT:    EMIT-SCALAR vp<[[VP9:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ], [ ir<0>, strides.check ]
 ; CHECK-NEXT:  Successor(s): ir-bb<header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<header>:
-; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP7]]> from scalar.ph)
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP9]]> from scalar.ph)
 ; CHECK-NEXT:    IR   %iv.next = add nsw i64 %iv, 1
 ; CHECK-NEXT:    IR   %stride.x8 = mul i64 %stride, 8
 ; CHECK-NEXT:    IR   %idx = mul i64 %iv, %stride.x8
@@ -573,6 +585,12 @@ define void @shared_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
 ; CHECK-NEXT:  Live-in ir<128> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<entry>:
+; CHECK-NEXT:    EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:  Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT:  strides.check:
+; CHECK-NEXT:    EMIT branch-on-cond vp<[[VP3]]>
 ; CHECK-NEXT:  Successor(s): scalar.ph, vector.ph
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -580,7 +598,7 @@ define void...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/182595