[llvm-branch-commits] [llvm] [VPlan] Start implementing VPlan-based stride multiversioning (PR #182595)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 20 13:23:04 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Andrei Elovikov (eas)
<details>
<summary>Changes</summary>
This commit only implements the run-time guard without actually optimizing the vector loop. That would come in a separate PR to ease review.
Stacked on top of https://github.com/llvm/llvm-project/pull/182594.
---
Patch is 96.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182595.diff
10 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+111)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+43)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+14-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.cpp (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll (+137-65)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-based-stride-mv.ll (+227-59)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 54bb073eb4f81..189dcf121e65b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -382,6 +382,11 @@ class VPBuilder {
return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
}
+ VPExpandStridePredicatesRecipe *
+ createExpandSCEVPredicate(const SCEVUnionPredicate &StridePredicates) {
+ return tryInsertInstruction(new VPExpandStridePredicatesRecipe(StridePredicates));
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5ea9fa7ac3288..86d98ce58e87c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -418,6 +418,11 @@ static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
+static cl::opt<bool> EnableVPlanBasedStrideMV(
+ "enable-vplan-based-stride-mv", cl::init(false), cl::Hidden,
+ cl::desc("Perform stride multiversioning directly on VPlan instead of in "
+ "LoopAccessAnalysis."));
+
// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -10121,6 +10126,10 @@ void VPlanTransforms::makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
return false;
});
+ if (EnableVPlanBasedStrideMV)
+ RUN_VPLAN_PASS(VPlanTransforms::multiversionForUnitStridedMemOps, Plan,
+ CostCtx, MemOps);
+
VPlanTransforms::runPass("delegateMemOpWideningToLegacyCM", ProcessSubset,
Plan, [&](VPInstruction *VPI) {
VPRecipeBase *Recipe =
@@ -10132,3 +10141,105 @@ void VPlanTransforms::makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
return ReplaceWith(VPI, Recipe);
});
}
+
+void VPlanTransforms::multiversionForUnitStridedMemOps(
+ VPlan &Plan, VPCostContext &CostCtx,
+ SmallVectorImpl<VPInstruction *> &MemOps) {
+ SmallVector<VPInstruction *> RemainingOps;
+ // Makes a copy of VPTypeAnalysis (not sure where the problem is).
+ auto Types = CostCtx.Types;
+
+ ScalarEvolution *SE = CostCtx.PSE.getSE();
+
+ PredicatedScalarEvolution StrideMVPSE(*SE, const_cast<Loop &>(*CostCtx.L));
+
+ SCEVUnionPredicate StridePredicates({}, *SE);
+
+ // Use `for_each` so that we could do `return Skip();`.
+ for_each(MemOps, [&](VPInstruction *VPI) {
+ auto Skip = [&]() { RemainingOps.push_back(VPI); };
+ auto *PtrOp = VPI->getOpcode() == Instruction::Load ? VPI->getOperand(0)
+ : VPI->getOperand(1);
+
+ const SCEV *PtrSCEV =
+ vputils::getSCEVExprForVPValue(PtrOp, CostCtx.PSE, CostCtx.L);
+ const SCEV *Start = nullptr;
+ const SCEV *Stride = nullptr;
+
+ if (!match(PtrSCEV, m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Stride),
+ m_SpecificLoop(CostCtx.L)))) {
+ return Skip();
+ }
+
+ Type *ScalarTy = Types.inferScalarType(
+ VPI->getOpcode() == Instruction::Load ? VPI : VPI->getOperand(0));
+
+ const SCEV *TypeSize = SE->getSizeOfExpr(
+ Stride->getType(), SE->getDataLayout().getTypeStoreSize(ScalarTy));
+
+ if (isa<SCEVConstant>(Stride)) {
+ // TODO: Process non-MV unit strided accesses prior to this pass so that
+ // we could be sure this one is due another MemOp MV.
+ return Skip();
+ }
+
+ const SCEVConstant *StrideConstantMultiplier;
+ const SCEV *StrideNonConstantMultiplier;
+
+ const SCEV *ToMultiVersion = Stride;
+ const SCEV *MVConst = TypeSize;
+ if (match(Stride, m_scev_c_Mul(m_SCEVConstant(StrideConstantMultiplier),
+ m_SCEV(StrideNonConstantMultiplier)))) {
+ if (TypeSize != StrideConstantMultiplier) {
+ // TODO: Support `TypeSize = N * StrideCosntantMultiplier`,
+ // including negative `N`. For now, only process when they're equal,
+ // which matches the usefull part of the legacy behavior that
+ // multiversiones GEP index for stride one.
+ return Skip();
+ }
+ ToMultiVersion = StrideNonConstantMultiplier;
+ MVConst = SE->getOne(ToMultiVersion->getType());
+ } else if (!TypeSize->isOne()) {
+ // Likewise - try to match legacy behavior.
+ return Skip();
+ }
+
+ if (!isa<SCEVUnknown>(ToMultiVersion)) {
+ // Match legacy behavior.
+ return Skip();
+ }
+
+ StridePredicates = StridePredicates.getUnionWith(
+ SE->getComparePredicate(CmpInst::ICMP_EQ, ToMultiVersion, MVConst),
+ *SE);
+
+ return Skip();
+ });
+
+ MemOps.swap(RemainingOps);
+
+ if (StridePredicates.isAlwaysTrue())
+ return;
+
+ VPBasicBlock *Entry = Plan.getEntry();
+ VPBuilder Builder(Entry);
+
+ auto *Pred =
+ Builder.createExpandSCEVPredicate(StridePredicates);
+
+ auto *StridesCheckBB = Plan.createVPBasicBlock("strides.check");
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockUtils::insertBlockBefore(StridesCheckBB, Plan.getVectorPreheader());
+ VPBlockUtils::connectBlocks(StridesCheckBB, ScalarPH);
+ // SCEVExpander::expandCodeForPredicate would negate the condition, so scalar
+ // preheader should be the first successor.
+ std::swap(StridesCheckBB->getSuccessors()[0],
+ StridesCheckBB->getSuccessors()[1]);
+ Builder.setInsertPoint(StridesCheckBB);
+ Builder.createNaryOp(VPInstruction::BranchOnCond, Pred);
+
+ for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
+ auto &Phi = cast<VPPhi>(R);
+ Phi.addOperand(Phi.getIncomingValueForBlock(Entry));
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a0c23df0b3c38..2a99be00daaac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -406,6 +406,7 @@ class LLVM_ABI_FOR_TEST VPRecipeBase
VPBranchOnMaskSC,
VPDerivedIVSC,
VPExpandSCEVSC,
+ VPExpandStridePredicatesSC,
VPExpressionSC,
VPIRInstructionSC,
VPInstructionSC,
@@ -599,6 +600,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue {
switch (R->getVPRecipeID()) {
case VPRecipeBase::VPDerivedIVSC:
case VPRecipeBase::VPExpandSCEVSC:
+ case VPRecipeBase::VPExpandStridePredicatesSC:
case VPRecipeBase::VPExpressionSC:
case VPRecipeBase::VPInstructionSC:
case VPRecipeBase::VPReductionEVLSC:
@@ -3742,6 +3744,47 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
#endif
};
+class VPExpandStridePredicatesRecipe : public VPSingleDefRecipe {
+ SCEVUnionPredicate StridePredicates;
+
+public:
+ VPExpandStridePredicatesRecipe(const SCEVUnionPredicate &StridePredicates)
+ : VPSingleDefRecipe(VPRecipeBase::VPExpandStridePredicatesSC, {}),
+ StridePredicates(StridePredicates) {}
+
+ VPExpandStridePredicatesRecipe(SCEVUnionPredicate &&StridePredicates)
+ : VPSingleDefRecipe(VPRecipeBase::VPExpandStridePredicatesSC, {}),
+ StridePredicates(std::move(StridePredicates)) {}
+
+ ~VPExpandStridePredicatesRecipe() override = default;
+
+ VPExpandStridePredicatesRecipe *clone() override {
+ return new VPExpandStridePredicatesRecipe(StridePredicates);
+ }
+
+ VP_CLASSOF_IMPL(VPRecipeBase::VPExpandStridePredicatesSC)
+
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("SCEVPredicates must be expanded before final execute");
+ }
+
+ /// Return the cost of this VPExpandSCEVRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
+ const SCEVPredicate *getSCEVPredicate() const { return &StridePredicates; }
+
+protected:
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void printRecipe(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// Canonical scalar induction phi of the vector loop. Starting at the specified
/// start value (either 0 or the resume value when vectorizing the epilogue
/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 4b744b9128171..524ef4211e862 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -316,6 +316,9 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
.Case([](const VPExpandSCEVRecipe *R) {
return R->getSCEV()->getType();
})
+ .Case([this](const VPExpandStridePredicatesRecipe *R) {
+ return Type::getInt1Ty(Ctx);
+ })
.Case([this](const VPReductionRecipe *R) {
return inferScalarType(R->getChainOp());
})
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 33cb1509565d5..695bc7758c054 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -4508,6 +4508,13 @@ void VPExpandSCEVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = EXPAND SCEV " << *Expr;
}
+
+void VPExpandStridePredicatesRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ printAsOperand(O, SlotTracker);
+ O << " = EXPAND SCEVPredicate " << StridePredicates;
+}
#endif
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 22a8edaf30eb6..b90a9eb342241 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -5122,6 +5122,16 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
if (isa<VPIRInstruction, VPIRPhi>(&R))
continue;
+ if (auto *ExpStrides = dyn_cast<VPExpandStridePredicatesRecipe>(&R)) {
+ Value *Res = Expander.expandCodeForPredicate(
+ ExpStrides->getSCEVPredicate(), EntryBB->getTerminator());
+ Res->setName("strides.mv.check");
+ VPValue *Exp = Plan.getOrAddLiveIn(Res);
+
+ ExpStrides->replaceAllUsesWith(Exp);
+ ExpStrides->eraseFromParent();
+ continue;
+ }
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpSCEV)
break;
@@ -5135,9 +5145,10 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
Plan.resetTripCount(Exp);
ExpSCEV->eraseFromParent();
}
- assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
- "VPExpandSCEVRecipes must be at the beginning of the entry block, "
- "before any VPIRInstructions");
+ assert(none_of(*Entry,
+ IsaPred<VPExpandSCEVRecipe, VPExpandStridePredicatesRecipe>) &&
+ "VPExpandSCEVRecipes/VPExpandStridePredicatesRecipe must be at the "
+ "beginning of the entry block, before any VPIRInstructions");
// Add IR instructions in the entry basic block but not in the VPIRBasicBlock
// to the VPIRBasicBlock.
auto EI = Entry->begin();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 787a687f19cdd..ddc31659b0514 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -487,6 +487,11 @@ struct VPlanTransforms {
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
VPRecipeBuilder &RecipeBuilder,
VPCostContext &CostCtx);
+
+ /// \p MemOps must be updated to contain ones that haven't been processed by
+ /// the pass.
+ static void multiversionForUnitStridedMemOps(VPlan &Plan, VPCostContext &CostCtx,
+ SmallVectorImpl<VPInstruction *> &MemOps);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index f5318bb1c6515..af08539129362 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -389,8 +389,9 @@ bool vputils::isSingleScalar(const VPValue *VPV) {
if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
return Expr->isSingleScalar();
- // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
- return isa<VPExpandSCEVRecipe>(VPV);
+ // VPExpandSCEVRecipes and VPExpandStridePredicatesRecipe must be placed in
+ // the entry and are always uniform.
+ return isa<VPExpandSCEVRecipe, VPExpandStridePredicatesRecipe>(VPV);
}
bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
index 33eac6bc08b26..610d8cc63217f 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-based-stride-mv.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -p loop-vectorize -force-vector-width=4 -disable-output \
-; RUN: -vplan-print-after=scalarizeMemOpsWithIrregularTypes \
-; RUN: -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+; RUN: -vplan-print-after=multiversionForUnitStridedMemOps \
+; RUN: -enable-mem-access-versioning=false -enable-vplan-based-stride-mv 2>&1 | FileCheck %s
define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'basic'
@@ -12,6 +12,12 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT: Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT: strides.check:
+; CHECK-NEXT: EMIT branch-on-cond vp<[[VP3]]>
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -19,7 +25,7 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
@@ -28,8 +34,8 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
-; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -44,11 +50,11 @@ define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<[[VP7:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[VP9:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ], [ ir<0>, strides.check ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
-; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP7]]> from scalar.ph)
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP9]]> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
@@ -90,6 +96,12 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT: Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT: strides.check:
+; CHECK-NEXT: EMIT branch-on-cond vp<[[VP3]]>
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -97,7 +109,7 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<8>
@@ -107,8 +119,8 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
-; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -123,11 +135,11 @@ define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT-SCALAR vp<[[VP7:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[VP9:%[0-9]+]]> = phi [ ir<%iv>, middle.block ], [ ir<0>, ir-bb<entry> ], [ ir<0>, strides.check ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
-; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP7]]> from scalar.ph)
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<[[VP9]]> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 8
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8
@@ -573,6 +585,12 @@ define void @shared_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = EXPAND SCEVPredicate Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT: Successor(s): scalar.ph, strides.check
+; CHECK-EMPTY:
+; CHECK-NEXT: strides.check:
+; CHECK-NEXT: EMIT branch-on-cond vp<[[VP3]]>
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -580,7 +598,7 @@ define void...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/182595
More information about the llvm-branch-commits
mailing list