[llvm] [VPlan] Optimize more IV increment exit users by using a map (PR #147016)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 15:46:47 PDT 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/147016
>From f3ffd6a011161b33fec817375cdf492f91376695 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 4 Jul 2025 01:36:38 -0700
Subject: [PATCH 1/2] [VPlan] Optimize more IV increment exit users by using a
map
The current pattern matching in getOptimizableIVOf() doesn't cover all
cases for wide IV increments, missing optimization opportunities for certain
exit users.
This patch adds a mapping between wide IV increments and their corresponding
IVs to enable optimization of previously unhandled cases.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 13 +++---
.../Transforms/Vectorize/VPlanTransforms.cpp | 24 ++++++----
.../Transforms/Vectorize/VPlanTransforms.h | 6 +--
.../LoopVectorize/X86/induction-step.ll | 3 +-
.../LoopVectorize/induction-step.ll | 8 ++--
.../LoopVectorize/iv_outside_user.ll | 44 +++++--------------
.../LoopVectorize/scalable-iv-outside-user.ll | 20 +--------
7 files changed, 44 insertions(+), 74 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 907839711a39c..00b2dafab397f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8887,15 +8887,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
+ DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs;
for (const auto &[Phi, ID] : Legal->getInductionVars()) {
auto *IVInc = cast<Instruction>(
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
- continue;
VPWidenInductionRecipe *WideIV =
cast<VPWidenInductionRecipe>(RecipeBuilder.getRecipe(Phi));
- VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc);
- R->setOperand(1, WideIV->getStepValue());
+ VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IVInc);
+ if (!dyn_cast<PHINode>(IVInc))
+ MapIVs[V] = WideIV;
+ if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
+ continue;
+ V->getDefiningRecipe()->setOperand(1, WideIV->getStepValue());
}
DenseMap<VPValue *, VPValue *> IVEndValues;
@@ -8992,7 +8995,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
}
- VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, MapIVs);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8e05b0138eeed..20d91a39162f6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -823,10 +823,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
/// Attempts to optimize the induction variable exit values for users in the
/// exit block coming from the latch in the original scalar loop.
-static VPValue *
-optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
- VPBlockBase *PredVPBB, VPValue *Op,
- DenseMap<VPValue *, VPValue *> &EndValues) {
+static VPValue *optimizeLatchExitInductionUser(
+ VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
+ DenseMap<VPValue *, VPValue *> &EndValues,
+ DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs) {
using namespace VPlanPatternMatch;
VPValue *Incoming;
@@ -834,7 +834,13 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
m_VPValue(Incoming))))
return nullptr;
- auto *WideIV = getOptimizableIVOf(Incoming);
+ // Truncated IV is not handled here.
+ auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Incoming);
+ if (IntOrFpIV && IntOrFpIV->getTruncInst())
+ return nullptr;
+ auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Incoming);
+ if (!WideIV)
+ WideIV = MapIVs.lookup(Incoming);
if (!WideIV)
return nullptr;
@@ -873,7 +879,8 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
}
void VPlanTransforms::optimizeInductionExitUsers(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
+ VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
+ DenseMap<VPValue *, VPWidenInductionRecipe *> &MapIVs) {
VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -883,8 +890,9 @@ void VPlanTransforms::optimizeInductionExitUsers(
for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
VPValue *Escape = nullptr;
if (PredVPBB == MiddleVPBB)
- Escape = optimizeLatchExitInductionUser(
- Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), EndValues);
+ Escape = optimizeLatchExitInductionUser(Plan, TypeInfo, PredVPBB,
+ ExitIRI->getOperand(Idx),
+ EndValues, MapIVs);
else
Escape = optimizeEarlyExitInductionUser(Plan, TypeInfo, PredVPBB,
ExitIRI->getOperand(Idx));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 8d2eded45da22..22a69c864095c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -211,9 +211,9 @@ struct VPlanTransforms {
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
- static void
- optimizeInductionExitUsers(VPlan &Plan,
- DenseMap<VPValue *, VPValue *> &EndValues);
+ static void optimizeInductionExitUsers(
+ VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
+ DenseMap<VPValue *, VPWidenInductionRecipe *> &MapIVs);
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
index 196c7552d0852..5f7b630560ab3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
@@ -113,7 +113,6 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -130,7 +129,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[SUB_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index 036d5f5886234..5163b9dfc7423 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -368,11 +368,11 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ADD]] = add i16 [[IV_2]], [[O_1]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_DST]], align 2
@@ -439,11 +439,11 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[SUB]] = sub i16 [[IV_2]], [[O_1]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i16 [[SUB]], ptr [[GEP_DST]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 7b7735434e67d..0ec0241ea2515 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -1095,14 +1095,11 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC: [[VECTOR_PH]]:
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
-; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
+; VEC-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
-; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
-; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; VEC: [[MIDDLE_BLOCK]]:
@@ -1118,7 +1115,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; VEC: [[E_EXIT]]:
-; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i32 [[RES]]
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
@@ -1136,7 +1133,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1153,7 +1149,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[E_EXIT]]:
-; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[RES]]
;
entry:
@@ -1182,12 +1178,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VEC: [[VECTOR_PH]]:
-; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0
-; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
@@ -1195,14 +1188,10 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
-; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1)
-; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; VEC: [[MIDDLE_BLOCK]]:
-; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; VEC-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
; VEC: [[SCALAR_PH]]:
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -1216,7 +1205,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; VEC: [[E_EXIT]]:
-; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i32 [[RES]]
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
@@ -1235,8 +1224,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1254,7 +1241,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[E_EXIT]]:
-; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[RES]]
;
entry:
@@ -1286,16 +1273,11 @@ define i32 @iv_ext_used_outside( ptr %dst) {
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[OFFSET_IDX]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
; VEC-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4
-; VEC-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i16> [[VEC_IND]], splat (i16 1)
-; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
-; VEC-NEXT: [[TMP7:%.*]] = zext nneg i16 [[TMP8]] to i32
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; VEC: [[MIDDLE_BLOCK]]:
@@ -1314,7 +1296,7 @@ define i32 @iv_ext_used_outside( ptr %dst) {
; VEC-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128
; VEC-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
; VEC: [[EXIT]]:
-; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i32 [[IV_1_EXT_LCSSA]]
;
; INTERLEAVE-LABEL: define i32 @iv_ext_used_outside(
@@ -1331,8 +1313,6 @@ define i32 @iv_ext_used_outside( ptr %dst) {
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[TMP1]]
; INTERLEAVE-NEXT: store i32 0, ptr [[TMP2]], align 4
; INTERLEAVE-NEXT: store i32 0, ptr [[TMP3]], align 4
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = add nuw nsw i16 [[TMP1]], 1
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = zext nneg i16 [[TMP4]] to i32
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1352,7 +1332,7 @@ define i32 @iv_ext_used_outside( ptr %dst) {
; INTERLEAVE-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128
; INTERLEAVE-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[EXIT]]:
-; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[IV_1_EXT_LCSSA]]
;
entry:
@@ -1386,8 +1366,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1
; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2
-; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1
-; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VEC: [[MIDDLE_BLOCK]]:
; VEC-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -1405,7 +1383,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; VEC-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1
; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
; VEC: [[EXIT]]:
-; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA]]
;
; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented(
@@ -1419,8 +1397,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1
; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; INTERLEAVE: [[MIDDLE_BLOCK]]:
; INTERLEAVE-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -1438,7 +1414,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1
; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[EXIT]]:
-; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
index b0029a4e0d069..2df8645b5305a 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
@@ -17,20 +17,10 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
-; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP4]], 4
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
@@ -38,16 +28,10 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP11]], align 2
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
-; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT]], [[STEP_ADD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2
-; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i32> [[TMP15]], i32 [[TMP19]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -61,7 +45,7 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000
; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[E_EXIT]]:
-; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
>From 0c9267968a2d964e9943bcbbeea275e712552689 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 4 Jul 2025 15:45:53 -0700
Subject: [PATCH 2/2] Address comments
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 00b2dafab397f..32846cad8ac42 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8887,14 +8887,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
- DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs;
+ SmallDenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs;
for (const auto &[Phi, ID] : Legal->getInductionVars()) {
auto *IVInc = cast<Instruction>(
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
VPWidenInductionRecipe *WideIV =
cast<VPWidenInductionRecipe>(RecipeBuilder.getRecipe(Phi));
VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IVInc);
- if (!dyn_cast<PHINode>(IVInc))
+ if (!isa<PHINode>(IVInc))
MapIVs[V] = WideIV;
if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 20d91a39162f6..cbf13f46b37b2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -826,7 +826,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
static VPValue *optimizeLatchExitInductionUser(
VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
DenseMap<VPValue *, VPValue *> &EndValues,
- DenseMap<VPValue *, VPWidenInductionRecipe *> MapIVs) {
+ DenseMap<VPValue *, VPWidenInductionRecipe *> &MapIVs) {
using namespace VPlanPatternMatch;
VPValue *Incoming;
More information about the llvm-commits
mailing list