[llvm] [VPlan] Add narrowToSingleScalarRecipe transform. (PR #139150)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat May 17 14:54:54 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/139150
>From 50be0596277ed0ff5fd4e38eec2dd75f8fd56a66 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 22 Apr 2025 21:17:27 +0100
Subject: [PATCH 1/5] [VPlan] Add convertToUniformRecipe transform.
Add a new convertToUniformRecipes transform which uses VPlan-based
uniformity analysis to determine if wide recipes and replicate recipes
can be converted to uniform recipes.
There are a few places where we ad-hoc convert recipes to uniform
recipes, which this transform will eventually replace. There are a few
more generalizations required to do so which I plan to do as follow-ups.
By converting the recipes to uniform recipes, we effectively materialize
the information from the VPlan-based analysis.
Note that there is one regression at the moment in SystemZ/pr47665.ll
due to trivial constant folding opportunities in the input IR.
This will be fixed by VPlan-based constant folding
(https://github.com/llvm/llvm-project/pull/125365/)
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 35 +++++++++++++++++++
.../LoopVectorize/SystemZ/pr47665.ll | 33 ++++++++---------
.../LoopVectorize/X86/cost-model.ll | 5 +--
.../version-stride-with-integer-casts.ll | 11 +++---
4 files changed, 57 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 79ddb8bf0b09b..50552c843cd59 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1084,6 +1084,40 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
}
}
+static void convertToUniformRecipes(VPlan &Plan) {
+ auto TryToNarrow = [](VPBasicBlock *VPBB) {
+ for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+ // Try to narrow wide and replicating recipes to uniform recipes, based on
+ // VPlan analysis.
+ auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
+ if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
+ !Def->getUnderlyingValue())
+ continue;
+
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (RepR && RepR->isUniform())
+ continue;
+
+ // Skip recipes that aren't uniform and don't have only their scalar
+ // results used. In the later case, we would introduce extra broadcasts.
+ if (!vputils::isUniformAfterVectorization(Def) ||
+ any_of(Def->users(),
+ [Def](VPUser *U) { return !U->usesScalars(Def); }))
+ continue;
+
+ auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
+ Def->operands(), /*IsUniform*/ true);
+ Clone->insertBefore(Def);
+ Def->replaceAllUsesWith(Clone);
+ Def->eraseFromParent();
+ }
+ };
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())))
+ TryToNarrow(VPBB);
+}
+
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
/// to make sure the masks are simplified.
static void simplifyBlends(VPlan &Plan) {
@@ -1778,6 +1812,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
+ runPass(convertToUniformRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index 02a876a3fda67..bb96c166f894c 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -7,86 +7,87 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i1 true, false
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
; CHECK: pred.store.if15:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
; CHECK: pred.store.if21:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
; CHECK: pred.store.if23:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue24:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
; CHECK: pred.store.if25:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.continue26:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
; CHECK: pred.store.if27:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
; CHECK: pred.store.continue28:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
; CHECK: pred.store.if29:
-; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
; CHECK: pred.store.continue30:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index f8b1cc2d775f5..7c42c3d9cd52e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -890,9 +890,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[N:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[N:%.*]], 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -904,7 +902,6 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1)
; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1)
; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1)
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index fb84739881010..30e0acb4d7bf6 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -159,9 +159,6 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]]
-; CHECK-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]]
-; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]]
; CHECK-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -174,10 +171,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
-; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP11]], align 8
-; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8
-; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8
+; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP11]], align 8
+; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP13]], align 8
+; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP15]], align 8
+; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP17]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
>From a8dda3f680ae7148440747694ee189d2cf131aba Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 15 May 2025 21:18:46 +0100
Subject: [PATCH 2/5] !fixup address latest comments.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 38 +++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3803dd20d866b..d40f8d59d4f24 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1087,37 +1087,37 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
}
static void convertToUniformRecipes(VPlan &Plan) {
- auto TryToNarrow = [](VPBasicBlock *VPBB) {
+ if (Plan.hasScalarVFOnly())
+ return;
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
// Try to narrow wide and replicating recipes to uniform recipes, based on
// VPlan analysis.
- auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
- if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
- !Def->getUnderlyingValue())
- continue;
-
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (!RepR && !isa<VPWidenRecipe>(&R))
+ continue;
if (RepR && RepR->isUniform())
continue;
+ auto *RepOrWiden = cast<VPSingleDefRecipe>(&R);
// Skip recipes that aren't uniform and don't have only their scalar
// results used. In the later case, we would introduce extra broadcasts.
- if (!vputils::isUniformAfterVectorization(Def) ||
- any_of(Def->users(),
- [Def](VPUser *U) { return !U->usesScalars(Def); }))
+ if (!vputils::isUniformAfterVectorization(RepOrWiden) ||
+ any_of(RepOrWiden->users(), [RepOrWiden](VPUser *U) {
+ return !U->usesScalars(RepOrWiden);
+ }))
continue;
- auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
- Def->operands(), /*IsUniform*/ true);
- Clone->insertBefore(Def);
- Def->replaceAllUsesWith(Clone);
- Def->eraseFromParent();
+ auto *Clone =
+ new VPReplicateRecipe(RepOrWiden->getUnderlyingInstr(),
+ RepOrWiden->operands(), /*IsUniform*/ true);
+ Clone->insertBefore(RepOrWiden);
+ RepOrWiden->replaceAllUsesWith(Clone);
+ RepOrWiden->eraseFromParent();
}
- };
-
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
- vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())))
- TryToNarrow(VPBB);
+ }
}
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
>From a7e9545a415359c9f82f146eee1faa8ec9fa621a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 15 May 2025 21:37:35 +0100
Subject: [PATCH 3/5] !fixup naming, later->latter
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d40f8d59d4f24..9270f48718233 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1101,21 +1101,21 @@ static void convertToUniformRecipes(VPlan &Plan) {
if (RepR && RepR->isUniform())
continue;
- auto *RepOrWiden = cast<VPSingleDefRecipe>(&R);
+ auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
// Skip recipes that aren't uniform and don't have only their scalar
- // results used. In the later case, we would introduce extra broadcasts.
- if (!vputils::isUniformAfterVectorization(RepOrWiden) ||
- any_of(RepOrWiden->users(), [RepOrWiden](VPUser *U) {
- return !U->usesScalars(RepOrWiden);
+ // results used. In the latter case, we would introduce extra broadcasts.
+ if (!vputils::isUniformAfterVectorization(RepOrWidenR) ||
+ any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
+ return !U->usesScalars(RepOrWidenR);
}))
continue;
auto *Clone =
- new VPReplicateRecipe(RepOrWiden->getUnderlyingInstr(),
- RepOrWiden->operands(), /*IsUniform*/ true);
- Clone->insertBefore(RepOrWiden);
- RepOrWiden->replaceAllUsesWith(Clone);
- RepOrWiden->eraseFromParent();
+ new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
+ RepOrWidenR->operands(), /*IsUniform*/ true);
+ Clone->insertBefore(RepOrWidenR);
+ RepOrWidenR->replaceAllUsesWith(Clone);
+ RepOrWidenR->eraseFromParent();
}
}
}
>From 451d82a044d47e7eb24754300103f304cdc379bb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 16 May 2025 21:05:21 +0100
Subject: [PATCH 4/5] !fixup address comments, thanks
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b7555ff1e3e44..eff5724667227 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1085,36 +1085,36 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
}
}
-static void convertToUniformRecipes(VPlan &Plan) {
+static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
- // Try to narrow wide and replicating recipes to uniform recipes, based on
- // VPlan analysis.
+ // Try to narrow wide and replicating recipes to single scalar recipes,
+ // based on VPlan analysis.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR && !isa<VPWidenRecipe>(&R))
continue;
- if (RepR && RepR->isUniform())
+ if (RepR && RepR->isSingleScalar())
continue;
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
- // Skip recipes that aren't uniform and don't have only their scalar
- // results used. In the latter case, we would introduce extra broadcasts.
- if (!vputils::isUniformAfterVectorization(RepOrWidenR) ||
+ // Skip recipes that aren't single scalars and don't have only their
+ // scalar results used. In the latter case, we would introduce extra
+ // broadcasts.
+ if (!vputils::isSingleScalar(RepOrWidenR) ||
any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
return !U->usesScalars(RepOrWidenR);
}))
continue;
- auto *Clone =
- new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
- RepOrWidenR->operands(), /*IsUniform*/ true);
+ auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
+ RepOrWidenR->operands(),
+ true /*IsSingleScalar*/);
Clone->insertBefore(RepOrWidenR);
RepOrWidenR->replaceAllUsesWith(Clone);
- RepOrWidenR->eraseFromParent();
}
}
}
@@ -1813,7 +1813,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
- runPass(convertToUniformRecipes, Plan);
+ runPass(narrowToSingleScalarRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
>From 5b15a9be581fb9da82df18ad5c8f2cae7af69f3e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 17 May 2025 22:53:11 +0100
Subject: [PATCH 5/5] !fixup update comment, add comment re regions.
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index eff5724667227..8c8297bb1ae94 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1089,11 +1089,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
+ // Try to narrow wide and replicating recipes to single scalar recipes,
+ // based on VPlan analysis. Only process blocks in the loop region for now,
+ // without traversing into nested regions, as recipes in replicate regions
+ // cannot be converted yet.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
- // Try to narrow wide and replicating recipes to single scalar recipes,
- // based on VPlan analysis.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR && !isa<VPWidenRecipe>(&R))
continue;
@@ -1101,7 +1103,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
continue;
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
- // Skip recipes that aren't single scalars and don't have only their
+ // Skip recipes that aren't single scalars or don't have only their
// scalar results used. In the latter case, we would introduce extra
// broadcasts.
if (!vputils::isSingleScalar(RepOrWidenR) ||
More information about the llvm-commits
mailing list