[llvm] [VPlan] Consolidate logic for narrow to single scalars (PR #151506)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 05:45:30 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/151506
>From f41be4eccb33c964031fa482c84e12fab91079a2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 31 Jul 2025 12:58:49 +0100
Subject: [PATCH 1/2] [VPlan] Consolidate logic for narrow to single scalars
The logic for narrowing to single scalar recipes is in two different
places: narrowToSingleScalarRecipes and legalizeAndOptimizeInductions.
Consolidate them, with minor test changes.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 48 +++++--------------
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 3 ++
.../uniform_across_vf_induction1.ll | 6 +--
.../uniform_across_vf_induction1_and.ll | 25 +++++++++-
.../uniform_across_vf_induction1_div_urem.ll | 6 +--
.../uniform_across_vf_induction1_lshr.ll | 18 +++----
.../uniform_across_vf_induction2.ll | 6 +--
7 files changed, 55 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..3e7061584f2eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -631,30 +631,6 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
if (!PhiR)
continue;
- // Try to narrow wide and replicating recipes to uniform recipes, based on
- // VPlan analysis.
- // TODO: Apply to all recipes in the future, to replace legacy uniformity
- // analysis.
- auto Users = collectUsersRecursively(PhiR);
- for (VPUser *U : reverse(Users)) {
- auto *Def = dyn_cast<VPSingleDefRecipe>(U);
- auto *RepR = dyn_cast<VPReplicateRecipe>(U);
- // Skip recipes that shouldn't be narrowed.
- if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
- Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
- (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
- continue;
-
- // Skip recipes that may have other lanes than their first used.
- if (!vputils::isSingleScalar(Def) && !vputils::onlyFirstLaneUsed(Def))
- continue;
-
- auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
- Def->operands(), /*IsUniform*/ true);
- Clone->insertAfter(Def);
- Def->replaceAllUsesWith(Clone);
- }
-
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1238,21 +1214,19 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
continue;
- auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
- // Skip recipes that aren't single scalars or don't have only their
- // scalar results used. In the latter case, we would introduce extra
- // broadcasts.
- if (!vputils::isSingleScalar(RepOrWidenR) ||
- any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
- return !U->usesScalars(RepOrWidenR);
- }))
+ auto *Def = cast<VPSingleDefRecipe>(&R);
+ // Skip recipes that aren't single scalars or that don't have users or
+ // that don't have only their scalar results used (would introduce extra
+ // broadcasts in this case).
+ if (!vputils::isSingleScalar(Def) || Def->getNumUsers() == 0 ||
+ any_of(Def->users(),
+ [Def](VPUser *U) { return !U->usesScalars(Def); }))
continue;
- auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
- RepOrWidenR->operands(),
- true /*IsSingleScalar*/);
- Clone->insertBefore(RepOrWidenR);
- RepOrWidenR->replaceAllUsesWith(Clone);
+ auto *Clone = new VPReplicateRecipe(
+ Def->getUnderlyingInstr(), Def->operands(), true /*IsSingleScalar*/);
+ Clone->insertBefore(Def);
+ Def->replaceAllUsesWith(Clone);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..5f80214973158 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -40,6 +40,9 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
inline bool isSingleScalar(const VPValue *VPV) {
+ if (onlyFirstLaneUsed(VPV))
+ return true;
+
auto PreservesUniformity = [](unsigned Opcode) -> bool {
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
index 82f2fdd431238..cc6d70331e69d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
@@ -57,11 +57,11 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
index af2b238105925..0775ef9b8de75 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
@@ -15,6 +15,13 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -50,10 +57,14 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -97,6 +108,11 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
@@ -351,6 +367,11 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
index 61f511c16e88b..ab31abd970814 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
@@ -240,11 +240,11 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
+; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
index e412d130e115f..93e7581a8ba3d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
@@ -79,11 +79,11 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -159,11 +159,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -182,11 +182,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 2
; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; VF4-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; VF4-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
-; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
+; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
+; VF4-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP4]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
index ef6ce08da5230..1d140ffc14d14 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
@@ -115,11 +115,11 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; VF2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
-; VF2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
+; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
-; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
+; VF2-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP6]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
>From 61ec5b3403b9a9ba02648a8adfbb81f7a9e33a2f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 31 Jul 2025 13:44:38 +0100
Subject: [PATCH 2/2] [VPlan] Revert rename
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3e7061584f2eb..ca5a42c529591 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1214,19 +1214,22 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
continue;
- auto *Def = cast<VPSingleDefRecipe>(&R);
+ auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
// Skip recipes that aren't single scalars or that don't have users or
// that don't have only their scalar results used (would introduce extra
// broadcasts in this case).
- if (!vputils::isSingleScalar(Def) || Def->getNumUsers() == 0 ||
- any_of(Def->users(),
- [Def](VPUser *U) { return !U->usesScalars(Def); }))
+ if (!vputils::isSingleScalar(RepOrWidenR) ||
+ RepOrWidenR->getNumUsers() == 0 ||
+ any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) {
+ return !U->usesScalars(RepOrWidenR);
+ }))
continue;
- auto *Clone = new VPReplicateRecipe(
- Def->getUnderlyingInstr(), Def->operands(), true /*IsSingleScalar*/);
- Clone->insertBefore(Def);
- Def->replaceAllUsesWith(Clone);
+ auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
+ RepOrWidenR->operands(),
+ true /*IsSingleScalar*/);
+ Clone->insertBefore(RepOrWidenR);
+ RepOrWidenR->replaceAllUsesWith(Clone);
}
}
}
More information about the llvm-commits
mailing list