[llvm] [VPlan] Allow VPWidenPHI in non-native path and copy DebugLoc (PR #118662)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 20:54:29 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/118662
>From 431d6d914f4a9d190e6e3d0ee5d5e77fcfb7690a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 00:09:46 +0800
Subject: [PATCH 1/5] [LV] Allow VPWidenPHI in non-native path and copy
DebugLoc
We can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it.
We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 ++++-----
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++++++--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++----
.../Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++--
4 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f2f8a85b7cc233..5f924e25fcf208 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -516,7 +516,7 @@ class InnerLoopVectorizer {
VPTransformState &State);
/// Fix the non-induction PHIs in \p Plan.
- void fixNonInductionPHIs(VPTransformState &State);
+ void fixWidenedPHIs(VPTransformState &State);
/// Returns the original loop trip count.
Value *getTripCount() const { return TripCount; }
@@ -2977,9 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
}
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
- // Fix widened non-induction PHIs by setting up the PHI operands.
- if (EnableVPlanNativePath)
- fixNonInductionPHIs(State);
+ // Fix widened PHIs by setting up the PHI operands.
+ fixWidenedPHIs(State);
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
@@ -3116,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
-void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 88f3f672d3aa38..b42dbe9208672f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2287,10 +2287,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
/// List of incoming blocks. Only used in the VPlan native path.
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
+ /// Name to use for the generated IR instruction for the widened IV.
+ std::string Name;
+
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+ VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
+ const Twine &Name = "vec.phi")
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+ Phi->getDebugLoc()),
+ Name(Name.str()) {
if (Start)
addOperand(Start);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 77c08839dbfa95..bfe945d73bf145 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3440,12 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- assert(EnableVPlanNativePath &&
- "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
+ State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
- Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+ Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
State.set(this, VecPhi);
}
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
; CHECK: for.cond5.preheader1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
>From cda41c6532be6aa56f66e093c44cb521cb64343e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 00:20:20 +0800
Subject: [PATCH 2/5] Update comment
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5f924e25fcf208..0eaaa711a5f95c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -515,7 +515,7 @@ class InnerLoopVectorizer {
VPReplicateRecipe *RepRecipe, const VPLane &Lane,
VPTransformState &State);
- /// Fix the non-induction PHIs in \p Plan.
+ /// Fix the widened PHIs in \p Plan.
void fixWidenedPHIs(VPTransformState &State);
/// Returns the original loop trip count.
>From f31878cde04c29d2f76bc82879d49e0f37566e35 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 04:04:21 +0800
Subject: [PATCH 3/5] Rename fixNonInductionPHIs back
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0eaaa711a5f95c..ef8abdaa8aa76e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -515,8 +515,8 @@ class InnerLoopVectorizer {
VPReplicateRecipe *RepRecipe, const VPLane &Lane,
VPTransformState &State);
- /// Fix the widened PHIs in \p Plan.
- void fixWidenedPHIs(VPTransformState &State);
+ /// Fix the non-induction PHIs in \p Plan.
+ void fixNonInductionPHIs(VPTransformState &State);
/// Returns the original loop trip count.
Value *getTripCount() const { return TripCount; }
@@ -2977,8 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
}
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
- // Fix widened PHIs by setting up the PHI operands.
- fixWidenedPHIs(State);
+ // Fix widened non-induction PHIs by setting up the PHI operands.
+ fixNonInductionPHIs(State);
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
@@ -3115,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
-void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
>From 69d40761f44ddd666d6d0da97e2d791210425a53 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Dec 2024 01:22:27 +0800
Subject: [PATCH 4/5] Update comments to clarify which parts can only be used
in the vplan-native path, also add back the assertion but check that the
incomingblocks is empty
---
llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b42dbe9208672f..d15418ab7415e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2318,13 +2318,15 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
VPSlotTracker &SlotTracker) const override;
#endif
- /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
+ /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. Only used in the
+ /// VPlan native path.
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
addOperand(IncomingV);
IncomingBlocks.push_back(IncomingBlock);
}
- /// Returns the \p I th incoming VPBasicBlock.
+ /// Returns the \p I th incoming VPBasicBlock. Only used in the VPlan native
+ /// path.
VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
/// Returns the \p I th incoming VPValue.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bfe945d73bf145..6a35289c4dfa24 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3440,6 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
+ assert((EnableVPlanNativePath || IncomingBlocks.empty()) &&
+ "Non-native vplans are not expected to have VPWidenPHIRecipes with "
+ "incoming blocks.");
+
State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
>From 46ae40263fa35accbaf52f2f738e0262dd048d60 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 3 Jan 2025 12:54:04 +0800
Subject: [PATCH 5/5] Remove DebugLoc changes
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 -
llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6a35289c4dfa24..b470e228e023ae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3444,7 +3444,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
"Non-native vplans are not expected to have VPWidenPHIRecipes with "
"incoming blocks.");
- State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 44afa34100c299..66aceab9fb27c8 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
; CHECK: for.cond5.preheader1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
More information about the llvm-commits
mailing list