[llvm] [VPlan] Allow VPWidenPHI in non-native path and copy DebugLoc (PR #118662)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 20:54:29 PST 2025


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/118662

>From 431d6d914f4a9d190e6e3d0ee5d5e77fcfb7690a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 00:09:46 +0800
Subject: [PATCH 1/5] [LV] Allow VPWidenPHI in non-native path and copy
 DebugLoc

We can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it.

We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state.
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp        |  9 ++++-----
 llvm/lib/Transforms/Vectorize/VPlan.h                  | 10 ++++++++--
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp         |  6 ++----
 .../Transforms/LoopVectorize/dbg-outer-loop-vect.ll    |  4 ++--
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f2f8a85b7cc233..5f924e25fcf208 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -516,7 +516,7 @@ class InnerLoopVectorizer {
                             VPTransformState &State);
 
   /// Fix the non-induction PHIs in \p Plan.
-  void fixNonInductionPHIs(VPTransformState &State);
+  void fixWidenedPHIs(VPTransformState &State);
 
   /// Returns the original loop trip count.
   Value *getTripCount() const { return TripCount; }
@@ -2977,9 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
 }
 
 void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
-  // Fix widened non-induction PHIs by setting up the PHI operands.
-  if (EnableVPlanNativePath)
-    fixNonInductionPHIs(State);
+  // Fix widened PHIs by setting up the PHI operands.
+  fixWidenedPHIs(State);
 
   // Forget the original basic block.
   PSE.getSE()->forgetLoop(OrigLoop);
@@ -3116,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
   } while (Changed);
 }
 
-void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
   auto Iter = vp_depth_first_deep(Plan.getEntry());
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
     for (VPRecipeBase &P : VPBB->phis()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 88f3f672d3aa38..b42dbe9208672f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2287,10 +2287,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
   /// List of incoming blocks. Only used in the VPlan native path.
   SmallVector<VPBasicBlock *, 2> IncomingBlocks;
 
+  /// Name to use for the generated IR instruction for the widened IV.
+  std::string Name;
+
 public:
   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
-  VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
-      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+  VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
+                   const Twine &Name = "vec.phi")
+      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+                          Phi->getDebugLoc()),
+        Name(Name.str()) {
     if (Start)
       addOperand(Start);
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 77c08839dbfa95..bfe945d73bf145 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3440,12 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
-  assert(EnableVPlanNativePath &&
-         "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
+  State.setDebugLocFrom(getDebugLoc());
   Value *Op0 = State.get(getOperand(0));
   Type *VecTy = Op0->getType();
-  Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+  Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
   State.set(this, VecPhi);
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
 ; CHECK:       for.cond5.preheader1:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]

>From cda41c6532be6aa56f66e093c44cb521cb64343e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 00:20:20 +0800
Subject: [PATCH 2/5] Update comment

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5f924e25fcf208..0eaaa711a5f95c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -515,7 +515,7 @@ class InnerLoopVectorizer {
                             VPReplicateRecipe *RepRecipe, const VPLane &Lane,
                             VPTransformState &State);
 
-  /// Fix the non-induction PHIs in \p Plan.
+  /// Fix the widened PHIs in \p Plan.
   void fixWidenedPHIs(VPTransformState &State);
 
   /// Returns the original loop trip count.

>From f31878cde04c29d2f76bc82879d49e0f37566e35 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 04:04:21 +0800
Subject: [PATCH 3/5] Rename fixNonInductionPHIs back

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0eaaa711a5f95c..ef8abdaa8aa76e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -515,8 +515,8 @@ class InnerLoopVectorizer {
                             VPReplicateRecipe *RepRecipe, const VPLane &Lane,
                             VPTransformState &State);
 
-  /// Fix the widened PHIs in \p Plan.
-  void fixWidenedPHIs(VPTransformState &State);
+  /// Fix the non-induction PHIs in \p Plan.
+  void fixNonInductionPHIs(VPTransformState &State);
 
   /// Returns the original loop trip count.
   Value *getTripCount() const { return TripCount; }
@@ -2977,8 +2977,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
 }
 
 void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
-  // Fix widened PHIs by setting up the PHI operands.
-  fixWidenedPHIs(State);
+  // Fix widened non-induction PHIs by setting up the PHI operands.
+  fixNonInductionPHIs(State);
 
   // Forget the original basic block.
   PSE.getSE()->forgetLoop(OrigLoop);
@@ -3115,7 +3115,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
   } while (Changed);
 }
 
-void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
   auto Iter = vp_depth_first_deep(Plan.getEntry());
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
     for (VPRecipeBase &P : VPBB->phis()) {

>From 69d40761f44ddd666d6d0da97e2d791210425a53 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Dec 2024 01:22:27 +0800
Subject: [PATCH 4/5] Update comments to clarify which parts can only be used
 in the vplan-native path, also add back the assertion but check that the
 incomingblocks is empty

---
 llvm/lib/Transforms/Vectorize/VPlan.h          | 6 ++++--
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b42dbe9208672f..d15418ab7415e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2318,13 +2318,15 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
              VPSlotTracker &SlotTracker) const override;
 #endif
 
-  /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
+  /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. Only used in the
+  /// VPlan native path.
   void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
     addOperand(IncomingV);
     IncomingBlocks.push_back(IncomingBlock);
   }
 
-  /// Returns the \p I th incoming VPBasicBlock.
+  /// Returns the \p I th incoming VPBasicBlock. Only used in the VPlan native
+  /// path.
   VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
 
   /// Returns the \p I th incoming VPValue.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bfe945d73bf145..6a35289c4dfa24 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3440,6 +3440,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
+  assert((EnableVPlanNativePath || IncomingBlocks.empty()) &&
+         "Non-native vplans are not expected to have VPWidenPHIRecipes with "
+         "incoming blocks.");
+
   State.setDebugLocFrom(getDebugLoc());
   Value *Op0 = State.get(getOperand(0));
   Type *VecTy = Op0->getType();

>From 46ae40263fa35accbaf52f2f738e0262dd048d60 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 3 Jan 2025 12:54:04 +0800
Subject: [PATCH 5/5] Remove DebugLoc changes

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp            | 1 -
 llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6a35289c4dfa24..b470e228e023ae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3444,7 +3444,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
          "Non-native vplans are not expected to have VPWidenPHIRecipes with "
          "incoming blocks.");
 
-  State.setDebugLocFrom(getDebugLoc());
   Value *Op0 = State.get(getOperand(0));
   Type *VecTy = Op0->getType();
   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 44afa34100c299..66aceab9fb27c8 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
 ; CHECK:       for.cond5.preheader1:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]



More information about the llvm-commits mailing list