[llvm] [LV] Limits the splat operations be hoisted must not be defined by a recipe. (PR #117138)

Mel Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 11 00:36:27 PDT 2025


https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/117138

>From 8a67a65dd86b4955fd087ba2902492301479a379 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 21 Nov 2024 02:05:59 -0800
Subject: [PATCH 1/2] [LV] Limits the splat operations be hoisted must not be
 defined by a recipe.

---
 llvm/lib/Transforms/Vectorize/VPlan.cpp             | 13 +++++++++++--
 llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h  |  2 ++
 llvm/lib/Transforms/Vectorize/VPlanHelpers.h        |  4 ++++
 .../extractvalue-no-scalarization-required.ll       |  4 ++--
 .../LoopVectorize/AArch64/sve-widen-extractvalue.ll |  2 +-
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 944a11b96325d..e595347d62bf5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -19,6 +19,7 @@
 #include "VPlan.h"
 #include "LoopVectorizationPlanner.h"
 #include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
 #include "VPlanHelpers.h"
 #include "VPlanPatternMatch.h"
 #include "VPlanTransforms.h"
@@ -221,7 +222,7 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
                                    Loop *CurrentParentLoop, Type *CanonicalIVTy)
     : TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
       CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
-      TypeAnalysis(CanonicalIVTy) {}
+      TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
 
 Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
   if (Def->isLiveIn())
@@ -264,7 +265,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
     return Data.VPV2Vector[Def];
 
   auto GetBroadcastInstrs = [this, Def](Value *V) {
-    bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
+    bool SafeToHoist =
+        !Def->hasDefiningRecipe() ||
+        VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+                               Plan->getVectorPreheader());
+
     if (VF.isScalar())
       return V;
     // Place the code for broadcasting invariant variables in the new preheader.
@@ -929,6 +934,10 @@ void VPlan::execute(VPTransformState *State) {
   State->CFG.PrevVPBB = nullptr;
   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
 
+  // Update VPDominatorTree since VPBasicBlock may be removed after State was
+  // constructed.
+  State->VPDT.recalculate(*this);
+
   // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
   cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 787cd940c4e26..995c6b8b2c2fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
 
 public:
   VPDominatorTree() = default;
+  explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
 
   /// Returns true if \p A properly dominates \p B.
+  using Base::properlyDominates;
   bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
 };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 8bdbf556efbb3..d53b72bb2258a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -16,6 +16,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
 
 #include "VPlanAnalysis.h"
+#include "VPlanDominatorTree.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -360,6 +361,9 @@ struct VPTransformState {
 
   /// VPlan-based type analysis.
   VPTypeAnalysis TypeAnalysis;
+
+  /// VPlan-based dominator tree.
+  VPDominatorTree VPDT;
 };
 
 /// Struct to hold various analysis needed for cost computations.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index f969be80c3c00..ab21794baa924 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -23,9 +23,9 @@
 ; Check that the extractvalue operands are actually free in vector code.
 
 ; FORCED:         [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
 ; FORCED-LABEL: define void @test_getVectorCallCost
 
 ; FORCED:         [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT:    [[E2:%.+]] = extractvalue { float, float } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT:    [[E2:%.+]] = extractvalue { float, float } %sv, 1
 ; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 91dd6e475ec47..473fabfc9fecc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
 ; CHECK-LABEL: @widen_extractvalue(
 ; CHECK:        [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT:   [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
 ; CHECK-NEXT:   [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:   [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
 ; CHECK-NEXT:   [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK:        [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]

>From 68426e063de1abec3222c6d099e1e3ba6bbc8397 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 10 Mar 2025 07:13:41 -0700
Subject: [PATCH 2/2] Rebase and update test case

---
 .../LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index 000c1570ef92d..36ae8493951bd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -276,10 +276,10 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[X]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]



More information about the llvm-commits mailing list