[llvm] [LV] Limits the splat operations be hoisted must not be defined by a recipe. (PR #117138)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 28 00:47:44 PST 2024
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/117138
>From 069c7620dae7dfa15eeb926b0aea4c18c71074d9 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 21 Nov 2024 02:05:59 -0800
Subject: [PATCH 1/3] [LV] Limits the splat operations be hoisted must not be
defined by a recipe.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +-
.../AArch64/extractvalue-no-scalarization-required.ll | 4 ++--
.../LoopVectorize/AArch64/sve-widen-extractvalue.ll | 2 +-
.../LoopVectorize/X86/epilog-vectorization-inductions.ll | 4 ++--
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 529108a5aaa97f..af9c6b55122124 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -267,7 +267,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
+ bool SafeToHoist = !Def->hasDefiningRecipe();
if (VF.isScalar())
return V;
// Place the code for broadcasting invariant variables in the new preheader.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index aa78113ebaa48c..b1c202eab9dd3d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -23,9 +23,9 @@
; Check that the extractvalue operands are actually free in vector code.
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
; FORCED-LABEL: define void @test_getVectorCallCost
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 7778f01c58dc34..fb5087db254b23 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
; CHECK-LABEL: @widen_extractvalue(
; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index e94bd841360256..7840a9dec794b3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -137,6 +137,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-LABEL: @test_induction_step_needs_expansion(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = sub i16 0, [[OFF:%.*]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[L:%.*]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
@@ -145,8 +147,6 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[L]], 64
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
>From 140de9d171558c39ef232d0061d59a3f2bd7f6ae Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 26 Nov 2024 07:37:48 -0800
Subject: [PATCH 2/3] Use VPDom
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 +++++++-
llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h | 2 ++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index af9c6b55122124..51eb0ba26ff89c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -19,6 +19,7 @@
#include "VPlan.h"
#include "LoopVectorizationPlanner.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
@@ -267,7 +268,12 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- bool SafeToHoist = !Def->hasDefiningRecipe();
+ VPDominatorTree VPDT(*Plan);
+ bool SafeToHoist =
+ !Def->hasDefiningRecipe() ||
+ VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+ Plan->getVectorPreheader());
+
if (VF.isScalar())
return V;
// Place the code for broadcasting invariant variables in the new preheader.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 787cd940c4e266..995c6b8b2c2fb3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
public:
VPDominatorTree() = default;
+ explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
/// Returns true if \p A properly dominates \p B.
+ using Base::properlyDominates;
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
};
>From 2153bf3f95e732b05ae99a6ae9862a953eefef2d Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 26 Nov 2024 11:45:33 -0800
Subject: [PATCH 3/3] Put VPDT as pointer into State
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 ++++++----
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++++
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 51eb0ba26ff89c..c49c4e3fd180c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -225,7 +225,10 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan)
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
- LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
+ LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()),
+ VPDT(new VPDominatorTree(*Plan)) {}
+
+VPTransformState::~VPTransformState() { delete VPDT; }
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
@@ -268,11 +271,10 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- VPDominatorTree VPDT(*Plan);
bool SafeToHoist =
!Def->hasDefiningRecipe() ||
- VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
- Plan->getVectorPreheader());
+ VPDT->properlyDominates(Def->getDefiningRecipe()->getParent(),
+ Plan->getVectorPreheader());
if (VF.isScalar())
return V;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 1b1630ebc6c23a..cc89660ff73210 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -67,6 +67,7 @@ class VPlanSlp;
class Value;
class LoopVectorizationCostModel;
class LoopVersioning;
+class VPDominatorTree;
struct VPCostContext;
@@ -237,6 +238,7 @@ struct VPTransformState {
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan);
+ ~VPTransformState();
/// Target Transform Info.
const TargetTransformInfo *TTI;
@@ -389,6 +391,9 @@ struct VPTransformState {
/// VPlan-based type analysis.
VPTypeAnalysis TypeAnalysis;
+
+ /// VPlan-based dominator tree.
+ VPDominatorTree *VPDT = nullptr;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
More information about the llvm-commits
mailing list