[llvm] [LV] Limits the splat operations be hoisted must not be defined by a recipe. (PR #117138)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 04:32:22 PST 2025
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/117138
>From 8af36bc93ee2bac830448b14eb9394008db48221 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 21 Nov 2024 02:05:59 -0800
Subject: [PATCH 1/4] [LV] Limits the splat operations be hoisted must not be
defined by a recipe.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +-
.../AArch64/extractvalue-no-scalarization-required.ll | 4 ++--
.../LoopVectorize/AArch64/sve-widen-extractvalue.ll | 2 +-
.../LoopVectorize/X86/epilog-vectorization-inductions.ll | 8 ++++----
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index e804f81c36dba0..4d3ea59011a0ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -262,7 +262,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
+ bool SafeToHoist = !Def->hasDefiningRecipe();
if (VF.isScalar())
return V;
// Place the code for broadcasting invariant variables in the new preheader.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index f969be80c3c00a..ab21794baa9240 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -23,9 +23,9 @@
; Check that the extractvalue operands are actually free in vector code.
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind
; FORCED-LABEL: define void @test_getVectorCallCost
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 7778f01c58dc34..fb5087db254b23 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu"
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
; CHECK-LABEL: @widen_extractvalue(
; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index c14ddca6c913d8..7ba4b522b06780 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -137,6 +137,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-LABEL: @test_induction_step_needs_expansion(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = sub i16 0, [[OFF:%.*]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[L:%.*]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
@@ -147,15 +149,13 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP11]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
>From 415d657a3f94c3191df5db00fd14e9b7eb931ab8 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 26 Nov 2024 07:37:48 -0800
Subject: [PATCH 2/4] Use VPDom
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 +++++++-
llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h | 2 ++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4d3ea59011a0ce..4ba3ab0cee2418 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -19,6 +19,7 @@
#include "VPlan.h"
#include "LoopVectorizationPlanner.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
@@ -262,7 +263,12 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- bool SafeToHoist = !Def->hasDefiningRecipe();
+ VPDominatorTree VPDT(*Plan);
+ bool SafeToHoist =
+ !Def->hasDefiningRecipe() ||
+ VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+ Plan->getVectorPreheader());
+
if (VF.isScalar())
return V;
// Place the code for broadcasting invariant variables in the new preheader.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 787cd940c4e266..995c6b8b2c2fb3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
public:
VPDominatorTree() = default;
+ explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
/// Returns true if \p A properly dominates \p B.
+ using Base::properlyDominates;
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
};
>From 4c104c35a6e7c097ad576086cdb80151d594f5df Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 26 Nov 2024 11:45:33 -0800
Subject: [PATCH 3/4] Put VPDT as pointer into State
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 9 +++++----
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++++
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4ba3ab0cee2418..26a4c4e7346506 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -220,7 +220,9 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
Loop *CurrentParentLoop, Type *CanonicalIVTy)
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
- TypeAnalysis(CanonicalIVTy) {}
+ TypeAnalysis(CanonicalIVTy), VPDT(new VPDominatorTree(*Plan)) {}
+
+VPTransformState::~VPTransformState() { delete VPDT; }
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
@@ -263,11 +265,10 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
- VPDominatorTree VPDT(*Plan);
bool SafeToHoist =
!Def->hasDefiningRecipe() ||
- VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
- Plan->getVectorPreheader());
+ VPDT->properlyDominates(Def->getDefiningRecipe()->getParent(),
+ Plan->getVectorPreheader());
if (VF.isScalar())
return V;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9d7bf97d305ed1..290737fc201e0b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -67,6 +67,7 @@ class VPlanSlp;
class Value;
class LoopVectorizationCostModel;
class LoopVersioning;
+class VPDominatorTree;
struct VPCostContext;
@@ -238,6 +239,7 @@ struct VPTransformState {
LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan,
Loop *CurrentParentLoop, Type *CanonicalIVTy);
+ ~VPTransformState();
/// Target Transform Info.
const TargetTransformInfo *TTI;
@@ -390,6 +392,9 @@ struct VPTransformState {
/// VPlan-based type analysis.
VPTypeAnalysis TypeAnalysis;
+
+ /// VPlan-based dominator tree.
+ VPDominatorTree *VPDT = nullptr;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
>From 40c2deeccbead6ad64559a0bcb7d5d5997550210 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 7 Jan 2025 04:31:56 -0800
Subject: [PATCH 4/4] Recalculate VPDT in VPlan::execute
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 4 ++++
.../LoopVectorize/X86/epilog-vectorization-inductions.ll | 4 ++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 26a4c4e7346506..cd0864af349230 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -965,6 +965,10 @@ void VPlan::execute(VPTransformState *State) {
State->CFG.PrevVPBB = nullptr;
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+ // Update VPDominatorTree since VPBasicBlock may be removed after State wsa
+ // constucted.
+ State->VPDT->recalculate(*this);
+
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 7ba4b522b06780..91026f88430fa1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -137,8 +137,6 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-LABEL: @test_induction_step_needs_expansion(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = sub i16 0, [[OFF:%.*]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[L:%.*]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
@@ -149,6 +147,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
More information about the llvm-commits
mailing list