[llvm] [SLP][REVEC] Make ShuffleCostEstimator and ShuffleInstructionBuilder can vectorize vector instructions. (PR #99606)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 06:08:57 PDT 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/99606
>From a45ce7312cfcb7e2295b9db1dcd337cc18573b78 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 01:19:03 -0700
Subject: [PATCH 1/6] [SLP][REVEC] NFC. Add
transformScalarShuffleIndiciesToVector.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8d2ce6bad6af7..4407e6188f13f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -253,6 +253,21 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
VF * getNumElements(ScalarTy));
}
+static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
+ SmallVectorImpl<int> &Mask) {
+ // The ShuffleBuilder implementation use shufflevector to splat an "element".
+ // But the element have different meaning for SLP (scalar) and REVEC
+ // (vector). We need to expand Mask into masks which shufflevector can use
+ // directly.
+ SmallVector<int> NewMask(Mask.size() * VecTyNumElements);
+ for (unsigned I : seq<unsigned>(Mask.size()))
+ for (auto [J, MaskV] : enumerate(MutableArrayRef(NewMask).slice(
+ I * VecTyNumElements, VecTyNumElements)))
+ MaskV = Mask[I] == PoisonMaskElem ? PoisonMaskElem
+ : Mask[I] * VecTyNumElements + J;
+ Mask.swap(NewMask);
+}
+
/// \returns True if the value is a constant (but not globals/constant
/// expressions).
static bool isConstant(Value *V) {
>From ee179a3267ce37d57d12e5c2d432e3f11e79aa1d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 01:22:42 -0700
Subject: [PATCH 2/6] [SLP][REVEC] Make ShuffleInstructionBuilder::finalize
support vector instructions.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4407e6188f13f..2f499cc891436 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12216,6 +12216,14 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
IsFinalized = true;
+ SmallVector<int> NewExtMask(ExtMask);
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ CommonMask);
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ NewExtMask);
+ ExtMask = NewExtMask;
+ }
if (Action) {
Value *Vec = InVectors.front();
if (InVectors.size() == 2) {
>From e06a402873d4edc787d649d2eadb2bb4731a6554 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 5 Aug 2024 23:08:30 -0700
Subject: [PATCH 3/6] [SLP][REVEC] NFC. Add a helper function to get VF for
ShuffleCostEstimator.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2f499cc891436..95085dc11e12f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8286,6 +8286,27 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
/// to move most of the long shuffles cost estimation to TTI.
bool SameNodesEstimated = true;
+ /// V is expected to be a vectorized value.
+ /// When REVEC is disabled, there is no difference between VF and
+ /// VNumElements.
+ /// When REVEC is enabled, VF is VNumElements / ScalarTyNumElements.
+ /// e.g., if ScalarTy is <4 x Ty> and V1 is <8 x Ty>, 2 is returned instead
+ /// of 8.
+ unsigned getVF(Value *V) const {
+ assert(V && "V cannot be nullptr");
+ assert(isa<FixedVectorType>(V->getType()) &&
+ "V does not have FixedVectorType");
+ assert(ScalarTy && "ScalarTy cannot be nullptr");
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ unsigned VNumElements =
+ cast<FixedVectorType>(V->getType())->getNumElements();
+ assert(VNumElements > ScalarTyNumElements &&
+ "the number of elements of V is not large enough");
+ assert(VNumElements % ScalarTyNumElements == 0 &&
+ "the number of elements of V is not a vectorized value");
+ return VNumElements / ScalarTyNumElements;
+ }
+
static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) {
if (Ty->getScalarType()->isPointerTy()) {
Constant *Res = ConstantExpr::getIntToPtr(
>From ae865fc3b075afec76b042f74dadc099cbc73f82 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 6 Aug 2024 05:59:59 -0700
Subject: [PATCH 4/6] [SLP][REVEC] NFC. Refactor getVF.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 52 ++++++++++---------
1 file changed, 27 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95085dc11e12f..55991532ebe98 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7777,6 +7777,31 @@ namespace {
/// The base class for shuffle instruction emission and shuffle cost estimation.
class BaseShuffleAnalysis {
protected:
+ Type *ScalarTy = nullptr;
+
+ BaseShuffleAnalysis(Type *ScalarTy) : ScalarTy(ScalarTy) {}
+
+ /// V is expected to be a vectorized value.
+ /// When REVEC is disabled, there is no difference between VF and
+ /// VNumElements.
+ /// When REVEC is enabled, VF is VNumElements / ScalarTyNumElements.
+ /// e.g., if ScalarTy is <4 x Ty> and V1 is <8 x Ty>, 2 is returned instead
+ /// of 8.
+ unsigned getVF(Value *V) const {
+ assert(V && "V cannot be nullptr");
+ assert(isa<FixedVectorType>(V->getType()) &&
+ "V does not have FixedVectorType");
+ assert(ScalarTy && "ScalarTy cannot be nullptr");
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ unsigned VNumElements =
+ cast<FixedVectorType>(V->getType())->getNumElements();
+ assert(VNumElements > ScalarTyNumElements &&
+ "the number of elements of V is not large enough");
+ assert(VNumElements % ScalarTyNumElements == 0 &&
+ "the number of elements of V is not a vectorized value");
+ return VNumElements / ScalarTyNumElements;
+ }
+
/// Checks if the mask is an identity mask.
/// \param IsStrict if is true the function returns false if mask size does
/// not match vector size.
@@ -8273,7 +8298,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool IsFinalized = false;
SmallVector<int> CommonMask;
SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
- Type *ScalarTy = nullptr;
const TargetTransformInfo &TTI;
InstructionCost Cost = 0;
SmallDenseSet<Value *> VectorizedVals;
@@ -8286,27 +8310,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
/// to move most of the long shuffles cost estimation to TTI.
bool SameNodesEstimated = true;
- /// V is expected to be a vectorized value.
- /// When REVEC is disabled, there is no difference between VF and
- /// VNumElements.
- /// When REVEC is enabled, VF is VNumElements / ScalarTyNumElements.
- /// e.g., if ScalarTy is <4 x Ty> and V1 is <8 x Ty>, 2 is returned instead
- /// of 8.
- unsigned getVF(Value *V) const {
- assert(V && "V cannot be nullptr");
- assert(isa<FixedVectorType>(V->getType()) &&
- "V does not have FixedVectorType");
- assert(ScalarTy && "ScalarTy cannot be nullptr");
- unsigned ScalarTyNumElements = getNumElements(ScalarTy);
- unsigned VNumElements =
- cast<FixedVectorType>(V->getType())->getNumElements();
- assert(VNumElements > ScalarTyNumElements &&
- "the number of elements of V is not large enough");
- assert(VNumElements % ScalarTyNumElements == 0 &&
- "the number of elements of V is not a vectorized value");
- return VNumElements / ScalarTyNumElements;
- }
-
static Constant *getAllOnesValue(const DataLayout &DL, Type *Ty) {
if (Ty->getScalarType()->isPointerTy()) {
Constant *Res = ConstantExpr::getIntToPtr(
@@ -8969,7 +8972,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI,
ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
SmallPtrSetImpl<Value *> &CheckedExtracts)
- : ScalarTy(ScalarTy), TTI(TTI),
+ : BaseShuffleAnalysis(ScalarTy), TTI(TTI),
VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R),
CheckedExtracts(CheckedExtracts) {}
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
@@ -11831,7 +11834,6 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// resulting shuffle and the second operand sets to be the newly added
/// operand. The \p CommonMask is transformed in the proper way after that.
SmallVector<Value *, 2> InVectors;
- Type *ScalarTy = nullptr;
IRBuilderBase &Builder;
BoUpSLP &R;
@@ -11957,7 +11959,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
public:
ShuffleInstructionBuilder(Type *ScalarTy, IRBuilderBase &Builder, BoUpSLP &R)
- : ScalarTy(ScalarTy), Builder(Builder), R(R) {}
+ : BaseShuffleAnalysis(ScalarTy), Builder(Builder), R(R) {}
/// Adjusts extractelements after reusing them.
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
>From efeaf08378a3e475af0e7f576bf36c1ccf89d9ef Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 1 Jul 2024 11:46:06 -0700
Subject: [PATCH 5/6] [SLP][REVEC] Make ShuffleCostEstimator::add and
ShuffleInstructionBuilder::add support vector instructions.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 55991532ebe98..97208836ee3ce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9177,7 +9177,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
assert(!InVectors.empty() && !CommonMask.empty() &&
"Expected only tree entries from extracts/reused buildvectors.");
- unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ unsigned VF = getVF(V1);
if (InVectors.size() == 2) {
Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
transformMaskAfterShuffle(CommonMask, CommonMask);
@@ -12216,7 +12216,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
break;
}
}
- int VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ int VF = getVF(V1);
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
>From 2e951cf32cc2c9b81515588782f9b08e5d7a21ae Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 1 Jul 2024 10:44:57 -0700
Subject: [PATCH 6/6] [SLP][REVEC] Make ShuffleCostEstimator::createShuffle
support vector instructions.
The VF is relative to the number of elements in ScalarTy instead of the
size of mask.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 97208836ee3ce..1e8d34fde6cc0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8879,14 +8879,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
} else if (V1 && P2.isNull()) {
// Shuffle single vector.
ExtraCost += GetValueMinBWAffectedCost(V1);
- CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ CommonVF = getVF(V1);
assert(
all_of(Mask,
[=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
"All elements in mask must be less than CommonVF.");
} else if (V1 && !V2) {
// Shuffle vector and tree node.
- unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ unsigned VF = getVF(V1);
const TreeEntry *E2 = P2.get<const TreeEntry *>();
CommonVF = std::max(VF, E2->getVectorFactor());
assert(all_of(Mask,
@@ -8912,7 +8912,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else if (!V1 && V2) {
// Shuffle vector and tree node.
- unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
+ unsigned VF = getVF(V2);
const TreeEntry *E1 = P1.get<const TreeEntry *>();
CommonVF = std::max(VF, E1->getVectorFactor());
assert(all_of(Mask,
@@ -8940,9 +8940,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
} else {
assert(V1 && V2 && "Expected both vectors.");
- unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
- CommonVF =
- std::max(VF, cast<FixedVectorType>(V2->getType())->getNumElements());
+ unsigned VF = getVF(V1);
+ CommonVF = std::max(VF, getVF(V2));
assert(all_of(Mask,
[=](int Idx) {
return Idx < 2 * static_cast<int>(CommonVF);
@@ -8960,6 +8959,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
}
}
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ CommonMask);
InVectors.front() =
Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
More information about the llvm-commits
mailing list