[llvm] [SLP][REVEC] Initial commits. (PR #98269)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 10:12:30 PDT 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/98269
>From 199dbe928f610f9ac9a26c1c9e30c972c0c1fbf7 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Jun 2024 01:58:51 -0700
Subject: [PATCH 1/7] [SLP][REVEC] Add an option to control SLP
revectorization. NFC.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1e9dd8c1e2287..35be1488dc90d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -113,6 +113,10 @@ static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
+static cl::opt<bool>
+ RunSLPReVectorization("revectorize-slp", cl::init(false), cl::Hidden,
+ cl::desc("Run the SLP revectorization passes"));
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
>From 48ca8bffc74c56518ceda3a17b37ae8a836ec41b Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 27 Jun 2024 01:59:46 -0700
Subject: [PATCH 2/7] [SLP][REVEC] Pre-commit test.
---
llvm/test/Transforms/SLPVectorizer/revec.ll | 32 +++++++++++++++++++++
1 file changed, 32 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/revec.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
new file mode 100644
index 0000000000000..cb6ed9c983b5c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -revectorize-slp -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
+
+define void @test1(ptr %a, ptr %b, ptr %c) {
+entry:
+ %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
+ %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
+ %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12
+ %0 = load <4 x i32>, ptr %a, align 4
+ %1 = load <4 x i32>, ptr %arrayidx3, align 4
+ %2 = load <4 x i32>, ptr %arrayidx7, align 4
+ %3 = load <4 x i32>, ptr %arrayidx11, align 4
+ %arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4
+ %arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8
+ %arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12
+ %4 = load <4 x i32>, ptr %b, align 4
+ %5 = load <4 x i32>, ptr %arrayidx19, align 4
+ %6 = load <4 x i32>, ptr %arrayidx23, align 4
+ %7 = load <4 x i32>, ptr %arrayidx27, align 4
+ %add.i = add <4 x i32> %4, %0
+ %add.i63 = add <4 x i32> %5, %1
+ %add.i64 = add <4 x i32> %6, %2
+ %add.i65 = add <4 x i32> %7, %3
+ %arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4
+ %arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8
+ %arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12
+ store <4 x i32> %add.i, ptr %c, align 4
+ store <4 x i32> %add.i63, ptr %arrayidx36, align 4
+ store <4 x i32> %add.i64, ptr %arrayidx39, align 4
+ store <4 x i32> %add.i65, ptr %arrayidx42, align 4
+ ret void
+}
>From 940254179f053dd25ca39a7f5351c2b4e73f6175 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 13 Jun 2024 01:58:02 -0700
Subject: [PATCH 3/7] [SLP][REVEC] Apply RunSLPReVectorization to the existing
code.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 ++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 35be1488dc90d..3e1083b4ac481 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -231,12 +231,18 @@ static const unsigned MaxPHINumOperands = 128;
/// avoids spending time checking the cost model and realizing that they will
/// be inevitably scalarized.
static bool isValidElementType(Type *Ty) {
+ // TODO: Support ScalableVectorType.
+ if (isa<FixedVectorType>(Ty))
+ Ty = Ty->getScalarType();
return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
!Ty->isPPC_FP128Ty();
}
/// \returns the vector type of ScalarTy based on vectorization factor.
static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ return FixedVectorType::get(VecTy->getElementType(),
+ VF * VecTy->getNumElements());
return FixedVectorType::get(ScalarTy, VF);
}
@@ -6784,19 +6790,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Don't handle vectors.
- if (S.OpValue->getType()->isVectorTy() &&
+ if (!RunSLPReVectorization && S.OpValue->getType()->isVectorTy() &&
!isa<InsertElementInst>(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
- if (SI->getValueOperand()->getType()->isVectorTy()) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
- return;
- }
+ if (!RunSLPReVectorization)
+ if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
+ if (SI->getValueOperand()->getType()->isVectorTy()) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
// If all of the operands are identical or constant we have a simple solution.
// If we deal with insert/extract instructions, they all must have constant
>From 0b53076aa17765d6f39e025de48a6a64156c8d05 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 27 Jun 2024 02:22:59 -0700
Subject: [PATCH 4/7] [SLP][REVEC] Make castToScalarTyElem support vector
instructions.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3e1083b4ac481..c0bfad96a3ef8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11818,10 +11818,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
Value *castToScalarTyElem(Value *V,
std::optional<bool> IsSigned = std::nullopt) {
auto *VecTy = cast<VectorType>(V->getType());
- if (VecTy->getElementType() == ScalarTy)
+ if (VecTy->getElementType() == ScalarTy->getScalarType())
return V;
return Builder.CreateIntCast(
- V, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ V, VectorType::get(ScalarTy->getScalarType(), VecTy->getElementCount()),
IsSigned.value_or(!isKnownNonNegative(V, SimplifyQuery(*R.DL))));
}
>From 6016d569750dd437775b264147d4546fbeeefa5e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 27 Jun 2024 00:09:23 -0700
Subject: [PATCH 5/7] [SLP][REVEC] NFC. Provide an universal interface for
getNumElements.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c0bfad96a3ef8..37410eb0b023b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -238,12 +238,17 @@ static bool isValidElementType(Type *Ty) {
!Ty->isPPC_FP128Ty();
}
+/// \returns the number of elements for Ty.
+static unsigned getNumElements(Type *Ty) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+}
+
/// \returns the vector type of ScalarTy based on vectorization factor.
static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
- if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
- return FixedVectorType::get(VecTy->getElementType(),
- VF * VecTy->getNumElements());
- return FixedVectorType::get(ScalarTy, VF);
+ return FixedVectorType::get(ScalarTy->getScalarType(),
+ VF * getNumElements(ScalarTy));
}
/// \returns True if the value is a constant (but not globals/constant
>From a65f5e160996f856a11a85acf7e8ca29e863c2ab Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 27 Jun 2024 23:59:01 -0700
Subject: [PATCH 6/7] [SLP][REVEC] Make vectorizeOperand support vector
instructions.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 37410eb0b023b..3f7d94d46bd29 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12211,7 +12211,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
return ShuffleBuilder.finalize(std::nullopt);
};
Value *V = vectorizeTree(VE, PostponedPHIs);
- if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+ if ((VF * getNumElements(VL[0]->getType())) !=
+ cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
// Reshuffle to get only unique values.
// If some of the scalars are duplicated in the vectorization
>From 3d610dffa98fcbf05376c0ee659d2dad5e10ab50 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 28 Jun 2024 01:42:20 -0700
Subject: [PATCH 7/7] [SLP][REVEC] Update test1.
---
llvm/test/Transforms/SLPVectorizer/revec.ll | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index cb6ed9c983b5c..3b9b242eef7ad 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -2,6 +2,14 @@
; RUN: opt -passes=slp-vectorizer -S -revectorize-slp -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
define void @test1(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
entry:
%arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
%arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
More information about the llvm-commits
mailing list