[llvm] [SLP][REVEC] Make MinBWs support vector instructions. (PR #103049)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 05:19:45 PDT 2024
https://github.com/HanKuanChen created https://github.com/llvm/llvm-project/pull/103049
If ScalarTy is FixedVectorType, it should remain as FixedVectorType.
>From 8c1ee3db98e3c13af4c6a22b4a2ababf530b33e7 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 29 Jul 2024 00:44:09 -0700
Subject: [PATCH 1/2] [SLP][REVEC] Pre-commit test.
---
llvm/test/Transforms/SLPVectorizer/revec.ll | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 9ce9db1d360cc..5e0b82ecd59ca 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -272,3 +272,16 @@ for.body:
%5 = phi <2 x float> [ %5, %for.body ], [ zeroinitializer, %entry ]
br i1 false, label %for0, label %for.body
}
+
+define void @test9() {
+entry:
+ br label %for.body13
+
+for.body13: ; preds = %for.body13, %entry
+ %vmovl.i111 = sext <4 x i16> zeroinitializer to <4 x i32>
+ %vmovl.i110 = sext <4 x i16> zeroinitializer to <4 x i32>
+ store <4 x i32> %vmovl.i111, ptr null, align 4
+ %add.ptr29 = getelementptr i8, ptr null, i64 16
+ store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4
+ br label %for.body13
+}
>From 74a94d72531222208033c875041719ddf0d33a49 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 27 Jun 2024 03:17:56 -0700
Subject: [PATCH 2/2] [SLP][REVEC] Make MinBWs support vector instructions.
If ScalarTy is FixedVectorType, it should remain as FixedVectorType.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 26 ++++++++++++++-----
llvm/test/Transforms/SLPVectorizer/revec.ll | 11 ++++++++
2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8a08d766a8ed4..ebfb11f841086 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9527,8 +9527,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// that the costs will be accurate.
auto It = MinBWs.find(E);
Type *OrigScalarTy = ScalarTy;
- if (It != MinBWs.end())
+ if (It != MinBWs.end()) {
+ auto VecTy = dyn_cast<FixedVectorType>(ScalarTy);
ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ if (VecTy)
+ ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements());
+ }
auto *VecTy = getWidenedType(ScalarTy, VL.size());
unsigned EntryVF = E->getVectorFactor();
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
@@ -13127,8 +13131,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
else if (auto *IE = dyn_cast<InsertElementInst>(V))
ScalarTy = IE->getOperand(1)->getType();
auto It = MinBWs.find(E);
- if (It != MinBWs.end())
+ if (It != MinBWs.end()) {
+ auto VecTy = dyn_cast<FixedVectorType>(ScalarTy);
ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ if (VecTy)
+ ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements());
+ }
auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
if (E->isGather()) {
// Set insert point for non-reduction initial nodes.
@@ -16003,8 +16011,9 @@ void BoUpSLP::computeMinimumValueSizes() {
}
unsigned VF = E.getVectorFactor();
- auto *TreeRootIT =
- dyn_cast<IntegerType>(E.Scalars.front()->getType()->getScalarType());
+ Type *ScalarTy = E.Scalars.front()->getType();
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ auto *TreeRootIT = dyn_cast<IntegerType>(ScalarTy->getScalarType());
if (!TreeRootIT || !Opcode)
return 0u;
@@ -16012,7 +16021,8 @@ void BoUpSLP::computeMinimumValueSizes() {
[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
return 0u;
- unsigned NumParts = TTI->getNumberOfParts(getWidenedType(TreeRootIT, VF));
+ unsigned NumParts = TTI->getNumberOfParts(
+ getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
// The maximum bit width required to represent all the values that can be
// demoted without loss of precision. It would be safe to truncate the roots
@@ -16034,7 +16044,8 @@ void BoUpSLP::computeMinimumValueSizes() {
// we can truncate the roots to this narrower type.
for (Value *Root : E.Scalars) {
unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT);
- TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType());
+ TypeSize NumTypeBits =
+ DL->getTypeSizeInBits(Root->getType()->getScalarType());
unsigned BitWidth1 = NumTypeBits - NumSignBits;
// If we can't prove that the sign bit is zero, we must add one to the
// maximum bit width to account for the unknown sign bit. This preserves
@@ -16206,7 +16217,8 @@ void BoUpSLP::computeMinimumValueSizes() {
// type, we can proceed with the narrowing. Otherwise, do nothing.
if (MaxBitWidth == 0 ||
MaxBitWidth >=
- cast<IntegerType>(TreeRoot.front()->getType())->getBitWidth()) {
+ cast<IntegerType>(TreeRoot.front()->getType()->getScalarType())
+ ->getBitWidth()) {
if (UserIgnoreList)
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
continue;
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 5e0b82ecd59ca..31ee107c81cd4 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -274,6 +274,17 @@ for.body:
}
define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4)
+; CHECK-NEXT: br label [[FOR_BODY13:%.*]]
+; CHECK: for.body13:
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32>
+; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4
+; CHECK-NEXT: br label [[FOR_BODY13]]
+;
entry:
br label %for.body13
More information about the llvm-commits
mailing list