[llvm] [SLP][REVEC] Fix CompressVectorize does not expand mask when REVEC is enabled. (PR #135174)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 07:37:03 PDT 2025
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/135174
>From 284caa06f4023deb217231b84644be1c7ad28017 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 10 Apr 2025 06:12:00 -0700
Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test.
---
.../SLPVectorizer/X86/revec-load-compress.ll | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
new file mode 100644
index 0000000000000..adbef4b99c0d6
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.1-512 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
+
+define void @test(ptr %in) {
+entry:
+ %0 = getelementptr i8, ptr %in, i64 112
+ %wide.load = load <8 x i16>, ptr %0, align 2
+ %1 = sext <8 x i16> %wide.load to <8 x i32>
+ %2 = getelementptr i8, ptr %in, i64 48
+ %3 = or <8 x i32> zeroinitializer, %1
+ %4 = getelementptr i8, ptr %in, i64 32
+ %5 = getelementptr i8, ptr %in, i64 64
+ %wide.load155 = load <8 x i16>, ptr %5, align 2
+ %6 = sext <8 x i16> %wide.load155 to <8 x i32>
+ %7 = or <8 x i32> zeroinitializer, %6
+ %8 = trunc <8 x i32> %3 to <8 x i16>
+ store <8 x i16> %8, ptr %2, align 2
+ %9 = trunc <8 x i32> %7 to <8 x i16>
+ store <8 x i16> %9, ptr %4, align 2
+ ret void
+}
>From e42e332e7ac4f592d04028e78c2a5d2281944040 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 10 Apr 2025 06:12:51 -0700
Subject: [PATCH 2/3] [SLP][REVEC] Fix CompressVectorize does not expand mask
when REVEC is enabled.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 +++++++++++++++
.../SLPVectorizer/X86/revec-load-compress.ll | 13 +++++++++++++
2 files changed, 28 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4611d162edbe2..731d6d58efbce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17610,6 +17610,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ConstantInt::getFalse(VecTy->getContext()));
for (int I : CompressMask)
MaskValues[I] = ConstantInt::getTrue(VecTy->getContext());
+ if (auto *VecTy = dyn_cast<FixedVectorType>(LI->getType())) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ unsigned VecTyNumElements = VecTy->getNumElements();
+ SmallVector<Constant *> NewMaskValues(
+ MaskValues.size() * VecTyNumElements,
+ ConstantInt::getFalse(VecTy->getContext()));
+ for (auto [I, V] : enumerate(MaskValues))
+ if (V->isOneValue())
+ std::fill_n(NewMaskValues.begin() + I * VecTyNumElements,
+ VecTyNumElements, V);
+ MaskValues.swap(NewMaskValues);
+ }
Constant *MaskValue = ConstantVector::get(MaskValues);
NewLI = Builder.CreateMaskedLoad(LoadVecTy, PO, CommonAlignment,
MaskValue);
@@ -17618,6 +17630,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
NewLI = ::propagateMetadata(NewLI, E->Scalars);
// TODO: include this cost into CommonCost.
+ if (auto *VecTy = dyn_cast<FixedVectorType>(LI->getType()))
+ transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
+ CompressMask);
NewLI =
cast<Instruction>(Builder.CreateShuffleVector(NewLI, CompressMask));
} else if (E->State == TreeEntry::StridedVectorize) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
index adbef4b99c0d6..069274df396d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll
@@ -2,6 +2,19 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.1-512 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
define void @test(ptr %in) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 32
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[IN]], i64 64
+; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[TMP1]], i32 2, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP4]], <8 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> [[TMP6]], [[TMP3]]
+; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0]], align 2
+; CHECK-NEXT: ret void
+;
entry:
%0 = getelementptr i8, ptr %in, i64 112
%wide.load = load <8 x i16>, ptr %0, align 2
>From 9c54379b11b12a9832adff9aa99faf18474b592b Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 10 Apr 2025 07:36:43 -0700
Subject: [PATCH 3/3] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 53 +++++++++----------
1 file changed, 25 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 731d6d58efbce..2efe107f03142 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1278,6 +1278,17 @@ static SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, Type *ScalarTy,
return OpcodeMask;
}
+/// Replicates the given \p Val \p VF times.
+static SmallVector<Constant *> replicateMask(ArrayRef<Constant *> Val,
+ unsigned VF) {
+ assert(none_of(Val, [](Constant *C) { return C->getType()->isVectorTy(); }) &&
+ "Expected scalar constants.");
+ SmallVector<Constant *> NewVal(Val.size() * VF);
+ for (auto [I, V] : enumerate(Val))
+ std::fill_n(NewVal.begin() + I * VF, VF, V);
+ return NewVal;
+}
+
namespace llvm {
static void inversePermutation(ArrayRef<unsigned> Indices,
@@ -12202,32 +12213,24 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
unsigned VF = VL.size();
if (MaskVF != 0)
VF = std::min(VF, MaskVF);
+ Type *VLScalarTy = VL.front()->getType();
for (Value *V : VL.take_front(VF)) {
+ Type *ScalarTy = VLScalarTy->getScalarType();
+ if (isa<PoisonValue>(V)) {
+ Vals.push_back(PoisonValue::get(ScalarTy));
+ continue;
+ }
if (isa<UndefValue>(V)) {
- Vals.push_back(cast<Constant>(V));
+ Vals.push_back(UndefValue::get(ScalarTy));
continue;
}
- Vals.push_back(Constant::getNullValue(V->getType()));
+ Vals.push_back(Constant::getNullValue(ScalarTy));
}
- if (auto *VecTy = dyn_cast<FixedVectorType>(Vals.front()->getType())) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(VLScalarTy)) {
assert(SLPReVec && "FixedVectorType is not expected.");
// When REVEC is enabled, we need to expand vector types into scalar
// types.
- unsigned VecTyNumElements = VecTy->getNumElements();
- SmallVector<Constant *> NewVals(VF * VecTyNumElements, nullptr);
- for (auto [I, V] : enumerate(Vals)) {
- Type *ScalarTy = V->getType()->getScalarType();
- Constant *NewVal;
- if (isa<PoisonValue>(V))
- NewVal = PoisonValue::get(ScalarTy);
- else if (isa<UndefValue>(V))
- NewVal = UndefValue::get(ScalarTy);
- else
- NewVal = Constant::getNullValue(ScalarTy);
- std::fill_n(NewVals.begin() + I * VecTyNumElements, VecTyNumElements,
- NewVal);
- }
- Vals.swap(NewVals);
+ Vals = replicateMask(Vals, VecTy->getNumElements());
}
return ConstantVector::get(Vals);
}
@@ -17612,15 +17615,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
MaskValues[I] = ConstantInt::getTrue(VecTy->getContext());
if (auto *VecTy = dyn_cast<FixedVectorType>(LI->getType())) {
assert(SLPReVec && "Only supported by REVEC.");
- unsigned VecTyNumElements = VecTy->getNumElements();
- SmallVector<Constant *> NewMaskValues(
- MaskValues.size() * VecTyNumElements,
- ConstantInt::getFalse(VecTy->getContext()));
- for (auto [I, V] : enumerate(MaskValues))
- if (V->isOneValue())
- std::fill_n(NewMaskValues.begin() + I * VecTyNumElements,
- VecTyNumElements, V);
- MaskValues.swap(NewMaskValues);
+ MaskValues = replicateMask(MaskValues, VecTy->getNumElements());
}
Constant *MaskValue = ConstantVector::get(MaskValues);
NewLI = Builder.CreateMaskedLoad(LoadVecTy, PO, CommonAlignment,
@@ -17630,9 +17625,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
NewLI = ::propagateMetadata(NewLI, E->Scalars);
// TODO: include this cost into CommonCost.
- if (auto *VecTy = dyn_cast<FixedVectorType>(LI->getType()))
+ if (auto *VecTy = dyn_cast<FixedVectorType>(LI->getType())) {
+ assert(SLPReVec && "FixedVectorType is not expected.");
transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
CompressMask);
+ }
NewLI =
cast<Instruction>(Builder.CreateShuffleVector(NewLI, CompressMask));
} else if (E->State == TreeEntry::StridedVectorize) {
More information about the llvm-commits
mailing list