[llvm] 5d2cc8e - [SLP]Fix emission of buildvectors with full match.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 28 13:51:35 PDT 2023
Author: Alexey Bataev
Date: 2023-06-28T13:47:08-07:00
New Revision: 5d2cc8e242004161221f8b313279b83f18e1756c
URL: https://github.com/llvm/llvm-project/commit/5d2cc8e242004161221f8b313279b83f18e1756c
DIFF: https://github.com/llvm/llvm-project/commit/5d2cc8e242004161221f8b313279b83f18e1756c.diff
LOG: [SLP]Fix emission of buildvectors with full match.
If the buildvector node is a full match of another node, need to
correctly build the mask for the original vector value and build common
mask for the emitted node.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index efdbc66aef029..77a7852af9828 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2535,6 +2535,14 @@ class BoUpSLP {
using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntry(VecTreeTy &Container) : Container(Container) {}
+ /// \returns Common mask for reorder indices and reused scalars.
+ SmallVector<int> getCommonMask() const {
+ SmallVector<int> Mask;
+ inversePermutation(ReorderIndices, Mask);
+ ::addMask(Mask, ReuseShuffleIndices);
+ return Mask;
+ }
+
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
@@ -9751,16 +9759,23 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
<< "SLP: perfect diamond match for gather bundle that starts with "
<< *E->Scalars.front() << ".\n");
// Restore the mask for previous partially matched values.
- for (auto [I, V] : enumerate(E->Scalars)) {
- if (isa<PoisonValue>(V)) {
- Mask[I] = PoisonMaskElem;
- continue;
- }
- if (Mask[I] == PoisonMaskElem)
+ if (Entries.front()->ReorderIndices.empty() &&
+ ((Entries.front()->ReuseShuffleIndices.empty() &&
+ E->Scalars.size() == Entries.front()->Scalars.size()) ||
+ (E->Scalars.size() ==
+ Entries.front()->ReuseShuffleIndices.size()))) {
+ std::iota(Mask.begin(), Mask.end(), 0);
+ } else {
+ for (auto [I, V] : enumerate(E->Scalars)) {
+ if (isa<PoisonValue>(V)) {
+ Mask[I] = PoisonMaskElem;
+ continue;
+ }
Mask[I] = Entries.front()->findLaneForValue(V);
+ }
}
ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
- Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+ Res = ShuffleBuilder.finalize(E->getCommonMask());
return Res;
}
if (!Resized) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
index 9e25a709b5060..9db7d696c7c7e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
@@ -10,9 +10,8 @@ define void @foo(<4 x float> %vec, float %val, ptr %ptr) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VAL]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP1]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[GEP0]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[GEP0]], align 4
; CHECK-NEXT: ret void
;
%vec_3 = extractelement <4 x float> %vec, i32 3
More information about the llvm-commits
mailing list