[llvm] 5d2cc8e - [SLP]Fix emission of buildvectors with full match.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 28 13:51:35 PDT 2023


Author: Alexey Bataev
Date: 2023-06-28T13:47:08-07:00
New Revision: 5d2cc8e242004161221f8b313279b83f18e1756c

URL: https://github.com/llvm/llvm-project/commit/5d2cc8e242004161221f8b313279b83f18e1756c
DIFF: https://github.com/llvm/llvm-project/commit/5d2cc8e242004161221f8b313279b83f18e1756c.diff

LOG: [SLP]Fix emission of buildvectors with full match.

If the buildvector node is a full match of another node, need to
correctly build the mask for the original vector value and build common
mask for the emitted node.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index efdbc66aef029..77a7852af9828 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2535,6 +2535,14 @@ class BoUpSLP {
     using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
     TreeEntry(VecTreeTy &Container) : Container(Container) {}
 
+    /// \returns Common mask for reorder indices and reused scalars.
+    SmallVector<int> getCommonMask() const {
+      SmallVector<int> Mask;
+      inversePermutation(ReorderIndices, Mask);
+      ::addMask(Mask, ReuseShuffleIndices);
+      return Mask;
+    }
+
     /// \returns true if the scalars in VL are equal to this entry.
     bool isSame(ArrayRef<Value *> VL) const {
       auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
@@ -9751,16 +9759,23 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
             << "SLP: perfect diamond match for gather bundle that starts with "
             << *E->Scalars.front() << ".\n");
         // Restore the mask for previous partially matched values.
-        for (auto [I, V] : enumerate(E->Scalars)) {
-          if (isa<PoisonValue>(V)) {
-            Mask[I] = PoisonMaskElem;
-            continue;
-          }
-          if (Mask[I] == PoisonMaskElem)
+        if (Entries.front()->ReorderIndices.empty() &&
+            ((Entries.front()->ReuseShuffleIndices.empty() &&
+              E->Scalars.size() == Entries.front()->Scalars.size()) ||
+             (E->Scalars.size() ==
+              Entries.front()->ReuseShuffleIndices.size()))) {
+          std::iota(Mask.begin(), Mask.end(), 0);
+        } else {
+          for (auto [I, V] : enumerate(E->Scalars)) {
+            if (isa<PoisonValue>(V)) {
+              Mask[I] = PoisonMaskElem;
+              continue;
+            }
             Mask[I] = Entries.front()->findLaneForValue(V);
+          }
         }
         ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
-        Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+        Res = ShuffleBuilder.finalize(E->getCommonMask());
         return Res;
       }
       if (!Resized) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
index 9e25a709b5060..9db7d696c7c7e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll
@@ -10,9 +10,8 @@ define void @foo(<4 x float> %vec, float %val, ptr %ptr) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VAL]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[GEP0]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    store <4 x float> [[TMP6]], ptr [[GEP0]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %vec_3 = extractelement <4 x float> %vec, i32 3


        


More information about the llvm-commits mailing list