[llvm] ea7f43e - [SLP]Do not gather node, if the instruction, that does not require

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 12:01:03 PDT 2023


Author: Alexey Bataev
Date: 2023-09-26T11:57:35-07:00
New Revision: ea7f43ec14c5522cec7b787c5fc3c323f7459e89

URL: https://github.com/llvm/llvm-project/commit/ea7f43ec14c5522cec7b787c5fc3c323f7459e89
DIFF: https://github.com/llvm/llvm-project/commit/ea7f43ec14c5522cec7b787c5fc3c323f7459e89.diff

LOG: [SLP]Do not gather node, if the instruction, that does not require
scheduling, is previously vectorized.

If the main node was vectorized already, but does not require
scheduling, we still can try to vectorize it in this new node instead of
gathering.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 77362d610a0c839..9adaaf3fe66d683 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1326,6 +1326,9 @@ class BoUpSLP {
     }
     LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
 #endif
+    bool operator == (const EdgeInfo &Other) const {
+      return UserTE == Other.UserTE && EdgeIdx == Other.EdgeIdx;
+    }
   };
 
   /// A helper class used for scoring candidates for two consecutive lanes.
@@ -2412,12 +2415,25 @@ class BoUpSLP {
   TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
     ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
     TreeEntry *TE = nullptr;
-    const auto *It = find_if(VL, [this, &TE](Value *V) {
+    const auto *It = find_if(VL, [&](Value *V) {
       TE = getTreeEntry(V);
-      return TE;
+      if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
+        return true;
+      auto It = MultiNodeScalars.find(V);
+      if (It != MultiNodeScalars.end()) {
+        for (TreeEntry *E : It->second) {
+          if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
+            TE = E;
+            return true;
+          }
+        }
+      }
+      return false;
     });
-    if (It != VL.end() && TE->isSame(VL))
+    if (It != VL.end()) {
+      assert(TE->isSame(VL) && "Expedted same scalars.");
       return TE;
+    }
     return nullptr;
   }
 
@@ -5806,18 +5822,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
   if (TreeEntry *E = getTreeEntry(S.OpValue)) {
     LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
     if (!E->isSame(VL)) {
-      LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
-      if (TryToFindDuplicates(S))
-        newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
-                     ReuseShuffleIndicies);
+      if (!doesNotNeedToBeScheduled(S.OpValue)) {
+        LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
+        if (TryToFindDuplicates(S))
+          newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+                       ReuseShuffleIndicies);
+        return;
+      }
+    } else {
+      // Record the reuse of the tree node.  FIXME, currently this is only used
+      // to properly draw the graph rather than for the actual vectorization.
+      E->UserTreeIndices.push_back(UserTreeIdx);
+      LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
+                        << ".\n");
       return;
     }
-    // Record the reuse of the tree node.  FIXME, currently this is only used to
-    // properly draw the graph rather than for the actual vectorization.
-    E->UserTreeIndices.push_back(UserTreeIdx);
-    LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
-                      << ".\n");
-    return;
   }
 
   // Check that none of the instructions in the bundle are already in the tree.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
index 87292700317ead6..3b54b0a058d0c6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
@@ -83,13 +83,12 @@ define void @test2(double %0) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[TMP4:%.*]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = fsub double 1.000000e+00, [[TMP0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 1.000000e+00>, [[TMP3]]
 ; CHECK-NEXT:    br label [[DOTBACKEDGE:%.*]]
 ; CHECK:       .backedge:
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    br label [[TMP4]]
 ;
   br label %2


        


More information about the llvm-commits mailing list