[llvm] 7705342 - [SLP]Do not gather node, if the instruction, that does not require

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 20 13:01:54 PDT 2023


Author: Alexey Bataev
Date: 2023-09-20T12:52:37-07:00
New Revision: 77053421228edd12a3ba73d4eebd970fcdd3b2c0

URL: https://github.com/llvm/llvm-project/commit/77053421228edd12a3ba73d4eebd970fcdd3b2c0
DIFF: https://github.com/llvm/llvm-project/commit/77053421228edd12a3ba73d4eebd970fcdd3b2c0.diff

LOG: [SLP]Do not gather node, if the instruction, that does not require
scheduling, is previously vectorized.

If the main node was vectorized already, but does not require
scheduling, we still can try to vectorize it in this new node instead of
gathering.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index db0df8cce2b7a9c..b5860e34251d12b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5811,18 +5811,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
   if (TreeEntry *E = getTreeEntry(S.OpValue)) {
     LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
     if (!E->isSame(VL)) {
-      LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
-      if (TryToFindDuplicates(S))
-        newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
-                     ReuseShuffleIndicies);
+      if (!doesNotNeedToBeScheduled(S.OpValue)) {
+        LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
+        if (TryToFindDuplicates(S))
+          newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+                       ReuseShuffleIndicies);
+        return;
+      }
+    } else {
+      // Record the reuse of the tree node.  FIXME, currently this is only used
+      // to properly draw the graph rather than for the actual vectorization.
+      E->UserTreeIndices.push_back(UserTreeIdx);
+      LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
+                        << ".\n");
       return;
     }
-    // Record the reuse of the tree node.  FIXME, currently this is only used to
-    // properly draw the graph rather than for the actual vectorization.
-    E->UserTreeIndices.push_back(UserTreeIdx);
-    LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
-                      << ".\n");
-    return;
   }
 
   // Check that none of the instructions in the bundle are already in the tree.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
index 87292700317ead6..3b54b0a058d0c6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll
@@ -83,13 +83,12 @@ define void @test2(double %0) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[TMP4:%.*]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = fsub double 1.000000e+00, [[TMP0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 1.000000e+00>, [[TMP3]]
 ; CHECK-NEXT:    br label [[DOTBACKEDGE:%.*]]
 ; CHECK:       .backedge:
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    br label [[TMP4]]
 ;
   br label %2


        


More information about the llvm-commits mailing list