[llvm] cec8b61 - [SLP]Do not reorder top nodes if they do not require reordering.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 28 09:17:12 PST 2022


Author: Alexey Bataev
Date: 2022-01-28T09:16:18-08:00
New Revision: cec8b614f3843c169dc387520013745e4ff9886a

URL: https://github.com/llvm/llvm-project/commit/cec8b614f3843c169dc387520013745e4ff9886a
DIFF: https://github.com/llvm/llvm-project/commit/cec8b614f3843c169dc387520013745e4ff9886a.diff

LOG: [SLP]Do not reorder top nodes if they do not require reordering.

No need to reorder the top nodes, if they are not stores or
insertelement instructions and each node should be analized only
once, when the bottom-to-top analysis is performed.
We still endup with extractelements for the top node scalars and
the final shuffle just adds an extra cost and currently
crashes the compiler for PHI nodes.

Differential Revision: https://reviews.llvm.org/D116760

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99c265fc5101a..967e527b16d51 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3307,9 +3307,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
       MapVector<OrdersType, unsigned,
                 DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
           OrdersUses;
+      // Do the analysis for each tree entry only once, otherwise the order of
+      // the same node my be considered several times, though might be not
+      // profitable.
       SmallPtrSet<const TreeEntry *, 4> VisitedOps;
       for (const auto &Op : Data.second) {
         TreeEntry *OpTE = Op.second;
+        if (!VisitedOps.insert(OpTE).second)
+          continue;
         if (!OpTE->ReuseShuffleIndices.empty() ||
             (IgnoreReorder && OpTE == VectorizableTree.front().get()))
           continue;
@@ -3333,9 +3338,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
         } else {
           ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
         }
-        if (VisitedOps.insert(OpTE).second)
-          OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
-              OpTE->UserTreeIndices.size();
+        OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+            OpTE->UserTreeIndices.size();
         assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
         --OrdersUses[{}];
       }
@@ -8444,7 +8448,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
       if (R.isTreeTinyAndNotFullyVectorizable())
         continue;
       R.reorderTopToBottom();
-      R.reorderBottomToTop();
+      R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
       R.buildExternalUses();
 
       R.computeMinimumValueSizes();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
new file mode 100644
index 0000000000000..810e166c40c07
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s
+
+define i32 @test(i32* %isec) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]]
+; CHECK:       block1:
+; CHECK-NEXT:    br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]]
+; CHECK:       block2:
+; CHECK-NEXT:    br label [[BLOCK3]]
+; CHECK:       block3:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[SHUFFLE]], [[BLOCK1]] ], [ [[SHUFFLE]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    ret i32 [[TMP9]]
+;
+entry:
+  %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0
+  %0 = bitcast i32* %arrayidx10 to <2 x i32>*
+  %1 = load <2 x i32>, <2 x i32>* %0, align 8
+  %2 = extractelement <2 x i32> %1, i32 1
+  %3 = extractelement <2 x i32> %1, i32 0
+  br i1 false, label %block1, label %block3
+block1:
+  br i1 false, label %block2, label %block3
+block2:
+  br label %block3
+block3:
+  %4 = phi i32 [ %2, %block1 ], [ %2, %block2 ], [ %3, %entry ]
+  %5 = phi i32 [ %3, %block1 ], [ %3, %block2 ], [ %2, %entry ]
+  %6 = mul i32 %4, %5
+  ret i32 %6
+}


        


More information about the llvm-commits mailing list