[PATCH] D116760: [SLP]Do not reorder top nodes if they do not require reordering.

Alexey Bataev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 6 10:35:44 PST 2022


ABataev created this revision.
ABataev added reviewers: vporpo, RKSimon, anton-afanasyev, dtemirbulatov.
Herald added a subscriber: hiraditya.
ABataev requested review of this revision.
Herald added a project: LLVM.

No need to reorder the top nodes, if they are not stores or
insertelement instructions and each node should be analized only
once, when the bottom-to-top analysis is performed.
We still endup with extractelements for the top node scalars and
the final shuffle just adds an extra cost and currently
crashes the compiler for PHI nodes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116760

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll


Index: llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s
+
+define i32 @test(i32* %isec) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]]
+; CHECK:       block1:
+; CHECK-NEXT:    br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]]
+; CHECK:       block2:
+; CHECK-NEXT:    br label [[BLOCK3]]
+; CHECK:       block3:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[SHUFFLE]], [[BLOCK1]] ], [ [[SHUFFLE]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    ret i32 [[TMP9]]
+;
+entry:
+  %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0
+  %0 = bitcast i32* %arrayidx10 to <2 x i32>*
+  %1 = load <2 x i32>, <2 x i32>* %0, align 8
+  %2 = extractelement <2 x i32> %1, i32 1
+  %3 = extractelement <2 x i32> %1, i32 0
+  br i1 false, label %block1, label %block3
+block1:
+  br i1 false, label %block2, label %block3
+block2:
+  br label %block3
+block3:
+  %4 = phi i32 [ %2, %block1 ], [ %2, %block2 ], [ %3, %entry ]
+  %5 = phi i32 [ %3, %block1 ], [ %3, %block2 ], [ %2, %entry ]
+  %6 = mul i32 %4, %5
+  ret i32 %6
+}
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3278,9 +3278,14 @@
       MapVector<OrdersType, unsigned,
                 DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
           OrdersUses;
+      // Do the analysis for each tree entry only once, otherwise the order of
+      // the same node my be considered several times, though might be not
+      // profitable.
       SmallPtrSet<const TreeEntry *, 4> VisitedOps;
       for (const auto &Op : Data.second) {
         TreeEntry *OpTE = Op.second;
+        if (!VisitedOps.insert(OpTE).second)
+          continue;
         if (!OpTE->ReuseShuffleIndices.empty() ||
             (IgnoreReorder && OpTE == VectorizableTree.front().get()))
           continue;
@@ -3304,9 +3309,8 @@
         } else {
           ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
         }
-        if (VisitedOps.insert(OpTE).second)
-          OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
-              OpTE->UserTreeIndices.size();
+        OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+            OpTE->UserTreeIndices.size();
         assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
         --OrdersUses[{}];
       }
@@ -8407,7 +8411,7 @@
       if (R.isTreeTinyAndNotFullyVectorizable())
         continue;
       R.reorderTopToBottom();
-      R.reorderBottomToTop();
+      R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
       R.buildExternalUses();
 
       R.computeMinimumValueSizes();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D116760.397936.patch
Type: text/x-patch
Size: 4073 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220106/03470968/attachment.bin>


More information about the llvm-commits mailing list