[llvm] bf4dcbd - [SLP]Fix PR56251: Do not remove the reordering from the root node, being used as an operand.

Tue Jun 28 13:46:24 PDT 2022

Author: Alexey Bataev
Date: 2022-06-28T13:42:05-07:00
New Revision: bf4dcbd2df009ba71d9032379fe8bc37ba0903ff

URL: https://github.com/llvm/llvm-project/commit/bf4dcbd2df009ba71d9032379fe8bc37ba0903ff
DIFF: https://github.com/llvm/llvm-project/commit/bf4dcbd2df009ba71d9032379fe8bc37ba0903ff.diff

LOG: [SLP]Fix PR56251: Do not remove the reordering from the root node, being used as an operand.

If the root order itself does not require reordering, we can just
remove its reorder mask safely (e.g., if the root node is a vector of
phis). But if this node is used as an operand in the graph, we cannot
delete the reordering, need to keep it. Otherwise the graph nodes are
not synchronized with the operands. It may cause an extra gather
instruction(s) or a compiler crash.
Also, need to be very careful when selecting the gather nodes for
reordering since there might several gather nodes with the same scalars
and we can try to reorder just the same node many times instead of
different nodes.

Differential Revision: https://reviews.llvm.org/D128680

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 54b54d8a662e8..a958c36969d53 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3946,19 +3946,23 @@ bool BoUpSLP::canReorderOperands(
         GatherOps.push_back(TE);
       continue;
     }
-    ArrayRef<Value *> VL = UserTE->getOperand(I);
     TreeEntry *Gather = nullptr;
     if (count_if(ReorderableGathers,
-                 [VL, &Gather](TreeEntry *TE) {
+                 [&Gather, UserTE, I](TreeEntry *TE) {
                    assert(TE->State != TreeEntry::Vectorize &&
                           "Only non-vectorized nodes are expected.");
-                   if (TE->isSame(VL)) {
+                   if (any_of(TE->UserTreeIndices,
+                              [UserTE, I](const EdgeInfo &EI) {
+                                return EI.UserTE == UserTE && EI.EdgeIdx == I;
+                              })) {
+                     assert(TE->isSame(UserTE->getOperand(I)) &&
+                            "Operand entry does not match operands.");
                      Gather = TE;
                      return true;
                    }
                    return false;
                  }) > 1 &&
-        !all_of(VL, isConstant))
+        !all_of(UserTE->getOperand(I), isConstant))
       return false;
     if (Gather)
       GatherOps.push_back(Gather);
@@ -4053,8 +4057,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
         TreeEntry *OpTE = Op.second;
         if (!VisitedOps.insert(OpTE).second)
           continue;
-        if (!OpTE->ReuseShuffleIndices.empty() ||
-            (IgnoreReorder && OpTE == VectorizableTree.front().get()))
+        if (!OpTE->ReuseShuffleIndices.empty())
           continue;
         const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
           if (OpTE->State == TreeEntry::NeedToGather)
@@ -4174,6 +4177,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
                 TE->ReorderIndices.empty()) &&
                "Non-matching sizes of user/operand entries.");
         reorderOrder(TE->ReorderIndices, Mask);
+        if (IgnoreReorder && TE == VectorizableTree.front().get())
+          IgnoreReorder = false;
       }
       // For gathers just need to reorder its scalars.
       for (TreeEntry *Gather : GatherOps) {
@@ -7853,9 +7858,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
   switch (ShuffleOrOp) {
     case Instruction::PHI: {
-      assert(
-          (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) &&
-          "PHI reordering is free.");
+      assert((E->ReorderIndices.empty() ||
+              E != VectorizableTree.front().get() ||
+              !E->UserTreeIndices.empty()) &&
+             "PHI reordering is free.");
       auto *PH = cast<PHINode>(VL0);
       Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
       Builder.SetCurrentDebugLocation(PH->getDebugLoc());

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
new file mode 100644
index 0000000000000..9e136ee516f5c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define ptr @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GREEN_I:%.*]] = getelementptr inbounds float, ptr null, i32 6
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GREEN_I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
+; CHECK-NEXT:    br label [[BODY:%.*]]
+; CHECK:       body:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP5:%.*]], [[BODY]] ], [ [[TMP1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr null, align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP5]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[SHUFFLE]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    br label [[BODY]]
+;
+entry:
+  %green.i = getelementptr inbounds float, ptr null, i32 6
+  %blue.i = getelementptr inbounds float, ptr null, i32 7
+  %0 = load float, ptr %green.i, align 4
+  %conv197 = fpext float %0 to double
+  %1 = load float, ptr %blue.i, align 8
+  %conv199 = fpext float %1 to double
+  br label %body
+
+body:
+  %phi1 = phi double [ %3, %body ], [ %conv197, %entry ]
+  %phi2 = phi double [ %5, %body ], [ %conv199, %entry ]
+  %green = getelementptr inbounds i16, ptr null, i32 1
+  %2 = load i16, ptr %green, align 2
+  %conv1 = uitofp i16 %2 to double
+  %3 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv1, double %phi1)
+  %4 = load i16, ptr null, align 2
+  %conv2 = uitofp i16 %4 to double
+  %5 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %conv2, double %phi2)
+  br label %body
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
+
+define void @test1(ptr %agg.result, ptr %this) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    br i1 false, label [[RETURN]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[B_I:%.*]] = getelementptr inbounds float, ptr [[THIS:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[B_I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_END]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[LOR_LHS_FALSE]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[C_I_I_I:%.*]] = getelementptr inbounds float, ptr [[AGG_RESULT:%.*]], i32 2
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    store <2 x float> [[SHUFFLE]], ptr [[C_I_I_I]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 false, label %return, label %lor.lhs.false
+
+lor.lhs.false:
+  br i1 false, label %return, label %if.end
+
+if.end:
+  %c.i = getelementptr inbounds float, ptr %this, i32 2
+  %0 = load float, ptr %c.i, align 4
+  %add.i = fadd float %0, 0.000000e+00
+  %b.i = getelementptr inbounds float, ptr %this, i32 1
+  %1 = load float, ptr %b.i, align 4
+  %add2.i = fadd float %1, 0.000000e+00
+  br label %return
+
+return:
+  %add.i.sink = phi float [ %add.i, %if.end ], [ 0.000000e+00, %lor.lhs.false ], [ 0.000000e+00, %entry ]
+  %add2.i.sink = phi float [ %add2.i, %if.end ], [ 1.000000e+00, %lor.lhs.false ], [ 1.000000e+00, %entry ]
+  %c.i.i.i = getelementptr inbounds float, ptr %agg.result, i32 2
+  store float %add.i.sink, ptr %c.i.i.i, align 4
+  %d.i.i.i = getelementptr inbounds float, ptr %agg.result, i32 3
+  store float %add2.i.sink, ptr %d.i.i.i, align 4
+  ret void
+}