[llvm] 4b25c11 - [SLP]Fix an assertion for the size of user nodes.

Fri Jul 30 05:47:15 PDT 2021

Author: Alexey Bataev
Date: 2021-07-30T05:46:44-07:00
New Revision: 4b25c113210e579a5346ca0abc0717ab1ce5d9df

URL: https://github.com/llvm/llvm-project/commit/4b25c113210e579a5346ca0abc0717ab1ce5d9df
DIFF: https://github.com/llvm/llvm-project/commit/4b25c113210e579a5346ca0abc0717ab1ce5d9df.diff

LOG: [SLP]Fix an assertion for the size of user nodes.

For the nodes with reused scalars the user may be not only of the size
of the final shuffle but also of the size of the scalars themselves,
need to check for this. It is safe to just modify the check here, since
the order of the scalars themselves is preserved, only indeces of the
reused scalars are changed. So, the users with the same size as the
number of scalars in the node, will not be affected, they still will get
the operands in the required order.

Reported by @mstorsjo in D105020.

Differential Revision: https://reviews.llvm.org/D107080

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ae9d5d6b4fa6..839162b99ace 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2660,12 +2660,14 @@ void BoUpSLP::reorderTopToBottom(bool FreeReorder) {
       if (TE->Scalars.size() != VF) {
         if (TE->ReuseShuffleIndices.size() == VF) {
           // Need to reorder the reuses masks of the operands with smaller VF to
-          // be able to find the math between the graph nodes and scalar
+          // be able to find the match between the graph nodes and scalar
           // operands of the given node during vectorization/cost estimation.
           // Build a list of such operands for future reordering.
           assert(all_of(TE->UserTreeIndices,
-                        [VF](const EdgeInfo &EI) {
-                          return EI.UserTE->Scalars.size() == VF;
+                        [VF, &TE](const EdgeInfo &EI) {
+                          return EI.UserTE->Scalars.size() == VF ||
+                                 EI.UserTE->Scalars.size() ==
+                                     TE->Scalars.size();
                         }) &&
                  "All users must be of VF size.");
           SmallOperandsToReorder.insert(TE.get());

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
new file mode 100644
index 000000000000..a8f3046cd3b4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; ModuleID = 'repro1.ll'
+; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s
+
+define i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND15_PREHEADER:%.*]]
+; CHECK:       for.cond15.preheader:
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       for.cond15:
+; CHECK-NEXT:    br label [[IF_END_1:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[FOR_COND15:%.*]]
+; CHECK:       for.end39:
+; CHECK-NEXT:    switch i32 undef, label [[DO_BODY:%.*]] [
+; CHECK-NEXT:    i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB195:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
+; CHECK-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
+; CHECK-NEXT:    [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
+; CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
+; CHECK:       sw.bb195:
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       do.body:
+; CHECK-NEXT:    unreachable
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ]
+; CHECK-NEXT:    ret i32 undef
+; CHECK:       if.end.1:
+; CHECK-NEXT:    br label [[FOR_COND15_1:%.*]]
+; CHECK:       for.cond15.1:
+; CHECK-NEXT:    br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
+;
+entry:
+  %conv = sitofp i32 undef to double
+  %conv2 = sitofp i32 undef to double
+  br label %for.cond15.preheader
+
+for.cond15.preheader:                             ; preds = %for.cond15.1, %entry
+  br label %if.end
+
+for.cond15:                                       ; preds = %if.end
+  br label %if.end.1
+
+if.end:                                           ; preds = %for.cond15.preheader
+  br label %for.cond15
+
+for.end39:                                        ; preds = %for.cond15.1
+  switch i32 undef, label %do.body [
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb195
+  ]
+
+sw.bb:                                            ; preds = %for.end39
+  %arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
+  %0 = load double, double* %arrayidx43, align 8
+  %arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
+  %1 = load double, double* %arrayidx45, align 8
+  %arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
+  %2 = load double, double* %arrayidx51, align 8
+  %arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
+  %3 = load double, double* %arrayidx58, align 8
+  %mul = fmul double undef, %conv2
+  %mul109 = fmul double undef, %conv
+  %mul143 = fmul double %0, %mul
+  %4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143)
+  %mul154 = fmul double %1, %mul109
+  %5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154)
+  %mul172 = fmul double %3, %mul
+  %6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172)
+  %mul183 = fmul double %2, %mul109
+  %7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183)
+  br label %sw.epilog
+
+sw.bb195:                                         ; preds = %for.end39
+  br label %sw.epilog
+
+do.body:                                          ; preds = %for.end39
+  unreachable
+
+sw.epilog:                                        ; preds = %sw.bb195, %sw.bb
+  %x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ]
+  %x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ]
+  %x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ]
+  %x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ]
+  ret i32 undef
+
+if.end.1:                                         ; preds = %for.cond15
+  br label %for.cond15.1
+
+for.cond15.1:                                     ; preds = %if.end.1
+  br i1 undef, label %for.end39, label %for.cond15.preheader
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)