[llvm] 4a0bbbc - [SLP]Fix PR104637: do not create new nodes for fully overlapped non-schedulable nodes

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 13:49:54 PDT 2024


Author: Alexey Bataev
Date: 2024-08-16T13:49:44-07:00
New Revision: 4a0bbbcbcf6ebc87e794e7b86b9f4651bffcd806

URL: https://github.com/llvm/llvm-project/commit/4a0bbbcbcf6ebc87e794e7b86b9f4651bffcd806
DIFF: https://github.com/llvm/llvm-project/commit/4a0bbbcbcf6ebc87e794e7b86b9f4651bffcd806.diff

LOG: [SLP]Fix PR104637: do not create new nodes for fully overlapped non-schedulable nodes

If the scalars do not require scheduling and were already vectorized,
but in the different order, compiler still tries to create the new node.
It may cause the compiler crash for the gathered operands. Instead need
to consider such nodes as full overlap and just reshuffle vectorized
node.

Fixes https://github.com/llvm/llvm-project/issues/104637

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/full-non-schedulable-overlap.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 667d0df781aa4c..9ecd8160a97891 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7144,6 +7144,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                        ReuseShuffleIndices);
         return;
       }
+      SmallPtrSet<const TreeEntry *, 4> Nodes;
+      Nodes.insert(getTreeEntry(S.OpValue));
+      for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue))
+        Nodes.insert(E);
+      SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
+      if (any_of(Nodes, [&](const TreeEntry *E) {
+            return all_of(E->Scalars,
+                          [&](Value *V) { return Values.contains(V); });
+          })) {
+        LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
+        if (TryToFindDuplicates(S))
+          newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+                       ReuseShuffleIndices);
+        return;
+      }
     } else {
       // Record the reuse of the tree node.  FIXME, currently this is only used
       // to properly draw the graph rather than for the actual vectorization.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/full-non-schedulable-overlap.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-non-schedulable-overlap.ll
new file mode 100644
index 00000000000000..231a9512c9ee99
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-non-schedulable-overlap.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(double %v) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: double [[V:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[V]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[T50_02:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x double> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd double [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8]] = fadd double [[TMP7]], [[V]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP10]] = fadd <2 x double> zeroinitializer, [[TMP9]]
+; CHECK-NEXT:    br label %[[LOOP]]
+;
+entry:
+  %mul.3 = fmul double 0.000000e+00, %v
+  %mul.4 = fmul double 0.000000e+00, 0.000000e+00
+  br label %loop
+
+loop:
+  %t48.0 = phi double [ 0.000000e+00, %entry ], [ %5, %loop ]
+  %t50.02 = phi double [ 0.000000e+00, %entry ], [ %3, %loop ]
+  %t52.0 = phi double [ 0.000000e+00, %entry ], [ %7, %loop ]
+  %0 = fmul double %t52.0, %mul.3
+  %1 = fmul double %t48.0, %mul.4
+  %2 = fadd double %1, %0
+  %3 = fadd double %2, %v
+  %4 = fmul double 0.000000e+00, %mul.3
+  %5 = fadd double 0.000000e+00, %4
+  %6 = fmul double 0.000000e+00, %mul.4
+  %7 = fadd double 0.000000e+00, %6
+  br label %loop
+}


        


More information about the llvm-commits mailing list