[llvm] 18ef467 - [SLP]Fix PR108709: postpone buildvector clustered nodes, if required

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 09:57:09 PDT 2024


Author: Alexey Bataev
Date: 2024-09-16T09:53:46-07:00
New Revision: 18ef467d73d7dfc471c98d76021f040fef0037bf

URL: https://github.com/llvm/llvm-project/commit/18ef467d73d7dfc471c98d76021f040fef0037bf
DIFF: https://github.com/llvm/llvm-project/commit/18ef467d73d7dfc471c98d76021f040fef0037bf.diff

LOG: [SLP]Fix PR108709: postpone buildvector clustered nodes, if required

The "clustered" nodes for buildvector nodes must be postponed in
accordance with the global flag, otherwise it may cause crash because of
the dependency between phi nodes.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 282bb8eac7e2e4..377bd08ee8ff18 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2883,7 +2883,8 @@ class BoUpSLP {
   /// Create a new vector from a list of scalar values.  Produces a sequence
   /// which exploits values reused across lanes, and arranges the inserts
   /// for ease of later optimization.
-  Value *createBuildVector(const TreeEntry *E, Type *ScalarTy);
+  Value *createBuildVector(const TreeEntry *E, Type *ScalarTy,
+                           bool PostponedPHIs);
 
   /// Returns the instruction in the bundle, which can be used as a base point
   /// for scheduling. Usually it is the last instruction in the bundle, except
@@ -13198,9 +13199,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
   return Res;
 }
 
-Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy) {
+Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy,
+                                  bool PostponedPHIs) {
   for (auto [EIdx, _] : E->CombinedEntriesWithIndices)
-    (void)vectorizeTree(VectorizableTree[EIdx].get(), /*PostponedPHIs=*/false);
+    (void)vectorizeTree(VectorizableTree[EIdx].get(), PostponedPHIs);
   return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
                                                                 Builder, *this);
 }
@@ -13231,7 +13233,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
     // Set insert point for non-reduction initial nodes.
     if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
       setInsertPointAfterBundle(E);
-    Value *Vec = createBuildVector(E, ScalarTy);
+    Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs);
     E->VectorizedValue = Vec;
     return Vec;
   }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
new file mode 100644
index 00000000000000..9c22295a1c718e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB6:.*]]
+; CHECK:       [[BB1:.*]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <2 x i32> [[TMP1]], i64 2)
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> <i32 2, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
+; CHECK-NEXT:    br i1 false, label %[[BB2]], label %[[BB6]]
+;
+bb:
+  br label %bb6
+
+bb1:
+  %ashr = ashr i32 0, 0
+  br label %bb2
+
+bb2:
+  %phi = phi i32 [ %ashr, %bb1 ], [ %ashr9, %bb6 ]
+  %phi3 = phi i32 [ 0, %bb1 ], [ %mul10, %bb6 ]
+  %phi4 = phi i32 [ 0, %bb1 ], [ %mul11, %bb6 ]
+  %phi5 = phi i32 [ 0, %bb1 ], [ %mul, %bb6 ]
+  ret void
+
+bb6:
+  %phi7 = phi i32 [ 0, %bb ], [ %mul11, %bb6 ]
+  %phi8 = phi i32 [ 0, %bb ], [ %mul10, %bb6 ]
+  %mul = mul i32 0, %phi8
+  %ashr9 = ashr i32 0, 0
+  %mul10 = mul i32 0, 0
+  %mul11 = mul i32 %phi7, 0
+  br i1 false, label %bb2, label %bb6
+}


        


More information about the llvm-commits mailing list