[llvm] 0e1a9e3 - [SLP]Fix PR74607: Fix dependency between buildvector nodes with user

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 10:17:46 PST 2023


Author: Alexey Bataev
Date: 2023-12-06T10:15:01-08:00
New Revision: 0e1a9e3084cd8dffa5d4f2cf6eabf9e7721e1cdd

URL: https://github.com/llvm/llvm-project/commit/0e1a9e3084cd8dffa5d4f2cf6eabf9e7721e1cdd
DIFF: https://github.com/llvm/llvm-project/commit/0e1a9e3084cd8dffa5d4f2cf6eabf9e7721e1cdd.diff

LOG: [SLP]Fix PR74607: Fix dependency between buildvector nodes with user
nodes, having same last instruction.

If the user nodes has the same last-instruction, used as insert points
for the buildvector nodes, finding the proper dependency is crucial.
  Before, it depended on the indices of the buildvectors themselves but
  looks like it should depend on indices of the user nodes, because it
  identifies the vectorization order and, thus, properly aligns
  buildvector nodes in terms of def-use chain.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1012f8eb863cd..a1e3e76ac852a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9384,7 +9384,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
           continue;
         // If the user instruction is used for some reason in 
diff erent
         // vectorized nodes - make it depend on index.
-        if (TEUseEI.UserTE != UseEI.UserTE && TE->Idx < TEPtr->Idx)
+        if (TEUseEI.UserTE != UseEI.UserTE &&
+            TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
           continue;
       }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 1cef1032bf5d9..5562291dbb6be 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -15,7 +15,7 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,8 +29,9 @@ define void @test() {
 ; CHECK-NEXT:    tail call void @foo(float [[MUL]])
 ; CHECK-NEXT:    [[I2:%.*]] = load float, ptr poison, align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
-; CHECK-NEXT:    [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
-; CHECK-NEXT:    [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
+; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
new file mode 100644
index 0000000000000..16ede231c200e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s
+
+define i64 @foo() {
+; CHECK-LABEL: define i64 @foo() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
+; CHECK-NEXT:    ret i64 0
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; CHECK-NEXT:    br i1 false, label [[BB3]], label [[BB1:%.*]]
+;
+bb:
+  br label %bb3
+
+bb1:
+  %phi = phi i64 [ %add, %bb3 ]
+  %phi2 = phi i64 [ %or, %bb3 ]
+  ret i64 0
+
+bb3:
+  %phi4 = phi i64 [ 0, %bb ], [ %add7, %bb3 ]
+  %phi5 = phi i64 [ 0, %bb ], [ 0, %bb3 ]
+  %phi6 = phi i64 [ 0, %bb ], [ %add, %bb3 ]
+  %add = add i64 %phi6, %phi5
+  %add7 = add i64 0, 0
+  %getelementptr = getelementptr i64, ptr addrspace(1) null, i64 %add7
+  %or = or i64 %phi4, 0
+  %icmp = icmp ult i64 %or, 0
+  br i1 false, label %bb3, label %bb1
+}
+


        


More information about the llvm-commits mailing list