[llvm] 1520728 - [SLP]Check if the root of the buildvector has one use only.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon May 16 07:34:13 PDT 2022


Author: Alexey Bataev
Date: 2022-05-16T07:30:36-07:00
New Revision: 152072801e24fb1e5cd962b0cb089230bc27b6b9

URL: https://github.com/llvm/llvm-project/commit/152072801e24fb1e5cd962b0cb089230bc27b6b9
DIFF: https://github.com/llvm/llvm-project/commit/152072801e24fb1e5cd962b0cb089230bc27b6b9.diff

LOG: [SLP]Check if the root of the buildvector has one use only.

The root of the buildvector can have only one use, otherwise it can be
treated only as a final element of the previous buildvector sequence.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc0e164ab99fe..2a8b3cee5c8a3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6618,8 +6618,10 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
   // either VU as the original vector for IE2 or V as the original vector for
   // IE1.
   do {
-    if (IE2 == VU || IE1 == V)
-      return true;
+    if (IE2 == VU)
+      return VU->hasOneUse();
+    if (IE1 == V)
+      return V->hasOneUse();
     if (IE1) {
       if ((IE1 != VU && !IE1->hasOneUse()) ||
           getInsertIndex(IE1).getValueOr(Idx2) == Idx2)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
index 0d3c7809e868e..77174151e9635 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -36,3 +36,41 @@ define void @test() {
   store <2 x float> zeroinitializer, ptr null, align 4
   ret void
 }
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
+; CHECK-NEXT:    [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1
+; CHECK-NEXT:    store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT:    [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1
+; CHECK-NEXT:    store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = getelementptr inbounds float, ptr undef, i32 2
+  %2 = load float, ptr %1, align 4
+  %3 = load float, ptr undef, align 4
+  %4 = fsub float %2, %3
+  %5 = getelementptr inbounds float, ptr undef, i32 3
+  %6 = load float, ptr %5, align 4
+  %7 = getelementptr inbounds float, ptr undef, i32 1
+  %8 = load float, ptr %7, align 4
+  %9 = fsub float %6, %8
+  %10 = fcmp olt float %9, %4
+  %.sroa.0.0.vec.insert.i5.i10 = insertelement <2 x float> undef, float %3, i64 0
+  %.sroa.0.4.vec.insert.i10.i13 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %8, i64 1
+  store <2 x float> %.sroa.0.4.vec.insert.i10.i13, ptr null, align 4
+  %.sroa.0.4.vec.insert.i10.i13.2 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %6, i64 1
+  store <2 x float> %.sroa.0.4.vec.insert.i10.i13.2, ptr null, align 4
+  ret void
+}
+


        


More information about the llvm-commits mailing list