[llvm] cf62adb - [SLP]Fix delete of the extractelement with users.

Thu Apr 6 09:18:18 PDT 2023

Author: Alexey Bataev
Date: 2023-04-06T09:15:30-07:00
New Revision: cf62adbbd83e593a893a27234636b939870c9658

URL: https://github.com/llvm/llvm-project/commit/cf62adbbd83e593a893a27234636b939870c9658
DIFF: https://github.com/llvm/llvm-project/commit/cf62adbbd83e593a893a27234636b939870c9658.diff

LOG: [SLP]Fix delete of the extractelement with users.

Made the condition for the erasing of the gathered extractelements
stricter, remove it only if it has single vectorized use, otherwise
leave it for instcombiner/instsimplify analysis.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2689f5a8f2d07..99ee28c3bda6b 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9411,9 +9411,11 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
         continue;
       auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
       VecBase = EI->getVectorOperand();
-      // If all users are vectorized - can delete the extractelement itself.
-      if (any_of(EI->users(),
-                 [&](User *U) { return !ScalarToTreeEntry.count(U); }))
+      // If the only one use is vectorized - can delete the extractelement
+      // itself.
+      if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
+            return !ScalarToTreeEntry.count(U);
+          }))
         continue;
       eraseInstruction(EI);
     }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll
new file mode 100644
index 0000000000000..432a0eda30fe7
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i1 @test(float %0, double %1) {
+; CHECK-LABEL: define i1 @test
+; CHECK-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 undef, i32 undef, i32 1, i32 undef>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> <double poison, double 0.000000e+00, double poison, double 0.000000e+00>, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP7]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x double> [[TMP15]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <8 x double> [[TMP17]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP21:%.*]] = fsub <8 x double> [[TMP16]], [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = fmul <8 x double> [[TMP16]], [[TMP20]]
+; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP24:%.*]] = fptrunc <8 x double> [[TMP23]] to <8 x float>
+; CHECK-NEXT:    [[TMP25:%.*]] = fmul <8 x float> [[TMP24]], zeroinitializer
+; CHECK-NEXT:    [[TMP26:%.*]] = fcmp oeq <8 x float> [[TMP25]], zeroinitializer
+; CHECK-NEXT:    [[TMP27:%.*]] = freeze <8 x i1> [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP27]])
+; CHECK-NEXT:    ret i1 [[TMP28]]
+;
+  %3 = fpext float %0 to double
+  %4 = fpext float 0.000000e+00 to double
+  %5 = fpext float 0.000000e+00 to double
+  %6 = fpext float 0.000000e+00 to double
+  %7 = fmul double 0.000000e+00, 0.000000e+00
+  %8 = fmul double 0.000000e+00, %1
+  %9 = fmul double 0.000000e+00, 0.000000e+00
+  %10 = fmul double 0.000000e+00, %5
+  %11 = fmul double 0.000000e+00, %6
+  %12 = fsub double %10, %11
+  %13 = fptrunc double %12 to float
+  %14 = fmul double %9, 0.000000e+00
+  %15 = fmul double 0.000000e+00, %3
+  %16 = fsub double %14, %15
+  %17 = fptrunc double %16 to float
+  %18 = fptrunc double %7 to float
+  %19 = fmul double %1, %6
+  %20 = fmul double 0.000000e+00, %4
+  %21 = fsub double %19, %20
+  %22 = fptrunc double %21 to float
+  %23 = fsub double 0.000000e+00, %8
+  %24 = fptrunc double %23 to float
+  %25 = fmul double 0.000000e+00, 0.000000e+00
+  %26 = fptrunc double %25 to float
+  %27 = fmul double %9, %4
+  %28 = fmul double 0.000000e+00, %5
+  %29 = fsub double %27, %28
+  %30 = fptrunc double %29 to float
+  %31 = fmul double %9, 0.000000e+00
+  %32 = fptrunc double %31 to float
+  %33 = fmul float %13, 0.000000e+00
+  %34 = fcmp oeq float %33, 0.000000e+00
+  %35 = fmul float %22, 0.000000e+00
+  %36 = fcmp oeq float %35, 0.000000e+00
+  %37 = select i1 %34, i1 %36, i1 false
+  %38 = fmul float %30, 0.000000e+00
+  %39 = fcmp oeq float %38, 0.000000e+00
+  %40 = select i1 %37, i1 %39, i1 false
+  %41 = fmul float %17, 0.000000e+00
+  %42 = fcmp oeq float %41, 0.000000e+00
+  %43 = select i1 %40, i1 %42, i1 false
+  %44 = fmul float %24, 0.000000e+00
+  %45 = fcmp oeq float %44, 0.000000e+00
+  %46 = select i1 %43, i1 %45, i1 false
+  %47 = fmul float %32, 0.000000e+00
+  %48 = fcmp oeq float %47, 0.000000e+00
+  %49 = select i1 %46, i1 %48, i1 false
+  %50 = fmul float %18, 0.000000e+00
+  %51 = fcmp oeq float %50, 0.000000e+00
+  %52 = select i1 %49, i1 %51, i1 false
+  %53 = fmul float %26, 0.000000e+00
+  %54 = fcmp oeq float %53, 0.000000e+00
+  %55 = select i1 %52, i1 %54, i1 false
+  ret i1 %55
+}