[llvm] e8e6795 - [SLP]Fix PR88123: use vectorized operands consistently.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 9 08:47:22 PDT 2024


Author: Alexey Bataev
Date: 2024-04-09T08:42:57-07:00
New Revision: e8e67957fa48fd7611adccef1a0449b83649c9f4

URL: https://github.com/llvm/llvm-project/commit/e8e67957fa48fd7611adccef1a0449b83649c9f4
DIFF: https://github.com/llvm/llvm-project/commit/e8e67957fa48fd7611adccef1a0449b83649c9f4.diff

LOG: [SLP]Fix PR88123: use vectorized operands consistently.

Need to use vectorized operands, not the vecop of the extractelement
instructions, to avoid false detection of the extra vector operand in
the extractelements shuffling.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e2be6e0fa36628..da2b61ea6a635e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11755,8 +11755,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
               VecOp = TE->VectorizedValue;
           if (!Vec1) {
             Vec1 = VecOp;
-          } else if (Vec1 != EI->getVectorOperand()) {
-            assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
+          } else if (Vec1 != VecOp) {
+            assert((!Vec2 || Vec2 == VecOp) &&
                    "Expected only 1 or 2 vectors shuffle.");
             Vec2 = VecOp;
           }
@@ -11796,8 +11796,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
         VecMask.assign(VecMask.size(), PoisonMaskElem);
         copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
         if (TEs.size() == 1) {
-          IsUsedInExpr &=
-              FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
+          IsUsedInExpr &= FindReusedSplat(
+              VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
           ShuffleBuilder.add(*TEs.front(), VecMask);
           if (TEs.front()->VectorizedValue)
             IsNonPoisoned &=

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll
new file mode 100644
index 00000000000000..a7dbe7d0b43fe6
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:       1:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x double> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> [[TMP4]], <4 x i32> <i32 0, i32 8, i32 poison, i32 8>
+; CHECK-NEXT:    [[TMP6]] = shufflevector <4 x double> [[TMP5]], <4 x double> <double poison, double poison, double 0.000000e+00, double poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    br label [[TMP1]]
+;
+  br label %1
+
+1:
+  %.i489 = phi double [ 0.000000e+00, %0 ], [ 0.000000e+00, %1 ]
+  %.i1102 = phi double [ 0.000000e+00, %0 ], [ %.i1110, %1 ]
+  %.i4105 = phi double [ 0.000000e+00, %0 ], [ %.i4113, %1 ]
+  %.i14525 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i1102)
+  %.i24526 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i489)
+  %.i44529 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i4105)
+  %.upto16034 = insertelement <8 x double> zeroinitializer, double %.i14525, i64 1
+  %.upto26035 = insertelement <8 x double> %.upto16034, double %.i24526, i64 2
+  %.upto36036 = insertelement <8 x double> %.upto26035, double %.i14525, i64 3
+  %.upto46037 = insertelement <8 x double> %.upto36036, double %.i44529, i64 0
+  %.i1110 = extractelement <8 x double> %.upto46037, i64 0
+  %.i4113 = extractelement <8 x double> zeroinitializer, i64 0
+  br label %1
+}


        


More information about the llvm-commits mailing list