[llvm] e8e6795 - [SLP]Fix PR88123: use vectorized operands consistently.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 9 08:47:22 PDT 2024
Author: Alexey Bataev
Date: 2024-04-09T08:42:57-07:00
New Revision: e8e67957fa48fd7611adccef1a0449b83649c9f4
URL: https://github.com/llvm/llvm-project/commit/e8e67957fa48fd7611adccef1a0449b83649c9f4
DIFF: https://github.com/llvm/llvm-project/commit/e8e67957fa48fd7611adccef1a0449b83649c9f4.diff
LOG: [SLP]Fix PR88123: use vectorized operands consistently.
Need to use vectorized operands, not the vecop of the extractelement
instructions, to avoid false detection of the extra vector operand in
the extractelements shuffling.
Added:
llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e2be6e0fa36628..da2b61ea6a635e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11755,8 +11755,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
VecOp = TE->VectorizedValue;
if (!Vec1) {
Vec1 = VecOp;
- } else if (Vec1 != EI->getVectorOperand()) {
- assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
+ } else if (Vec1 != VecOp) {
+ assert((!Vec2 || Vec2 == VecOp) &&
"Expected only 1 or 2 vectors shuffle.");
Vec2 = VecOp;
}
@@ -11796,8 +11796,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
VecMask.assign(VecMask.size(), PoisonMaskElem);
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
if (TEs.size() == 1) {
- IsUsedInExpr &=
- FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
+ IsUsedInExpr &= FindReusedSplat(
+ VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
ShuffleBuilder.add(*TEs.front(), VecMask);
if (TEs.front()->VectorizedValue)
IsNonPoisoned &=
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll
new file mode 100644
index 00000000000000..a7dbe7d0b43fe6
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: br label [[TMP1:%.*]]
+; CHECK: 1:
+; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[TMP1]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> [[TMP4]], <4 x i32> <i32 0, i32 8, i32 poison, i32 8>
+; CHECK-NEXT: [[TMP6]] = shufflevector <4 x double> [[TMP5]], <4 x double> <double poison, double poison, double 0.000000e+00, double poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: br label [[TMP1]]
+;
+ br label %1
+
+1:
+ %.i489 = phi double [ 0.000000e+00, %0 ], [ 0.000000e+00, %1 ]
+ %.i1102 = phi double [ 0.000000e+00, %0 ], [ %.i1110, %1 ]
+ %.i4105 = phi double [ 0.000000e+00, %0 ], [ %.i4113, %1 ]
+ %.i14525 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i1102)
+ %.i24526 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i489)
+ %.i44529 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i4105)
+ %.upto16034 = insertelement <8 x double> zeroinitializer, double %.i14525, i64 1
+ %.upto26035 = insertelement <8 x double> %.upto16034, double %.i24526, i64 2
+ %.upto36036 = insertelement <8 x double> %.upto26035, double %.i14525, i64 3
+ %.upto46037 = insertelement <8 x double> %.upto36036, double %.i44529, i64 0
+ %.i1110 = extractelement <8 x double> %.upto46037, i64 0
+ %.i4113 = extractelement <8 x double> zeroinitializer, i64 0
+ br label %1
+}
More information about the llvm-commits
mailing list