[llvm] 641939b - [SLP]Remove CreateShuffle lambda and reuse ShuffleBuilder functions.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 14 10:20:03 PDT 2023
Author: Alexey Bataev
Date: 2023-03-14T10:15:41-07:00
New Revision: 641939baa92835a3f588b2f68e6a8cda96a328ef
URL: https://github.com/llvm/llvm-project/commit/641939baa92835a3f588b2f68e6a8cda96a328ef
DIFF: https://github.com/llvm/llvm-project/commit/641939baa92835a3f588b2f68e6a8cda96a328ef.diff
LOG: [SLP]Remove CreateShuffle lambda and reuse ShuffleBuilder functions.
After merging main part of the gather/buildvector code, CreateShuffle
lambda can removed and ShuffleBuilder add functions can be used instead.
Also, part of the code from CreateShuffle migrated to createShuffle of
the BaseShuffleAnalysis::createShuffle function for better code emission.
Differential Revision: https://reviews.llvm.org/D145988
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll
llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll
llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8ca8da430ded6..6b08c6a49f34d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6629,7 +6629,8 @@ class BaseShuffleAnalysis {
Op = SV->getOperand(1);
}
if (auto *OpTy = dyn_cast<FixedVectorType>(Op->getType());
- !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute)) {
+ !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute) ||
+ ShuffleVectorInst::isZeroEltSplatMask(Mask)) {
if (IdentityOp) {
V = IdentityOp;
assert(Mask.size() == IdentityMask.size() &&
@@ -6661,6 +6662,8 @@ class BaseShuffleAnalysis {
static Value *createShuffle(Value *V1, Value *V2, ArrayRef<int> Mask,
ShuffleBuilderTy &Builder) {
assert(V1 && "Expected at least one vector value.");
+ if (V2)
+ Builder.resizeToMatch(V1, V2);
int VF = Mask.size();
if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
VF = FTy->getNumElements();
@@ -6748,6 +6751,15 @@ class BaseShuffleAnalysis {
CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
}
}
+ const int Limit = CombinedMask1.size() * 2;
+ if (Op1 == Op2 && Limit == 2 * VF &&
+ all_of(CombinedMask1, [=](int Idx) { return Idx < Limit; }) &&
+ (ShuffleVectorInst::isIdentityMask(CombinedMask1) ||
+ (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1) &&
+ isa<ShuffleVectorInst>(Op1) &&
+ cast<ShuffleVectorInst>(Op1)->getShuffleMask() ==
+ ArrayRef(CombinedMask1))))
+ return Op1;
return Builder.createShuffleVector(
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
CombinedMask1);
@@ -9294,44 +9306,6 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
}
return VecBase;
};
- auto CreateShuffle = [&](Value *V1, Value *V2, ArrayRef<int> Mask) {
- unsigned VF1 = cast<FixedVectorType>(V1->getType())->getNumElements();
- unsigned VF2 = cast<FixedVectorType>(V2->getType())->getNumElements();
- unsigned VF = std::max(VF1, VF2);
- if (VF1 != VF2) {
- SmallVector<int> ExtMask(VF, UndefMaskElem);
- std::iota(ExtMask.begin(), std::next(ExtMask.begin(), std::min(VF1, VF2)),
- 0);
- if (VF1 < VF2) {
- V1 = Builder.CreateShuffleVector(V1, ExtMask);
- if (auto *I = dyn_cast<Instruction>(V1)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- } else {
- V2 = Builder.CreateShuffleVector(V2, ExtMask);
- if (auto *I = dyn_cast<Instruction>(V2)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- }
- }
- const int Limit = Mask.size() * 2;
- if (V1 == V2 && Mask.size() == VF &&
- all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
- (ShuffleVectorInst::isIdentityMask(Mask) ||
- (ShuffleVectorInst::isZeroEltSplatMask(Mask) &&
- isa<ShuffleVectorInst>(V1) &&
- cast<ShuffleVectorInst>(V1)->getShuffleMask() == Mask)))
- return V1;
- Value *Vec = V1 == V2 ? Builder.CreateShuffleVector(V1, Mask)
- : Builder.CreateShuffleVector(V1, V2, Mask);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- return Vec;
- };
auto NeedToDelay = [=](const TreeEntry *E,
ArrayRef<const TreeEntry *> Deps) -> Value * {
// No need to delay emission if all deps are ready.
@@ -9438,29 +9412,20 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
}
}
if (Vec2)
- Vec1 = CreateShuffle(Vec1, Vec2, ExtractMask);
+ ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
else if (Vec1)
- Vec1 = CreateShuffle(Vec1, Vec1, ExtractMask);
+ ShuffleBuilder.add(Vec1, ExtractMask);
else
- Vec1 = PoisonValue::get(
- FixedVectorType::get(ScalarTy, GatheredScalars.size()));
+ ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
+ ScalarTy, GatheredScalars.size())),
+ ExtractMask);
}
if (GatherShuffle) {
- Vec = CreateShuffle(Entries.front()->VectorizedValue,
- Entries.back()->VectorizedValue, Mask);
- VF = Mask.size();
- if (Vec1) {
- // Build final mask.
- for (auto [I, Idx] : enumerate(Mask)) {
- if (ExtractMask[I] != UndefMaskElem)
- Idx = I;
- else if (Idx != UndefMaskElem)
- Idx = I + VF;
- }
- Vec = CreateShuffle(Vec1, Vec, Mask);
- }
- } else {
- Vec = Vec1;
+ if (Entries.size() == 1)
+ ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+ else
+ ShuffleBuilder.add(Entries.front()->VectorizedValue,
+ Entries.back()->VectorizedValue, Mask);
}
} else if (!allConstant(E->Scalars)) {
// TODO: remove this code once able to combine shuffled vectors and build
@@ -9556,12 +9521,13 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
}
// Gather unique scalars and all constants.
Vec = gather(GatheredScalars);
+ ShuffleBuilder.add(Vec, ReuseMask);
} else {
// Gather all constants.
Vec = gather(E->Scalars);
+ ShuffleBuilder.add(Vec, ReuseMask);
}
- ShuffleBuilder.add(Vec, ReuseMask);
Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
if (NeedFreeze)
Vec = Builder.CreateFreeze(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
index 2af7efb3d81fe..48ebff8ebabb6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -211,15 +211,14 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) {
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: call void @use(double [[V1_LANE_0]])
; CHECK-NEXT: call void @use(double [[V1_LANE_1]])
; CHECK-NEXT: call void @use(double [[V1_LANE_2]])
; CHECK-NEXT: call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT: store <9 x double> [[TMP4]], ptr [[PTR_1]], align 8
+; CHECK-NEXT: store <9 x double> [[TMP3]], ptr [[PTR_1]], align 8
; CHECK-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index ae8a52e669d35..abf1d7abdc122 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -13,7 +13,7 @@ define void @Test(i32) {
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> <i32 0, i32 3>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
index 2827e3ba42af5..001da64c60a93 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-nodes-dependency.ll
@@ -12,7 +12,7 @@ define double @test() {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
index efa37527ce991..d9942887d6c7d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entries-
diff erent-vf.ll
@@ -10,14 +10,13 @@ define i1 @test() {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i64> [[TMP1]], i64 0, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> [[TMP2]], i64 0, i32 3
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, <4 x i64> [[TMP4]], <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = sub <8 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <8 x i64> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
-; CHECK-NEXT: ret i1 [[TMP11]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> <i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <8 x i64> [[TMP3]], <8 x i32> <i32 11, i32 11, i32 11, i32 1, i32 9, i32 9, i32 1, i32 8>
+; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i64> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP9]])
+; CHECK-NEXT: ret i1 [[TMP10]]
;
entry:
%0 = shl i64 0, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
index e1c816fd383d7..97189e4ef2a14 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-
diff erent-bbs.ll
@@ -10,11 +10,10 @@ define i32 @foo(i32 %a) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], <i32 3, i32 1>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
-; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], <i32 3, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
@@ -23,10 +22,10 @@ define i32 @foo(i32 %a) {
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
; CHECK-NEXT: ret i32 0
; CHECK: bb4:
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP2]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
-; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP10]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP2]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP9]], 0
; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OP_RDX9]]
; CHECK: bb5:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 469b15d074186..adbeb638c1a46 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -643,12 +643,11 @@ define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]]
-; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]]
-; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]]
+; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
+; SSE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
+; SSE-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP6]], [[TMP7]]
; SSE-NEXT: ret double [[ADD3]]
;
; AVX-LABEL: @splat_loads(
@@ -700,14 +699,13 @@ define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptr
; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]]
-; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]]
-; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]]
-; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
-; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
-; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]]
+; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
+; SSE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
+; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
+; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
+; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
+; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
+; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]]
; SSE-NEXT: ret double [[RES]]
;
; AVX-LABEL: @splat_loads_with_internal_uses(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 6e0aafeacd309..0be5e2d9d536b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -333,16 +333,15 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y1]], i32 5
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y2]], i32 6
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y3]], i32 7
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP2]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
-; CHECK-NEXT: ret i1 [[TMP9]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[Y1]], i32 5
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y2]], i32 6
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y3]], i32 7
+; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]])
+; CHECK-NEXT: ret i1 [[TMP8]]
;
%x0 = extractelement <8 x i32> %x, i32 0
%x1 = extractelement <8 x i32> %x, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
index 3b4938f220fdd..e378909b49b92 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
@@ -18,11 +18,10 @@ define void @test(ptr %a, ptr %b) {
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP9]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
-; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[RESULT]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], zeroinitializer
+; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[RESULT]], align 4
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
More information about the llvm-commits
mailing list